diff --git a/dan/predict/attention.py b/dan/predict/attention.py index 38d0032ef327a769d4138769ccfbe09fd6e741f1..a2b39ace297eec4a871f7d4495eb2c8b4510d161 100644 --- a/dan/predict/attention.py +++ b/dan/predict/attention.py @@ -9,55 +9,6 @@ from dan import logger from dan.utils import round_floats -def compute_coverage(text: str, max_value: float, offset: int, attentions, size: tuple): - """ - Aggregates attention maps for the current text piece (char, word, line) - :param text: Text piece selected with offset after splitting DAN prediction - :param max_value: Maximum "attention intensity" for parts of a text piece, used for normalization - :param offset: Offset value to get the relevant part of text piece - :param attentions: Attention weights of size (n_char, feature_height, feature_width) - :param size: Target size (width, height) to resize the coverage vector - """ - _, height, width = attentions.shape - - # blank vector to accumulate weights for the current text - coverage_vector = np.zeros((height, width)) - for i in range(len(text)): - local_weight = cv2.resize(attentions[i + offset], (width, height)) - coverage_vector = np.clip(coverage_vector + local_weight, 0, 1) - - # Normalize coverage vector - coverage_vector = (coverage_vector / max_value * 255).astype(np.uint8) - - # Resize it - if size: - coverage_vector = cv2.resize(coverage_vector, size) - - return coverage_vector - - -def blend_coverage(coverage_vector, image, mask, scale): - """ - Blends current coverage_vector over original image, used to make an attention map. - :param coverage_vector: Aggregated attention weights of the current text piece, resized to image. size: (n_char, image_height, image_width) - :param image: Input image in PIL format - :param mask: Mask of the image (of any color) - :param scale: Scaling factor for the output gif image - """ - height, width = coverage_vector.shape - - # Blend coverage vector with original image - blank_array = np.zeros((height, width)).astype(np.uint8) - coverage_vector = Image.fromarray( - np.stack([coverage_vector, blank_array, blank_array], axis=2), "RGB" - ) - blend = Image.composite(image, coverage_vector, mask) - - # Resize to save time - blend = blend.resize((int(width * scale), int(height * scale)), Image.ANTIALIAS) - return blend - - def parse_delimiters(delimiters): return re.compile(r"|".join(delimiters)) @@ -170,6 +121,55 @@ def get_predicted_polygons_with_confidence( return polygons +def compute_coverage(text: str, max_value: float, offset: int, attentions, size: tuple): + """ + Aggregates attention maps for the current text piece (char, word, line) + :param text: Text piece selected with offset after splitting DAN prediction + :param max_value: Maximum "attention intensity" for parts of a text piece, used for normalization + :param offset: Offset value to get the relevant part of text piece + :param attentions: Attention weights of size (n_char, feature_height, feature_width) + :param size: Target size (width, height) to resize the coverage vector + """ + _, height, width = attentions.shape + + # blank vector to accumulate weights for the current text + coverage_vector = np.zeros((height, width)) + for i in range(len(text)): + local_weight = cv2.resize(attentions[i + offset], (width, height)) + coverage_vector = np.clip(coverage_vector + local_weight, 0, 1) + + # Normalize coverage vector + coverage_vector = (coverage_vector / max_value * 255).astype(np.uint8) + + # Resize it + if size: + coverage_vector = cv2.resize(coverage_vector, size) + + return coverage_vector + + +def blend_coverage(coverage_vector, image, mask, scale): + """ + Blends current coverage_vector over original image, used to make an attention map. + :param coverage_vector: Aggregated attention weights of the current text piece, resized to image. size: (n_char, image_height, image_width) + :param image: Input image in PIL format + :param mask: Mask of the image (of any color) + :param scale: Scaling factor for the output gif image + """ + height, width = coverage_vector.shape + + # Blend coverage vector with original image + blank_array = np.zeros((height, width)).astype(np.uint8) + coverage_vector = Image.fromarray( + np.stack([coverage_vector, blank_array, blank_array], axis=2), "RGB" + ) + blend = Image.composite(image, coverage_vector, mask) + + # Resize to save time + blend = blend.resize((int(width * scale), int(height * scale)), Image.ANTIALIAS) + return blend + + def compute_contour_metrics(coverage_vector, contour): """ Compute the contours's area and the mean value inside it.