Thibault Lavigne · Solene Tarride
--- a/dan/predict/attention.py

+ 49

− 49
+++ b/dan/predict/attention.py

+ 49

− 49
 @@ -9,55 +9,6 @@ from dan import logger
 from dan.utils import round_floats


-def compute_coverage(text: str, max_value: float, offset: int, attentions, size: tuple):
-    """
-    Aggregates attention maps for the current text piece (char, word, line)
-    :param text: Text piece selected with offset after splitting DAN prediction
-    :param max_value: Maximum "attention intensity" for parts of a text piece, used for normalization
-    :param offset: Offset value to get the relevant part of text piece
-    :param attentions: Attention weights of size (n_char, feature_height, feature_width)
-    :param size: Target size (width, height) to resize the coverage vector
-    """
-    _, height, width = attentions.shape
-
-    # blank vector to accumulate weights for the current text
-    coverage_vector = np.zeros((height, width))
-    for i in range(len(text)):
-        local_weight = cv2.resize(attentions[i + offset], (width, height))
-        coverage_vector = np.clip(coverage_vector + local_weight, 0, 1)
-
-    # Normalize coverage vector
-    coverage_vector = (coverage_vector / max_value * 255).astype(np.uint8)
-
-    # Resize it
-    if size:
-        coverage_vector = cv2.resize(coverage_vector, size)
-
-    return coverage_vector
-
-
-def blend_coverage(coverage_vector, image, mask, scale):
-    """
-    Blends current coverage_vector over original image, used to make an attention map.
-    :param coverage_vector: Aggregated attention weights of the current text piece, resized to image. size: (n_char, image_height, image_width)
-    :param image: Input image in PIL format
-    :param mask: Mask of the image (of any color)
-    :param scale: Scaling factor for the output gif image
-    """
-    height, width = coverage_vector.shape
-
-    # Blend coverage vector with original image
-    blank_array = np.zeros((height, width)).astype(np.uint8)
-    coverage_vector = Image.fromarray(
-        np.stack([coverage_vector, blank_array, blank_array], axis=2), "RGB"
-    )
-    blend = Image.composite(image, coverage_vector, mask)
-
-    # Resize to save time
-    blend = blend.resize((int(width * scale), int(height * scale)), Image.ANTIALIAS)
-    return blend
-
-
 def parse_delimiters(delimiters):
    return re.compile(r"|".join(delimiters))

 @@ -170,6 +121,55 @@ def get_predicted_polygons_with_confidence(
    return polygons


+def compute_coverage(text: str, max_value: float, offset: int, attentions, size: tuple):
+    """
+    Aggregates attention maps for the current text piece (char, word, line)
+    :param text: Text piece selected with offset after splitting DAN prediction
+    :param max_value: Maximum "attention intensity" for parts of a text piece, used for normalization
+    :param offset: Offset value to get the relevant part of text piece
+    :param attentions: Attention weights of size (n_char, feature_height, feature_width)
+    :param size: Target size (width, height) to resize the coverage vector
+    """
+    _, height, width = attentions.shape
+
+    # blank vector to accumulate weights for the current text
+    coverage_vector = np.zeros((height, width))
+    for i in range(len(text)):
+        local_weight = cv2.resize(attentions[i + offset], (width, height))
+        coverage_vector = np.clip(coverage_vector + local_weight, 0, 1)
+
+    # Normalize coverage vector
+    coverage_vector = (coverage_vector / max_value * 255).astype(np.uint8)
+
+    # Resize it
+    if size:
+        coverage_vector = cv2.resize(coverage_vector, size)
+
+    return coverage_vector
+
+
+def blend_coverage(coverage_vector, image, mask, scale):
+    """
+    Blends current coverage_vector over original image, used to make an attention map.
+    :param coverage_vector: Aggregated attention weights of the current text piece, resized to image. size: (n_char, image_height, image_width)
+    :param image: Input image in PIL format
+    :param mask: Mask of the image (of any color)
+    :param scale: Scaling factor for the output gif image
+    """
+    height, width = coverage_vector.shape
+
+    # Blend coverage vector with original image
+    blank_array = np.zeros((height, width)).astype(np.uint8)
+    coverage_vector = Image.fromarray(
+        np.stack([coverage_vector, blank_array, blank_array], axis=2), "RGB"
+    )
+    blend = Image.composite(image, coverage_vector, mask)
+
+    # Resize to save time
+    blend = blend.resize((int(width * scale), int(height * scale)), Image.ANTIALIAS)
+    return blend
+
+
 def compute_contour_metrics(coverage_vector, contour):
    """
    Compute the contours's area and the mean value inside it.