diff --git a/dan/ocr/predict/__init__.py b/dan/ocr/predict/__init__.py
index 2e03fdeba9067048886a31378e1033868e6725a7..7a534e9e6e97b7f3379f2264bcd73d144cc96b8d 100644
--- a/dan/ocr/predict/__init__.py
+++ b/dan/ocr/predict/__init__.py
@@ -6,7 +6,7 @@
 Predict on an image using a trained DAN model.
 """
 
-import pathlib
+from pathlib import Path
 
 from dan.ocr.predict.attention import Level
 from dan.ocr.predict.inference import run
@@ -22,18 +22,30 @@ def add_predict_parser(subcommands) -> None:
     # Required arguments.
     parser.add_argument(
         "--image-dir",
-        type=pathlib.Path,
+        type=Path,
         help="Path to the folder where the images to predict are stored.",
     )
     parser.add_argument(
         "--model",
-        type=pathlib.Path,
+        type=Path,
         help="Path to the directory containing the model, the YAML parameters file and the charset file to use for prediction.",
         required=True,
     )
+    parser.add_argument(
+        "--font",
+        type=Path,
+        default=Path("fonts/LinuxLibertine.ttf"),
+        help="Path to the font file to use for the GIF of the attention map.",
+    )
+    parser.add_argument(
+        "--maximum-font-size",
+        type=int,
+        default=32,
+        help="Maximum font size to use for the GIF of the attention map.",
+    )
     parser.add_argument(
         "--output",
-        type=pathlib.Path,
+        type=Path,
         help="Path to the output folder.",
         required=True,
     )
diff --git a/dan/ocr/predict/attention.py b/dan/ocr/predict/attention.py
index 02ae378982cb049e279af07ff6941f1da5a699ec..f4368675a9b343fc3b9e11f8bd6a83e21c27be24 100644
--- a/dan/ocr/predict/attention.py
+++ b/dan/ocr/predict/attention.py
@@ -5,15 +5,19 @@
 import logging
 import re
 from enum import Enum
+from pathlib import Path
+from statistics import mean
 from typing import List, Tuple
 
 import cv2
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
-from PIL import Image
+from PIL import Image, ImageDraw
 from torchvision.transforms.functional import to_pil_image
 
+from dan.ocr.utils import load_font
+
 logger = logging.getLogger(__name__)
 
 
@@ -427,6 +431,41 @@ def get_grid_search_contour(coverage_vector, bin_mask, height=50):
     return coord, confidence
 
 
+def search_font_size(
+    font: Path,
+    maximum_font_size: int,
+    text: str,
+    width: int,
+):
+    """
+    Search the biggest font size compatible with the width of the GIF. Take the maximum font size if it is lesser than the perfect font size.
+    :param font : Path to the font file to use for the GIF of the attention map.
+    :param maximum_font_size: Maximum font size to use for the GIF of the attention map.
+    :param text: Predicted text.
+    :param width: Image width.
+    """
+    font_size = maximum_font_size
+
+    font_param = None
+
+    # Check for every font size if it's the perfect font size
+    while font_param is None:
+        loaded_font = load_font(font, font_size)
+
+        # Get place taken by the font
+        _, _, right, _ = loaded_font.getbbox(text)
+
+        font_param = loaded_font if right < width else None
+
+        font_size -= 1
+
+        if font_size == 0:
+            logger.warn("No compatible font size found")
+            break
+
+    return font_param
+
+
 def plot_attention(
     image: torch.Tensor,
     text: str,
@@ -437,6 +476,8 @@ def plot_attention(
     alpha_factor: float,
     color_map: str,
     char_separators: re.Pattern,
+    font: Path,
+    maximum_font_size: int,
     max_object_height: int = 50,
     word_separators: re.Pattern = parse_delimiters(["\n", " "]),
     line_separators: re.Pattern = parse_delimiters(["\n"]),
@@ -454,6 +495,8 @@ def plot_attention(
     :param alpha_factor: Alpha factor that controls how much the attention map is shown to the user during prediction. (higher value means more transparency for the attention map, commonly between 0.5 and 1.0)
     :param color_map: Colormap to use for the attention map
     :param char_separators: Pattern used to find tokens of the charset
+    :param font : Path to the font file to use for the GIF of the attention map.
+    :param maximum_font_size: Maximum font size to use for the GIF of the attention map. Default is 32.
     :param max_object_height: Maximum height of predicted objects.
     :param word_separators: Pattern used to find words
     :param line_separators: Pattern used to find lines
@@ -486,22 +529,21 @@ def plot_attention(
         # Blend coverage vector with original image to make an attention map
         blended = blend_coverage(coverage_vector, image, scale, alpha_factor, color_map)
 
-        # Get polygons if flag is set:
-        if display_polygons:
-            # Draw the contour
-            _, contour = get_polygon(
-                text_piece,
-                max_value,
-                tot_len,
-                weights,
-                max_object_height=max_object_height,
-                size=(image.width, image.height),
-            )
+        # Draw the contour
+        polygon, contour = get_polygon(
+            text_piece,
+            max_value,
+            tot_len,
+            weights,
+            max_object_height=max_object_height,
+            size=(image.width, image.height),
+        )
 
-            if contour is not None:
-                # The image has been scaled so we need to scale the contour
-                contour = (contour * scale).astype(np.int32)
+        if contour is not None:
+            # The image has been scaled so we need to scale the contour
+            contour = (contour * scale).astype(np.int32)
 
+            if display_polygons:
                 # Draw the contour with a thickness based on the scale in red
                 cv2.drawContours(
                     blended := np.array(blended),
@@ -514,11 +556,33 @@ def plot_attention(
                 # Make the np.array with drawn contours back into a PIL image
                 blended = Image.fromarray(blended, "RGBA")
 
+            # Image size
+            width, height = blended.size
+
+            # Double image size so it have a free with space to write
+            result = Image.new(image.mode, (width * 2, height), (255, 255, 255))
+            result.paste(blended, (0, 0))
+
+            draw = ImageDraw.Draw(result)
+
+            # Search the biggest compatible font size
+            font_param = search_font_size(font, maximum_font_size, text_piece, width)
+
+            if font_param is not None:
+                # Get the list of every height of every point of the contour
+                heights = [coord[0][1] for coord in contour.tolist()]
+
+                average_height = round(mean(heights))
+
+                draw.text(
+                    (width, average_height), text_piece, (0, 0, 0), font=font_param
+                )
+
         # Keep track of text length
         tot_len += len(text_piece) + offset
 
         # Append the blended image to the list of attention maps to be used for the .gif
-        attention_map.append(blended)
+        attention_map.append(result)
 
     if not attention_map:
         return
diff --git a/dan/ocr/predict/inference.py b/dan/ocr/predict/inference.py
index dd734764c5cbc39c671536e68bf033df96a13337..34bb1da1c7c2484e3cab194123613ed8b5ba7444 100644
--- a/dan/ocr/predict/inference.py
+++ b/dan/ocr/predict/inference.py
@@ -24,6 +24,7 @@ from dan.ocr.predict.attention import (
     split_text_and_confidences,
 )
 from dan.ocr.transforms import get_preprocessing_transforms
+from dan.ocr.utils import load_font
 from dan.utils import (
     EntityType,
     ind_to_token,
@@ -340,6 +341,8 @@ def process_batch(
     image_batch: List[Path],
     dan_model: DAN,
     device: str,
+    font: Path,
+    maximum_font_size: int,
     output: Path,
     confidence_score: bool,
     confidence_score_levels: List[Level],
@@ -454,6 +457,8 @@ def process_batch(
                     display_polygons=predict_objects,
                     max_object_height=max_object_height,
                     outname=gif_filename,
+                    font=font,
+                    maximum_font_size=maximum_font_size,
                 )
                 result["attention_gif"] = gif_filename
 
@@ -465,6 +470,8 @@ def process_batch(
 def run(
     image_dir: Path,
     model: Path,
+    font: Path,
+    maximum_font_size: int,
     output: Path,
     confidence_score: bool,
     confidence_score_levels: List[Level],
@@ -491,6 +498,8 @@ def run(
     Predict a single image save the output
     :param image_dir: Path to the folder where the images to predict are stored.
     :param model: Path to the directory containing the model, the YAML parameters file and the charset file to use for prediction.
+    :param font: Path to the font file to use for the GIF of the attention map.
+    :param maximum_font_size: Maximum font size to use for the GIF of the attention map.
     :param output: Path to the output folder where the results will be saved.
     :param confidence_score: Whether to compute confidence score.
     :param confidence_score_levels: Levels of objects to extract.
@@ -529,12 +538,19 @@ def run(
         dynamic_mode=dynamic_mode,
     )
 
+    try:
+        load_font(font, maximum_font_size)
+    except OSError:
+        raise FileNotFoundError(f"The font file is missing at path {str(font)}")
+
     images = image_dir.rglob(f"*{image_extension}")
     for image_batch in list_to_batches(images, n=batch_size):
         process_batch(
             image_batch,
             dan_model,
             device,
+            font,
+            maximum_font_size,
             output,
             confidence_score,
             confidence_score_levels,
diff --git a/dan/ocr/utils.py b/dan/ocr/utils.py
index 6d22c183372c2325b62987df2565a1d34424c666..4a8a30ee8291e24164c79b0035f0d438982bf800 100644
--- a/dan/ocr/utils.py
+++ b/dan/ocr/utils.py
@@ -6,6 +6,7 @@ from pathlib import Path
 from typing import Dict, List, Optional
 
 import torch
+from PIL import ImageFont
 from prettytable import MARKDOWN, PrettyTable
 from torch.optim import Adam
 
@@ -104,3 +105,12 @@ def add_metrics_table_row(
         row.append(round(metric_value * 100, 2) if metric_value is not None else "−")
 
     table.add_row(row)
+
+
+def load_font(path: Path, size: int):
+    """
+    Load the font.
+    :param path: Path to the font.
+    :param size: Size of the font.
+    """
+    return ImageFont.truetype(path, size)
diff --git a/docs/assets/example_line.gif b/docs/assets/example_line.gif
index 33f5118657ed3ab1a937ffcc0671f2ab31fe13df..0fa8c19f14f4918e5f8609571991a6b0334ef1b6 100644
--- a/docs/assets/example_line.gif
+++ b/docs/assets/example_line.gif
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc1d84b83107ca31e1c6790d6be9af55d9502902b5e64e3cf53b0ec649232aa5
-size 4712754
+oid sha256:9a1d31d94904f684df5f7e10092998c41bbb85994482486ebe25abaedaf95a83
+size 4412209
diff --git a/docs/assets/example_line_polygon.gif b/docs/assets/example_line_polygon.gif
index f86291321f86c79c78964ca4c91ab62da9d58f4f..cdef8ad43f61b96702e3b3d2d923b93c25fc648a 100644
--- a/docs/assets/example_line_polygon.gif
+++ b/docs/assets/example_line_polygon.gif
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d277020b943d8cd506514d90f50479bd513cd0eef76ca35651e9c37d9573652b
-size 4708787
+oid sha256:19dfd5559c8569c459baa1c3636efb2b9d72f2f0e55871ccfcefd8fc23c0c466
+size 4404726
diff --git a/docs/assets/example_word.gif b/docs/assets/example_word.gif
index 04f57f7cb54203a540dbdf9005f219b135116e9e..01a8443648a8d53d7278e49b19eafa838e07fb39 100644
--- a/docs/assets/example_word.gif
+++ b/docs/assets/example_word.gif
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc087b7b209449ac333153856ba97707bdfedeef9c9a09e8100f03b697057e2e
-size 15762215
+oid sha256:8a09715f75077b0256699d69e432325202a6d3e8cc1ae59d38a5536d9824edb2
+size 17366393
diff --git a/docs/usage/predict/index.md b/docs/usage/predict/index.md
index 11b5dddbf9a27e8b1b36e408b819cd520d8b4679..95ad91680a45efbc51cfa0143c16f624957f7451 100644
--- a/docs/usage/predict/index.md
+++ b/docs/usage/predict/index.md
@@ -4,31 +4,33 @@
 
 Use the `teklia-dan predict` command to apply a trained DAN model on an image.
 
-| Parameter                   | Description                                                                                                                           | Type           | Default         |
-| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | -------------- | --------------- |
-| `--image-dir`               | Path to the folder where the images to predict are stored. Must not be provided with `--image`.                                       | `pathlib.Path` |                 |
-| `--image-extension`         | The extension of the images in the folder. Ignored if `--image-dir` is not provided.                                                  | `str`          | .jpg            |
-| `--model`                   | Path to the directory containing the model, the YAML parameters file and the charset file to use for prediction.                      | `pathlib.Path` |                 |
-| `--output`                  | Path to the output folder. Results will be saved in this directory.                                                                   | `pathlib.Path` |                 |
-| `--tokens`                  | Path to a yaml file containing a mapping between starting tokens and end tokens. Needed for entities.                                 | `pathlib.Path` |                 |
-| `--temperature`             | Temperature scaling scalar parameter.                                                                                                 | `float`        | `1.0`           |
-| `--confidence-score`        | Whether to return confidence scores.                                                                                                  | `bool`         | `False`         |
-| `--confidence-score-levels` | Level to return confidence scores. Should be any combination of `["line", "word", "char", "ner"]`.                                    | `str`          |                 |
-| `--attention-map`           | Whether to plot attention maps.                                                                                                       | `bool`         | `False`         |
-| `--attention-map-scale`     | Image scaling factor before creating the GIF.                                                                                         | `float`        | `0.5`           |
-| `--alpha-factor`            | Alpha factor that controls how much the attention map is shown to the user during prediction.                                         | `float`        | `0.9`           |
-| `--color-map`               | A matplotlib colormap to use for the attention maps.                                                                                  | `str`          | `nipy_spectral` |
-| `--attention-map-level`     | Level to plot the attention maps. Should be in `["line", "word", "char", "ner"]`.                                                     | `str`          | `"line"`        |
-| `--predict-objects`         | Whether to return polygons coordinates.                                                                                               | `bool`         | `False`         |
-| `--max-object-height`       | Maximum height for predicted objects. If set, grid search segmentation will be applied and width will be normalized to element width. | `int`          |                 |
-| `--word-separators`         | List of word separators.                                                                                                              | `list`         | `[" ", "\n"]`   |
-| `--line-separators`         | List of line separators.                                                                                                              | `list`         | `["\n"]`        |
-| `--gpu-device`              | Use a specific GPU if available.                                                                                                      | `int`          |                 |
-| `--batch-size`              | Size of the batches for prediction.                                                                                                   | `int`          | `1`             |
-| `--start-token`             | Use a specific starting token at the beginning of the prediction. Useful when making predictions on different single pages.           | `str`          |                 |
-| `--use-language-model`      | Whether to use an explicit language model to rescore text hypotheses.                                                                 | `bool`         | `False`         |
-| `--compile-model`           | Whether to compile the model. Recommended to speed up inference.                                                                      | `bool`         | `False`         |
-| `--dynamic-mode`            | Whether to use the dynamic mode during model compilation. Recommended for prediction on images of variable size.                      | `bool`         | `False`         |
+| Parameter                   | Description                                                                                                                           | Type           | Default                    |
+| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | -------------- | -------------------------- |
+| `--image-dir`               | Path to the folder where the images to predict are stored. Must not be provided with `--image`.                                       | `pathlib.Path` |                            |
+| `--image-extension`         | The extension of the images in the folder. Ignored if `--image-dir` is not provided.                                                  | `str`          | .jpg                       |
+| `--model`                   | Path to the directory containing the model, the YAML parameters file and the charset file to use for prediction.                      | `pathlib.Path` |                            |
+| `--font`                    | Path to the font file to use for the GIF of the attention map.                                                                        | `pathlib.Path` | `fonts/LinuxLibertine.ttf` |
+| `--maximum-font-size`       | Maximum font size to use for the GIF of the attention map.                                                                            | `int`          | 32                         |
+| `--output`                  | Path to the output folder. Results will be saved in this directory.                                                                   | `pathlib.Path` |                            |
+| `--tokens`                  | Path to a yaml file containing a mapping between starting tokens and end tokens. Needed for entities.                                 | `pathlib.Path` |                            |
+| `--temperature`             | Temperature scaling scalar parameter.                                                                                                 | `float`        | `1.0`                      |
+| `--confidence-score`        | Whether to return confidence scores.                                                                                                  | `bool`         | `False`                    |
+| `--confidence-score-levels` | Level to return confidence scores. Should be any combination of `["line", "word", "char", "ner"]`.                                    | `str`          |                            |
+| `--attention-map`           | Whether to plot attention maps.                                                                                                       | `bool`         | `False`                    |
+| `--attention-map-scale`     | Image scaling factor before creating the GIF.                                                                                         | `float`        | `0.5`                      |
+| `--alpha-factor`            | Alpha factor that controls how much the attention map is shown to the user during prediction.                                         | `float`        | `0.9`                      |
+| `--color-map`               | A matplotlib colormap to use for the attention maps.                                                                                  | `str`          | `nipy_spectral`            |
+| `--attention-map-level`     | Level to plot the attention maps. Should be in `["line", "word", "char", "ner"]`.                                                     | `str`          | `"line"`                   |
+| `--predict-objects`         | Whether to return polygons coordinates.                                                                                               | `bool`         | `False`                    |
+| `--max-object-height`       | Maximum height for predicted objects. If set, grid search segmentation will be applied and width will be normalized to element width. | `int`          |                            |
+| `--word-separators`         | List of word separators.                                                                                                              | `list`         | `[" ", "\n"]`              |
+| `--line-separators`         | List of line separators.                                                                                                              | `list`         | `["\n"]`                   |
+| `--gpu-device`              | Use a specific GPU if available.                                                                                                      | `int`          |                            |
+| `--batch-size`              | Size of the batches for prediction.                                                                                                   | `int`          | `1`                        |
+| `--start-token`             | Use a specific starting token at the beginning of the prediction. Useful when making predictions on different single pages.           | `str`          |                            |
+| `--use-language-model`      | Whether to use an explicit language model to rescore text hypotheses.                                                                 | `bool`         | `False`                    |
+| `--compile-model`           | Whether to compile the model. Recommended to speed up inference.                                                                      | `bool`         | `False`                    |
+| `--dynamic-mode`            | Whether to use the dynamic mode during model compilation. Recommended for prediction on images of variable size.                      | `bool`         | `False`                    |
 
 The `--model` argument expects a directory with the following files:
 
@@ -36,6 +38,8 @@ The `--model` argument expects a directory with the following files:
 - a `charset.pkl` file,
 - a `parameters.yml` file corresponding to the `inference_parameters.yml` file generated during training.
 
+The default font we use in `--font` is [Linux Libertine](https://gitlab.teklia.com/atr/dan/-/raw/visualization-transcription/fonts/LinuxLibertine.ttf?inline=false)
+
 ## Examples
 
 ### Predict with confidence scores
@@ -54,9 +58,9 @@ It will create the following JSON file named after the image in the `predict` fo
 
 ```json
 {
-  "text": "Manoeuvre Louis VIOLOTTE\nLaboureur d'autrui Antoine VIOLOTTE\nCouvreur en laves Antoine ROBERT\nVigneron d'autrui Jean POULET\nManoeuvre Claude BELORGEY\nVigneron d'autrui Pierre BERANGEY\nLaboureur d'autrui Antoine BERANGEY\nManoeuvre Fran\u00e7ois CORNU\nVigneron d'autrui Jean PANTIOT\nid id Philibert BONNE\nid id Jean TONICHON\nTaillandier Denis BONNE\nCharpentier Jean CHAPUIS\nLaboureur d'autrui Antoine BONNE\nTaillandier Fran\u00e7ois BONNE\nVigneron d'autrui Antoine TISSERAND\nLaboureur d'autrui Fran\u00e7ois TISSERAND\nid id Hubert TISSERAND\nManoeuvre Hubert BONNARDOT\nVigneron d'autrui Etienne CORNU\nRegratiere la veuve GUYARD\nManoeuvre Pierre MELINE\nSabotier Claude MOREAU\nVigneron d'autrui Denis ECARD\nid id JB CORNU",
+  "text": "\u24ddCoupez \u24dfLouis \u24d37.12.14 \u24e148877\n\u24ddFoutrain \u24dfAdolphe \u24d39.4.13 \u24e148877\n\u24ddGabala \u24dfFran\u00e7ois \u24d326.3.11 \u24e148877\n\u24ddBurglen \u24dfJoef \u24d328.11.12 \u24e148877\n\u24ddGradel \u24dfEdouard \u24d35.1.05 \u24e148877\n\u24ddDerocde \u24dfLouis \u24d329.8.07 \u24e148877\n\u24ddLonglet \u24dfGaston \u24d321.9.03 \u24e148877\n\u24ddGodefroy \u24dfAlbert \u24d31.10.07 \u24e148877",
   "confidences": {
-    "total": 1.0
+    "total": 0.99
   },
   "language_model": {},
   "attention_gif": "predict/example_line.gif"
@@ -80,15 +84,17 @@ It will create the following JSON file named after the image and a GIF showing a
 
 ```json
 {
-  "text": "Manoeuvre Louis VIOLOTTE\nLaboureur d'autrui Antoine VIOLOTTE\nCouvreur en laves Antoine ROBERT\nVigneron d'autrui Jean POULET\nManoeuvre Claude BELORGEY\nVigneron d'autrui Pierre BERANGEY\nLaboureur d'autrui Antoine BERANGEY\nManoeuvre Fran\u00e7ois CORNU\nVigneron d'autrui Jean PANTIOT\nid id Philibert BONNE\nid id Jean TONICHON\nTaillandier Denis BONNE\nCharpentier Jean CHAPUIS\nLaboureur d'autrui Antoine BONNE\nTaillandier Fran\u00e7ois BONNE\nVigneron d'autrui Antoine TISSERAND\nLaboureur d'autrui Fran\u00e7ois TISSERAND\nid id Hubert TISSERAND\nManoeuvre Hubert BONNARDOT\nVigneron d'autrui Etienne CORNU\nRegratiere la veuve GUYARD\nManoeuvre Pierre MELINE\nSabotier Claude MOREAU\nVigneron d'autrui Denis ECARD\nid id JB CORNU",
+  "text": "\u24ddCoupez \u24dfLouis \u24d37.12.14 \u24e148877\n\u24ddFoutrain \u24dfAdolphe \u24d39.4.13 \u24e148877\n\u24ddGabala \u24dfFran\u00e7ois \u24d326.3.11 \u24e148877\n\u24ddBurglen \u24dfJoef \u24d328.11.12 \u24e148877\n\u24ddGradel \u24dfEdouard \u24d35.1.05 \u24e148877\n\u24ddDerocde \u24dfLouis \u24d329.8.07 \u24e148877\n\u24ddLonglet \u24dfGaston \u24d321.9.03 \u24e148877\n\u24ddGodefroy \u24dfAlbert \u24d31.10.07 \u24e148877",
   "confidences": {
-    "total": 1.0
+    "total": 0.99
   },
   "language_model": {},
   "attention_gif": "predict/example_line.gif"
 }
 ```
 
+<!-- Generated with https://gitlab.teklia.com/atr/dan/-/blob/56b986696064e316490162f5c1fb97d2bc54aaa3/tests/data/extraction/images/double_page/test-page_1.jpg -->
+
 <img src="../../assets/example_line.gif" />
 
 ### Predict with confidence scores and word-level attention maps
@@ -110,15 +116,17 @@ It will create the following JSON file named after the image and a GIF showing a
 
 ```json
 {
-  "text": "Manoeuvre Louis VIOLOTTE\nLaboureur d'autrui Antoine VIOLOTTE\nCouvreur en laves Antoine ROBERT\nVigneron d'autrui Jean POULET\nManoeuvre Claude BELORGEY\nVigneron d'autrui Pierre BERANGEY\nLaboureur d'autrui Antoine BERANGEY\nManoeuvre Fran\u00e7ois CORNU\nVigneron d'autrui Jean PANTIOT\nid id Philibert BONNE\nid id Jean TONICHON\nTaillandier Denis BONNE\nCharpentier Jean CHAPUIS\nLaboureur d'autrui Antoine BONNE\nTaillandier Fran\u00e7ois BONNE\nVigneron d'autrui Antoine TISSERAND\nLaboureur d'autrui Fran\u00e7ois TISSERAND\nid id Hubert TISSERAND\nManoeuvre Hubert BONNARDOT\nVigneron d'autrui Etienne CORNU\nRegratiere la veuve GUYARD\nManoeuvre Pierre MELINE\nSabotier Claude MOREAU\nVigneron d'autrui Denis ECARD\nid id JB CORNU",
+  "text": "\u24ddCoupez \u24dfLouis \u24d37.12.14 \u24e148877\n\u24ddFoutrain \u24dfAdolphe \u24d39.4.13 \u24e148877\n\u24ddGabala \u24dfFran\u00e7ois \u24d326.3.11 \u24e148877\n\u24ddBurglen \u24dfJoef \u24d328.11.12 \u24e148877\n\u24ddGradel \u24dfEdouard \u24d35.1.05 \u24e148877\n\u24ddDerocde \u24dfLouis \u24d329.8.07 \u24e148877\n\u24ddLonglet \u24dfGaston \u24d321.9.03 \u24e148877\n\u24ddGodefroy \u24dfAlbert \u24d31.10.07 \u24e148877",
   "confidences": {
-    "total": 1.0
+    "total": 0.99
   },
   "language_model": {},
   "attention_gif": "predict/example_word.gif"
 }
 ```
 
+<!-- Generated with https://gitlab.teklia.com/atr/dan/-/blob/56b986696064e316490162f5c1fb97d2bc54aaa3/tests/data/extraction/images/double_page/test-page_1.jpg -->
+
 <img src="../../assets/example_word.gif" >
 
 ### Predict with line-level attention maps and extract polygons
@@ -138,34 +146,34 @@ It will create the following JSON file named after the image and a GIF showing a
 
 ```json
 {
-  "text": "Manoeuvre Louis VIOLOTTE\nLaboureur d'autrui Antoine VIOLOTTE\nCouvreur en laves Antoine ROBERT\nVigneron d'autrui Jean POULET\nManoeuvre Claude BELORGEY\nVigneron d'autrui Pierre BERANGEY\nLaboureur d'autrui Antoine BERANGEY\nManoeuvre Fran\u00e7ois CORNU\nVigneron d'autrui Jean PANTIOT\nid id Philibert BONNE\nid id Jean TONICHON\nTaillandier Denis BONNE\nCharpentier Jean CHAPUIS\nLaboureur d'autrui Antoine BONNE\nTaillandier Fran\u00e7ois BONNE\nVigneron d'autrui Antoine TISSERAND\nLaboureur d'autrui Fran\u00e7ois TISSERAND\nid id Hubert TISSERAND\nManoeuvre Hubert BONNARDOT\nVigneron d'autrui Etienne CORNU\nRegratiere la veuve GUYARD\nManoeuvre Pierre MELINE\nSabotier Claude MOREAU\nVigneron d'autrui Denis ECARD\nid id JB CORNU",
+  "text": "\u24ddCoupez \u24dfLouis \u24d37.12.14 \u24e148877\n\u24ddFoutrain \u24dfAdolphe \u24d39.4.13 \u24e148877\n\u24ddGabala \u24dfFran\u00e7ois \u24d326.3.11 \u24e148877\n\u24ddBurglen \u24dfJoef \u24d328.11.12 \u24e148877\n\u24ddGradel \u24dfEdouard \u24d35.1.05 \u24e148877\n\u24ddDerocde \u24dfLouis \u24d329.8.07 \u24e148877\n\u24ddLonglet \u24dfGaston \u24d321.9.03 \u24e148877\n\u24ddGodefroy \u24dfAlbert \u24d31.10.07 \u24e148877",
   "confidences": {
-    "total": 1.0
+    "total": 0.99
   },
   "language_model": {},
   "objects": [
     {
-      "confidence": 0.76,
+      "confidence": 0.19,
       "polygon": [
         [
-          572,
-          242
+          324,
+          183
         ],
         [
-          1157,
-          242
+          536,
+          183
         ],
         [
-          1157,
-          317
+          536,
+          293
         ],
         [
-          572,
-          317
+          324,
+          293
         ]
       ],
-      "text": "Manoeuvre Louis VIOLOTTE",
-      "text_confidence": 1.0
+      "text": "\u24ddCoupez \u24dfLouis \u24d37.12.14 \u24e148877",
+      "text_confidence": 0.99
     },
     ...
     ...
@@ -175,6 +183,8 @@ It will create the following JSON file named after the image and a GIF showing a
 }
 ```
 
+<!-- Generated with https://gitlab.teklia.com/atr/dan/-/blob/56b986696064e316490162f5c1fb97d2bc54aaa3/tests/data/extraction/images/double_page/test-page_1.jpg -->
+
 <img src="../../assets/example_line_polygon.gif" >
 
 ### Predict with an external n-gram language model
diff --git a/fonts/LinuxLibertine.ttf b/fonts/LinuxLibertine.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..3dee7e79b51596588ddb6573f574279b320429f0
Binary files /dev/null and b/fonts/LinuxLibertine.ttf differ
diff --git a/tests/data/prediction/0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84_line.gif b/tests/data/prediction/0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84_line.gif
new file mode 100644
index 0000000000000000000000000000000000000000..be510dc91270771f3fb238a49eb517fe8ecb752d
--- /dev/null
+++ b/tests/data/prediction/0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84_line.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7f38766a47ee709a099d83a087f4b189dd53b34dc43c3090daf2531101052b6
+size 19490
diff --git a/tests/test_attention.py b/tests/test_attention.py
index 503841bf0fbbb1745302ebf8519e22bf6b7187c7..008640bc65903e7fe374a2ba7663ec8d007564f6 100644
--- a/tests/test_attention.py
+++ b/tests/test_attention.py
@@ -1,6 +1,8 @@
 # Copyright Teklia (contact@teklia.com) & Denis Coquenet
 # This code is licensed under CeCILL-C
 
+import shutil
+
 import pytest
 
 from dan.ocr.predict.attention import (
@@ -9,7 +11,18 @@ from dan.ocr.predict.attention import (
     split_text,
     split_text_and_confidences,
 )
-from dan.utils import EntityType, parse_charset_pattern, parse_tokens_pattern
+from dan.ocr.predict.inference import run as run_prediction
+from dan.utils import (
+    EntityType,
+    parse_charset_pattern,
+    parse_tokens,
+    parse_tokens_pattern,
+)
+from tests import FIXTURES
+
+PREDICTION_DATA_PATH = FIXTURES / "prediction"
+
+FONT_PATH = FIXTURES.parent.parent / "fonts/LinuxLibertine.ttf"
 
 
 @pytest.mark.parametrize(
@@ -147,3 +160,52 @@ def test_split_text_and_confidences(
     assert texts_conf == expected_split_text
     assert averages_conf == expected_mean_confidences
     assert offsets_conf == expected_offsets
+
+
+def test_plot_attention(
+    tmp_path,
+):
+    confidence_score = []
+
+    image_name = "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84"
+
+    image_dir = tmp_path / "images"
+    image_dir.mkdir()
+    shutil.copyfile(
+        (PREDICTION_DATA_PATH / "images" / image_name).with_suffix(".png"),
+        (image_dir / image_name).with_suffix(".png"),
+    )
+
+    run_prediction(
+        image_dir=image_dir,
+        font=FONT_PATH,
+        maximum_font_size=32,
+        model=PREDICTION_DATA_PATH,
+        output=tmp_path,
+        confidence_score=bool(confidence_score),
+        confidence_score_levels=confidence_score,
+        attention_map=True,
+        attention_map_level=[Level.Line].pop(),
+        attention_map_scale=0.5,
+        alpha_factor=0.9,
+        color_map="nipy_spectral",
+        word_separators=[" ", "\n"],
+        line_separators=["\n"],
+        temperature=1.0,
+        predict_objects=True,
+        max_object_height=None,
+        image_extension=".png",
+        gpu_device=None,
+        batch_size=1,
+        tokens=parse_tokens(PREDICTION_DATA_PATH / "tokens.yml"),
+        start_token=None,
+        use_language_model=False,
+        compile_model=False,
+        dynamic_mode=False,
+    )
+
+    outname = image_name + "_line.gif"
+
+    assert (tmp_path / outname).read_bytes() == (
+        PREDICTION_DATA_PATH / outname
+    ).read_bytes()
diff --git a/tests/test_prediction.py b/tests/test_prediction.py
index 36365097455199951d0fe0437782e0f3d32470a1..01125eec8fda21ed2ad8dd05b7e433625b245c49 100644
--- a/tests/test_prediction.py
+++ b/tests/test_prediction.py
@@ -23,6 +23,9 @@ from tests import FIXTURES
 
 PREDICTION_DATA_PATH = FIXTURES / "prediction"
 
+FONT_PATH = FIXTURES.parent.parent / "fonts/LinuxLibertine.ttf"
+MAXIMUM_FONT_SIZE = 32
+
 
 @pytest.mark.parametrize(
     "image_name, expected_prediction",
@@ -409,6 +412,8 @@ def test_run_prediction(
 
     run_prediction(
         image_dir=image_dir,
+        font=FONT_PATH,
+        maximum_font_size=MAXIMUM_FONT_SIZE,
         model=PREDICTION_DATA_PATH,
         output=tmp_path,
         confidence_score=bool(confidence_score),
@@ -608,6 +613,8 @@ def test_run_prediction_batch(
 
     run_prediction(
         image_dir=image_dir,
+        font=FONT_PATH,
+        maximum_font_size=MAXIMUM_FONT_SIZE,
         model=PREDICTION_DATA_PATH,
         output=tmp_path,
         confidence_score=True if confidence_score else False,
@@ -771,6 +778,8 @@ def test_run_prediction_language_model(
 
     run_prediction(
         image_dir=image_dir,
+        font=FONT_PATH,
+        maximum_font_size=MAXIMUM_FONT_SIZE,
         model=model_path,
         output=tmp_path,
         confidence_score=False,