diff --git a/README.md b/README.md
index cc24d3a2a38e44d7982c654a70b7d2d3acc311c9..f3526541b6e47690b0d4d5cf1b5e941206ef0a7c 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ This is an open-source project, licensed using [the MIT license](https://opensou
 For development and tests purpose it may be useful to install the project as a editable package with pip.
 
 - Use a virtualenv (e.g. with virtualenvwrapper `mkvirtualenv -a . dan`)
+- Initialize the [`Nerval`](https://gitlab.teklia.com/ner/nerval) submodule (e.g. `git submodule update --init --recursive`)
 - Install `dan` as a package (e.g. `pip install -e .`)
 
 ### Linter
diff --git a/dan/ocr/evaluate.py b/dan/ocr/evaluate.py
index 079b0e5d11455426132c410adf0f5383e9329e1f..f8f1fc743803967384f217b2c8f7f35e48525ca0 100644
--- a/dan/ocr/evaluate.py
+++ b/dan/ocr/evaluate.py
@@ -6,12 +6,16 @@ Evaluate a trained DAN model.
 import logging
 import random
 from argparse import ArgumentTypeError
+from itertools import chain
+from operator import attrgetter
 from pathlib import Path
 from typing import Dict, List
 
 import numpy as np
 import torch
 import torch.multiprocessing as mp
+from edlib import align, getNiceAlignment
+from prettytable import MARKDOWN, PrettyTable
 
 from dan.bio import convert
 from dan.ocr.manager.metrics import Inference
@@ -25,6 +29,7 @@ from nerval.utils import print_results
 logger = logging.getLogger(__name__)
 
 NERVAL_THRESHOLD = 0.30
+NB_WORST_PREDICTIONS = 5
 
 
 def parse_threshold(value: str) -> float:
@@ -66,6 +71,38 @@ def add_evaluate_parser(subcommands) -> None:
     parser.set_defaults(func=run)
 
 
+def print_worst_predictions(all_inferences: Dict[str, List[Inference]]):
+    table = PrettyTable(
+        field_names=[
+            "Image name",
+            "WER",
+            "Alignment between ground truth - prediction",
+        ]
+    )
+    table.set_style(MARKDOWN)
+
+    worst_inferences = sorted(
+        chain.from_iterable(all_inferences.values()),
+        key=attrgetter("wer"),
+        reverse=True,
+    )[:NB_WORST_PREDICTIONS]
+    for inference in worst_inferences:
+        alignment = getNiceAlignment(
+            align(
+                inference.ground_truth,
+                inference.prediction,
+                task="path",
+            ),
+            inference.ground_truth,
+            inference.prediction,
+        )
+        alignment_str = f'{alignment["query_aligned"]}\n{alignment["matched_aligned"]}\n{alignment["target_aligned"]}'
+        table.add_row([inference.image, round(inference.wer * 100, 2), alignment_str])
+
+    print(f"\n#### {NB_WORST_PREDICTIONS} worst prediction(s)\n")
+    print(table)
+
+
 def eval_nerval(
     all_inferences: Dict[str, List[Inference]],
     tokens: Path,
@@ -159,6 +196,8 @@ def eval(rank, config: dict, nerval_threshold: float, mlflow_logging: bool):
             threshold=nerval_threshold,
         )
 
+    print_worst_predictions(all_inferences)
+
 
 def run(config: dict, nerval_threshold: float):
     update_config(config)
diff --git a/dan/ocr/manager/metrics.py b/dan/ocr/manager/metrics.py
index 988c90d073c95cb80a682cf3a916972da540233f..07cece06993cea371d6c4f3b34970ac63f8aa924 100644
--- a/dan/ocr/manager/metrics.py
+++ b/dan/ocr/manager/metrics.py
@@ -29,8 +29,10 @@ class Inference(NamedTuple):
     inferring again when we need to compute new metrics
     """
 
+    image: str
     ground_truth: str
     prediction: str
+    wer: float
 
 
 class MetricManager:
diff --git a/dan/ocr/manager/training.py b/dan/ocr/manager/training.py
index ad35f823e312c51a21975229fff0da38bac0545d..d436ead177c6f959fcc757c27722f970477df784 100644
--- a/dan/ocr/manager/training.py
+++ b/dan/ocr/manager/training.py
@@ -4,6 +4,7 @@ import os
 import random
 from copy import deepcopy
 from enum import Enum
+from itertools import repeat
 from pathlib import Path
 from time import time
 from typing import Dict, List, Tuple
@@ -768,7 +769,9 @@ class GenericTrainingManager:
             tokens=self.tokens,
         )
 
-        # Keep inferences in memory to evaluate with Nerval
+        # Keep inferences in memory to:
+        # - evaluate with Nerval
+        # - display worst predictions
         inferences = []
 
         with tqdm(total=len(loader.dataset)) as pbar:
@@ -796,7 +799,13 @@ class GenericTrainingManager:
                     pbar.update(len(batch_data["names"]) * self.nb_workers)
 
                     inferences.extend(
-                        map(Inference, batch_values["str_y"], batch_values["str_x"])
+                        map(
+                            Inference,
+                            batch_data["names"],
+                            batch_values["str_y"],
+                            batch_values["str_x"],
+                            repeat(display_values["wer"]),
+                        )
                     )
 
                 # log metrics in MLflow
diff --git a/docs/get_started/index.md b/docs/get_started/index.md
index ddc34d0fad9aca53dba03f1abc732d6e13057585..90e88151d4bc0991179d4e8068660404c251b803 100644
--- a/docs/get_started/index.md
+++ b/docs/get_started/index.md
@@ -26,6 +26,12 @@ To install DAN manually, you need to first clone via:
 git clone git@gitlab.teklia.com:atr/dan.git
 ```
 
+Then you can initialize the [`Nerval`](https://gitlab.teklia.com/ner/nerval) submodule:
+
+```shell
+git submodule update --init --recursive
+```
+
 Then you can install it via pip:
 
 ```shell
diff --git a/docs/usage/evaluate/index.md b/docs/usage/evaluate/index.md
index 2153ebd776c0767b8e55edf8ffaa9cec2422f307..94d57d18c812acbe184c721c0de3e52ad196d8a2 100644
--- a/docs/usage/evaluate/index.md
+++ b/docs/usage/evaluate/index.md
@@ -12,15 +12,19 @@ To evaluate DAN on your dataset:
 This will, for each evaluated split:
 
 1. Create a YAML file with the evaluation results in the `results` subfolder of the `training.output_folder` indicated in your configuration.
-1. Print in the console a metrics Markdown table (see [table example below](#htr-evaluation)).
-1. Print in the console a [Nerval](https://gitlab.teklia.com/ner/nerval) metrics Markdown table, if the `dataset.tokens` parameter in your configuration is defined (see [table example below](#htr-and-ner-evaluation)).
+1. Print in the console a metrics Markdown table (see [HTR example below](#htr-evaluation)).
+1. Print in the console a [Nerval](https://gitlab.teklia.com/ner/nerval) metrics Markdown table, if the `dataset.tokens` parameter in your configuration is defined (see [HTR and NER example below](#htr-and-ner-evaluation)).
+1. Print in the console the 5 worst predictions (see [examples below](#examples)).
+
+!!! warning
+    The display of the worst predictions does not support batch evaluation. If the `training.data.batch_size` parameter is not equal to `1`, then the `WER` displayed is the `WER` of the **whole batch** and not just the image.
 
 | Parameter            | Description                                                                                                                                                                                              | Type           | Default |
 | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ------- |
 | `--config`           | Path to the configuration file.                                                                                                                                                                          | `pathlib.Path` |         |
 | `--nerval-threshold` | Distance threshold for the match between gold and predicted entity during Nerval evaluation. `0` would impose perfect matches, `1` would allow completely different strings to be considered as a match. | `float`        | `0.3`   |
 
-## Example output
+## Examples
 
 ### HTR evaluation
 
@@ -32,6 +36,14 @@ This will, for each evaluated split:
 | train |       x       |     x     |       x       |     x     |         x          |
 |  val  |       x       |     x     |       x       |     x     |         x          |
 | test  |       x       |     x     |       x       |     x     |         x          |
+
+#### 5 worst prediction(s)
+
+|   Image name   | WER | Alignment between ground truth - prediction |
+| :------------: | :-: | :-----------------------------------------: |
+| <image_id>.png |  x  |                      x                      |
+|                |     |                      |                      |
+|                |     |                      x                      |
 ```
 
 ### HTR and NER evaluation
@@ -67,4 +79,12 @@ This will, for each evaluated split:
 | :-----: | :-------: | :-----: | :-------: | :----: | :-: | :-----: |
 | Surname |     x     |    x    |     x     |   x    |  x  |    x    |
 |   All   |     x     |    x    |     x     |   x    |  x  |    x    |
+
+#### 5 worst prediction(s)
+
+|   Image name   | WER | Alignment between ground truth - prediction |
+| :------------: | :-: | :-----------------------------------------: |
+| <image_id>.png |  x  |                      x                      |
+|                |     |                      |                      |
+|                |     |                      x                      |
 ```
diff --git a/mkdocs.yml b/mkdocs.yml
index 758964cef9c45bec8799828241470a9f196ca57a..3ac3aea259d4d272fc8975909b3f723d811cddef 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -144,7 +144,7 @@ extra:
       link: https://teklia.com
     - icon: fontawesome/brands/gitlab
       name: Git repository for this project
-      link: https://gitlab.com/teklia/atr/dan
+      link: https://gitlab.teklia.com/atr/dan
     - icon: fontawesome/brands/linkedin
       name: Teklia @ LinkedIn
       link: https://www.linkedin.com/company/teklia
diff --git a/tests/data/evaluate/metrics_table.md b/tests/data/evaluate/metrics_table.md
index 107bef4188e44a6f8e5509c74b7ceb8d4ded4625..27bf53ad329cbd0441b030695b322728ce62b3f0 100644
--- a/tests/data/evaluate/metrics_table.md
+++ b/tests/data/evaluate/metrics_table.md
@@ -47,3 +47,20 @@
 | Chalumeau |     1     |    0    |    0.0    |  0.0   |   0   |    1    |
 |  Batiment |     1     |    1    |   100.0   | 100.0  | 100.0 |    1    |
 |    All    |     6     |    5    |   83.33   | 83.33  | 83.33 |    6    |
+
+#### 5 worst prediction(s)
+
+|                Image name                |  WER  |        Alignment between ground truth - prediction        |
+|:----------------------------------------:|:-----:|:---------------------------------------------------------:|
+| 2c242f5c-e979-43c4-b6f2-a6d4815b651d.png |  50.0 |             ⓈA ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF ⓄA Ⓟ14331            |
+|                                          |       |             |.||||||||||||||||||||||||.||||.||            |
+|                                          |       |             Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31            |
+| 0dfe8bcd-ed0b-453e-bf19-cc697012296e.png | 26.67 |      ⓈTemplié ⒻMarcelle Ⓑ93 ⓁJ Ⓚch ⓄE dachyle-------      |
+|                                          |       |      ||||||||||||||||||||||||.|||||||||||.||.-------      |
+|                                          |       |      ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376      |
+| ffdec445-7f14-4f5f-be44-68d0844d0df1.png | 14.29 |            ⓈNaudin ⒻMarie Ⓑ53 ⓁS ⒸV ⓀBelle mère           |
+|                                          |       |            |||||||||||||||||||||||.||||||||||||           |
+|                                          |       |            ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère           |
+| 0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84.png |  12.5 | ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier Ⓟ-------12241 |
+|                                          |       | |||||||||||||||||||||||||||||||||||||||||||||-------||||| |
+|                                          |       | ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241 |