diff --git a/dan/ocr/evaluate.py b/dan/ocr/evaluate.py
index ce88f3153febfefcc99ef55a69b8953a0b4f3fe2..cb2bf6c8d1a6f1dc78fceb44e1fcf443eb762088 100644
--- a/dan/ocr/evaluate.py
+++ b/dan/ocr/evaluate.py
@@ -84,10 +84,11 @@ def add_evaluate_parser(subcommands) -> None:
         type=Path,
     )
 
+    pretty_splits = map(lambda split: f"`{split}`", SPLIT_NAMES)
     parser.add_argument(
         "--sets",
         dest="set_names",
-        help="Where to save evaluation results in JSON format.",
+        help=f"Sets to evaluate. Defaults to {', '.join(pretty_splits)}."
         default=SPLIT_NAMES,
         nargs="+",
     )
diff --git a/docs/usage/evaluate/index.md b/docs/usage/evaluate/index.md
index c672b466ab2de0cad2e25440c4f6554dfb693faa..ad5fe49d6a239d9cd0d6f4eab0a11723c7f2a2aa 100644
--- a/docs/usage/evaluate/index.md
+++ b/docs/usage/evaluate/index.md
@@ -25,7 +25,7 @@ This will, for each evaluated split:
 | `--config`           | Path to the configuration file.                                                                                                                                                                          | `pathlib.Path` |                            |
 | `--nerval-threshold` | Distance threshold for the match between gold and predicted entity during Nerval evaluation. `0` would impose perfect matches, `1` would allow completely different strings to be considered as a match. | `float`        | `0.3`                      |
 | `--output-json`      | Where to save evaluation results in JSON format.                                                                                                                                                         | `pathlib.Path` | `None`                     |
-| `--sets`             | Which sets should be evaluated.                                                                                                                                                                          | `str`          | `["train", "val", "test"]` |
+| `--sets`             | Sets to evaluate. Defaults to `train`, `val`, `test`.                                                                                                                                                    | `list[str]`    | `["train", "val", "test"]` |
 
 ## Examples