Compare revisions

Eva Bardou · Yoann Schneider · Yoann Schneider · 2769fe6d · 2769fe6d · 2769fe6d
--- a/configs/eval.json
+++ b/configs/eval.json
@@ -9,9 +9,9 @@
                ["training", "train"]
            ]
        },
-        "val": {
-            "training-val": [
-                ["training", "val"]
+        "dev": {
+            "training-dev": [
+                ["training", "dev"]
            ]
        },
        "test": {
@@ -84,7 +84,7 @@
        "validation": {
            "eval_on_valid": true,
            "eval_on_valid_interval": 2,
-            "set_name_focus_metric": "training-val",
+            "set_name_focus_metric": "training-dev",
            "font": "fonts/LinuxLibertine.ttf",
            "maximum_font_size": 32,
            "nb_logged_images": 5

--- a/configs/quickstart.json
+++ b/configs/quickstart.json
@@ -19,9 +19,9 @@
                ["$dataset_name", "train"]
            ]
        },
-        "val": {
-            "$dataset_name-val": [
-                ["$dataset_name", "val"]
+        "dev": {
+            "$dataset_name-dev": [
+                ["$dataset_name", "dev"]
            ]
        },
        "test": {
@@ -96,7 +96,7 @@
            "eval_on_valid": true,
            "eval_on_valid_interval": 5,
            "eval_on_valid_start": 0,
-            "set_name_focus_metric": "$dataset_name-val",
+            "set_name_focus_metric": "$dataset_name-dev",
            "font": "fonts/LinuxLibertine.ttf",
            "maximum_font_size": 32,
            "nb_logged_images": 5,

--- a/configs/tests.json
+++ b/configs/tests.json
@@ -9,9 +9,9 @@
                ["training", "train"]
            ]
        },
-        "val": {
-            "training-val": [
-                ["training", "val"]
+        "dev": {
+            "training-dev": [
+                ["training", "dev"]
            ]
        },
        "test": {
@@ -85,7 +85,7 @@
            "eval_on_valid": true,
            "eval_on_valid_interval": 2,
            "eval_on_valid_start": 0,
-            "set_name_focus_metric": "training-val",
+            "set_name_focus_metric": "training-dev",
            "font": "fonts/LinuxLibertine.ttf",
            "maximum_font_size": 32,
            "nb_logged_images": 1

--- a/dan/__init__.py
+++ b/dan/__init__.py
@@ -10,6 +10,6 @@ logging.basicConfig(
 )

 TRAIN_NAME = "train"
-VAL_NAME = "val"
+VAL_NAME = "dev"
 TEST_NAME = "test"
 SPLIT_NAMES = [TRAIN_NAME, VAL_NAME, TEST_NAME]
--- a/dan/ocr/mlflow.py
+++ b/dan/ocr/mlflow.py
@@ -83,7 +83,7 @@ def logging_metrics(
    Log dictionary metrics in the Metrics section of MLflow

    :param display_values: dict, the dictionary containing the metrics to publish on MLflow
-    :param step: str, the step for which the metrics are to be published on Metrics section (ex: train, val, test). This will allow a better display on MLflow.
+    :param step: str, the step for which the metrics are to be published on Metrics section (ex: train, dev, test). This will allow a better display on MLflow.
    :param epoch: int, the current epoch.
    :param mlflow_logging: bool, allows you to verify that you have the authorization to log on MLflow, defaults to False
    :param is_master: bool, makes sure you're on the right thread, defaults to False
@@ -107,7 +107,7 @@ def logging_tags_metrics(
    Log dictionary metrics in the Tags section of MLflow

    :param display_values: dict, the dictionary containing the metrics to publish on MLflow
-    :param step: str, the step for which the metrics are to be published on Tags section (ex: train, val, test). This will allow a better display on MLflow.
+    :param step: str, the step for which the metrics are to be published on Tags section (ex: train, dev, test). This will allow a better display on MLflow.
    :param mlflow_logging: bool, allows you to verify that you have the authorization to log on MLflow, defaults to False
    :param is_master: bool, makes sure you're on the right thread, defaults to False
    """

--- a/docs/get_started/training.md
+++ b/docs/get_started/training.md
@@ -6,7 +6,7 @@ There are a several steps to follow when training a DAN model.

 To extract the data, DAN uses an Arkindex export database in SQLite format. You will need to:

-1. Structure the data into splits (`train` / `val` / `test`) in a project dataset in [Arkindex](https://demo.arkindex.org/).
+1. Structure the data into splits (`train` / `dev` / `test`) in a project dataset in [Arkindex](https://demo.arkindex.org/).
 1. [Export the project](https://doc.arkindex.org/howto/export/) in SQLite format.
 1. Extract the data with the [extract command](../usage/datasets/extract.md).
 1. Download images with the [download command](../usage/datasets/download.md).
@@ -22,7 +22,7 @@ output/
 ├── labels.json
 ├── images
 │   ├── train
-│   ├── val
+│   ├── dev
 │   └── test
 └── language_model
    ├── corpus_characters.txt

--- a/docs/usage/datasets/download.md
+++ b/docs/usage/datasets/download.md
@@ -38,7 +38,7 @@ The `--output` directory should have a `split.json` JSON-formatted file with a s
      "text": "ⓢCoufet ⓕBouis ⓑ07.12.14"
    },
  },
-  "val": {},
+  "dev": {},
  "test": {}
 }
 ```

--- a/docs/usage/datasets/language_model.md
+++ b/docs/usage/datasets/language_model.md
@@ -25,7 +25,7 @@ These files can be generated by the `teklia-dan dataset download` command. More
  "train": {
    "<image_path>": "\u24e2Coufet \u24d5Bouis \u24d107.12.14"
  },
-  "val": {},
+  "dev": {},
  "test": {}
 }
 ```

--- a/docs/usage/evaluate/index.md
+++ b/docs/usage/evaluate/index.md
@@ -25,7 +25,7 @@ This will, for each evaluated split:
 | `--config`           | Path to the configuration file.                                                                                                                                                                          | `pathlib.Path` |                            |
 | `--nerval-threshold` | Distance threshold for the match between gold and predicted entity during Nerval evaluation. `0` would impose perfect matches, `1` would allow completely different strings to be considered as a match. | `float`        | `0.3`                      |
 | `--output-json`      | Where to save evaluation results in JSON format.                                                                                                                                                         | `pathlib.Path` | `None`                     |
-| `--sets`             | Sets to evaluate. Defaults to `train`, `val`, `test`.                                                                                                                                                    | `list[str]`    | `["train", "val", "test"]` |
+| `--sets`             | Sets to evaluate. Defaults to `train`, `dev`, `test`.                                                                                                                                                    | `list[str]`    | `["train", "dev", "test"]` |

 ## Examples

@@ -37,7 +37,7 @@ This will, for each evaluated split:
 | Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) |
 | :---: | :-----------: | :-------: | :-----------: | :-------: | :----------------: |
 | train |       x       |     x     |       x       |     x     |         x          |
-|  val  |       x       |     x     |       x       |     x     |         x          |
+|  dev  |       x       |     x     |       x       |     x     |         x          |
 | test  |       x       |     x     |       x       |     x     |         x          |

 #### 5 worst prediction(s)
@@ -57,7 +57,7 @@ This will, for each evaluated split:
 | Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) | NER |
 | :---: | :-----------: | :-------: | :-----------: | :-------: | :----------------: | :-: |
 | train |       x       |     x     |       x       |     x     |         x          |  x  |
-|  val  |       x       |     x     |       x       |     x     |         x          |  x  |
+|  dev  |       x       |     x     |       x       |     x     |         x          |  x  |
 | test  |       x       |     x     |       x       |     x     |         x          |  x  |

 #### Nerval evaluation
@@ -69,7 +69,7 @@ This will, for each evaluated split:
 | Surname |     x     |    x    |     x     |   x    |  x  |    x    |
 |   All   |     x     |    x    |     x     |   x    |  x  |    x    |

-##### val
+##### dev

 |   tag   | predicted | matched | Precision | Recall | F1  | Support |
 | :-----: | :-------: | :-----: | :-------: | :----: | :-: | :-----: |

--- a/docs/usage/train/tensorboard.md
+++ b/docs/usage/train/tensorboard.md
@@ -39,12 +39,12 @@ Alternatively, you can find them in the `Time series` tab.

 The same metrics are computed on the validation set, except for the loss function:

- `val/{dataset}-val_cer`: the CER.
- `val/{dataset}-val_cer_no_token`: the CER ignoring punctuation marks.
- `val/{dataset}-val_ner`: the CER ignoring characters (only NE tokens are considered).
- `val/{dataset}-val_wer`. the WER.
- `val/{dataset}-val_wer_no_punct`: the WER ignoring punctuation marks.
- `val/{dataset}-val_wer_no_token`: the WER ignoring Named Entity (NE) tokens (only characters are considered).
+- `dev/{dataset}-dev_cer`: the CER.
+- `dev/{dataset}-dev_cer_no_token`: the CER ignoring punctuation marks.
+- `dev/{dataset}-dev_ner`: the CER ignoring characters (only NE tokens are considered).
+- `dev/{dataset}-dev_wer`. the WER.
+- `dev/{dataset}-dev_wer_no_punct`: the WER ignoring punctuation marks.
+- `dev/{dataset}-dev_wer_no_token`: the WER ignoring Named Entity (NE) tokens (only characters are considered).

 These metrics can be visualized in the `Scalars` tab in Tensorboard, under the `valid` section.
 <img src="../../../assets/tensorboard/example_scalars_val.png" />

--- a/tests/data/analyze/stats.md
+++ b/tests/data/analyze/stats.md
@@ -55,7 +55,7 @@ Statistics
 | Median |   0.0   |    0.0    | 0.0 |
 | Total  |    0    |     0     |  0  |

-# Val
+# Dev

 ## Images statistics


--- a/tests/data/evaluate/inference.json
+++ b/tests/data/evaluate/inference.json
@@ -15,7 +15,7 @@
            0.4286
        ]
    ],
-    "val": [
+    "dev": [
        [
            "2c242f5c-e979-43c4-b6f2-a6d4815b651d.png",
            "\u24c8A \u24bbCharles \u24b711 \u24c1P \u24b8C \u24c0F \u24c4A \u24c514331",

--- a/tests/data/evaluate/metrics_table.md
+++ b/tests/data/evaluate/metrics_table.md
@@ -3,7 +3,7 @@
 | Split | CER (HTR-NER) | CER (HTR) | WER (HTR-NER) | WER (HTR) | WER (HTR no punct) | NER  |
 |:-----:|:-------------:|:---------:|:-------------:|:---------:|:------------------:|:----:|
 | train |     18.89     |   21.05   |     26.67     |   26.67   |       26.67        | 7.14 |
-|  val  |      8.82     |   11.54   |      50.0     |    50.0   |        50.0        | 0.0  |
+|  dev  |      8.82     |   11.54   |      50.0     |    50.0   |        50.0        | 0.0  |
 |  test |      2.78     |    3.33   |     14.29     |   14.29   |       14.29        | 0.0  |

 #### Nerval evaluation
@@ -22,7 +22,7 @@
 | Surname   |         2 |       2 |     100.0 |  100.0 | 100.0 |       2 |
 | ALL       |        15 |      12 |      80.0 |  85.71 | 82.76 |      14 |

-##### val
+##### dev

 | tag       | predicted | matched | Precision | Recall |    F1 | Support |
 |:----------|----------:|--------:|----------:|-------:|------:|--------:|

--- a/tests/data/extraction/elements/val-page_1-line_1.json
+++ b/tests/data/extraction/elements/val-page_1-line_1.json
--- a/tests/data/extraction/elements/val-page_1-line_2.json
+++ b/tests/data/extraction/elements/val-page_1-line_2.json
--- a/tests/data/extraction/elements/val-page_1-line_3.json
+++ b/tests/data/extraction/elements/val-page_1-line_3.json
--- a/tests/data/extraction/elements/val-page_1.json
+++ b/tests/data/extraction/elements/val-page_1.json
@@ -8,8 +8,8 @@
        [0, 0]
    ],
    "children": [
-        "val-page_1-line_1",
-        "val-page_1-line_2",
-        "val-page_1-line_3"
+        "dev-page_1-line_1",
+        "dev-page_1-line_2",
+        "dev-page_1-line_3"
    ]
 }
--- a/tests/data/extraction/elements/val.json
+++ b/tests/data/extraction/elements/val.json
 {
    "type": "folder",
    "children": [
-        "val-page_1"
+        "dev-page_1"
    ]
 }
--- a/tests/data/extraction/elements/root.json
+++ b/tests/data/extraction/elements/root.json
@@ -2,7 +2,7 @@
    "type": "folder",
    "children": [
        "train",
-        "val",
+        "dev",
        "test"
    ]
 }
--- a/tests/data/extraction/images/val-page_1-line_1.jpg
+++ b/tests/data/extraction/images/val-page_1-line_1.jpg
No results found