Load the model via a path to a folder

88e83f99 · Manon Blanco · Manon Blanco · 900e10a0 · 88e83f99 · 88e83f99
Commit 88e83f99 authored 1 year ago by Manon Blanco Committed by Manon Blanco 1 year ago
--- a/README.md
+++ b/README.md
@@ -61,15 +61,17 @@ from dan.ocr.predict.inference import DAN
 image = cv2.cvtColor(cv2.imread(IMAGE_PATH), cv2.COLOR_BGR2RGB)
 ```

-Then one can initialize and load the trained model with the parameters used during training.
+Then one can initialize and load the trained model with the parameters used during training. The directory passed as parameter should have:
+
+- a single model file `*.pt`,
+- a single charset file `*.pkl`,
+- a single parameters file `*parameters.yml`.

 ```python
-model_path = "model.pt"
-params_path = "parameters.yml"
-charset_path = "charset.pkl"
+model_path = "models"

 model = DAN("cpu")
-model.load(model_path, params_path, charset_path, mode="eval")
+model.load(model_path, mode="eval")
 ```

 To run the inference on a GPU, one can replace `cpu` by the name of the GPU. In the end, one can run the prediction:

--- a/dan/ocr/predict/__init__.py
+++ b/dan/ocr/predict/__init__.py
@@ -31,20 +31,7 @@ def add_predict_parser(subcommands) -> None:
    parser.add_argument(
        "--model",
        type=pathlib.Path,
-        help="Path to the model to use for prediction.",
-        required=True,
-    )
-    parser.add_argument(
-        "--parameters",
-        type=pathlib.Path,
-        help="Path to the YAML parameters file.",
-        required=True,
-        default="page",
-    )
-    parser.add_argument(
-        "--charset",
-        type=pathlib.Path,
-        help="Path to the charset file.",
+        help="Path to the directory containing the model, the YAML parameters file and the charset file to be used for prediction.",
        required=True,
    )
    parser.add_argument(

--- a/dan/ocr/predict/inference.py
+++ b/dan/ocr/predict/inference.py
@@ -49,20 +49,32 @@ class DAN:

    def load(
        self,
-        model_path: Path,
-        params_path: Path,
-        charset_path: Path,
+        path: Path,
        mode: str = "eval",
        use_language_model: bool = False,
    ) -> None:
        """
        Load a trained model.
-        :param model_path: Path to the model.
-        :param params_path: Path to the parameters.
-        :param charset_path: Path to the charset.
+        :param path: Path to the directory containing the model, the YAML parameters file and the charset file.
        :param mode: The mode to load the model (train or eval).
        :param use_language_model: Whether to use an explicit language model to rescore text hypotheses.
        """
+        model_path = list(path.glob("*.pt"))
+        assert len(model_path) == 1, f"Found {len(model_path)} model(s) `.pt` in {path}"
+        model_path = model_path.pop()
+
+        params_path = list(path.glob("*parameters.yml"))
+        assert (
+            len(params_path) == 1
+        ), f"Found {len(params_path)} parameter(s) `parameters.yml` in {path}"
+        params_path = params_path.pop()
+
+        charset_path = list(path.glob("*.pkl"))
+        assert (
+            len(charset_path) == 1
+        ), f"Found {len(charset_path)} charset(s) `.pkl` in {path}"
+        charset_path = charset_path.pop()
+
        parameters = yaml.safe_load(params_path.read_text())["parameters"]
        parameters["decoder"]["device"] = self.device

@@ -410,8 +422,6 @@ def run(
    image: Optional[Path],
    image_dir: Optional[Path],
    model: Path,
-    parameters: Path,
-    charset: Path,
    output: Path,
    confidence_score: bool,
    confidence_score_levels: List[Level],
@@ -434,9 +444,7 @@ def run(
    Predict a single image save the output
    :param image: Path to the image to predict.
    :param image_dir: Path to the folder where the images to predict are stored.
-    :param model: Path to the model to use for prediction.
-    :param parameters: Path to the YAML parameters file.
-    :param charset: Path to the charset.
+    :param model: Path to the directory containing the model, the YAML parameters file and the charset file to be used for prediction.
    :param output: Path to the output folder where the results will be saved.
    :param confidence_score: Whether to compute confidence score.
    :param attention_map: Whether to plot the attention map.
@@ -460,9 +468,7 @@ def run(
    cuda_device = f":{gpu_device}" if gpu_device is not None else ""
    device = f"cuda{cuda_device}" if torch.cuda.is_available() else "cpu"
    dan_model = DAN(device, temperature)
-    dan_model.load(
-        model, parameters, charset, mode="eval", use_language_model=use_language_model
-    )
+    dan_model.load(model, mode="eval", use_language_model=use_language_model)

    # Do not use LM with invalid LM weight
    use_language_model = dan_model.lm_decoder is not None

--- a/docs/get_started/training.md
+++ b/docs/get_started/training.md
@@ -22,16 +22,16 @@ output/
 │   ├── train
 │   ├── val
 │   └── test
-├── language_model
-│   ├── corpus_characters.txt
-│   ├── lexicon_characters.txt
-│   ├── corpus_subwords.txt
-│   ├── lexicon_subwords.txt
-│   ├── corpus_words.txt
-│   ├── lexicon_words.txt
-│   ├── subword_tokenizer.model
-│   ├── subword_tokenizer.vocab
-│   └── tokens.txt
+└── language_model
+    ├── corpus_characters.txt
+    ├── lexicon_characters.txt
+    ├── corpus_subwords.txt
+    ├── lexicon_subwords.txt
+    ├── corpus_words.txt
+    ├── lexicon_words.txt
+    ├── subword_tokenizer.model
+    ├── subword_tokenizer.vocab
+    └── tokens.txt
 ```

 ## 2. Train

--- a/docs/usage/predict/index.md
+++ b/docs/usage/predict/index.md
@@ -9,9 +9,7 @@ Use the `teklia-dan predict` command to apply a trained DAN model on an image.
 | `--image`                   | Path to the image to predict. Must not be provided with `--image-dir`.                                                                | `pathlib.Path` |               |
 | `--image-dir`               | Path to the folder where the images to predict are stored. Must not be provided with `--image`.                                       | `pathlib.Path` |               |
 | `--image-extension`         | The extension of the images in the folder. Ignored if `--image-dir` is not provided.                                                  | `str`          | .jpg          |
-| `--model`                   | Path to the model to use for prediction                                                                                               | `pathlib.Path` |               |
-| `--parameters`              | Path to the YAML parameters file.                                                                                                     | `pathlib.Path` |               |
-| `--charset`                 | Path to the charset file.                                                                                                             | `pathlib.Path` |               |
+| `--model`                   | Path to the directory containing the model, the YAML parameters file and the charset file to be used for prediction                   | `pathlib.Path` |               |
 | `--output`                  | Path to the output folder. Results will be saved in this directory.                                                                   | `pathlib.Path` |               |
 | `--tokens`                  | Path to a yaml file containing a mapping between starting tokens and end tokens. Needed for entities.                                 | `pathlib.Path` |               |
 | `--temperature`             | Temperature scaling scalar parameter.                                                                                                 | `float`        | `1.0`         |
@@ -31,6 +29,12 @@ Use the `teklia-dan predict` command to apply a trained DAN model on an image.

 ## Examples

+In the following examples the `models` directory should have:
+
+- a single model file `*.pt`,
+- a single charset file `*.pkl`,
+- a single parameters file `*parameters.yml`.
+
 ### Predict with confidence scores

 To run a prediction with confidence scores, run this command:
@@ -38,9 +42,7 @@ To run a prediction with confidence scores, run this command:
 ```shell
 teklia-dan predict \
    --image example.jpg \
-    --model model.pt \
-    --parameters inference_parameters.yml \
-    --charset charset.pkl \
+    --model models \
    --output predict/ \
    --confidence-score
 ```
@@ -63,9 +65,7 @@ To run a prediction with confidence scores and plot line-level attention maps, r
 ```shell
 teklia-dan predict \
    --image example.jpg \
-    --model model.pt \
-    --parameters inference_parameters.yml \
-    --charset charset.pkl \
+    --model models \
    --output predict/ \
    --confidence-score \
    --attention-map
@@ -92,9 +92,7 @@ To run a prediction with confidence scores and plot word-level attention maps, r
 ```shell
 teklia-dan predict \
    --image example.jpg \
-    --model model.pt \
-    --parameters inference_parameters.yml \
-    --charset charset.pkl \
+    --model models \
    --output predict/ \
    --confidence-score \
    --attention-map \
@@ -123,9 +121,7 @@ To run a prediction, plot line-level attention maps, and extract polygons, run t
 ```shell
 teklia-dan predict \
    --image example.jpg \
-    --model model.pt \
-    --parameters inference_parameters.yml \
-    --charset charset.pkl \
+    --model models \
    --output predict/ \
    --attention-map \
    --predict-objects
@@ -197,9 +193,7 @@ Then, run this command:
 ```shell
 teklia-dan predict \
    --image dan_humu_page/6e830f23-e70d-4399-8b94-f36ed3198575.jpg \
-    --model dan_humu_page/model.pt \
-    --parameters dan_humu_page/inference_parameters_char_lm.yml \
-    --charset dan_humu_page/charset.pkl \
+    --model models \
    --use-language-model \
    --output dan_humu_page/predict_char_lm/
 ```
@@ -235,9 +229,7 @@ Then, run this command:
 ```shell
 teklia-dan predict \
    --image dan_humu_page/6e830f23-e70d-4399-8b94-f36ed3198575.jpg \
-    --model dan_humu_page/model.pt \
-    --parameters dan_humu_page/inference_parameters_subword_lm.yml \
-    --charset dan_humu_page/charset.pkl \
+    --model models \
    --use-language-model \
    --output dan_humu_page/predict_subword_lm/
 ```
@@ -273,9 +265,7 @@ Then, run this command:
 ```shell
 teklia-dan predict \
    --image dan_humu_page/6e830f23-e70d-4399-8b94-f36ed3198575.jpg \
-    --model dan_humu_page/model.pt \
-    --parameters dan_humu_page/inference_parameters_word_lm.yml \
-    --charset dan_humu_page/charset.pkl \
+    --model models \
    --use-language-model \
    --output dan_humu_page/predict_word_lm/
 ```

--- a/tests/test_prediction.py
+++ b/tests/test_prediction.py
@@ -41,12 +41,7 @@ def test_predict(image_name, expected_prediction):
    device = "cpu"

    dan_model = DAN(device)
-    dan_model.load(
-        model_path=PREDICTION_DATA_PATH / "popp_line_model.pt",
-        params_path=PREDICTION_DATA_PATH / "parameters.yml",
-        charset_path=PREDICTION_DATA_PATH / "charset.pkl",
-        mode="eval",
-    )
+    dan_model.load(path=PREDICTION_DATA_PATH, mode="eval")

    image_path = PREDICTION_DATA_PATH / "images" / image_name
    _, image = dan_model.preprocess(str(image_path))
@@ -301,9 +296,7 @@ def test_run_prediction(
    run_prediction(
        image=(PREDICTION_DATA_PATH / "images" / image_name).with_suffix(".png"),
        image_dir=None,
-        model=PREDICTION_DATA_PATH / "popp_line_model.pt",
-        parameters=PREDICTION_DATA_PATH / "parameters.yml",
-        charset=PREDICTION_DATA_PATH / "charset.pkl",
+        model=PREDICTION_DATA_PATH,
        output=tmp_path,
        confidence_score=True if confidence_score else False,
        confidence_score_levels=confidence_score if confidence_score else [],
@@ -497,9 +490,7 @@ def test_run_prediction_batch(
    run_prediction(
        image=None,
        image_dir=image_dir,
-        model=PREDICTION_DATA_PATH / "popp_line_model.pt",
-        parameters=PREDICTION_DATA_PATH / "parameters.yml",
-        charset=PREDICTION_DATA_PATH / "charset.pkl",
+        model=PREDICTION_DATA_PATH,
        output=tmp_path,
        confidence_score=True if confidence_score else False,
        confidence_score_levels=confidence_score if confidence_score else [],
@@ -640,16 +631,26 @@ def test_run_prediction_language_model(
        )

    # Update language_model_weight in parameters.yml
+    model_path = tmp_path / "models"
+    model_path.mkdir(exist_ok=True)
+
+    shutil.copyfile(
+        PREDICTION_DATA_PATH / "popp_line_model.pt",
+        model_path / "popp_line_model.pt",
+    )
+    shutil.copyfile(
+        PREDICTION_DATA_PATH / "charset.pkl",
+        model_path / "charset.pkl",
+    )
+
    params = read_yaml(PREDICTION_DATA_PATH / "parameters.yml")
    params["parameters"]["language_model"]["weight"] = language_model_weight
-    yaml.dump(params, (tmp_path / "parameters.yml").open("w"))
+    yaml.dump(params, (model_path / "parameters.yml").open("w"))

    run_prediction(
        image=None,
        image_dir=image_dir,
-        model=PREDICTION_DATA_PATH / "popp_line_model.pt",
-        parameters=tmp_path / "parameters.yml",
-        charset=PREDICTION_DATA_PATH / "charset.pkl",
+        model=model_path,
        output=tmp_path,
        confidence_score=False,
        confidence_score_levels=[],