diff --git a/README.md b/README.md index 15a18601ad574e77d3b9c22bbe39b0dffb097cd4..42e37351d3fb53ebbd1c76cbc7e55f3bf3563ffd 100644 --- a/README.md +++ b/README.md @@ -61,15 +61,17 @@ from dan.ocr.predict.inference import DAN image = cv2.cvtColor(cv2.imread(IMAGE_PATH), cv2.COLOR_BGR2RGB) ``` -Then one can initialize and load the trained model with the parameters used during training. +Then one can initialize and load the trained model with the parameters used during training. The directory passed as parameter should have: + +- a single model file `*.pt`, +- a single charset file `*.pkl`, +- a single parameters file `*parameters.yml`. ```python -model_path = "model.pt" -params_path = "parameters.yml" -charset_path = "charset.pkl" +model_path = "models" model = DAN("cpu") -model.load(model_path, params_path, charset_path, mode="eval") +model.load(model_path, mode="eval") ``` To run the inference on a GPU, one can replace `cpu` by the name of the GPU. In the end, one can run the prediction: diff --git a/dan/ocr/predict/__init__.py b/dan/ocr/predict/__init__.py index fd5359059abbc930ae5eb460fcd3d1b9e21d8e2e..3500fbed4a860b9e4799feb1f7d56c21d4d06e05 100644 --- a/dan/ocr/predict/__init__.py +++ b/dan/ocr/predict/__init__.py @@ -31,20 +31,7 @@ def add_predict_parser(subcommands) -> None: parser.add_argument( "--model", type=pathlib.Path, - help="Path to the model to use for prediction.", - required=True, - ) - parser.add_argument( - "--parameters", - type=pathlib.Path, - help="Path to the YAML parameters file.", - required=True, - default="page", - ) - parser.add_argument( - "--charset", - type=pathlib.Path, - help="Path to the charset file.", + help="Path to the directory containing the model, the YAML parameters file and the charset file to be used for prediction.", required=True, ) parser.add_argument( diff --git a/dan/ocr/predict/inference.py b/dan/ocr/predict/inference.py index e012a5bfda873de7738911475bcd519274d3f448..c73c8890318db98098d3ec3894cb35186ce25647 100644 --- a/dan/ocr/predict/inference.py +++ b/dan/ocr/predict/inference.py @@ -49,20 +49,32 @@ class DAN: def load( self, - model_path: Path, - params_path: Path, - charset_path: Path, + path: Path, mode: str = "eval", use_language_model: bool = False, ) -> None: """ Load a trained model. - :param model_path: Path to the model. - :param params_path: Path to the parameters. - :param charset_path: Path to the charset. + :param path: Path to the directory containing the model, the YAML parameters file and the charset file. :param mode: The mode to load the model (train or eval). :param use_language_model: Whether to use an explicit language model to rescore text hypotheses. """ + model_path = list(path.glob("*.pt")) + assert len(model_path) == 1, f"Found {len(model_path)} model(s) `.pt` in {path}" + model_path = model_path.pop() + + params_path = list(path.glob("*parameters.yml")) + assert ( + len(params_path) == 1 + ), f"Found {len(params_path)} parameter(s) `parameters.yml` in {path}" + params_path = params_path.pop() + + charset_path = list(path.glob("*.pkl")) + assert ( + len(charset_path) == 1 + ), f"Found {len(charset_path)} charset(s) `.pkl` in {path}" + charset_path = charset_path.pop() + parameters = yaml.safe_load(params_path.read_text())["parameters"] parameters["decoder"]["device"] = self.device @@ -410,8 +422,6 @@ def run( image: Optional[Path], image_dir: Optional[Path], model: Path, - parameters: Path, - charset: Path, output: Path, confidence_score: bool, confidence_score_levels: List[Level], @@ -434,9 +444,7 @@ def run( Predict a single image save the output :param image: Path to the image to predict. :param image_dir: Path to the folder where the images to predict are stored. - :param model: Path to the model to use for prediction. - :param parameters: Path to the YAML parameters file. - :param charset: Path to the charset. + :param model: Path to the directory containing the model, the YAML parameters file and the charset file to be used for prediction. :param output: Path to the output folder where the results will be saved. :param confidence_score: Whether to compute confidence score. :param attention_map: Whether to plot the attention map. @@ -460,9 +468,7 @@ def run( cuda_device = f":{gpu_device}" if gpu_device is not None else "" device = f"cuda{cuda_device}" if torch.cuda.is_available() else "cpu" dan_model = DAN(device, temperature) - dan_model.load( - model, parameters, charset, mode="eval", use_language_model=use_language_model - ) + dan_model.load(model, mode="eval", use_language_model=use_language_model) # Do not use LM with invalid LM weight use_language_model = dan_model.lm_decoder is not None diff --git a/docs/get_started/training.md b/docs/get_started/training.md index e28b6d314e73731d20d743dabd4d00857437b235..f3ae783e83c8218f9bc64a0b9419785011ade3ca 100644 --- a/docs/get_started/training.md +++ b/docs/get_started/training.md @@ -22,16 +22,16 @@ output/ │ ├── train │ ├── val │ └── test -├── language_model -│ ├── corpus_characters.txt -│ ├── lexicon_characters.txt -│ ├── corpus_subwords.txt -│ ├── lexicon_subwords.txt -│ ├── corpus_words.txt -│ ├── lexicon_words.txt -│ ├── subword_tokenizer.model -│ ├── subword_tokenizer.vocab -│ └── tokens.txt +└── language_model + ├── corpus_characters.txt + ├── lexicon_characters.txt + ├── corpus_subwords.txt + ├── lexicon_subwords.txt + ├── corpus_words.txt + ├── lexicon_words.txt + ├── subword_tokenizer.model + ├── subword_tokenizer.vocab + └── tokens.txt ``` ## 2. Train diff --git a/docs/usage/predict/index.md b/docs/usage/predict/index.md index 51f99d4a9d588af5abf5b78401e2ea5ecd609920..497d9f378840d2f6bf20c9c15e3be1bae76bcd78 100644 --- a/docs/usage/predict/index.md +++ b/docs/usage/predict/index.md @@ -9,9 +9,7 @@ Use the `teklia-dan predict` command to apply a trained DAN model on an image. | `--image` | Path to the image to predict. Must not be provided with `--image-dir`. | `pathlib.Path` | | | `--image-dir` | Path to the folder where the images to predict are stored. Must not be provided with `--image`. | `pathlib.Path` | | | `--image-extension` | The extension of the images in the folder. Ignored if `--image-dir` is not provided. | `str` | .jpg | -| `--model` | Path to the model to use for prediction | `pathlib.Path` | | -| `--parameters` | Path to the YAML parameters file. | `pathlib.Path` | | -| `--charset` | Path to the charset file. | `pathlib.Path` | | +| `--model` | Path to the directory containing the model, the YAML parameters file and the charset file to be used for prediction | `pathlib.Path` | | | `--output` | Path to the output folder. Results will be saved in this directory. | `pathlib.Path` | | | `--tokens` | Path to a yaml file containing a mapping between starting tokens and end tokens. Needed for entities. | `pathlib.Path` | | | `--temperature` | Temperature scaling scalar parameter. | `float` | `1.0` | @@ -31,6 +29,12 @@ Use the `teklia-dan predict` command to apply a trained DAN model on an image. ## Examples +In the following examples the `models` directory should have: + +- a single model file `*.pt`, +- a single charset file `*.pkl`, +- a single parameters file `*parameters.yml`. + ### Predict with confidence scores To run a prediction with confidence scores, run this command: @@ -38,9 +42,7 @@ To run a prediction with confidence scores, run this command: ```shell teklia-dan predict \ --image example.jpg \ - --model model.pt \ - --parameters inference_parameters.yml \ - --charset charset.pkl \ + --model models \ --output predict/ \ --confidence-score ``` @@ -63,9 +65,7 @@ To run a prediction with confidence scores and plot line-level attention maps, r ```shell teklia-dan predict \ --image example.jpg \ - --model model.pt \ - --parameters inference_parameters.yml \ - --charset charset.pkl \ + --model models \ --output predict/ \ --confidence-score \ --attention-map @@ -92,9 +92,7 @@ To run a prediction with confidence scores and plot word-level attention maps, r ```shell teklia-dan predict \ --image example.jpg \ - --model model.pt \ - --parameters inference_parameters.yml \ - --charset charset.pkl \ + --model models \ --output predict/ \ --confidence-score \ --attention-map \ @@ -123,9 +121,7 @@ To run a prediction, plot line-level attention maps, and extract polygons, run t ```shell teklia-dan predict \ --image example.jpg \ - --model model.pt \ - --parameters inference_parameters.yml \ - --charset charset.pkl \ + --model models \ --output predict/ \ --attention-map \ --predict-objects @@ -197,9 +193,7 @@ Then, run this command: ```shell teklia-dan predict \ --image dan_humu_page/6e830f23-e70d-4399-8b94-f36ed3198575.jpg \ - --model dan_humu_page/model.pt \ - --parameters dan_humu_page/inference_parameters_char_lm.yml \ - --charset dan_humu_page/charset.pkl \ + --model models \ --use-language-model \ --output dan_humu_page/predict_char_lm/ ``` @@ -235,9 +229,7 @@ Then, run this command: ```shell teklia-dan predict \ --image dan_humu_page/6e830f23-e70d-4399-8b94-f36ed3198575.jpg \ - --model dan_humu_page/model.pt \ - --parameters dan_humu_page/inference_parameters_subword_lm.yml \ - --charset dan_humu_page/charset.pkl \ + --model models \ --use-language-model \ --output dan_humu_page/predict_subword_lm/ ``` @@ -273,9 +265,7 @@ Then, run this command: ```shell teklia-dan predict \ --image dan_humu_page/6e830f23-e70d-4399-8b94-f36ed3198575.jpg \ - --model dan_humu_page/model.pt \ - --parameters dan_humu_page/inference_parameters_word_lm.yml \ - --charset dan_humu_page/charset.pkl \ + --model models \ --use-language-model \ --output dan_humu_page/predict_word_lm/ ``` diff --git a/tests/test_prediction.py b/tests/test_prediction.py index 6affeeb944d56eec9f44cf85b6805a09f8d0a7fc..f84b3406b68c02c7ec92e0046f4e9c84577d4908 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -41,12 +41,7 @@ def test_predict(image_name, expected_prediction): device = "cpu" dan_model = DAN(device) - dan_model.load( - model_path=PREDICTION_DATA_PATH / "popp_line_model.pt", - params_path=PREDICTION_DATA_PATH / "parameters.yml", - charset_path=PREDICTION_DATA_PATH / "charset.pkl", - mode="eval", - ) + dan_model.load(path=PREDICTION_DATA_PATH, mode="eval") image_path = PREDICTION_DATA_PATH / "images" / image_name _, image = dan_model.preprocess(str(image_path)) @@ -301,9 +296,7 @@ def test_run_prediction( run_prediction( image=(PREDICTION_DATA_PATH / "images" / image_name).with_suffix(".png"), image_dir=None, - model=PREDICTION_DATA_PATH / "popp_line_model.pt", - parameters=PREDICTION_DATA_PATH / "parameters.yml", - charset=PREDICTION_DATA_PATH / "charset.pkl", + model=PREDICTION_DATA_PATH, output=tmp_path, confidence_score=True if confidence_score else False, confidence_score_levels=confidence_score if confidence_score else [], @@ -497,9 +490,7 @@ def test_run_prediction_batch( run_prediction( image=None, image_dir=image_dir, - model=PREDICTION_DATA_PATH / "popp_line_model.pt", - parameters=PREDICTION_DATA_PATH / "parameters.yml", - charset=PREDICTION_DATA_PATH / "charset.pkl", + model=PREDICTION_DATA_PATH, output=tmp_path, confidence_score=True if confidence_score else False, confidence_score_levels=confidence_score if confidence_score else [], @@ -640,16 +631,26 @@ def test_run_prediction_language_model( ) # Update language_model_weight in parameters.yml + model_path = tmp_path / "models" + model_path.mkdir(exist_ok=True) + + shutil.copyfile( + PREDICTION_DATA_PATH / "popp_line_model.pt", + model_path / "popp_line_model.pt", + ) + shutil.copyfile( + PREDICTION_DATA_PATH / "charset.pkl", + model_path / "charset.pkl", + ) + params = read_yaml(PREDICTION_DATA_PATH / "parameters.yml") params["parameters"]["language_model"]["weight"] = language_model_weight - yaml.dump(params, (tmp_path / "parameters.yml").open("w")) + yaml.dump(params, (model_path / "parameters.yml").open("w")) run_prediction( image=None, image_dir=image_dir, - model=PREDICTION_DATA_PATH / "popp_line_model.pt", - parameters=tmp_path / "parameters.yml", - charset=PREDICTION_DATA_PATH / "charset.pkl", + model=model_path, output=tmp_path, confidence_score=False, confidence_score_levels=[],