Skip to content
Snippets Groups Projects
Commit 721eef7d authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

Merge branch 'model-compile' into 'main'

Add an option to compile models during infrerence

Closes #264

See merge request !382
parents 948176f3 c03abb5c
No related branches found
No related tags found
1 merge request!382Add an option to compile models during infrerence
......@@ -141,4 +141,16 @@ def add_predict_parser(subcommands) -> None:
action="store_true",
required=False,
)
parser.add_argument(
"--compile-model",
help="Whether to compile the model. Recommended to speed up inference.",
action="store_true",
required=False,
)
parser.add_argument(
"--dynamic-mode",
help="Whether to use the dynamic mode during model compilation. Recommended for prediction on images of variable size.",
action="store_true",
required=False,
)
parser.set_defaults(func=run)
......@@ -52,12 +52,16 @@ class DAN:
path: Path,
mode: str = "eval",
use_language_model: bool = False,
compile_model: bool = False,
dynamic_mode: bool = False,
) -> None:
"""
Load a trained model.
:param path: Path to the directory containing the model, the YAML parameters file and the charset file.
:param mode: The mode to load the model (train or eval).
:param use_language_model: Whether to use an explicit language model to rescore text hypotheses.
:param compile_model: Whether to compile the model.
:param dynamic_mode: Whether to use the dynamic mode during model compilation.
"""
model_path = path / "model.pt"
assert model_path.is_file(), f"File {model_path} not found"
......@@ -84,6 +88,15 @@ class DAN:
logger.debug(f"Loaded model {model_path}")
if compile_model:
torch.compiler.cudagraph_mark_step_begin()
encoder = torch.compile(encoder, dynamic=True if dynamic_mode else None)
torch.compiler.cudagraph_mark_step_begin()
decoder = torch.compile(decoder, dynamic=True if dynamic_mode else None)
logger.info("Encoder and decoder have been compiled")
if mode == "train":
encoder.train()
decoder.train()
......@@ -445,6 +458,8 @@ def run(
tokens: Dict[str, EntityType],
start_token: str,
use_language_model: bool,
compile_model: bool,
dynamic_mode: bool,
) -> None:
"""
Predict a single image save the output
......@@ -464,6 +479,8 @@ def run(
:param tokens: NER tokens used.
:param start_token: Use a specific starting token at the beginning of the prediction. Useful when making predictions on different single pages.
:param use_language_model: Whether to use an explicit language model to rescore text hypotheses.
:param compile_model: Whether to compile the model.
:param dynamic_mode: Whether to use the dynamic mode during model compilation.
"""
# Create output directory if necessary
if not output.exists():
......@@ -473,7 +490,13 @@ def run(
cuda_device = f":{gpu_device}" if gpu_device is not None else ""
device = f"cuda{cuda_device}" if torch.cuda.is_available() else "cpu"
dan_model = DAN(device, temperature)
dan_model.load(model, mode="eval", use_language_model=use_language_model)
dan_model.load(
model,
mode="eval",
use_language_model=use_language_model,
compile_model=compile_model,
dynamic_mode=dynamic_mode,
)
images = image_dir.rglob(f"*{image_extension}")
for image_batch in list_to_batches(images, n=batch_size):
......
......@@ -25,6 +25,8 @@ Use the `teklia-dan predict` command to apply a trained DAN model on an image.
| `--batch-size` | Size of the batches for prediction. | `int` | `1` |
| `--start-token` | Use a specific starting token at the beginning of the prediction. Useful when making predictions on different single pages. | `str` | |
| `--use-language-model` | Whether to use an explicit language model to rescore text hypotheses. | `bool` | `False` |
| `--compile-model` | Whether to compile the model. Recommended to speed up inference. | `bool` | `False` |
| `--dynamic-mode` | Whether to use the dynamic mode during model compilation. Recommended for prediction on images of variable size. | `bool` | `False` |
The `--model` argument expects a directory with the following files:
......@@ -277,3 +279,28 @@ It will create the following JSON file named after the image in the `predict_wor
}
}
```
### Speed up prediction with model compilation
To speed up prediction, it is recommended to [compile models using `torch.compile`](https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html).
Run this command to use this option:
```shell
teklia-dan predict \
--image-dir images/ \
--model models \
--output predict/ \
--compile-model
```
When predicting on images of variable size, it is recommended to enable the `dynamic` mode:
```shell
teklia-dan predict \
--image-dir images/ \
--model models \
--output predict/ \
--compile-model \
--dynamic-mode
```
......@@ -341,6 +341,8 @@ def test_run_prediction(
tokens=parse_tokens(PREDICTION_DATA_PATH / "tokens.yml"),
start_token=None,
use_language_model=False,
compile_model=False,
dynamic_mode=False,
)
prediction = json.loads((tmp_path / image_name).with_suffix(".json").read_text())
......@@ -534,6 +536,8 @@ def test_run_prediction_batch(
tokens=parse_tokens(PREDICTION_DATA_PATH / "tokens.yml"),
start_token=None,
use_language_model=False,
compile_model=False,
dynamic_mode=False,
)
for image_name, expected_prediction in zip(image_names, expected_predictions):
......@@ -693,6 +697,8 @@ def test_run_prediction_language_model(
tokens=parse_tokens(PREDICTION_DATA_PATH / "tokens.yml"),
start_token=None,
use_language_model=True,
compile_model=False,
dynamic_mode=False,
)
for image_name, expected_prediction in zip(image_names, expected_predictions):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment