test_prediction.py

# -*- coding: utf-8 -*-

import json
import shutil

import numpy as np
import pytest
import yaml

from dan.ocr.predict.attention import Level
from dan.ocr.predict.inference import DAN
from dan.ocr.predict.inference import run as run_prediction
from dan.utils import parse_tokens, read_yaml
from tests import FIXTURES

PREDICTION_DATA_PATH = FIXTURES / "prediction"


@pytest.mark.parametrize(
    "image_name, expected_prediction",
    (
        (
            "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84.png",
            {"text": ["ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241"]},
        ),
        (
            "0dfe8bcd-ed0b-453e-bf19-cc697012296e.png",
            {"text": ["ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376"]},
        ),
        (
            "2c242f5c-e979-43c4-b6f2-a6d4815b651d.png",
            {"text": ["Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31"]},
        ),
        (
            "ffdec445-7f14-4f5f-be44-68d0844d0df1.png",
            {"text": ["ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère"]},
        ),
    ),
)
@pytest.mark.parametrize("normalize", (True, False))
def test_predict(image_name, expected_prediction, normalize, tmp_path):
    # Update mean/std in parameters.yml
    model_path = tmp_path / "models"
    model_path.mkdir(exist_ok=True)

    shutil.copyfile(
        PREDICTION_DATA_PATH / "model.pt",
        model_path / "model.pt",
    )
    shutil.copyfile(
        PREDICTION_DATA_PATH / "charset.pkl",
        model_path / "charset.pkl",
    )

    params = read_yaml(PREDICTION_DATA_PATH / "parameters.yml")
    if not normalize:
        del params["parameters"]["mean"]
        del params["parameters"]["std"]
    yaml.dump(params, (model_path / "parameters.yml").open("w"))

    device = "cpu"

    dan_model = DAN(device)
    dan_model.load(path=model_path, mode="eval")

    image_path = PREDICTION_DATA_PATH / "images" / image_name
    _, image = dan_model.preprocess(str(image_path))

    input_tensor = image.unsqueeze(0)
    input_tensor = input_tensor.to(device)
    input_sizes = [image.shape[1:]]

    prediction = dan_model.predict(input_tensor, input_sizes)

    assert prediction == expected_prediction


@pytest.mark.parametrize(
    "image_name, confidence_score, temperature, expected_prediction",
    (
        (
            "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
            None,
            1.0,
            {
                "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                "language_model": {},
                "confidences": {},
            },
        ),
        (
            "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
            [Level.Word],
            1.0,
            {
                "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                "language_model": {},
                "confidences": {
                    "total": 1.0,
                    "word": [
                        {"text": "ⓈBellisson", "confidence": 1.0},
                        {"text": "ⒻGeorges", "confidence": 1.0},
                        {"text": "Ⓑ91", "confidence": 1.0},
                        {"text": "ⓁP", "confidence": 1.0},
                        {"text": "ⒸM", "confidence": 1.0},
                        {"text": "ⓀCh", "confidence": 1.0},
                        {"text": "ⓄPlombier", "confidence": 1.0},
                        {"text": "ⓅPatron?12241", "confidence": 1.0},
                    ],
                },
            },
        ),
        (
            "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
            [Level.NER, Level.Word],
            3.5,
            {
                "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                "language_model": {},
                "confidences": {
                    "total": 0.93,
                    "ner": [
                        {"text": "ⓈBellisson ", "confidence": 0.92},
                        {"text": "ⒻGeorges ", "confidence": 0.94},
                        {"text": "Ⓑ91 ", "confidence": 0.93},
                        {"text": "ⓁP ", "confidence": 0.92},
                        {"text": "ⒸM ", "confidence": 0.93},
                        {"text": "ⓀCh ", "confidence": 0.95},
                        {"text": "ⓄPlombier ", "confidence": 0.93},
                        {"text": "ⓅPatron?12241", "confidence": 0.93},
                    ],
                    "word": [
                        {"text": "ⓈBellisson", "confidence": 0.93},
                        {"text": "ⒻGeorges", "confidence": 0.94},
                        {"text": "Ⓑ91", "confidence": 0.92},
                        {"text": "ⓁP", "confidence": 0.94},
                        {"text": "ⒸM", "confidence": 0.93},
                        {"text": "ⓀCh", "confidence": 0.96},
                        {"text": "ⓄPlombier", "confidence": 0.94},
                        {"text": "ⓅPatron?12241", "confidence": 0.93},
                    ],
                },
            },
        ),
        (
            "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
            [Level.Line],
            1.0,
            {
                "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                "language_model": {},
                "confidences": {
                    "total": 1.0,
                    "line": [
                        {
                            "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                            "confidence": 1.0,
                        }
                    ],
                },
            },
        ),
        (
            "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
            [Level.NER, Level.Line],
            3.5,
            {
                "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                "language_model": {},
                "confidences": {
                    "total": 0.93,
                    "ner": [
                        {"text": "ⓈBellisson ", "confidence": 0.92},
                        {"text": "ⒻGeorges ", "confidence": 0.94},
                        {"text": "Ⓑ91 ", "confidence": 0.93},
                        {"text": "ⓁP ", "confidence": 0.92},
                        {"text": "ⒸM ", "confidence": 0.93},
                        {"text": "ⓀCh ", "confidence": 0.95},
                        {"text": "ⓄPlombier ", "confidence": 0.93},
                        {"text": "ⓅPatron?12241", "confidence": 0.93},
                    ],
                    "line": [
                        {
                            "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                            "confidence": 0.93,
                        }
                    ],
                },
            },
        ),
        (
            "0dfe8bcd-ed0b-453e-bf19-cc697012296e",
            None,
            1.0,
            {
                "text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
                "language_model": {},
                "confidences": {},
            },
        ),
        (
            "0dfe8bcd-ed0b-453e-bf19-cc697012296e",
            [Level.NER, Level.Char, Level.Word, Level.Line],
            1.0,
            {
                "text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
                "language_model": {},
                "confidences": {
                    "total": 1.0,
                    "ner": [
                        {"text": "ⓈTemplié ", "confidence": 0.98},
                        {"text": "ⒻMarcelle ", "confidence": 1.0},
                        {"text": "Ⓑ93 ", "confidence": 1.0},
                        {"text": "ⓁS ", "confidence": 1.0},
                        {"text": "Ⓚch ", "confidence": 1.0},
                        {"text": "ⓄE dactylo ", "confidence": 1.0},
                        {"text": "Ⓟ18376", "confidence": 1.0},
                    ],
                    "char": [
                        {"text": "Ⓢ", "confidence": 1.0},
                        {"text": "T", "confidence": 1.0},
                        {"text": "e", "confidence": 1.0},
                        {"text": "m", "confidence": 1.0},
                        {"text": "p", "confidence": 1.0},
                        {"text": "l", "confidence": 1.0},
                        {"text": "i", "confidence": 1.0},
                        {"text": "é", "confidence": 0.85},
                        {"text": " ", "confidence": 1.0},
                        {"text": "Ⓕ", "confidence": 1.0},
                        {"text": "M", "confidence": 1.0},
                        {"text": "a", "confidence": 1.0},
                        {"text": "r", "confidence": 1.0},
                        {"text": "c", "confidence": 1.0},
                        {"text": "e", "confidence": 1.0},
                        {"text": "l", "confidence": 1.0},
                        {"text": "l", "confidence": 1.0},
                        {"text": "e", "confidence": 1.0},
                        {"text": " ", "confidence": 1.0},
                        {"text": "Ⓑ", "confidence": 1.0},
                        {"text": "9", "confidence": 1.0},
                        {"text": "3", "confidence": 1.0},
                        {"text": " ", "confidence": 1.0},
                        {"text": "Ⓛ", "confidence": 1.0},
                        {"text": "S", "confidence": 1.0},
                        {"text": " ", "confidence": 1.0},
                        {"text": "Ⓚ", "confidence": 1.0},
                        {"text": "c", "confidence": 1.0},
                        {"text": "h", "confidence": 1.0},
                        {"text": " ", "confidence": 1.0},
                        {"text": "Ⓞ", "confidence": 1.0},
                        {"text": "E", "confidence": 1.0},
                        {"text": " ", "confidence": 1.0},
                        {"text": "d", "confidence": 1.0},
                        {"text": "a", "confidence": 1.0},
                        {"text": "c", "confidence": 1.0},
                        {"text": "t", "confidence": 1.0},
                        {"text": "y", "confidence": 1.0},
                        {"text": "l", "confidence": 1.0},
                        {"text": "o", "confidence": 1.0},
                        {"text": " ", "confidence": 1.0},
                        {"text": "Ⓟ", "confidence": 1.0},
                        {"text": "1", "confidence": 1.0},
                        {"text": "8", "confidence": 1.0},
                        {"text": "3", "confidence": 1.0},
                        {"text": "7", "confidence": 1.0},
                        {"text": "6", "confidence": 1.0},
                    ],
                    "word": [
                        {"text": "ⓈTemplié", "confidence": 0.98},
                        {"text": "ⒻMarcelle", "confidence": 1.0},
                        {"text": "Ⓑ93", "confidence": 1.0},
                        {"text": "ⓁS", "confidence": 1.0},
                        {"text": "Ⓚch", "confidence": 1.0},
                        {"text": "ⓄE", "confidence": 1.0},
                        {"text": "dactylo", "confidence": 1.0},
                        {"text": "Ⓟ18376", "confidence": 1.0},
                    ],
                    "line": [
                        {
                            "text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
                            "confidence": 1.0,
                        }
                    ],
                },
            },
        ),
        (
            "2c242f5c-e979-43c4-b6f2-a6d4815b651d",
            False,
            1.0,
            {
                "text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31",
                "language_model": {},
                "confidences": {},
            },
        ),
        (
            "ffdec445-7f14-4f5f-be44-68d0844d0df1",
            False,
            1.0,
            {
                "text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
                "language_model": {},
                "confidences": {},
            },
        ),
    ),
)
def test_run_prediction(
    image_name,
    confidence_score,
    temperature,
    expected_prediction,
    tmp_path,
):
    # Make tmpdir and copy needed image inside
    image_dir = tmp_path / "images"
    image_dir.mkdir()
    shutil.copyfile(
        (PREDICTION_DATA_PATH / "images" / image_name).with_suffix(".png"),
        (image_dir / image_name).with_suffix(".png"),
    )

    run_prediction(
        image_dir=image_dir,
        model=PREDICTION_DATA_PATH,
        output=tmp_path,
        confidence_score=True if confidence_score else False,
        confidence_score_levels=confidence_score if confidence_score else [],
        attention_map=False,
        attention_map_level=None,
        attention_map_scale=0.5,
        word_separators=[" ", "\n"],
        line_separators=["\n"],
        temperature=temperature,
        predict_objects=False,
        max_object_height=None,
        image_extension=".png",
        gpu_device=None,
        batch_size=1,
        tokens=parse_tokens(PREDICTION_DATA_PATH / "tokens.yml"),
        start_token=None,
        use_language_model=False,
    )

    prediction = json.loads((tmp_path / image_name).with_suffix(".json").read_text())
    assert prediction == expected_prediction


@pytest.mark.parametrize(
    "image_names, confidence_score, temperature, expected_predictions",
    (
        (
            ["0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84"],
            None,
            1.0,
            [
                {
                    "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                    "language_model": {},
                    "confidences": {},
                }
            ],
        ),
        (
            ["0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84"],
            [Level.Word],
            1.0,
            [
                {
                    "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                    "language_model": {},
                    "confidences": {
                        "total": 1.0,
                        "word": [
                            {"text": "ⓈBellisson", "confidence": 1.0},
                            {"text": "ⒻGeorges", "confidence": 1.0},
                            {"text": "Ⓑ91", "confidence": 1.0},
                            {"text": "ⓁP", "confidence": 1.0},
                            {"text": "ⒸM", "confidence": 1.0},
                            {"text": "ⓀCh", "confidence": 1.0},
                            {"text": "ⓄPlombier", "confidence": 1.0},
                            {"text": "ⓅPatron?12241", "confidence": 1.0},
                        ],
                    },
                }
            ],
        ),
        (
            [
                "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
                "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
            ],
            [Level.NER, Level.Word],
            1.0,
            [
                {
                    "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                    "language_model": {},
                    "confidences": {
                        "total": 1.0,
                        "ner": [
                            {"text": "ⓈBellisson ", "confidence": 1.0},
                            {"text": "ⒻGeorges ", "confidence": 1.0},
                            {"text": "Ⓑ91 ", "confidence": 1.0},
                            {"text": "ⓁP ", "confidence": 1.0},
                            {"text": "ⒸM ", "confidence": 1.0},
                            {"text": "ⓀCh ", "confidence": 1.0},
                            {"text": "ⓄPlombier ", "confidence": 1.0},
                            {"text": "ⓅPatron?12241", "confidence": 1.0},
                        ],
                        "word": [
                            {"text": "ⓈBellisson", "confidence": 1.0},
                            {"text": "ⒻGeorges", "confidence": 1.0},
                            {"text": "Ⓑ91", "confidence": 1.0},
                            {"text": "ⓁP", "confidence": 1.0},
                            {"text": "ⒸM", "confidence": 1.0},
                            {"text": "ⓀCh", "confidence": 1.0},
                            {"text": "ⓄPlombier", "confidence": 1.0},
                            {"text": "ⓅPatron?12241", "confidence": 1.0},
                        ],
                    },
                },
                {
                    "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                    "language_model": {},
                    "confidences": {
                        "total": 1.0,
                        "ner": [
                            {"text": "ⓈBellisson ", "confidence": 1.0},
                            {"text": "ⒻGeorges ", "confidence": 1.0},
                            {"text": "Ⓑ91 ", "confidence": 1.0},
                            {"text": "ⓁP ", "confidence": 1.0},
                            {"text": "ⒸM ", "confidence": 1.0},
                            {"text": "ⓀCh ", "confidence": 1.0},
                            {"text": "ⓄPlombier ", "confidence": 1.0},
                            {"text": "ⓅPatron?12241", "confidence": 1.0},
                        ],
                        "word": [
                            {"text": "ⓈBellisson", "confidence": 1.0},
                            {"text": "ⒻGeorges", "confidence": 1.0},
                            {"text": "Ⓑ91", "confidence": 1.0},
                            {"text": "ⓁP", "confidence": 1.0},
                            {"text": "ⒸM", "confidence": 1.0},
                            {"text": "ⓀCh", "confidence": 1.0},
                            {"text": "ⓄPlombier", "confidence": 1.0},
                            {"text": "ⓅPatron?12241", "confidence": 1.0},
                        ],
                    },
                },
            ],
        ),
        (
            ["0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84"],
            [Level.Word],
            1.0,
            [
                {
                    "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                    "language_model": {},
                    "confidences": {
                        "total": 1.0,
                        "word": [
                            {"text": "ⓈBellisson", "confidence": 1.0},
                            {"text": "ⒻGeorges", "confidence": 1.0},
                            {"text": "Ⓑ91", "confidence": 1.0},
                            {"text": "ⓁP", "confidence": 1.0},
                            {"text": "ⒸM", "confidence": 1.0},
                            {"text": "ⓀCh", "confidence": 1.0},
                            {"text": "ⓄPlombier", "confidence": 1.0},
                            {"text": "ⓅPatron?12241", "confidence": 1.0},
                        ],
                    },
                }
            ],
        ),
        (
            [
                "2c242f5c-e979-43c4-b6f2-a6d4815b651d",
                "ffdec445-7f14-4f5f-be44-68d0844d0df1",
            ],
            False,
            1.0,
            [
                {
                    "text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31",
                    "language_model": {},
                    "confidences": {},
                },
                {
                    "text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
                    "language_model": {},
                    "confidences": {},
                },
            ],
        ),
    ),
)
@pytest.mark.parametrize("batch_size", [1, 2])
def test_run_prediction_batch(
    image_names,
    confidence_score,
    temperature,
    expected_predictions,
    batch_size,
    tmp_path,
):
    # Make tmpdir and copy needed images inside
    image_dir = tmp_path / "images"
    image_dir.mkdir()
    for image_name in image_names:
        shutil.copyfile(
            (PREDICTION_DATA_PATH / "images" / image_name).with_suffix(".png"),
            (image_dir / image_name).with_suffix(".png"),
        )

    run_prediction(
        image_dir=image_dir,
        model=PREDICTION_DATA_PATH,
        output=tmp_path,
        confidence_score=True if confidence_score else False,
        confidence_score_levels=confidence_score if confidence_score else [],
        attention_map=False,
        attention_map_level=None,
        attention_map_scale=0.5,
        word_separators=[" ", "\n"],
        line_separators=["\n"],
        temperature=temperature,
        predict_objects=False,
        max_object_height=None,
        image_extension=".png",
        gpu_device=None,
        batch_size=batch_size,
        tokens=parse_tokens(PREDICTION_DATA_PATH / "tokens.yml"),
        start_token=None,
        use_language_model=False,
    )

    for image_name, expected_prediction in zip(image_names, expected_predictions):
        prediction = json.loads(
            (tmp_path / image_name).with_suffix(".json").read_text()
        )
        assert prediction == expected_prediction


@pytest.mark.parametrize(
    "image_names, language_model_weight, expected_predictions",
    (
        (
            [
                "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
                "0dfe8bcd-ed0b-453e-bf19-cc697012296e",
                "2c242f5c-e979-43c4-b6f2-a6d4815b651d",
                "ffdec445-7f14-4f5f-be44-68d0844d0df1",
            ],
            1.0,
            [
                {
                    "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                    "language_model": {
                        "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                        "confidence": 0.92,
                    },
                },
                {
                    "text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
                    "language_model": {
                        "text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
                        "confidence": 0.88,
                    },
                },
                {
                    "text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31",
                    "language_model": {
                        "text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31",
                        "confidence": 0.86,
                    },
                },
                {
                    "text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
                    "language_model": {
                        "text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
                        "confidence": 0.89,
                    },
                },
            ],
        ),
        (
            [
                "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
                "0dfe8bcd-ed0b-453e-bf19-cc697012296e",
                "2c242f5c-e979-43c4-b6f2-a6d4815b651d",
                "ffdec445-7f14-4f5f-be44-68d0844d0df1",
            ],
            2.0,
            [
                {
                    "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                    "language_model": {
                        "text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241",
                        "confidence": 0.90,
                    },
                },
                {
                    "text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
                    "language_model": {
                        "text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376",
                        "confidence": 0.84,
                    },
                },
                {
                    "text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31",
                    "language_model": {
                        "text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14331",
                        "confidence": 0.83,
                    },
                },
                {
                    "text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
                    "language_model": {
                        "text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère",
                        "confidence": 0.86,
                    },
                },
            ],
        ),
        (
            [
                "0a56e8b3-95cd-4fa5-a17b-5b0ff9e6ea84",
                "0dfe8bcd-ed0b-453e-bf19-cc697012296e",
                "2c242f5c-e979-43c4-b6f2-a6d4815b651d",
                "ffdec445-7f14-4f5f-be44-68d0844d0df1",
            ],
            0.0,
            [
                {"text": "ⓈBellisson ⒻGeorges Ⓑ91 ⓁP ⒸM ⓀCh ⓄPlombier ⓅPatron?12241"},
                {"text": "ⓈTemplié ⒻMarcelle Ⓑ93 ⓁS Ⓚch ⓄE dactylo Ⓟ18376"},
                {"text": "Ⓢd ⒻCharles Ⓑ11 ⓁP ⒸC ⓀF Ⓞd Ⓟ14 31"},
                {"text": "ⓈNaudin ⒻMarie Ⓑ53 ⓁS Ⓒv ⓀBelle mère"},
            ],
        ),
    ),
)
def test_run_prediction_language_model(
    image_names,
    language_model_weight,
    expected_predictions,
    tmp_path,
):
    # Make tmpdir and copy needed images inside
    image_dir = tmp_path / "images"
    image_dir.mkdir()
    for image_name in image_names:
        shutil.copyfile(
            (PREDICTION_DATA_PATH / "images" / image_name).with_suffix(".png"),
            (image_dir / image_name).with_suffix(".png"),
        )

    # Update language_model_weight in parameters.yml
    model_path = tmp_path / "models"
    model_path.mkdir(exist_ok=True)

    shutil.copyfile(
        PREDICTION_DATA_PATH / "model.pt",
        model_path / "model.pt",
    )
    shutil.copyfile(
        PREDICTION_DATA_PATH / "charset.pkl",
        model_path / "charset.pkl",
    )

    params = read_yaml(PREDICTION_DATA_PATH / "parameters.yml")
    params["parameters"]["language_model"]["weight"] = language_model_weight
    yaml.dump(params, (model_path / "parameters.yml").open("w"))

    run_prediction(
        image_dir=image_dir,
        model=model_path,
        output=tmp_path,
        confidence_score=False,
        confidence_score_levels=[],
        attention_map=[],
        attention_map_level=None,
        attention_map_scale=0.5,
        word_separators=[" ", "\n"],
        line_separators=["\n"],
        temperature=1.0,
        predict_objects=False,
        max_object_height=None,
        image_extension=".png",
        gpu_device=None,
        batch_size=1,
        tokens=parse_tokens(PREDICTION_DATA_PATH / "tokens.yml"),
        start_token=None,
        use_language_model=True,
    )

    for image_name, expected_prediction in zip(image_names, expected_predictions):
        prediction = json.loads(
            (tmp_path / image_name).with_suffix(".json").read_text()
        )
        assert prediction["text"] == expected_prediction["text"]

        if language_model_weight > 0:
            assert (
                prediction["language_model"]["text"]
                == expected_prediction["language_model"]["text"]
            )
            assert np.isclose(
                prediction["language_model"]["confidence"],
                expected_prediction["language_model"]["confidence"],
            )