Compare revisions

Yoann Schneider · Bastien Abadie · Yoann Schneider · Bastien Abadie · Yoann Schneider · Bastien Abadie
--- a/.gitattributes
+++ b/.gitattributes
+*gif filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -56,3 +56,6 @@ See the [dedicated section](https://teklia.gitlab.io/atr/dan/usage/train/) on th

 ### Synthetic data generation
 See the [dedicated section](https://teklia.gitlab.io/atr/dan/usage/generate/) on the official DAN documentation.
+
+### Model prediction
+See the [dedicated section](https://teklia.gitlab.io/atr/dan/usage/predict/) on the official DAN documentation.
--- a/dan/cli.py
+++ b/dan/cli.py
@@ -5,6 +5,7 @@ import errno
 from dan.datasets import add_dataset_parser
 from dan.ocr import add_train_parser
 from dan.ocr.line import add_generate_parser
+from dan.predict import add_predict_parser


 def get_parser():
@@ -14,6 +15,7 @@ def get_parser():
    add_dataset_parser(subcommands)
    add_train_parser(subcommands)
    add_generate_parser(subcommands)
+    add_predict_parser(subcommands)
    return parser



--- a/dan/manager/training.py
+++ b/dan/manager/training.py
@@ -8,7 +8,6 @@ import sys
 from datetime import date
 from time import time

-import mlflow
 import numpy as np
 import torch
 import torch.distributed as dist
@@ -22,10 +21,16 @@ from torch.utils.tensorboard import SummaryWriter
 from tqdm import tqdm

 from dan.manager.metrics import MetricManager
-from dan.mlflow import logging_metrics, logging_tags_metrics
 from dan.ocr.utils import LM_ind_to_str
 from dan.schedulers import DropoutScheduler

+try:
+    import mlflow
+
+    from dan.mlflow import logging_metrics, logging_tags_metrics
+except ImportError:
+    pass
+

 class GenericTrainingManager:
    def __init__(self, params):

--- a/dan/mlflow.py
+++ b/dan/mlflow.py
@@ -3,11 +3,22 @@ import os
 from contextlib import contextmanager

 import mlflow
-from mlflow.exceptions import MlflowException
+import requests
+from mlflow.environment_variables import MLFLOW_HTTP_REQUEST_MAX_RETRIES

 from dan import logger


+def make_mlflow_request(mlflow_method, *args, **kwargs):
+    """
+    Encapsulate MLflow HTTP requests to prevent them from crashing the whole training process.
+    """
+    try:
+        mlflow_method(*args, **kwargs)
+    except requests.exceptions.ConnectionError as e:
+        logger.error(f"Call to `{str(mlflow_method)}` failed with error: {str(e)}")
+
+
 def setup_environment(config: dict):
    """
    Get the necessary variables from the config file and put them in the environment variables
@@ -24,6 +35,13 @@ def setup_environment(config: dict):
        if config_key in config:
            os.environ[variable_name] = config[config_key]

+    # Check max retry setting
+    max_retries = MLFLOW_HTTP_REQUEST_MAX_RETRIES.get()
+    if max_retries and int(max_retries) <= 1:
+        logger.warning(
+            f"The maximum number of retries for MLflow HTTP requests is set to {max_retries}, which is low. Consider using a higher value."
+        )
+

 def logging_metrics(
    display_values: dict,
@@ -42,10 +60,11 @@ def logging_metrics(
    :param is_master: bool, makes sure you're on the right thread, defaults to False
    """
    if mlflow_logging and is_master:
-        mlflow_values = {
-            f"{step}_{name}": value for name, value in display_values.items()
-        }
-        mlflow.log_metrics(mlflow_values, epoch)
+        make_mlflow_request(
+            mlflow_method=mlflow.log_metrics,
+            metrics={f"{step}_{name}": value for name, value in display_values.items()},
+            step=epoch,
+        )


 def logging_tags_metrics(
@@ -63,16 +82,18 @@ def logging_tags_metrics(
    :param is_master: bool, makes sure you're on the right thread, defaults to False
    """
    if mlflow_logging and is_master:
-        mlflow_values = {
-            f"{step}_{name}": value for name, value in display_values.items()
-        }
-        mlflow.set_tags(mlflow_values)
+        make_mlflow_request(
+            mlflow_method=mlflow.set_tags,
+            tags={f"{step}_{name}": value for name, value in display_values.items()},
+        )


 @contextmanager
 def start_mlflow_run(config: dict):
    """
-    Create an MLflow execution context with the parameters contained in the config file
+    Create an MLflow execution context with the parameters contained in the config file.
+
+    Yields the active MLflow run, as well as a boolean saying whether a new one was created.

    :param config: dict, the config of the model
    """
@@ -80,16 +101,22 @@ def start_mlflow_run(config: dict):
    # Set needed variables in environment
    setup_environment(config)

+    run_name, run_id = config.get("run_name"), config.get("run_id")
+
+    if run_id:
+        logger.info(f"Will resume run ({run_id}).")
+
+        if run_name:
+            logger.warning(
+                "Run_name will be ignored since you specified a run_id to resume from."
+            )
+
    # Set experiment from config
    experiment_id = config.get("experiment_id")
    assert experiment_id, "Missing MLflow experiment ID in the configuration"
-    try:
-        mlflow.set_experiment(experiment_id=experiment_id)
-        logger.info(f"Run Experiment ID : {experiment_id} on MLFlow")
-    except MlflowException as e:
-        logger.error(f"Couldn't set Mlflow experiment with ID: {experiment_id}")
-        raise e

    # Start run
-    yield mlflow.start_run(run_name=config.get("run_name"))
+    yield mlflow.start_run(
+        run_id=run_id, run_name=run_name, experiment_id=experiment_id
+    ), run_id is None
    mlflow.end_run()
--- a/dan/ocr/document/train.py
+++ b/dan/ocr/document/train.py
@@ -5,19 +5,30 @@ import random
 from copy import deepcopy
 from pathlib import Path

-import mlflow
 import numpy as np
 import torch
 import torch.multiprocessing as mp
 from torch.optim import Adam

+from dan import logger
 from dan.decoder import GlobalHTADecoder
 from dan.manager.ocr import OCRDataset, OCRDatasetManager
 from dan.manager.training import Manager
-from dan.mlflow import start_mlflow_run
 from dan.models import FCN_Encoder
 from dan.schedulers import exponential_dropout_scheduler
 from dan.transforms import aug_config
+from dan.utils import MLflowNotInstalled
+
+try:
+    import mlflow
+
+    MLFLOW = True
+    logger.info("MLflow Logging available.")
+
+    from dan.mlflow import make_mlflow_request, start_mlflow_run
+except ImportError:
+    MLFLOW = False
+

 logger = logging.getLogger(__name__)

@@ -64,14 +75,14 @@ def get_config():
    dataset_name = "esposalles"
    dataset_level = "record"
    dataset_variant = "_debug"
-    dataset_path = ""
+    dataset_path = "."
    params = {
        "mlflow": {
-            "dataset_name": dataset_name,
            "run_name": "Test log DAN",
+            "run_id": None,
            "s3_endpoint_url": "",
            "tracking_uri": "",
-            "experiment_id": "9",
+            "experiment_id": "0",
            "aws_access_key_id": "",
            "aws_secret_access_key": "",
        },
@@ -94,6 +105,11 @@ def get_config():
                    (dataset_name, "val"),
                ],
            },
+            "test": {
+                "{}-test".format(dataset_name): [
+                    (dataset_name, "test"),
+                ],
+            },
            "config": {
                "load_in_memory": True,  # Load all images in CPU memory
                "worker_per_gpu": 4,  # Num of parallel processes per gpu for data loading
@@ -218,12 +234,15 @@ def get_config():
        },
    }

-    return params
+    return params, dataset_name


 def serialize_config(config):
    """
-    Serialize a dictionary to transform it into json and remove the credentials
+    Make every field of the configuration JSON-Serializable and remove sensitive information.
+
+    - Classes are transformed using their name attribute
+    - Functions are casted to strings
    """
    # Create a copy of the original config without erase it
    serialized_config = deepcopy(config)
@@ -261,6 +280,20 @@ def serialize_config(config):
    serialized_config["training_params"]["nb_gpu"] = str(
        serialized_config["training_params"]["nb_gpu"]
    )
+
+    if (
+        "synthetic_data" in config["dataset_params"]["config"]
+        and config["dataset_params"]["config"]["synthetic_data"]
+    ):
+        # The Probability scheduler is a function and needs to be casted to string
+        serialized_config["dataset_params"]["config"]["synthetic_data"][
+            "proba_scheduler_function"
+        ] = str(
+            serialized_config["dataset_params"]["config"]["synthetic_data"][
+                "proba_scheduler_function"
+            ]
+        )
+
    return serialized_config


@@ -269,29 +302,35 @@ def run():
    Main program, training a new model, using a valid configuration
    """

-    config = get_config()
-    config_artifact = serialize_config(config)
-    labels_artifact = ""
-    dataset_name = config["mlflow"]["dataset_name"]
-    labels_path = (
-        Path(config_artifact["dataset_params"]["datasets"][dataset_name])
-        / "labels.json"
-    )
+    config, dataset_name = get_config()

-    if config["mlflow"]:
-        with start_mlflow_run(config["mlflow"]) as run:
-            logger.info(f"Set tags to MLflow on {config['mlflow']['run_name']}")
-            mlflow.set_tags({"Dataset": config["mlflow"]["dataset_name"]})
+    if MLFLOW and "mlflow" in config:
+        labels_path = (
+            Path(config["dataset_params"]["datasets"][dataset_name]) / "labels.json"
+        )
+        with start_mlflow_run(config["mlflow"]) as (run, created):
+            if created:
+                logger.info(f"Started MLflow run with ID ({run.info.run_id})")
+            else:
+                logger.info(f"Resumed MLflow run with ID ({run.info.run_id})")

+            make_mlflow_request(
+                mlflow_method=mlflow.set_tags, tags={"Dataset": dataset_name}
+            )
            # Get the labels json file
            with open(labels_path) as json_file:
                labels_artifact = json.load(json_file)

            # Log MLflow artifacts
-            mlflow.log_dict(config_artifact, "config.json")
-            mlflow.log_dict(labels_artifact, "labels.json")
-
-            logger.info(f"Started MLflow run with ID ({run.info.run_id})")
+            for artifact, filename in [
+                (serialize_config(config), "config.json"),
+                (labels_artifact, "labels.json"),
+            ]:
+                make_mlflow_request(
+                    mlflow_method=mlflow.log_dict,
+                    dictionary=artifact,
+                    artifact_file=filename,
+                )
            if (
                config["training_params"]["use_ddp"]
                and not config["training_params"]["force_cpu"]
@@ -303,6 +342,11 @@ def run():
                )
            else:
                train_and_test(0, config, True)
+    elif "mlflow" in config:
+        logger.error(
+            "Cannot log to MLflow as the `mlflow` module was not found in your environment."
+        )
+        raise MLflowNotInstalled()
    else:
        if (
            config["training_params"]["use_ddp"]

--- a/dan/predict/__init__.py
+++ b/dan/predict/__init__.py
+# -*- coding: utf-8 -*-
+"""
+Predict on an image using a trained DAN model.
+"""
+
+import pathlib
+
+from dan.predict.prediction import run
+
+
+def add_predict_parser(subcommands) -> None:
+    parser = subcommands.add_parser(
+        "predict",
+        description=__doc__,
+        help=__doc__,
+    )
+
+    # Required arguments.
+    parser.add_argument(
+        "--image",
+        type=pathlib.Path,
+        help="Path to the image to predict.",
+        required=True,
+    )
+    parser.add_argument(
+        "--model",
+        type=pathlib.Path,
+        help="Path to the model to use for prediction.",
+        required=True,
+    )
+    parser.add_argument(
+        "--parameters",
+        type=pathlib.Path,
+        help="Path to the YAML parameters file.",
+        required=True,
+        default="page",
+    )
+    parser.add_argument(
+        "--charset",
+        type=pathlib.Path,
+        help="Path to the charset file.",
+        required=True,
+    )
+    parser.add_argument(
+        "--output",
+        type=pathlib.Path,
+        help="Path to the output folder.",
+        required=True,
+    )
+    # Optional arguments.
+    parser.add_argument(
+        "--scale",
+        type=float,
+        default=1.0,
+        required=False,
+        help="Image scaling factor before feeding it to DAN",
+    )
+    parser.add_argument(
+        "--confidence-score",
+        action="store_true",
+        help="Whether to return confidence scores.",
+        required=False,
+    )
+    parser.add_argument(
+        "--confidence-score-levels",
+        default="",
+        type=str,
+        nargs="+",
+        help="Levels of confidence scores. Should be a list of any combinaison of ['char', 'word', 'line'].",
+        required=False,
+    )
+    parser.add_argument(
+        "--attention-map",
+        action="store_true",
+        help="Whether to plot attention maps.",
+        required=False,
+    )
+    parser.add_argument(
+        "--attention-map-level",
+        type=str,
+        choices=["line", "word", "char"],
+        default="line",
+        help="Level of attention maps.",
+        required=False,
+    )
+    parser.add_argument(
+        "--attention-map-scale",
+        type=float,
+        default=0.5,
+        help="Image scaling factor before creating the GIF",
+        required=False,
+    )
+    parser.add_argument(
+        "--word-separators",
+        default=[" ", "\n"],
+        type=str,
+        nargs="+",
+        help="String separators used to split text into words.",
+        required=False,
+    )
+    parser.add_argument(
+        "--line-separators",
+        default=["\n"],
+        type=str,
+        nargs="+",
+        help="String separators used to split text into lines.",
+        required=False,
+    )
+    parser.set_defaults(func=run)
--- a/dan/predict/attention.py
+++ b/dan/predict/attention.py
+# -*- coding: utf-8 -*-
+import re
+
+import cv2
+import numpy as np
+from PIL import Image
+
+from dan import logger
+
+
+def split_text(text, level, word_separators, line_separators):
+    """
+    Split text into a list of characters, word, or lines.
+    :param text: Text prediction from DAN
+    :param level: Level to visualize (char, word, line)
+    """
+    # split into characters
+    if level == "char":
+        text_split = list(text)
+        offset = 0
+    # split into words
+    elif level == "word":
+        text_split = re.split(word_separators, text)
+        offset = 1
+    # split into lines
+    elif level == "line":
+        text_split = re.split(line_separators, text)
+        offset = 1
+    else:
+        logger.error("Level should be either 'char', 'word', or 'line'")
+    return text_split, offset
+
+
+def plot_attention(
+    image,
+    text,
+    weights,
+    level,
+    scale,
+    outname,
+    word_separators=["\n", " "],
+    line_separators=["\n"],
+):
+    """
+    Create a gif by blending attention maps to the image for each text piece (char, word or line)
+    :param image: Input image in PIL format
+    :param text: Text predicted by DAN
+    :param weights: Attention weights of size (n_char, feature_height, feature_width)
+    :param level: Level to display (must be in [char, word, line])
+    :param scale: Scaling factor for the output gif image
+    :param outname: Name of the gif image
+    """
+    height, width, _ = image.shape
+    attention_map = []
+
+    # Convert to PIL Image and create mask
+    mask = Image.new("L", (width, height), color=(110))
+    image = Image.fromarray(image)
+
+    # Split text into characters, words or lines
+    text_list, offset = split_text(text, level, word_separators, line_separators)
+
+    # Iterate on characters, words or lines
+    tot_len = 0
+
+    max_value = weights.sum(0).max()
+    for text_piece in text_list:
+        # blank vector to accumulate weights for the current word/line
+        coverage_vector = np.zeros((height, width))
+        for i in range(len(text_piece)):
+            local_weight = weights[i + tot_len]
+            local_weight = cv2.resize(local_weight, (width, height))
+            coverage_vector = np.clip(coverage_vector + local_weight, 0, 1)
+
+        # Keep track of text length
+        tot_len += len(text_piece) + offset
+
+        # Normalize coverage vector
+        coverage_vector = (coverage_vector / max_value * 255).astype(np.uint8)
+
+        # Blend coverage vector with original image
+        blank_array = np.zeros((height, width)).astype(np.uint8)
+        coverage_vector = Image.fromarray(
+            np.stack([coverage_vector, blank_array, blank_array], axis=2), "RGB"
+        )
+        blend = Image.composite(image, coverage_vector, mask)
+
+        # Resize to save time
+        blend = blend.resize((int(width * scale), int(height * scale)), Image.ANTIALIAS)
+        attention_map.append(blend)
+
+    attention_map[0].save(
+        outname,
+        save_all=True,
+        format="GIF",
+        append_images=attention_map[1:],
+        duration=1000,
+        loop=True,
+    )
--- a/dan/predict.py
+++ b/dan/predict.py
 # -*- coding: utf-8 -*-

-import logging
+import os
 import pickle
+import re

 import cv2
 import numpy as np
 import torch
 import yaml

+from dan import logger
+from dan.datasets.extract.utils import save_json
 from dan.decoder import GlobalHTADecoder
 from dan.models import FCN_Encoder
 from dan.ocr.utils import LM_ind_to_str
+from dan.predict.attention import plot_attention
+from dan.utils import read_image, round_floats


 class DAN:
@@ -50,7 +55,7 @@ class DAN:

        decoder = GlobalHTADecoder(parameters["decoder"]).to(self.device)
        decoder.load_state_dict(checkpoint["decoder_state_dict"], strict=True)
-        logging.debug(f"Loaded model {model_path}")
+        logger.debug(f"Loaded model {model_path}")

        if mode == "train":
            encoder.train()
@@ -66,13 +71,11 @@ class DAN:
        self.mean, self.std = parameters["mean"], parameters["std"]
        self.max_chars = parameters["max_char_prediction"]

-    def predict(self, input_image, confidences=False):
+    def preprocess(self, input_image):
        """
-        Run prediction on an input image.
-        :param input_image: The image to predict.
-        :param confidences: Return the characters probabilities.
+        Preprocess an input_image.
+        :param input_image: The input image to preprocess.
        """
-        # Preprocess image.
        assert isinstance(
            input_image, np.ndarray
        ), "Input image must be an np.array in RGB"
@@ -80,12 +83,24 @@ class DAN:
        if len(input_image.shape) < 3:
            input_image = cv2.cvtColor(input_image, cv2.COLOR_GRAY2RGB)

-        reduced_size = [input_image.shape[:2]]
-
        input_image = (input_image - self.mean) / self.std
-        input_image = np.expand_dims(input_image.transpose((2, 0, 1)), axis=0)
-        input_tensor = torch.from_numpy(input_image).to(self.device)
-        logging.debug("Image pre-processed")
+        return input_image
+
+    def predict(
+        self,
+        input_tensor,
+        input_sizes,
+        confidences=False,
+        attentions=False,
+    ):
+        """
+        Run prediction on an input image.
+        :param input_tensor: A batch of images to predict.
+        :param input_sizes: The original images sizes.
+        :param confidences: Return the characters probabilities.
+        :param attentions: Return characters attention weights.
+        """
+        input_tensor.to(self.device)

        start_token = len(self.charset) + 1
        end_token = len(self.charset)
@@ -102,6 +117,7 @@ class DAN:

            whole_output = list()
            confidence_scores = list()
+            attention_maps = list()
            cache = None
            hidden_predict = None

@@ -125,7 +141,7 @@ class DAN:
                    features,
                    enhanced_features,
                    predicted_tokens,
-                    reduced_size,
+                    input_sizes,
                    predicted_tokens_len,
                    features_size,
                    start=0,
@@ -134,6 +150,7 @@ class DAN:
                    num_pred=1,
                )
                whole_output.append(output)
+                attention_maps.append(weights)
                confidence_scores.append(
                    torch.max(torch.softmax(pred[:, :], dim=1), dim=1).values
                )
@@ -155,9 +172,13 @@ class DAN:
                if torch.all(reached_end):
                    break

+            # Concatenate tensors for each token
            confidence_scores = (
                torch.cat(confidence_scores, dim=1).cpu().detach().numpy()
            )
+            attention_maps = torch.cat(attention_maps, dim=1).cpu().detach().numpy()
+
+            # Remove bot and eot tokens
            predicted_tokens = predicted_tokens[:, 1:]
            prediction_len[torch.eq(reached_end, False)] = self.max_chars - 1
            predicted_tokens = [
@@ -166,11 +187,124 @@ class DAN:
            confidence_scores = [
                confidence_scores[i, : prediction_len[i]].tolist() for i in range(b)
            ]
+
+            # Transform tokens to characters
            predicted_text = [
                LM_ind_to_str(self.charset, t, oov_symbol="") for t in predicted_tokens
            ]
-            logging.info("Image processed")

+            logger.info("Images processed")
+
+        out = {"text": predicted_text}
        if confidences:
-            return predicted_text[0], confidence_scores[0]
-        return predicted_text[0]
+            out["confidences"] = confidence_scores
+        if attentions:
+            out["attentions"] = attention_maps
+        return out
+
+
+def parse_delimiters(delimiters):
+    return re.compile(r"|".join(delimiters))
+
+
+def compute_prob_by_separator(characters, probabilities, separator):
+    """
+    Split text and confidences using separators and return a list of average confidence scores.
+    :param characters: list of characters.
+    :param probabilities: list of probabilities.
+    :param separators: regex for separators. Use parse_delimiters(["\n", " "]) for word confidences and parse_delimiters(["\n"]) for line confidences.
+    Returns a list confidence scores.
+    """
+    # match anything except separators, get start and end index
+    pattern = re.compile(f"[^{separator.pattern}]+")
+    matches = [(m.start(), m.end()) for m in re.finditer(pattern, characters)]
+
+    # Iterate over text pieces and compute mean confidence
+    return [np.mean(probabilities[start:end]) for (start, end) in matches]
+
+
+def run(
+    image,
+    model,
+    parameters,
+    charset,
+    output,
+    scale,
+    confidence_score,
+    confidence_score_levels,
+    attention_map,
+    attention_map_level,
+    attention_map_scale,
+    word_separators,
+    line_separators,
+):
+    # Create output directory if necessary
+    if not os.path.exists(output):
+        os.mkdir(output)
+
+    # Load model
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    dan_model = DAN(device)
+    dan_model.load(model, parameters, charset, mode="eval")
+
+    # Load image and pre-process it
+    im = read_image(image, scale=scale)
+    logger.info("Image loaded.")
+    im_p = dan_model.preprocess(im)
+    logger.debug("Image pre-processed.")
+
+    # Convert to tensor of size (batch_size, channel, height, width) with batch_size=1
+    input_tensor = torch.tensor(im_p).permute(2, 0, 1).unsqueeze(0)
+    input_tensor = input_tensor.to(device)
+    input_sizes = [im.shape[:2]]
+
+    # Predict
+    prediction = dan_model.predict(
+        input_tensor,
+        input_sizes,
+        confidences=confidence_score,
+        attentions=attention_map,
+    )
+    text = prediction["text"][0]
+    result = {"text": text}
+
+    # Parse delimiters to regex
+    word_separators = parse_delimiters(word_separators)
+    line_separators = parse_delimiters(line_separators)
+
+    # Average character-based confidence scores
+    if confidence_score:
+        char_confidences = prediction["confidences"][0]
+        result["confidences"] = {"total": np.around(np.mean(char_confidences), 2)}
+        if "word" in confidence_score_levels:
+            word_probs = compute_prob_by_separator(
+                text, char_confidences, word_separators
+            )
+            result["confidences"].update({"word": round_floats(word_probs)})
+        if "line" in confidence_score_levels:
+            line_probs = compute_prob_by_separator(
+                text, char_confidences, line_separators
+            )
+            result["confidences"].update({"line": round_floats(line_probs)})
+        if "char" in confidence_score_levels:
+            result["confidences"].update({"char": round_floats(char_confidences)})
+
+    # Save gif with attention map
+    if attention_map:
+        gif_filename = f"{output}/{image.stem}_{attention_map_level}.gif"
+        logger.info(f"Creating attention GIF in {gif_filename}")
+        plot_attention(
+            image=im,
+            text=prediction["text"][0],
+            weights=prediction["attentions"][0],
+            level=attention_map_level,
+            scale=attention_map_scale,
+            word_separators=word_separators,
+            line_separators=line_separators,
+            outname=gif_filename,
+        )
+        result["attention_gif"] = gif_filename
+
+    json_filename = f"{output}/{image.stem}.json"
+    logger.info(f"Saving JSON prediction in {json_filename}")
+    save_json(json_filename, result)
--- a/dan/utils.py
+++ b/dan/utils.py
@@ -18,6 +18,12 @@ SEM_MATCHING_TOKENS_STR = {
 SEM_MATCHING_TOKENS = {"ⓘ": "Ⓘ", "ⓓ": "Ⓓ", "ⓢ": "Ⓢ", "ⓒ": "Ⓒ", "ⓟ": "Ⓟ", "ⓐ": "Ⓐ"}


+class MLflowNotInstalled(Exception):
+    """
+    Raised when MLflow logging was requested but the module was not installed
+    """
+
+
 def randint(low, high):
    """
    call torch.randint to preserve random among dataloader workers
@@ -179,3 +185,24 @@ def pad_image_width_random(img, new_width, padding_value, max_pad_left_ratio=1):
    pad_right = np.ones((h, pad_right, c), dtype=img.dtype) * padding_value
    img = np.concatenate([pad_left, img, pad_right], axis=1)
    return img
+
+
+def read_image(filename, scale=1.0):
+    """
+    Read image and rescale it
+    :param filename: Image path
+    :param scale: Scaling factor before prediction
+    """
+    image = cv2.cvtColor(cv2.imread(str(filename)), cv2.COLOR_BGR2RGB)
+    if scale != 1.0:
+        width = int(image.shape[1] * scale)
+        height = int(image.shape[0] * scale)
+        image = cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA)
+    return image
+
+
+def round_floats(float_list, decimals=2):
+    """
+    Round list of floats with fixed decimals
+    """
+    return [np.around(num, decimals) for num in float_list]
--- a/doc-requirements.txt
+++ b/doc-requirements.txt
 black==22.12.0
 doc8==1.1.1
 mkdocs==1.4.2
-mkdocs-material==9.0.6
+mkdocs-material==9.1.0
 mkdocstrings==0.20.0
 mkdocstrings-python==0.8.3
 recommonmark==0.7.1
--- a/docs/assets/example_line.gif
+++ b/docs/assets/example_line.gif
--- a/docs/assets/example_word.gif
+++ b/docs/assets/example_word.gif
--- a/docs/ref/predict/attention.md
+++ b/docs/ref/predict/attention.md
+# Attention
+
+::: dan.predict.attention
--- a/docs/ref/predict.md
+++ b/docs/ref/predict.md
 # Inference

-::: dan.predict
+::: dan.predict.prediction
--- a/docs/usage/index.md
+++ b/docs/usage/index.md
@@ -6,7 +6,10 @@ When `teklia-dan` is installed in your environment, you may use the following co
 : To preprocess datasets from Arkindex for training. More details in [the dedicated section](./datasets/index.md).

 `teklia-dan train`
-: To train a new DAN model. More details in [the dedicated section](./train.md).
+: To train a new DAN model. More details in [the dedicated section](./train/index.md).

 `teklia-dan generate`
 : To generate synthetic data to train DAN models. More details in [the dedicated section](./generate.md).
+
+`teklia-dan predict`
+: To predict an image using a trained DAN model. More details in [the dedicated section](./predict.md).
--- a/docs/usage/predict.md
+++ b/docs/usage/predict.md
+# Predict
+
+Use the `teklia-dan predict` command to predict a trained DAN model on an image.
+
+## Description of parameters
+
+| Parameter                   | Description                                                                                  | Type    | Default       |
+| --------------------------- | -------------------------------------------------------------------------------------------- | ------- | ------------- |
+| `--image`                   | Path to the image to predict.                                                                | `Path`  |               |
+| `--model`                   | Path to the model to use for prediction                                                      | `Path`  |               |
+| `--parameters`              | Path to the YAML parameters file.                                                            | `Path`  |               |
+| `--charset`                 | Path to the charset file.                                                                    | `Path`  |               |
+| `--output`                  | Path to the output folder. Results will be saved in this directory.                          | `Path`  |               |
+| `--scale`                   | Image scaling factor before feeding it to DAN.                                               | `float` | `1.0`         |
+| `--confidence-score`        | Whether to return confidence scores.                                                         | `bool`  | `False`       |
+| `--confidence-score-levels` | Level to return confidence scores. Should be any combination of `["line", "word", "char"]`. | `str`   |               |
+| `--attention-map`           | Whether to plot attention maps.                                                              | `bool`  | `False`       |
+| `--attention-map-scale`     | Image scaling factor before creating the GIF.                                                | `float` | `0.5`         |
+| `--attention-map-level`     | Level to plot the attention maps. Should be in `["line", "word", "char"]`.                   | `str`   | `"line"`      |
+| `--word-separators`         | List of word separators.                                                                     | `list`  | `[" ", "\n"]` |
+| `--line-separators`         | List of line separators.                                                                     | `list`  | `["\n"]`      |
+
+## Examples
+
+### Predict with confidence scores
+
+To run a prediction with confidence scores, run this command:
+```shell
+teklia-dan predict \
+    --image dan_humu_page/example.jpg \
+    --model dan_humu_page/model.pt \
+    --parameters dan_humu_page/parameters.yml \
+    --charset dan_humu_page/charset.pkl \
+    --output dan_humu_page/predict/ \
+    --scale 0.5 \
+    --confidence-score
+```
+It will create the following JSON file named `dan_humu_page/predict/example.json`
+
+```json
+{
+    "text": "Hansteensgt. 2 IV 28/4 - 19\nKj\u00e6re Gerhard.\nTak for Brevet om Boken og Haven\nog Crokus og Blaaveis og tak fordi\nDu vilde be mig derut sammen\nmed Kris og Ragna. Men vet Du\nda ikke, at Kris reiste med sin S\u00f8-\nster Fru Cr\u00f8ger til Lillehammer\nnogle Dage efter Begravelsen? Hen\ndes Address er Amtsingeni\u00f8r\nCr\u00f8ger. Hun skriver at de blir\nder til lidt ut i Mai. Nu er hun\nnoksaa medtat skj\u00f8nner jeg af Sorg\nog af L\u00e6ngsel, skriver saameget r\u00f8-\nrende om Oluf. Ragna har det\nherligt, skriver hun. Hun er bare\ngla, og det vet jeg, at \"Oluf er gla over,\nder hvor han nu er. Jeg har saa in-\nderlig ondt af hende, og om Du skrev\net Par Ord tror jeg det vilde gj\u00f8re\nhende godt. - Jeg gl\u00e6der mig over,\nat Du har skrevet en Bok, og\njeg er vis paa, at den er god.",
+    "confidence": 0.99
+}
+```
+
+### Predict with confidence scores and line-level attention maps
+
+To run a prediction with confidence scores and plot line-level attention maps, run this command:
+
+```shell
+teklia-dan predict \
+    --image dan_humu_page/example.jpg \
+    --model dan_humu_page/model.pt \
+    --parameters dan_humu_page/parameters.yml \
+    --charset dan_humu_page/charset.pkl \
+    --output dan_humu_page/predict/ \
+    --scale 0.5 \
+    --confidence-score \
+    --attention-map \
+```
+
+It will create the following JSON file named `dan_humu_page/predict/example.json` and a GIF showing a word-level attention map `dan_humu_page/predict/example_line.gif`
+
+```json
+{
+    "text": "Hansteensgt. 2 IV 28/4 - 19\nKj\u00e6re Gerhard.\nTak for Brevet om Boken og Haven\nog Crokus og Blaaveis og tak fordi\nDu vilde be mig derut sammen\nmed Kris og Ragna. Men vet Du\nda ikke, at Kris reiste med sin S\u00f8-\nster Fru Cr\u00f8ger til Lillehammer\nnogle Dage efter Begravelsen? Hen\ndes Address er Amtsingeni\u00f8r\nCr\u00f8ger. Hun skriver at de blir\nder til lidt ut i Mai. Nu er hun\nnoksaa medtat skj\u00f8nner jeg af Sorg\nog af L\u00e6ngsel, skriver saameget r\u00f8-\nrende om Oluf. Ragna har det\nherligt, skriver hun. Hun er bare\ngla, og det vet jeg, at \"Oluf er gla over,\nder hvor han nu er. Jeg har saa in-\nderlig ondt af hende, og om Du skrev\net Par Ord tror jeg det vilde gj\u00f8re\nhende godt. - Jeg gl\u00e6der mig over,\nat Du har skrevet en Bok, og\njeg er vis paa, at den er god.",
+    "confidence": 0.99,
+    "attention_gif": "dan_humu_page/predict/example_line.gif"
+}
+```
+
+<img src="../../assets/example_line.gif" />
+
+### Predict with confidence scores and word-level attention maps
+
+To run a prediction with confidence scores and plot word-level attention maps, run this command:
+
+```shell
+teklia-dan predict \
+    --image dan_humu_page/example.jpg \
+    --model dan_humu_page/model.pt \
+    --parameters dan_humu_page/parameters.yml \
+    --charset dan_humu_page/charset.pkl \
+    --output dan_humu_page/predict/ \
+    --scale 0.5 \
+    --confidence-score \
+    --attention-map \
+    --attention-map-level word \
+    --attention-map-scale 0.5
+```
+
+It will create the following JSON file named `dan_humu_page/predict/example.json` and a GIF showing a word-level attention map `dan_humu_page/predict/example_word.gif`.
+
+```json
+{
+    "text": "Hansteensgt. 2 IV 28/4 - 19\nKj\u00e6re Gerhard.\nTak for Brevet om Boken og Haven\nog Crokus og Blaaveis og tak fordi\nDu vilde be mig derut sammen\nmed Kris og Ragna. Men vet Du\nda ikke, at Kris reiste med sin S\u00f8-\nster Fru Cr\u00f8ger til Lillehammer\nnogle Dage efter Begravelsen? Hen\ndes Address er Amtsingeni\u00f8r\nCr\u00f8ger. Hun skriver at de blir\nder til lidt ut i Mai. Nu er hun\nnoksaa medtat skj\u00f8nner jeg af Sorg\nog af L\u00e6ngsel, skriver saameget r\u00f8-\nrende om Oluf. Ragna har det\nherligt, skriver hun. Hun er bare\ngla, og det vet jeg, at \"Oluf er gla over,\nder hvor han nu er. Jeg har saa in-\nderlig ondt af hende, og om Du skrev\net Par Ord tror jeg det vilde gj\u00f8re\nhende godt. - Jeg gl\u00e6der mig over,\nat Du har skrevet en Bok, og\njeg er vis paa, at den er god.",
+    "confidence": 0.99,
+    "attention_gif": "dan_humu_page/predict/example_word.gif"
+}
+```
+<img src="../../assets/example_word.gif" >
--- a/docs/usage/train.md
+++ b/docs/usage/train.md
-# Train
-
-Use the `teklia-dan train` command to train a new DAN model.
-
-Two subcommands are available depending on your dataset:
-
-`line`
-: Train a DAN model at line-level.
-
-`document`
-: Train a DAN model at document-level.
-
-## Remarks (for pre-training and training)
-All hyperparameters are specified and editable in the training scripts (meaning are in comments).
-
-Evaluation is performed just after training ending (training is stopped when the maximum elapsed time is reached or after a maximum number of epoch as specified in the training script).
-
-The outputs files are split into two subfolders:
-
-`checkpoints`
-: Contains model weights for the last trained epoch and for the epoch giving the best valid CER.
-
-`results`
-: Contains tensorboard log for loss and metrics as well as text file for used hyperparameters and results of evaluation.
--- a/docs/usage/train/index.md
+++ b/docs/usage/train/index.md
+# Train
+
+Use the `teklia-dan train` command to train a new DAN model.
+
+Two subcommands are available depending on your dataset:
+
+`line`
+: Train a DAN model at line-level and evaluate it.
+
+`document`
+: Train a DAN model at document-level and evaluate it.
+
+## Examples
+
+### Document
+
+To train DAN on documents:
+
+1. Set your training configuration in `dan/ocr/document/train.py`. Refer to the [dedicated section](parameters.md) for a description of parameters.
+2. Run `teklia-dan train document`.
+3. Look into evaluation results in the `output` folder:
+    * `checkpoints` contains model weights for the last trained epoch and for the epoch giving the best valid CER.
+    * `results` contains the tensorboard log file, the parameters file, and the evaluation results for the best epoch.
+
+### Line
+
+To train DAN on lines:
+
+1. Set your training configuration in `dan/ocr/line/train.py`. Refer to the [dedicated section](parameters.md) for a description of parameters.
+2. Run `teklia-dan train line`.
+3. Look into evaluation results in the `output` folder:
+    * `checkpoints` contains model weights for the last trained epoch and for the epoch giving the best valid CER.
+    * `results` contains the tensorboard log file, the parameters file, and the evaluation results for the best epoch.
+
+Note that it is possible to run  `teklia-dan train document` to train DAN on text lines. However, the configuration must be updated when training on synthetic documents.
+
+## Additional page
+
+* [Jean Zay tutorial](jeanzay.md)
--- a/docs/usage/train/jeanzay.md
+++ b/docs/usage/train/jeanzay.md
+# Training on Jean Zay
+
+See the [wiki](https://redmine.teklia.com/projects/research/wiki/Jean_Zay) for more details.
+
+## Run a training job
+Warning: there is no HTTP connection during a job.
+
+You can debug using an interactive job. The following command will get you a new terminal with 1 gpu for 1 hour: `srun --ntasks=1 --cpus-per-task=40 --gres=gpu:1 --time=01:00:00 --qos=qos_gpu-dev --pty bash -i`.
+
+You should run the actual training using a passive/batch job:
+* Run `sbatch train_dan.sh`.
+* The `train_dan.sh` file should look like the example below.
+
+```sh
+#!/bin/bash
+#SBATCH --constraint=v100-32g
+#SBATCH --qos=qos_gpu-t4                # partition
+#SBATCH --job-name=dan_training         # name of the job
+#SBATCH --gres=gpu:1                    # number of GPUs per node
+#SBATCH --cpus-per-task=10              # number of cores per tasks
+#SBATCH --hint=nomultithread            # we get physical cores not logical
+#SBATCH --distribution=block:block      # we pin the tasks on contiguous cores
+#SBATCH --nodes=1                       # number of nodes
+#SBATCH --ntasks-per-node=1             # number of MPI tasks per node
+#SBATCH --time=99:00:00                 # max exec time
+#SBATCH --output=dan_train_hugin_munin_page_%j.out         # output log file
+#SBATCH --error=dan_train_hugin_munin_page_%j.err          # error log file
+
+module purge                            # purging modules inherited by default
+module load anaconda-py3
+
+conda activate /gpfswork/rech/rxm/ubz97wr/.conda/envs/dan/
+
+# print started commands
+set -x
+
+# execution
+teklia-dan train document
+```
+
+## Supervise a job
+* Use `squeue -u $USER`. This command should give an output similar to the one presented below.
+```
+(base) [ubz97wr@jean-zay1: ubz97wr]$ squeue -u $USER
+             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
+           1762916   gpu_p13 pylaia_t  ubz97wr  R   23:07:54      1 r7i6n1
+           1762954   gpu_p13 pylaia_t  ubz97wr  R   22:35:57      1 r7i3n1
+```
+
+## Delete a job
+* Use `scancel $JOBID` to cancel a specific job.
+* Use `scancel -u $USER` to cancel all your jobs.
No results found