last requests

e8dfc5f6 · Yoann Schneider · da25ec1b · e8dfc5f6 · e8dfc5f6
Commit e8dfc5f6 authored 2 years ago by Yoann Schneider
--- a/dan/mlflow.py
+++ b/dan/mlflow.py
@@ -10,6 +10,9 @@ from dan import logger


 def make_mlflow_request(mlflow_method, *args, **kwargs):
+    """
+    Encapsulate MLflow HTTP requests to prevent them from crashing the whole training process.
+    """
    try:
        mlflow_method(*args, **kwargs)
    except requests.exceptions.ConnectionError as e:
@@ -50,11 +53,10 @@ def logging_metrics(
    :param is_master: bool, makes sure you're on the right thread, defaults to False
    """
    if mlflow_logging and is_master:
-        mlflow_values = {
-            f"{step}_{name}": value for name, value in display_values.items()
-        }
        make_mlflow_request(
-            mlflow_method=mlflow.log_metrics, metrics=mlflow_values, step=epoch
+            mlflow_method=mlflow.log_metrics, metrics={
+            f"{step}_{name}": value for name, value in display_values.items()
+        }, step=epoch
        )


@@ -73,10 +75,10 @@ def logging_tags_metrics(
    :param is_master: bool, makes sure you're on the right thread, defaults to False
    """
    if mlflow_logging and is_master:
-        mlflow_values = {
+        make_mlflow_request(mlflow_method=mlflow.set_tags, tags=
+                            {
            f"{step}_{name}": value for name, value in display_values.items()
-        }
-        make_mlflow_request(mlflow_method=mlflow.set_tags, tags=mlflow_values)
+        })


 @contextmanager
@@ -93,15 +95,7 @@ def start_mlflow_run(config: dict):
    # Set experiment from config
    experiment_id = config.get("experiment_id")
    assert experiment_id, "Missing MLflow experiment ID in the configuration"
-    try:
-        make_mlflow_request(
-            mlflow_method=mlflow.set_experiment, experiment_id=experiment_id
-        )
-        logger.info(f"Run Experiment ID : {experiment_id} on MLFlow")
-    except MlflowException as e:
-        logger.error(f"Couldn't set Mlflow experiment with ID: {experiment_id}")
-        raise e

    # Start run
-    yield mlflow.start_run(run_name=config.get("run_name"))
+    yield mlflow.start_run(run_name=config.get("run_name"), experiment_id=experiment_id)
    mlflow.end_run()
--- a/dan/ocr/document/train.py
+++ b/dan/ocr/document/train.py
@@ -24,7 +24,7 @@ try:

    MLFLOW = True
    logger.info("MLflow Logging available.")
-    from dan.mlflow import start_mlflow_run
+    from dan.mlflow import start_mlflow_run, make_mlflow_request
 except ImportError:
    MLFLOW = False

@@ -70,17 +70,17 @@ def get_config():
    """
    Retrieve model configuration
    """
-    dataset_name = "esposalles"
-    dataset_level = "page"
+    dataset_name = "synist"
+    dataset_level = "manual_text_lines"
    dataset_variant = ""
-    dataset_path = "/home/training_data/ATR_paragraph/Esposalles"
+    dataset_path = "."
    params = {
        "mlflow": {
            "dataset_name": dataset_name,
            "run_name": "Test log DAN",
            "s3_endpoint_url": "",
            "tracking_uri": "",
-            "experiment_id": "9",
+            "experiment_id": "0",
            "aws_access_key_id": "",
            "aws_secret_access_key": "",
        },
@@ -287,18 +287,25 @@ def run():
            / "labels.json"
        )
        with start_mlflow_run(config["mlflow"]) as run:
-            logger.info(f"Set tags to MLflow on {config['mlflow']['run_name']}")
-            mlflow.set_tags({"Dataset": config["mlflow"]["dataset_name"]})
+            logger.info(f"Started MLflow run with ID ({run.info.run_id})")
+            
+            make_mlflow_request(
+                mlflow_method=mlflow.set_tags,
+                tags={"Dataset": dataset_name}
+            )

            # Get the labels json file
            with open(labels_path) as json_file:
                labels_artifact = json.load(json_file)

            # Log MLflow artifacts
-            mlflow.log_dict(config_artifact, "config.json")
-            mlflow.log_dict(labels_artifact, "labels.json")
-
-            logger.info(f"Started MLflow run with ID ({run.info.run_id})")
+            for artifact, filename in [(config_artifact, "config.json"), (labels_artifact, "labels.json")]:
+                make_mlflow_request(
+                    mlflow_method=mlflow.log_dict,
+                    dictionary=artifact,
+                    artifact_file=filename,
+                )
+            
            if (
                config["training_params"]["use_ddp"]
                and not config["training_params"]["force_cpu"]