Solene Tarride · Solene Tarride · bb0bdbe6 · ef07d298 · d1eeb307 · a572b293
--- a/dan/ocr/predict/inference.py

+ 54

− 54
+++ b/dan/ocr/predict/inference.py

+ 54

− 54
 @@ -356,62 +356,62 @@ def process_batch(
    logger.info("Prediction parsing...")
    for idx, image_path in enumerate(image_batch):
        predicted_text = prediction["text"][idx]
-        result = {"text": predicted_text}
-
-        # Return LM results
-        if use_language_model:
-            result["language_model"] = {
-                "text": prediction["language_model"]["text"][idx],
-                "confidence": prediction["language_model"]["confidence"][idx],
-            }
-
-        # Return extracted objects (coordinates, text, confidence)
-        if predict_objects:
-            result["objects"] = prediction["objects"][idx]
-
-        # Return mean confidence score
-        if confidence_score:
-            result["confidences"] = {}
-            char_confidences = prediction["confidences"][idx]
-            result["confidences"]["total"] = np.around(np.mean(char_confidences), 2)
-
-            for level in confidence_score_levels:
-                result["confidences"][level.value] = []
-                texts, confidences, _ = split_text_and_confidences(
-                    predicted_text,
-                    char_confidences,
-                    level,
-                    word_separators,
-                    line_separators,
-                    tokens,
-                )
-
-                for text, conf in zip(texts, confidences):
-                    result["confidences"][level.value].append(
-                        {"text": text, "confidence": conf}
+        result = {"text": predicted_text, "confidences": {}, "language_model": {}}
+
+        if predicted_text:
+            # Return LM results
+            if use_language_model:
+                result["language_model"] = {
+                    "text": prediction["language_model"]["text"][idx],
+                    "confidence": prediction["language_model"]["confidence"][idx],
+                }
+
+            # Return extracted objects (coordinates, text, confidence)
+            if predict_objects:
+                result["objects"] = prediction["objects"][idx]
+
+            # Return mean confidence score
+            if confidence_score:
+                char_confidences = prediction["confidences"][idx]
+                result["confidences"]["total"] = np.around(np.mean(char_confidences), 2)
+
+                for level in confidence_score_levels:
+                    result["confidences"][level.value] = []
+                    texts, confidences, _ = split_text_and_confidences(
+                        predicted_text,
+                        char_confidences,
+                        level,
+                        word_separators,
+                        line_separators,
+                        tokens,
                    )

-        # Save gif with attention map
-        if attention_map:
-            attentions = prediction["attentions"][idx]
-            gif_filename = f"{output}/{image_path.stem}_{attention_map_level}.gif"
-            logger.info(f"Creating attention GIF in {gif_filename}")
-            plot_attention(
-                image=visu_tensor[idx],
-                text=predicted_text,
-                weights=attentions,
-                level=attention_map_level,
-                scale=attention_map_scale,
-                word_separators=word_separators,
-                line_separators=line_separators,
-                tokens=tokens,
-                display_polygons=predict_objects,
-                threshold_method=threshold_method,
-                threshold_value=threshold_value,
-                max_object_height=max_object_height,
-                outname=gif_filename,
-            )
-            result["attention_gif"] = gif_filename
+                    for text, conf in zip(texts, confidences):
+                        result["confidences"][level.value].append(
+                            {"text": text, "confidence": conf}
+                        )
+
+            # Save gif with attention map
+            if attention_map:
+                attentions = prediction["attentions"][idx]
+                gif_filename = f"{output}/{image_path.stem}_{attention_map_level}.gif"
+                logger.info(f"Creating attention GIF in {gif_filename}")
+                plot_attention(
+                    image=visu_tensor[idx],
+                    text=predicted_text,
+                    weights=attentions,
+                    level=attention_map_level,
+                    scale=attention_map_scale,
+                    word_separators=word_separators,
+                    line_separators=line_separators,
+                    tokens=tokens,
+                    display_polygons=predict_objects,
+                    threshold_method=threshold_method,
+                    threshold_value=threshold_value,
+                    max_object_height=max_object_height,
+                    outname=gif_filename,
+                )
+                result["attention_gif"] = gif_filename

        json_filename = Path(output, image_path.stem).with_suffix(".json")
        logger.info(f"Saving JSON prediction in {json_filename}")