Yoann Schneider · 2c286d00 · 4456cb0f · c1486349 · 2c286d00 · 4456cb0f
--- a/atr_data_generator/extract/base.py

+ 25

− 10
+++ b/atr_data_generator/extract/base.py

+ 25

− 10
 @@ -7,7 +7,7 @@ from pathlib import Path
 from typing import Any, Dict

 import numpy as np
-from arkindex_export import Dataset, Element, open_database
+from arkindex_export import Dataset, Element, Transcription, open_database
 from line_image_extractor.extractor import extract, read_img, save_img
 from line_image_extractor.image_utils import polygon_to_bbox, resize
 from PIL import Image
 @@ -107,7 +107,9 @@ class DataGenerator:
        bbox = polygon_to_bbox(polygon)
        # Skip if line is vertical and these are forbidden
        if self.filter.skip_vertical_lines and _is_vertical(bbox):
-            return
+            raise ValueError(
+                "Image is vertical and current configuration forbids these."
+            )

        # Extract the polygon in the image
        image = extract(
 @@ -129,11 +131,30 @@ class DataGenerator:
                )
            )
        # Save the image to disk
-        save_img(path=destination, img=image)
+        status = save_img(path=destination, img=image)
+        if not status:
+            raise ValueError("Image download failed.")

    def parse_image_path(self, image_path: Path):
        return str(image_path)

+    def process_child(self, child: Transcription, split: str, image_path: Path) -> None:
+        """
+        Download the child's image and store its transcription.
+        If something went wrong during image download, the child is skipped and its transcription will not be saved.
+        """
+        try:
+            # Extract the image
+            self.get_image(child.element, image_path)
+        except Exception as e:
+            logger.warning(f"Skipping element ({child.id}): {e}")
+            return
+
+        # Store transcription
+        self.data[split][self.parse_image_path(image_path)] = self.parse_transcription(
+            child.text, split=split
+        )
+
    def process_parent(self, parent: Element, split: str):
        """
        Process every children under this parent element.
 @@ -151,13 +172,7 @@ class DataGenerator:
                / "images"
                / f"{parent.id}_{child.element.name.split('_')[-1]}_{child.element_id}.jpg"
            )
-            # Store transcription
-            self.data[split][
-                self.parse_image_path(image_path)
-            ] = self.parse_transcription(child.text, split=split)
-
-            # Extract the image
-            self.get_image(child.element, image_path)
+            self.process_child(child=child, split=split, image_path=image_path)

    def export(self):
        """