From f48d9f498dfcf78f439bb43c8326d94c884985ee Mon Sep 17 00:00:00 2001
From: Martin <maarand@teklia.com>
Date: Tue, 15 Feb 2022 17:50:35 +0100
Subject: [PATCH] skip instead of fail

---
 kaldi_data_generator/main.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/kaldi_data_generator/main.py b/kaldi_data_generator/main.py
index e341f86..f8ed850 100644
--- a/kaldi_data_generator/main.py
+++ b/kaldi_data_generator/main.py
@@ -180,6 +180,7 @@ class HTRDataGenerator:
 
     def get_accepted_zones(self, page_id: str):
         try:
+            skip_count = 0
             accepted_zones = []
             for elt in self.api_client.cached_paginate(
                 "ListElementChildren", id=page_id, with_classes=True
@@ -215,9 +216,14 @@ class HTRDataGenerator:
                         found_class = list(style_counts.keys())[0]
                         found_class = Style(found_class)
                     else:
-                        raise ValueError(
-                            f"Multiple style classes on the same element! {elt['id']} - {elem_classes}"
+                        logger.info(
+                            f"Multiple style classes on the same element! Skipping {elt['id']} - {elem_classes}"
                         )
+                        skip_count += 1
+                        continue
+                        # raise ValueError(
+                        #     f"Multiple style classes on the same element! {elt['id']} - {elem_classes}"
+                        # )
 
                     if found_class == self.style:
                         accepted_zones.append(elt["zone"]["id"])
@@ -225,8 +231,8 @@ class HTRDataGenerator:
                     accepted_zones.append(elt["zone"]["id"])
 
             logger.info(
-                "Number of accepted zone for page {} : {}".format(
-                    page_id, len(accepted_zones)
+                "Number of accepted zone for page {} : {} ; skipped {}".format(
+                    page_id, len(accepted_zones), skip_count
                 )
             )
             return accepted_zones
-- 
GitLab