From f48d9f498dfcf78f439bb43c8326d94c884985ee Mon Sep 17 00:00:00 2001 From: Martin <maarand@teklia.com> Date: Tue, 15 Feb 2022 17:50:35 +0100 Subject: [PATCH] skip instead of fail --- kaldi_data_generator/main.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kaldi_data_generator/main.py b/kaldi_data_generator/main.py index e341f86..f8ed850 100644 --- a/kaldi_data_generator/main.py +++ b/kaldi_data_generator/main.py @@ -180,6 +180,7 @@ class HTRDataGenerator: def get_accepted_zones(self, page_id: str): try: + skip_count = 0 accepted_zones = [] for elt in self.api_client.cached_paginate( "ListElementChildren", id=page_id, with_classes=True @@ -215,9 +216,14 @@ class HTRDataGenerator: found_class = list(style_counts.keys())[0] found_class = Style(found_class) else: - raise ValueError( - f"Multiple style classes on the same element! {elt['id']} - {elem_classes}" + logger.info( + f"Multiple style classes on the same element! Skipping {elt['id']} - {elem_classes}" ) + skip_count += 1 + continue + # raise ValueError( + # f"Multiple style classes on the same element! {elt['id']} - {elem_classes}" + # ) if found_class == self.style: accepted_zones.append(elt["zone"]["id"]) @@ -225,8 +231,8 @@ class HTRDataGenerator: accepted_zones.append(elt["zone"]["id"]) logger.info( - "Number of accepted zone for page {} : {}".format( - page_id, len(accepted_zones) + "Number of accepted zone for page {} : {} ; skipped {}".format( + page_id, len(accepted_zones), skip_count ) ) return accepted_zones -- GitLab