Skip to content
Snippets Groups Projects
Commit d732d942 authored by Martin's avatar Martin
Browse files

don't filter vertical lines that have a rotation class

parent 9f61d8c8
No related branches found
No related tags found
1 merge request!16Don't filter vertical lines with rotation class
Pipeline #74303 failed
...@@ -169,8 +169,6 @@ class HTRDataGenerator: ...@@ -169,8 +169,6 @@ class HTRDataGenerator:
raise e raise e
def get_transcriptions(self, page_id: str, accepted_zones): def get_transcriptions(self, page_id: str, accepted_zones):
count = 0
count_skipped = 0
lines = [] lines = []
try: try:
for res in self.api_client.paginate( for res in self.api_client.paginate(
...@@ -210,14 +208,8 @@ class HTRDataGenerator: ...@@ -210,14 +208,8 @@ class HTRDataGenerator:
polygon=polygon, polygon=polygon,
text=text, text=text,
) )
if self.skip_vertical_lines:
rect = trans_data.rect
if rect.height > rect.width:
count_skipped += 1
continue
lines.append(trans_data) lines.append(trans_data)
count += 1
if self.should_rotate: if self.should_rotate:
classes_by_elem = self.get_children_classes(page_id) classes_by_elem = self.get_children_classes(page_id)
...@@ -237,7 +229,20 @@ class HTRDataGenerator: ...@@ -237,7 +229,20 @@ class HTRDataGenerator:
else: else:
logger.warning(f"No rotation classes on {trans.element_id}") logger.warning(f"No rotation classes on {trans.element_id}")
return (lines, count, count_skipped) count_skipped = 0
if self.skip_vertical_lines:
filtered_lines = []
for line in lines:
if line.is_vertical:
count_skipped += 1
continue
filtered_lines.append(line)
lines = filtered_lines
count = len(lines)
return lines, count, count_skipped
except ErrorResponse as e: except ErrorResponse as e:
logger.info( logger.info(
...@@ -769,9 +774,9 @@ def main(): ...@@ -769,9 +774,9 @@ def main():
skipped_ratio = data_generator.skipped_vertical_lines_count / ( skipped_ratio = data_generator.skipped_vertical_lines_count / (
data_generator.skipped_vertical_lines_count data_generator.skipped_vertical_lines_count
+ data_generator.accepted_lines_count + data_generator.accepted_lines_count
) ) * 100
logger.info( logger.info(
f"Skipped {data_generator.skipped_vertical_lines_count} vertical lines ({skipped_ratio}/1.0)" f"Skipped {data_generator.skipped_vertical_lines_count} vertical lines ({round(skipped_ratio, 2)}%)"
) )
else: else:
logger.info("Creating a split from already downloaded files") logger.info("Creating a split from already downloaded files")
......
...@@ -25,6 +25,15 @@ class TranscriptionData: ...@@ -25,6 +25,15 @@ class TranscriptionData:
self.rect = BoundingBox._make(cv2.boundingRect(self.polygon)) self.rect = BoundingBox._make(cv2.boundingRect(self.polygon))
@property
def is_vertical(self) -> bool:
"""
Used to filter out vertical lines. Will be ignored when rotation class is given.
"""
if self.rotation_class is None:
return self.rect.height > self.rect.width
return False
def __repr__(self): def __repr__(self):
return str(vars(self)) return str(vars(self))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment