Skip to content
Snippets Groups Projects
Commit 681f7ff5 authored by Martin Maarand's avatar Martin Maarand
Browse files

Don't filter vertical lines with rotation class

parent 9f61d8c8
No related branches found
No related tags found
1 merge request!16Don't filter vertical lines with rotation class
Pipeline #74305 passed
...@@ -169,8 +169,6 @@ class HTRDataGenerator: ...@@ -169,8 +169,6 @@ class HTRDataGenerator:
raise e raise e
def get_transcriptions(self, page_id: str, accepted_zones): def get_transcriptions(self, page_id: str, accepted_zones):
count = 0
count_skipped = 0
lines = [] lines = []
try: try:
for res in self.api_client.paginate( for res in self.api_client.paginate(
...@@ -210,14 +208,8 @@ class HTRDataGenerator: ...@@ -210,14 +208,8 @@ class HTRDataGenerator:
polygon=polygon, polygon=polygon,
text=text, text=text,
) )
if self.skip_vertical_lines:
rect = trans_data.rect
if rect.height > rect.width:
count_skipped += 1
continue
lines.append(trans_data) lines.append(trans_data)
count += 1
if self.should_rotate: if self.should_rotate:
classes_by_elem = self.get_children_classes(page_id) classes_by_elem = self.get_children_classes(page_id)
...@@ -237,7 +229,20 @@ class HTRDataGenerator: ...@@ -237,7 +229,20 @@ class HTRDataGenerator:
else: else:
logger.warning(f"No rotation classes on {trans.element_id}") logger.warning(f"No rotation classes on {trans.element_id}")
return (lines, count, count_skipped) count_skipped = 0
if self.skip_vertical_lines:
filtered_lines = []
for line in lines:
if line.is_vertical:
count_skipped += 1
continue
filtered_lines.append(line)
lines = filtered_lines
count = len(lines)
return lines, count, count_skipped
except ErrorResponse as e: except ErrorResponse as e:
logger.info( logger.info(
...@@ -766,12 +771,12 @@ def main(): ...@@ -766,12 +771,12 @@ def main():
logger.info( logger.info(
f"Number of skipped pages: {data_generator.skipped_pages_count}" f"Number of skipped pages: {data_generator.skipped_pages_count}"
) )
skipped_ratio = data_generator.skipped_vertical_lines_count / ( _skipped_vertical_count = data_generator.skipped_vertical_lines_count
data_generator.skipped_vertical_lines_count _total_count = _skipped_vertical_count + data_generator.accepted_lines_count
+ data_generator.accepted_lines_count skipped_ratio = _skipped_vertical_count / _total_count * 100
)
logger.info( logger.info(
f"Skipped {data_generator.skipped_vertical_lines_count} vertical lines ({skipped_ratio}/1.0)" f"Skipped {data_generator.skipped_vertical_lines_count} vertical lines ({round(skipped_ratio, 2)}%)"
) )
else: else:
logger.info("Creating a split from already downloaded files") logger.info("Creating a split from already downloaded files")
......
...@@ -25,6 +25,15 @@ class TranscriptionData: ...@@ -25,6 +25,15 @@ class TranscriptionData:
self.rect = BoundingBox._make(cv2.boundingRect(self.polygon)) self.rect = BoundingBox._make(cv2.boundingRect(self.polygon))
@property
def is_vertical(self) -> bool:
"""
Used to filter out vertical lines. Will be ignored when rotation class is given.
"""
if self.rotation_class is None:
return self.rect.height > self.rect.width
return False
def __repr__(self): def __repr__(self):
return str(vars(self)) return str(vars(self))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment