From 9ab6085f452fa1b5998b190c2a4e6e6bb4416987 Mon Sep 17 00:00:00 2001 From: Martin Maarand <maarand@teklia.com> Date: Fri, 29 Oct 2021 14:27:10 +0000 Subject: [PATCH] add skew extraction modes --- kaldi_data_generator/main.py | 40 ++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/kaldi_data_generator/main.py b/kaldi_data_generator/main.py index 920d517..0914fc9 100644 --- a/kaldi_data_generator/main.py +++ b/kaldi_data_generator/main.py @@ -51,6 +51,8 @@ class Extraction(Enum): min_area_rect: int = 2 deskew_polygon: int = 3 deskew_min_area_rect: int = 4 + skew_polygon: int = 5 + skew_min_area_rect: int = 6 class HTRDataGenerator: @@ -67,6 +69,7 @@ class HTRDataGenerator: accepted_worker_version_ids=None, transcription_type=TEXT_LINE, max_deskew_angle=45, + skew_angle=0, should_rotate=False, scale_x=None, scale_y_top=None, @@ -91,6 +94,7 @@ class HTRDataGenerator: self.skipped_vertical_lines_count = 0 self.accepted_lines_count = 0 self.max_deskew_angle = max_deskew_angle + self.skew_angle = skew_angle self.should_rotate = should_rotate if scale_x or scale_y_top or scale_y_bottom: self.should_resize_polygons = True @@ -397,6 +401,32 @@ class HTRDataGenerator: trimmed_img = self.rotate_and_trim(min_rect_img, rotate_angle) self._save_line_image(page_id, i, trimmed_img, manifest_fp, trans) + + elif self.extraction_mode == Extraction.skew_polygon: + for i, trans in enumerate(sorted_lines): + rotate_angle = self.skew_angle + + # get polygon image + polygon_img = extract_polygon_image( + img, polygon=trans.polygon, rect=trans.rect + ) + + trimmed_img = self.rotate_and_trim(polygon_img, rotate_angle) + + self._save_line_image(page_id, i, trimmed_img, manifest_fp, trans) + + elif self.extraction_mode == Extraction.skew_min_area_rect: + for i, trans in enumerate(sorted_lines): + rotate_angle = self.skew_angle + + min_rect_img = extract_min_area_rect_image( + img, polygon=trans.polygon, rect=trans.rect + ) + + trimmed_img = self.rotate_and_trim(min_rect_img, rotate_angle) + + self._save_line_image(page_id, i, trimmed_img, manifest_fp, trans) + else: raise ValueError(f"Unsupported extraction mode: {self.extraction_mode}") @@ -628,6 +658,15 @@ def create_parser(): "then that line won't be deskewed/rotated.", ) + parser.add_argument( + "--skew_angle", + type=int, + default=0, + help="Angle by which the line image will be rotated. Useful for data augmnetation" + " - creating skewed text lines for a more robust model." + " Only used with skew_* extraction modes.", + ) + parser.add_argument( "--should_rotate", action="store_true", @@ -762,6 +801,7 @@ def main(): transcription_type=args.transcription_type, accepted_worker_version_ids=args.accepted_worker_version_ids, max_deskew_angle=args.max_deskew_angle, + skew_angle=args.skew_angle, should_rotate=args.should_rotate, scale_x=args.scale_x, scale_y_top=args.scale_y_top, -- GitLab