From 9ab6085f452fa1b5998b190c2a4e6e6bb4416987 Mon Sep 17 00:00:00 2001
From: Martin Maarand <maarand@teklia.com>
Date: Fri, 29 Oct 2021 14:27:10 +0000
Subject: [PATCH] add skew extraction modes

---
 kaldi_data_generator/main.py | 40 ++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/kaldi_data_generator/main.py b/kaldi_data_generator/main.py
index 920d517..0914fc9 100644
--- a/kaldi_data_generator/main.py
+++ b/kaldi_data_generator/main.py
@@ -51,6 +51,8 @@ class Extraction(Enum):
     min_area_rect: int = 2
     deskew_polygon: int = 3
     deskew_min_area_rect: int = 4
+    skew_polygon: int = 5
+    skew_min_area_rect: int = 6
 
 
 class HTRDataGenerator:
@@ -67,6 +69,7 @@ class HTRDataGenerator:
         accepted_worker_version_ids=None,
         transcription_type=TEXT_LINE,
         max_deskew_angle=45,
+        skew_angle=0,
         should_rotate=False,
         scale_x=None,
         scale_y_top=None,
@@ -91,6 +94,7 @@ class HTRDataGenerator:
         self.skipped_vertical_lines_count = 0
         self.accepted_lines_count = 0
         self.max_deskew_angle = max_deskew_angle
+        self.skew_angle = skew_angle
         self.should_rotate = should_rotate
         if scale_x or scale_y_top or scale_y_bottom:
             self.should_resize_polygons = True
@@ -397,6 +401,32 @@ class HTRDataGenerator:
                 trimmed_img = self.rotate_and_trim(min_rect_img, rotate_angle)
 
                 self._save_line_image(page_id, i, trimmed_img, manifest_fp, trans)
+
+        elif self.extraction_mode == Extraction.skew_polygon:
+            for i, trans in enumerate(sorted_lines):
+                rotate_angle = self.skew_angle
+
+                # get polygon image
+                polygon_img = extract_polygon_image(
+                    img, polygon=trans.polygon, rect=trans.rect
+                )
+
+                trimmed_img = self.rotate_and_trim(polygon_img, rotate_angle)
+
+                self._save_line_image(page_id, i, trimmed_img, manifest_fp, trans)
+
+        elif self.extraction_mode == Extraction.skew_min_area_rect:
+            for i, trans in enumerate(sorted_lines):
+                rotate_angle = self.skew_angle
+
+                min_rect_img = extract_min_area_rect_image(
+                    img, polygon=trans.polygon, rect=trans.rect
+                )
+
+                trimmed_img = self.rotate_and_trim(min_rect_img, rotate_angle)
+
+                self._save_line_image(page_id, i, trimmed_img, manifest_fp, trans)
+
         else:
             raise ValueError(f"Unsupported extraction mode: {self.extraction_mode}")
 
@@ -628,6 +658,15 @@ def create_parser():
         "then that line won't be deskewed/rotated.",
     )
 
+    parser.add_argument(
+        "--skew_angle",
+        type=int,
+        default=0,
+        help="Angle by which the line image will be rotated. Useful for data augmnetation"
+        " - creating skewed text lines for a more robust model."
+        " Only used with skew_* extraction modes.",
+    )
+
     parser.add_argument(
         "--should_rotate",
         action="store_true",
@@ -762,6 +801,7 @@ def main():
             transcription_type=args.transcription_type,
             accepted_worker_version_ids=args.accepted_worker_version_ids,
             max_deskew_angle=args.max_deskew_angle,
+            skew_angle=args.skew_angle,
             should_rotate=args.should_rotate,
             scale_x=args.scale_x,
             scale_y_top=args.scale_y_top,
-- 
GitLab