Yoann Schneider · 400df69d · a547914d · 191a2a2f · efea7323 · fd6587de
--- a/dan/ocr/transforms.py

+ 21

− 32
+++ b/dan/ocr/transforms.py

+ 21

− 32
 @@ -6,6 +6,7 @@ from enum import Enum
 from random import randint

 import albumentations as A
+import cv2
 import numpy as np
 from albumentations.augmentations import (
    Affine,
 @@ -15,16 +16,16 @@ from albumentations.augmentations import (
    GaussianBlur,
    GaussNoise,
    Perspective,
+    RandomScale,
    Sharpen,
    ToGray,
 )
 from albumentations.core.transforms_interface import ImageOnlyTransform
-from cv2 import dilate, erode, resize
+from cv2 import dilate, erode
 from numpy import random
 from torch import Tensor
-from torch.distributions.uniform import Uniform
 from torchvision.transforms import Compose, ToPILImage
-from torchvision.transforms.functional import resize as resize_tensor
+from torchvision.transforms.functional import resize


 class Preprocessing(str, Enum):
 @@ -54,7 +55,7 @@ class FixedHeightResize:

    def __call__(self, img: Tensor) -> Tensor:
        size = (self.height, self._calc_new_width(img))
-        return resize_tensor(img, size, antialias=False)
+        return resize(img, size, antialias=False)

    def _calc_new_width(self, img: Tensor) -> int:
        aspect_ratio = img.shape[2] / img.shape[1]
 @@ -71,7 +72,7 @@ class FixedWidthResize:

    def __call__(self, img: Tensor) -> Tensor:
        size = (self._calc_new_height(img), self.width)
-        return resize_tensor(img, size, antialias=False)
+        return resize(img, size, antialias=False)

    def _calc_new_height(self, img: Tensor) -> int:
        aspect_ratio = img.shape[1] / img.shape[2]
 @@ -96,7 +97,7 @@ class MaxResize:
        ratio = min(height_ratio, width_ratio)
        new_width = int(width * ratio)
        new_height = int(height * ratio)
-        return resize_tensor(img, (new_height, new_width), antialias=False)
+        return resize(img, (new_height, new_width), antialias=False)


 class Dilation:
 @@ -156,29 +157,6 @@ class ErosionDilation(ImageOnlyTransform):
        )


-class DPIAdjusting(ImageOnlyTransform):
-    """
-    Resolution modification
-    """
-
-    def __init__(
-        self,
-        min_factor: float = 0.75,
-        max_factor: float = 1,
-        always_apply: bool = False,
-        p: float = 1.0,
-    ):
-        super(DPIAdjusting, self).__init__(always_apply, p)
-        self.min_factor = min_factor
-        self.max_factor = max_factor
-        self.p = p
-        self.always_apply = False
-
-    def apply(self, img: np.ndarray, **params):
-        factor = float(Uniform(self.min_factor, self.max_factor).sample())
-        return resize(img, None, fx=factor, fy=factor)
-
-
 def get_preprocessing_transforms(
    preprocessings: list, to_pil_image: bool = False
 ) -> Compose:
 @@ -212,7 +190,10 @@ def get_augmentation_transforms() -> A.Compose:
    """
    return A.Compose(
        [
-            DPIAdjusting(min_factor=0.75, max_factor=1),
+            # Scale between 0.75 and 1.0
+            RandomScale(
+                scale_limit=[-0.25, 0], always_apply=True, interpolation=cv2.INTER_AREA
+            ),
            A.SomeOf(
                [
                    ErosionDilation(min_kernel=1, max_kernel=4, iterations=1),
 @@ -220,10 +201,18 @@ def get_augmentation_transforms() -> A.Compose:
                    GaussianBlur(sigma_limit=2.5, p=1),
                    GaussNoise(var_limit=50**2, p=1),
                    ColorJitter(
-                        contrast=0.2, brightness=0.2, saturation=0.2, hue=0.2, p=1
+                        contrast=0.2,
+                        brightness=0.2,
+                        saturation=0.2,
+                        hue=0.2,
+                        p=1,
                    ),
                    ElasticTransform(
-                        alpha=20.0, sigma=5.0, alpha_affine=1.0, border_mode=0, p=1
+                        alpha=20.0,
+                        sigma=5.0,
+                        alpha_affine=1.0,
+                        border_mode=0,
+                        p=1,
                    ),
                    Sharpen(alpha=(0.0, 1.0), p=1),
                    Affine(shear={"x": (-20, 20), "y": (0, 0)}, p=1),