No more DPI adjusting

76b7b5f7 · Yoann Schneider · f1ebd3ca · 76b7b5f7 · 76b7b5f7
Commit 76b7b5f7 authored 1 year ago by Yoann Schneider
--- a/dan/ocr/transforms.py
+++ b/dan/ocr/transforms.py
@@ -15,16 +15,16 @@ from albumentations.augmentations import (
    GaussianBlur,
    GaussNoise,
    Perspective,
+    RandomScale,
    Sharpen,
    ToGray,
 )
 from albumentations.core.transforms_interface import ImageOnlyTransform
-from cv2 import dilate, erode, resize
+from cv2 import dilate, erode
 from numpy import random
 from torch import Tensor
-from torch.distributions.uniform import Uniform
 from torchvision.transforms import Compose, ToPILImage
-from torchvision.transforms.functional import resize as resize_tensor
+from torchvision.transforms.functional import resize


 class Preprocessing(str, Enum):
@@ -54,7 +54,7 @@ class FixedHeightResize:

    def __call__(self, img: Tensor) -> Tensor:
        size = (self.height, self._calc_new_width(img))
-        return resize_tensor(img, size, antialias=False)
+        return resize(img, size, antialias=False)

    def _calc_new_width(self, img: Tensor) -> int:
        aspect_ratio = img.shape[2] / img.shape[1]
@@ -71,7 +71,7 @@ class FixedWidthResize:

    def __call__(self, img: Tensor) -> Tensor:
        size = (self._calc_new_height(img), self.width)
-        return resize_tensor(img, size, antialias=False)
+        return resize(img, size, antialias=False)

    def _calc_new_height(self, img: Tensor) -> int:
        aspect_ratio = img.shape[1] / img.shape[2]
@@ -96,7 +96,7 @@ class MaxResize:
        ratio = min(height_ratio, width_ratio)
        new_width = int(width * ratio)
        new_height = int(height * ratio)
-        return resize_tensor(img, (new_height, new_width), antialias=False)
+        return resize(img, (new_height, new_width), antialias=False)


 class Dilation:
@@ -156,29 +156,6 @@ class ErosionDilation(ImageOnlyTransform):
        )


-class DPIAdjusting(ImageOnlyTransform):
-    """
-    Resolution modification
-    """
-
-    def __init__(
-        self,
-        min_factor: float = 0.75,
-        max_factor: float = 1,
-        always_apply: bool = False,
-        p: float = 1.0,
-    ):
-        super(DPIAdjusting, self).__init__(always_apply, p)
-        self.min_factor = min_factor
-        self.max_factor = max_factor
-        self.p = p
-        self.always_apply = False
-
-    def apply(self, img: np.ndarray, **params):
-        factor = float(Uniform(self.min_factor, self.max_factor).sample())
-        return resize(img, None, fx=factor, fy=factor)
-
-
 def get_preprocessing_transforms(
    preprocessings: list, to_pil_image: bool = False
 ) -> Compose:
@@ -212,22 +189,31 @@ def get_augmentation_transforms() -> A.Compose:
    """
    return A.Compose(
        [
-            DPIAdjusting(min_factor=0.75, max_factor=1),
+            # Scale between 0.75 and 1.0
+            RandomScale(scale_limit=[-0.25, 0], always_apply=True),
            A.SomeOf(
                [
                    ErosionDilation(min_kernel=1, max_kernel=4, iterations=1),
                    Perspective(scale=(0.05, 0.09), fit_output=True, p=0.4),
-                    GaussianBlur(sigma_limit=2.5, p=1),
-                    GaussNoise(var_limit=50**2, p=1),
+                    GaussianBlur(sigma_limit=2.5, always_apply=True),
+                    GaussNoise(var_limit=50**2, always_apply=True),
                    ColorJitter(
-                        contrast=0.2, brightness=0.2, saturation=0.2, hue=0.2, p=1
+                        contrast=0.2,
+                        brightness=0.2,
+                        saturation=0.2,
+                        hue=0.2,
+                        always_apply=True,
                    ),
                    ElasticTransform(
-                        alpha=20.0, sigma=5.0, alpha_affine=1.0, border_mode=0, p=1
+                        alpha=20.0,
+                        sigma=5.0,
+                        alpha_affine=1.0,
+                        border_mode=0,
+                        always_apply=True,
                    ),
-                    Sharpen(alpha=(0.0, 1.0), p=1),
-                    Affine(shear={"x": (-20, 20), "y": (0, 0)}, p=1),
-                    CoarseDropout(p=1),
+                    Sharpen(alpha=(0.0, 1.0), always_apply=True),
+                    Affine(shear={"x": (-20, 20), "y": (0, 0)}, always_apply=True),
+                    CoarseDropout(always_apply=True),
                    ToGray(p=0.5),
                ],
                n=2,

--- a/docs/usage/train/augmentation.md
+++ b/docs/usage/train/augmentation.md
@@ -107,12 +107,13 @@ This page lists data augmentation transforms used in DAN.
 | Examples                     | ![](../../assets/augmentations/line_dropout.png) ![](../../assets/augmentations/document_dropout.png)                                                                                      |
 | CPU time (seconds/10 images) | 0.02 (3013x128 pixels) / 0.02 (1116x581 pixels)                                                                                                                                            |

-### DPIAdjusting
+### RandomScale

-|             | DPIAdjusting                                                   |
-| ----------- | -------------------------------------------------------------- |
-| Description | This transformation downscales the image from a random factor. |
-| Comments    | Similar to the original DAN implementation.                    |
+|               | RandomScale                                                                                                                                                                         |
+| ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Description   | This transformation downscales the image from a random factor.                                                                                                                      |
+| Comments      | The original DAN implementation reimplemented it as [DPIAdjusting](https://github.com/FactoDeepLearning/DAN/blob/da3046a1cc83e9be3e54dd31a5e74d6134d1ebdc/basic/transforms.py#L62). |
+| Documentation | See the [`albumentations` documentation](https://albumentations.ai/docs/api_reference/augmentations/geometric/resize/#albumentations.augmentations.geometric.resize.RandomScale)    |

 ### ToGray