No more DPI adjusting

6f3504c5 · Yoann Schneider · Mélodie Boillet · 620e8856 · 6f3504c5 · 6f3504c5
Commit 6f3504c5 authored 1 year ago by Yoann Schneider Committed by Mélodie Boillet 1 year ago
--- a/dan/ocr/transforms.py
+++ b/dan/ocr/transforms.py
@@ -6,6 +6,7 @@ from enum import Enum
 from random import randint

 import albumentations as A
+import cv2
 import numpy as np
 from albumentations.augmentations import (
    Affine,
@@ -15,16 +16,16 @@ from albumentations.augmentations import (
    GaussianBlur,
    GaussNoise,
    Perspective,
+    RandomScale,
    Sharpen,
    ToGray,
 )
 from albumentations.core.transforms_interface import ImageOnlyTransform
-from cv2 import dilate, erode, resize
+from cv2 import dilate, erode
 from numpy import random
 from torch import Tensor
-from torch.distributions.uniform import Uniform
 from torchvision.transforms import Compose, ToPILImage
-from torchvision.transforms.functional import resize as resize_tensor
+from torchvision.transforms.functional import resize


 class Preprocessing(str, Enum):
@@ -54,7 +55,7 @@ class FixedHeightResize:

    def __call__(self, img: Tensor) -> Tensor:
        size = (self.height, self._calc_new_width(img))
-        return resize_tensor(img, size, antialias=False)
+        return resize(img, size, antialias=False)

    def _calc_new_width(self, img: Tensor) -> int:
        aspect_ratio = img.shape[2] / img.shape[1]
@@ -71,7 +72,7 @@ class FixedWidthResize:

    def __call__(self, img: Tensor) -> Tensor:
        size = (self._calc_new_height(img), self.width)
-        return resize_tensor(img, size, antialias=False)
+        return resize(img, size, antialias=False)

    def _calc_new_height(self, img: Tensor) -> int:
        aspect_ratio = img.shape[1] / img.shape[2]
@@ -96,7 +97,7 @@ class MaxResize:
        ratio = min(height_ratio, width_ratio)
        new_width = int(width * ratio)
        new_height = int(height * ratio)
-        return resize_tensor(img, (new_height, new_width), antialias=False)
+        return resize(img, (new_height, new_width), antialias=False)


 class Dilation:
@@ -156,29 +157,6 @@ class ErosionDilation(ImageOnlyTransform):
        )


-class DPIAdjusting(ImageOnlyTransform):
-    """
-    Resolution modification
-    """
-
-    def __init__(
-        self,
-        min_factor: float = 0.75,
-        max_factor: float = 1,
-        always_apply: bool = False,
-        p: float = 1.0,
-    ):
-        super(DPIAdjusting, self).__init__(always_apply, p)
-        self.min_factor = min_factor
-        self.max_factor = max_factor
-        self.p = p
-        self.always_apply = False
-
-    def apply(self, img: np.ndarray, **params):
-        factor = float(Uniform(self.min_factor, self.max_factor).sample())
-        return resize(img, None, fx=factor, fy=factor)
-
-
 def get_preprocessing_transforms(
    preprocessings: list, to_pil_image: bool = False
 ) -> Compose:
@@ -212,7 +190,10 @@ def get_augmentation_transforms() -> A.Compose:
    """
    return A.Compose(
        [
-            DPIAdjusting(min_factor=0.75, max_factor=1),
+            # Scale between 0.75 and 1.0
+            RandomScale(
+                scale_limit=[-0.25, 0], always_apply=True, interpolation=cv2.INTER_AREA
+            ),
            A.SomeOf(
                [
                    ErosionDilation(min_kernel=1, max_kernel=4, iterations=1),
@@ -220,10 +201,18 @@ def get_augmentation_transforms() -> A.Compose:
                    GaussianBlur(sigma_limit=2.5, p=1),
                    GaussNoise(var_limit=50**2, p=1),
                    ColorJitter(
-                        contrast=0.2, brightness=0.2, saturation=0.2, hue=0.2, p=1
+                        contrast=0.2,
+                        brightness=0.2,
+                        saturation=0.2,
+                        hue=0.2,
+                        p=1,
                    ),
                    ElasticTransform(
-                        alpha=20.0, sigma=5.0, alpha_affine=1.0, border_mode=0, p=1
+                        alpha=20.0,
+                        sigma=5.0,
+                        alpha_affine=1.0,
+                        border_mode=0,
+                        p=1,
                    ),
                    Sharpen(alpha=(0.0, 1.0), p=1),
                    Affine(shear={"x": (-20, 20), "y": (0, 0)}, p=1),

--- a/docs/assets/augmentations/document_original.png
+++ b/docs/assets/augmentations/document_original.png
--- a/docs/assets/augmentations/document_random_scale.png
+++ b/docs/assets/augmentations/document_random_scale.png
--- a/docs/assets/augmentations/line_full_pipeline_2.png
+++ b/docs/assets/augmentations/line_full_pipeline_2.png
--- a/docs/assets/augmentations/line_original.png
+++ b/docs/assets/augmentations/line_original.png
--- a/docs/assets/augmentations/line_random_scale.png
+++ b/docs/assets/augmentations/line_random_scale.png
--- a/docs/usage/train/augmentation.md
+++ b/docs/usage/train/augmentation.md
@@ -107,14 +107,16 @@ This page lists data augmentation transforms used in DAN.
 | Examples                     | ![](../../assets/augmentations/line_dropout.png) ![](../../assets/augmentations/document_dropout.png)                                                                                      |
 | CPU time (seconds/10 images) | 0.02 (3013x128 pixels) / 0.02 (1116x581 pixels)                                                                                                                                            |

-### DPIAdjusting
+### Random Scale

-|             | DPIAdjusting                                                   |
-| ----------- | -------------------------------------------------------------- |
-| Description | This transformation downscales the image from a random factor. |
-| Comments    | Similar to the original DAN implementation.                    |
+|               | RandomScale                                                                                                                                                                         |
+| ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Description   | This transformation downscales the image from a random factor.                                                                                                                      |
+| Comments      | The original DAN implementation reimplemented it as [DPIAdjusting](https://github.com/FactoDeepLearning/DAN/blob/da3046a1cc83e9be3e54dd31a5e74d6134d1ebdc/basic/transforms.py#L62). |
+| Documentation | See the [`albumentations` documentation](https://albumentations.ai/docs/api_reference/augmentations/geometric/resize/#albumentations.augmentations.geometric.resize.RandomScale)    |
+| Examples      | ![](../../assets/augmentations/line_random_scale.png) ![](../../assets/augmentations/document_random_scale.png)                                                                     |

-### ToGray
+### To Gray

 |                              | ToGray                                                                                                                                                          |
 | ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- |

--- a/docs/usage/train/config.md
+++ b/docs/usage/train/config.md
@@ -134,7 +134,8 @@ The following configuration is used by default when using the `teklia-dan train`
 ```py
 transforms = A.Compose(
    [
-        DPIAdjusting(min_factor=0.75, max_factor=1),
+        # Scale between 0.75 and 1.0
+        RandomScale(scale_limit=[-0.25, 0], always_apply=True, interpolation=cv2.INTER_AREA),
        A.SomeOf(
            [
                ErosionDilation(min_kernel=1, max_kernel=4, iterations=1),

--- a/tests/data/training/models/best_0.pt
+++ b/tests/data/training/models/best_0.pt
--- a/tests/data/training/models/last_3.pt
+++ b/tests/data/training/models/last_3.pt