Mélodie Boillet
--- a/dan/transforms.py

+ 72

− 45
+++ b/dan/transforms.py

+ 72

− 45
 @@ -5,13 +5,12 @@ Each transform class defined here takes as input a PIL Image and returns the mod
 from enum import Enum
 from random import randint

+import albumentations as A
 import numpy as np
-from albumentations import SomeOf
 from albumentations.augmentations import (
    Affine,
    CoarseDropout,
    ColorJitter,
-    Downscale,
    ElasticTransform,
    GaussianBlur,
    GaussNoise,
 @@ -19,15 +18,15 @@ from albumentations.augmentations import (
    Sharpen,
    ToGray,
 )
-from cv2 import INTER_NEAREST, dilate, erode
+from albumentations.core.transforms_interface import ImageOnlyTransform
+from cv2 import dilate, erode
 from numpy import random
+from PIL import Image
 from torch import Tensor
-from torchvision.transforms import Compose, Normalize, ToPILImage, ToTensor
+from torch.distributions.uniform import Uniform
+from torchvision.transforms import Compose, ToPILImage
 from torchvision.transforms.functional import resize

-IMAGENET_MEAN = [0.485, 0.456, 0.406]
-IMAGENET_STD = [0.229, 0.224, 0.225]
-

 class Preprocessing(str, Enum):
    # If the image is bigger than the given size, resize it while keeping the original ratio
 @@ -119,30 +118,64 @@ class Erosion:
        return erode(np.array(x), self.kernel, iterations=self.iterations)


-class ErosionDilation:
+class ErosionDilation(ImageOnlyTransform):
    """
    Random erosion or dilation
    """

-    def __init__(self, min_kernel, max_kernel, iterations, p=1.0):
+    def __init__(
+        self,
+        min_kernel: int,
+        max_kernel: int,
+        iterations: int,
+        always_apply: bool = False,
+        p: float = 1.0,
+    ):
+        super(ErosionDilation, self).__init__(always_apply, p)
        self.min_kernel = min_kernel
        self.max_kernel = max_kernel
        self.iterations = iterations
        self.p = p
        self.always_apply = False

-    def __call__(self, image, force_apply=False):
-        if not (random.random() <= self.p or self.always_apply or force_apply):
-            return {"image": image}
+    def apply(self, img: np.ndarray, **params):
        kernel_h = randint(self.min_kernel, self.max_kernel)
        kernel_w = randint(self.min_kernel, self.max_kernel)
        kernel = np.ones((kernel_h, kernel_w), np.uint8)
        augmented_image = (
-            Erosion(kernel, iterations=self.iterations)(image)
+            Erosion(kernel, iterations=self.iterations)(img)
            if random.random() < 0.5
-            else Dilation(kernel=kernel, iterations=self.iterations)(image)
+            else Dilation(kernel=kernel, iterations=self.iterations)(img)
        )
-        return {"image": augmented_image}
+        return augmented_image
+
+
+class DPIAdjusting(ImageOnlyTransform):
+    """
+    Resolution modification
+    """
+
+    def __init__(
+        self,
+        min_factor: float = 0.75,
+        max_factor: float = 1,
+        always_apply: bool = False,
+        p: float = 1.0,
+    ):
+        super(DPIAdjusting, self).__init__(always_apply, p)
+        self.min_factor = min_factor
+        self.max_factor = max_factor
+        self.p = p
+        self.always_apply = False
+
+    def apply(self, img: np.ndarray, **params):
+        factor = float(Uniform(self.min_factor, self.max_factor).sample())
+        img = Image.fromarray(img)
+        augmented_image = img.resize(
+            (int(np.ceil(img.width * factor)), int(np.ceil(img.height * factor))),
+            Image.BILINEAR,
+        )
+        return np.array(augmented_image)


 def get_preprocessing_transforms(
 @@ -167,44 +200,38 @@ def get_preprocessing_transforms(
                )
            case Preprocessing.FixedWidthResize:
                transforms.append(FixedWidthResize(width=preprocessing["fixed_width"]))
-
    if to_pil_image:
        transforms.append(ToPILImage())
-
    return Compose(transforms)


-def get_augmentation_transforms() -> SomeOf:
+def get_augmentation_transforms() -> A.Compose:
    """
-    Returns a list of transformations to be applied to the image.
+    Returns a list of transformation to be applied to the image.
    """
-    return SomeOf(
+    return A.Compose(
        [
-            Perspective(scale=(0.05, 0.09), fit_output=True),
-            GaussianBlur(sigma_limit=2.5),
-            GaussNoise(var_limit=50**2),
-            ColorJitter(contrast=0.2, brightness=0.2, saturation=0.2, hue=0.2),
-            ElasticTransform(alpha=20.0, sigma=5.0, alpha_affine=1.0, border_mode=0),
-            Sharpen(alpha=(0.0, 1.0)),
-            ErosionDilation(min_kernel=1, max_kernel=4, iterations=1),
-            Affine(shear={"x": (-20, 20), "y": (0, 0)}),
-            CoarseDropout(),
-            Downscale(scale_min=0.5, scale_max=0.9, interpolation=INTER_NEAREST),
-            ToGray(),
+            DPIAdjusting(min_factor=0.75, max_factor=1),
+            A.SomeOf(
+                [
+                    ErosionDilation(min_kernel=1, max_kernel=4, iterations=1),
+                    Perspective(scale=(0.05, 0.09), fit_output=True, p=0.4),
+                    GaussianBlur(sigma_limit=2.5, p=1),
+                    GaussNoise(var_limit=50**2, p=1),
+                    ColorJitter(
+                        contrast=0.2, brightness=0.2, saturation=0.2, hue=0.2, p=1
+                    ),
+                    ElasticTransform(
+                        alpha=20.0, sigma=5.0, alpha_affine=1.0, border_mode=0, p=1
+                    ),
+                    Sharpen(alpha=(0.0, 1.0), p=1),
+                    Affine(shear={"x": (-20, 20), "y": (0, 0)}, p=1),
+                    CoarseDropout(p=1),
+                    ToGray(p=0.5),
+                ],
+                n=2,
+                p=0.9,
+            ),
        ],
-        n=2,
        p=0.9,
    )
-
-
-def get_normalization_transforms(from_pil_image: bool = False) -> Compose:
-    """
-    Returns a list of normalization transformations.
-    """
-    transforms = []
-
-    if from_pil_image:
-        transforms.append(ToTensor())
-
-    transforms.append(Normalize(IMAGENET_MEAN, IMAGENET_STD))
-    return Compose(transforms)