Skip to content
Snippets Groups Projects
Commit 6f3504c5 authored by Yoann Schneider's avatar Yoann Schneider :tennis: Committed by Mélodie Boillet
Browse files

No more DPI adjusting

parent 620e8856
No related branches found
No related tags found
1 merge request!312No more DPI adjusting
......@@ -6,6 +6,7 @@ from enum import Enum
from random import randint
import albumentations as A
import cv2
import numpy as np
from albumentations.augmentations import (
Affine,
......@@ -15,16 +16,16 @@ from albumentations.augmentations import (
GaussianBlur,
GaussNoise,
Perspective,
RandomScale,
Sharpen,
ToGray,
)
from albumentations.core.transforms_interface import ImageOnlyTransform
from cv2 import dilate, erode, resize
from cv2 import dilate, erode
from numpy import random
from torch import Tensor
from torch.distributions.uniform import Uniform
from torchvision.transforms import Compose, ToPILImage
from torchvision.transforms.functional import resize as resize_tensor
from torchvision.transforms.functional import resize
class Preprocessing(str, Enum):
......@@ -54,7 +55,7 @@ class FixedHeightResize:
def __call__(self, img: Tensor) -> Tensor:
size = (self.height, self._calc_new_width(img))
return resize_tensor(img, size, antialias=False)
return resize(img, size, antialias=False)
def _calc_new_width(self, img: Tensor) -> int:
aspect_ratio = img.shape[2] / img.shape[1]
......@@ -71,7 +72,7 @@ class FixedWidthResize:
def __call__(self, img: Tensor) -> Tensor:
size = (self._calc_new_height(img), self.width)
return resize_tensor(img, size, antialias=False)
return resize(img, size, antialias=False)
def _calc_new_height(self, img: Tensor) -> int:
aspect_ratio = img.shape[1] / img.shape[2]
......@@ -96,7 +97,7 @@ class MaxResize:
ratio = min(height_ratio, width_ratio)
new_width = int(width * ratio)
new_height = int(height * ratio)
return resize_tensor(img, (new_height, new_width), antialias=False)
return resize(img, (new_height, new_width), antialias=False)
class Dilation:
......@@ -156,29 +157,6 @@ class ErosionDilation(ImageOnlyTransform):
)
class DPIAdjusting(ImageOnlyTransform):
"""
Resolution modification
"""
def __init__(
self,
min_factor: float = 0.75,
max_factor: float = 1,
always_apply: bool = False,
p: float = 1.0,
):
super(DPIAdjusting, self).__init__(always_apply, p)
self.min_factor = min_factor
self.max_factor = max_factor
self.p = p
self.always_apply = False
def apply(self, img: np.ndarray, **params):
factor = float(Uniform(self.min_factor, self.max_factor).sample())
return resize(img, None, fx=factor, fy=factor)
def get_preprocessing_transforms(
preprocessings: list, to_pil_image: bool = False
) -> Compose:
......@@ -212,7 +190,10 @@ def get_augmentation_transforms() -> A.Compose:
"""
return A.Compose(
[
DPIAdjusting(min_factor=0.75, max_factor=1),
# Scale between 0.75 and 1.0
RandomScale(
scale_limit=[-0.25, 0], always_apply=True, interpolation=cv2.INTER_AREA
),
A.SomeOf(
[
ErosionDilation(min_kernel=1, max_kernel=4, iterations=1),
......@@ -220,10 +201,18 @@ def get_augmentation_transforms() -> A.Compose:
GaussianBlur(sigma_limit=2.5, p=1),
GaussNoise(var_limit=50**2, p=1),
ColorJitter(
contrast=0.2, brightness=0.2, saturation=0.2, hue=0.2, p=1
contrast=0.2,
brightness=0.2,
saturation=0.2,
hue=0.2,
p=1,
),
ElasticTransform(
alpha=20.0, sigma=5.0, alpha_affine=1.0, border_mode=0, p=1
alpha=20.0,
sigma=5.0,
alpha_affine=1.0,
border_mode=0,
p=1,
),
Sharpen(alpha=(0.0, 1.0), p=1),
Affine(shear={"x": (-20, 20), "y": (0, 0)}, p=1),
......
docs/assets/augmentations/document_original.png

213 KiB

docs/assets/augmentations/document_random_scale.png

748 KiB

docs/assets/augmentations/line_full_pipeline_2.png

204 KiB

docs/assets/augmentations/line_original.png

21.7 KiB

docs/assets/augmentations/line_random_scale.png

628 KiB

......@@ -107,14 +107,16 @@ This page lists data augmentation transforms used in DAN.
| Examples | ![](../../assets/augmentations/line_dropout.png) ![](../../assets/augmentations/document_dropout.png) |
| CPU time (seconds/10 images) | 0.02 (3013x128 pixels) / 0.02 (1116x581 pixels) |
### DPIAdjusting
### Random Scale
| | DPIAdjusting |
| ----------- | -------------------------------------------------------------- |
| Description | This transformation downscales the image from a random factor. |
| Comments | Similar to the original DAN implementation. |
| | RandomScale |
| ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Description | This transformation downscales the image from a random factor. |
| Comments | The original DAN implementation reimplemented it as [DPIAdjusting](https://github.com/FactoDeepLearning/DAN/blob/da3046a1cc83e9be3e54dd31a5e74d6134d1ebdc/basic/transforms.py#L62). |
| Documentation | See the [`albumentations` documentation](https://albumentations.ai/docs/api_reference/augmentations/geometric/resize/#albumentations.augmentations.geometric.resize.RandomScale) |
| Examples | ![](../../assets/augmentations/line_random_scale.png) ![](../../assets/augmentations/document_random_scale.png) |
### ToGray
### To Gray
| | ToGray |
| ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- |
......
......@@ -134,7 +134,8 @@ The following configuration is used by default when using the `teklia-dan train`
```py
transforms = A.Compose(
[
DPIAdjusting(min_factor=0.75, max_factor=1),
# Scale between 0.75 and 1.0
RandomScale(scale_limit=[-0.25, 0], always_apply=True, interpolation=cv2.INTER_AREA),
A.SomeOf(
[
ErosionDilation(min_kernel=1, max_kernel=4, iterations=1),
......
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment