From 55698f0c709e222f66e67e673bdc522ca1038986 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A9lodie?= <melo.boillet@gmail.com>
Date: Wed, 2 Aug 2023 07:57:21 +0200
Subject: [PATCH] Load images using Torch

---
 dan/manager/ocr.py               | 5 ++++-
 dan/transforms.py                | 5 -----
 docs/usage/train/augmentation.md | 4 ++--
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py
index 9d2eb065..c0e832b3 100644
--- a/dan/manager/ocr.py
+++ b/dan/manager/ocr.py
@@ -244,7 +244,10 @@ class OCRCollateFunction:
         labels = [batch_data[i]["token_label"] for i in range(len(batch_data))]
         labels = pad_sequences_1D(labels, padding_value=self.label_padding_value).long()
 
-        imgs = [batch_data[i]["img"] for i in range(len(batch_data))]
+        imgs = [
+            torch.from_numpy(batch_data[i]["img"]).permute(2, 0, 1)
+            for i in range(len(batch_data))
+        ]
         imgs = pad_images(imgs)
 
         formatted_batch_data = {
diff --git a/dan/transforms.py b/dan/transforms.py
index ff39dcc6..90b073fb 100644
--- a/dan/transforms.py
+++ b/dan/transforms.py
@@ -11,7 +11,6 @@ from albumentations.augmentations import (
     Affine,
     CoarseDropout,
     ColorJitter,
-    Downscale,
     ElasticTransform,
     GaussianBlur,
     GaussNoise,
@@ -226,11 +225,7 @@ def get_augmentation_transforms() -> SomeOf:
             Downscale(scale_min=0.5, scale_max=0.9, interpolation=INTER_NEAREST),
             ToGray(),
         ],
-        n=2,
         p=0.9,
-        )
-    ],
-    p=0.9
     )
 
 
diff --git a/docs/usage/train/augmentation.md b/docs/usage/train/augmentation.md
index ab9f3870..2e86de9a 100644
--- a/docs/usage/train/augmentation.md
+++ b/docs/usage/train/augmentation.md
@@ -107,7 +107,7 @@ This page lists data augmentation transforms used in DAN.
 | Examples                     | ![](../../assets/augmentations/line_dropout.png) ![](../../assets/augmentations/document_dropout.png)                                                                                       |
 | CPU time (seconds/10 images) | 0.02 (3013x128 pixels) / 0.02 (1116x581 pixels)                                                                                                                                             |
 
-### Downscale
+### DPIAdjusting
 
 |                              | Downscale                                                                                                                                                           |
 | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
@@ -117,7 +117,7 @@ This page lists data augmentation transforms used in DAN.
 | Examples                     | ![](../../assets/augmentations/line_downscale.png) ![](../../assets/augmentations/document_downscale.png)                                                           |
 | CPU time (seconds/10 images) | 0.03 (3013x128 pixels) / 0.03 (1116x581 pixels)                                                                                                                     |
 
-### Grayscale
+### ToGray
 
 |                              | Grayscale                                                                                                                                                        |
 | ---------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-- 
GitLab