Skip to content
Snippets Groups Projects

Fix version 0.2.0-dev3 and later

Merged Mélodie Boillet requested to merge fix-dev3 into main
All threads resolved!
Files
15
+ 41
15
# -*- coding: utf-8 -*-
import copy
import json
import os
@@ -21,18 +22,20 @@ class OCRDataset(Dataset):
charset,
tokens,
preprocessing_transforms,
normalization_transforms,
augmentation_transforms,
load_in_memory=False,
mean=None,
std=None,
):
self.set_name = set_name
self.charset = charset
self.tokens = tokens
self.load_in_memory = load_in_memory
self.mean = mean
self.std = std
# Pre-processing, augmentation, normalization
# Pre-processing, augmentation
self.preprocessing_transforms = preprocessing_transforms
self.normalization_transforms = normalization_transforms
self.augmentation_transforms = augmentation_transforms
# Factor to reduce the height and width of the feature vector before feeding the decoder.
@@ -54,20 +57,20 @@ class OCRDataset(Dataset):
"""
Return an item from the dataset (image and label)
"""
# Load preprocessed image
sample = dict(**self.samples[idx])
sample = copy.deepcopy(self.samples[idx])
if not self.load_in_memory:
sample["img"] = self.get_sample_img(idx)
# Convert to numpy
sample["img"] = np.array(sample["img"])
# Apply data augmentation
if self.augmentation_transforms:
sample["img"] = self.augmentation_transforms(image=np.array(sample["img"]))[
"image"
]
sample["img"] = self.augmentation_transforms(image=sample["img"])["image"]
# Image normalization
sample["img"] = self.normalization_transforms(sample["img"])
sample["img"] = (sample["img"] - self.mean) / self.std
# Get final height and width
sample["img_reduced_shape"], sample["img_position"] = self.compute_final_size(
@@ -119,21 +122,44 @@ class OCRDataset(Dataset):
return self.preprocessing_transforms(read_image(self.samples[i]["path"]))
def compute_std_mean(self):
"""
Compute cumulated variance and mean of whole dataset
"""
if self.mean is not None and self.std is not None:
return self.mean, self.std
total = np.zeros((3,))
diff = np.zeros((3,))
nb_pixels = 0
for metric in ["mean", "std"]:
for ind in range(len(self.samples)):
img = np.array(self.get_sample_img(ind))
if metric == "mean":
total += np.sum(img, axis=(0, 1))
nb_pixels += np.prod(img.shape[:2])
elif metric == "std":
diff += [
np.sum((img[:, :, k] - self.mean[k]) ** 2) for k in range(3)
]
if metric == "mean":
self.mean = total / nb_pixels
elif metric == "std":
self.std = np.sqrt(diff / nb_pixels)
return self.mean, self.std
def compute_final_size(self, img):
"""
Compute the final image size and position after feature extraction
"""
final_c, final_h, final_w = img.shape
image_reduced_shape = np.ceil(
[final_h, final_w, final_c] / self.reduce_dims_factor
).astype(int)
image_reduced_shape = np.ceil(img.shape / self.reduce_dims_factor).astype(int)
if self.set_name == "train":
image_reduced_shape = [max(1, t) for t in image_reduced_shape]
image_position = [
[0, final_h],
[0, final_w],
[0, img.shape[0]],
[0, img.shape[1]],
]
return image_reduced_shape, image_position
Loading