diff --git a/dan/manager/ocr.py b/dan/manager/ocr.py index 8c12c509c7467163514befe13d747916802f3cc8..d15627804241cd1761cac8c3a793d1458067ee3d 100644 --- a/dan/manager/ocr.py +++ b/dan/manager/ocr.py @@ -6,10 +6,11 @@ import pickle import cv2 import numpy as np import torch +from torch import randint from dan.manager.dataset import DatasetManager, GenericDataset, apply_preprocessing from dan.ocr.utils import LM_str_to_ind -from dan.utils import pad_image, pad_images, pad_sequences_1D, randint +from dan.utils import pad_image, pad_images, pad_sequences_1D class OCRDatasetManager(DatasetManager): @@ -137,12 +138,12 @@ class OCRDataset(GenericDataset): min_pad = self.params["config"]["padding"]["min_pad"] max_pad = self.params["config"]["padding"]["max_pad"] pad_width = ( - randint(min_pad, max_pad) + randint(min_pad, max_pad, (1,)) if min_pad is not None and max_pad is not None else None ) pad_height = ( - randint(min_pad, max_pad) + randint(min_pad, max_pad, (1,)) if min_pad is not None and max_pad is not None else None ) diff --git a/dan/predict/attention.py b/dan/predict/attention.py index 6b85825125577760fb746fba1779637508908dc9..f1c4def08e445f01b585599136312e1950eece45 100644 --- a/dan/predict/attention.py +++ b/dan/predict/attention.py @@ -6,7 +6,6 @@ import numpy as np from PIL import Image from dan import logger -from dan.utils import round_floats def parse_delimiters(delimiters): @@ -78,7 +77,7 @@ def split_text_and_confidences( offset = 1 else: logger.error("Level should be either 'char', 'word', or 'line'") - return texts, round_floats(probs), offset + return texts, [np.around(num, 2) for num in probs], offset def get_predicted_polygons_with_confidence( diff --git a/dan/transforms.py b/dan/transforms.py index c3ddac4e9d212264914b64a0e6da3b0c8f30c419..2c596e3c0e7f52c7816a481359702e7c64b95bec 100644 --- a/dan/transforms.py +++ b/dan/transforms.py @@ -9,6 +9,8 @@ import numpy as np from cv2 import dilate, erode, normalize from numpy import random from PIL import Image +from torch import rand, randint +from torch.distributions.uniform import Uniform from torchvision.transforms import ( ColorJitter, GaussianBlur, @@ -17,8 +19,6 @@ from torchvision.transforms import ( ) from torchvision.transforms.functional import InterpolationMode -from dan.utils import rand, rand_uniform, randint - class DPIAdjusting: """ @@ -173,14 +173,14 @@ def get_list_augmenters(img, aug_configs, fill_value): """ augmenters = list() for aug_config in aug_configs: - if rand() > aug_config["proba"]: + if rand((1,)) > aug_config["proba"]: continue if aug_config["type"] == "dpi": valid_factor = False while not valid_factor: - factor = rand_uniform( + factor = Uniform( aug_config["min_factor"], aug_config["max_factor"] - ) + ).sample() valid_factor = not ( ( "max_width" in aug_config @@ -202,8 +202,12 @@ def get_list_augmenters(img, aug_configs, fill_value): augmenters.append(DPIAdjusting(factor)) elif aug_config["type"] == "zoom_ratio": - ratio_h = rand_uniform(aug_config["min_ratio_h"], aug_config["max_ratio_h"]) - ratio_w = rand_uniform(aug_config["min_ratio_w"], aug_config["max_ratio_w"]) + ratio_h = Uniform( + aug_config["min_ratio_h"], aug_config["max_ratio_h"] + ).sample() + ratio_w = Uniform( + aug_config["min_ratio_w"], aug_config["max_ratio_w"] + ).sample() augmenters.append( ZoomRatio( ratio_h=ratio_h, ratio_w=ratio_w, keep_dim=aug_config["keep_dim"] @@ -211,7 +215,7 @@ def get_list_augmenters(img, aug_configs, fill_value): ) elif aug_config["type"] == "perspective": - scale = rand_uniform(aug_config["min_factor"], aug_config["max_factor"]) + scale = Uniform(aug_config["min_factor"], aug_config["max_factor"]).sample() augmenters.append( RandomPerspective( distortion_scale=scale, @@ -223,13 +227,20 @@ def get_list_augmenters(img, aug_configs, fill_value): elif aug_config["type"] == "elastic_distortion": kernel_size = ( - randint(aug_config["min_kernel_size"], aug_config["max_kernel_size"]) - // 2 - * 2 - + 1 + randint( + aug_config["min_kernel_size"], aug_config["max_kernel_size"], (1,) + ).item() + ) // 2 * 2 + 1 + sigma = ( + Uniform(aug_config["min_sigma"], aug_config["max_sigma"]) + .sample() + .item() + ) + alpha = ( + Uniform(aug_config["min_alpha"], aug_config["max_alpha"]) + .sample() + .item() ) - sigma = rand_uniform(aug_config["min_sigma"], aug_config["max_sigma"]) - alpha = rand_uniform(aug_config["min_alpha"], aug_config["max_alpha"]) augmenters.append( ElasticDistortion( kernel_size=(kernel_size, kernel_size), sigma=sigma, alpha=alpha @@ -237,9 +248,13 @@ def get_list_augmenters(img, aug_configs, fill_value): ) elif aug_config["type"] == "dilation_erosion": - kernel_h = randint(aug_config["min_kernel"], aug_config["max_kernel"] + 1) - kernel_w = randint(aug_config["min_kernel"], aug_config["max_kernel"] + 1) - if randint(0, 2) == 0: + kernel_h = randint( + aug_config["min_kernel"], aug_config["max_kernel"] + 1, (1,) + ) + kernel_w = randint( + aug_config["min_kernel"], aug_config["max_kernel"] + 1, (1,) + ) + if randint(0, 2, (1,)) == 0: augmenters.append( Erosion((kernel_w, kernel_h), aug_config["iterations"]) ) @@ -261,9 +276,17 @@ def get_list_augmenters(img, aug_configs, fill_value): elif aug_config["type"] == "gaussian_blur": max_kernel_h = min(aug_config["max_kernel"], img.size[1]) max_kernel_w = min(aug_config["max_kernel"], img.size[0]) - kernel_h = randint(aug_config["min_kernel"], max_kernel_h + 1) // 2 * 2 + 1 - kernel_w = randint(aug_config["min_kernel"], max_kernel_w + 1) // 2 * 2 + 1 - sigma = rand_uniform(aug_config["min_sigma"], aug_config["max_sigma"]) + kernel_h = ( + randint(aug_config["min_kernel"], max_kernel_h + 1, (1,)).item() + ) // 2 * 2 + 1 + kernel_w = ( + randint(aug_config["min_kernel"], max_kernel_w + 1, (1,)).item() + ) // 2 * 2 + 1 + sigma = ( + Uniform(aug_config["min_sigma"], aug_config["max_sigma"]) + .sample() + .item() + ) augmenters.append( GaussianBlur(kernel_size=(kernel_w, kernel_h), sigma=sigma) ) @@ -272,10 +295,10 @@ def get_list_augmenters(img, aug_configs, fill_value): augmenters.append(GaussianNoise(std=aug_config["std"])) elif aug_config["type"] == "sharpen": - alpha = rand_uniform(aug_config["min_alpha"], aug_config["max_alpha"]) - strength = rand_uniform( + alpha = Uniform(aug_config["min_alpha"], aug_config["max_alpha"]).sample() + strength = Uniform( aug_config["min_strength"], aug_config["max_strength"] - ) + ).sample() augmenters.append(Sharpen(alpha=alpha, strength=strength)) else: @@ -289,7 +312,7 @@ def apply_data_augmentation(img, da_config): """ Apply data augmentation strategy on input image """ - if da_config["proba"] != 1 and rand() > da_config["proba"]: + if da_config["proba"] != 1 and rand((1,)) > da_config["proba"]: return img # Convert to PIL Image diff --git a/dan/utils.py b/dan/utils.py index 93243fcfd8f46876b74027e897c1faf31d5ff5b0..e4f18b733b26d4d65c10ed7ac4ecc260c3fc7459 100644 --- a/dan/utils.py +++ b/dan/utils.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- import cv2 import numpy as np -import torch -from torch.distributions.uniform import Uniform +from torch import randint # Layout begin-token to end-token SEM_MATCHING_TOKENS = {"ⓘ": "â’¾", "â““": "â’¹", "â“¢": "Ⓢ", "â“’": "â’¸", "â“Ÿ": "â“…", "â“": "â’¶"} @@ -14,27 +13,6 @@ class MLflowNotInstalled(Exception): """ -def randint(low, high): - """ - call torch.randint to preserve random among dataloader workers - """ - return int(torch.randint(low, high, (1,))) - - -def rand(): - """ - call torch.rand to preserve random among dataloader workers - """ - return float(torch.rand((1,))) - - -def rand_uniform(low, high): - """ - call torch uniform to preserve random among dataloader workers - """ - return float(Uniform(low, high).sample()) - - def pad_sequences_1D(data, padding_value): """ Pad data with padding_value to get same length @@ -68,8 +46,8 @@ def pad_images(data, padding_value, padding_mode="br"): elif padding_mode == "random": xmax = longest_x - x_len ymax = longest_y - y_len - xi = randint(0, xmax) if xmax >= 1 else 0 - yi = randint(0, ymax) if ymax >= 1 else 0 + xi = randint(0, xmax, (1,)) if xmax >= 1 else 0 + yi = randint(0, ymax, (1,)) if ymax >= 1 else 0 padded_data[i, xi : xi + x_len, yi : yi + y_len, ...] = data[i] else: raise NotImplementedError("Undefined padding mode: {}".format(padding_mode)) @@ -118,8 +96,8 @@ def pad_image( elif padding_mode == "tl": hi, wi = pad_height, pad_width elif padding_mode == "random": - hi = randint(0, pad_height) if pad_height >= 1 else 0 - wi = randint(0, pad_width) if pad_width >= 1 else 0 + hi = randint(0, pad_height, (1,)) if pad_height >= 1 else 0 + wi = randint(0, pad_width, (1,)) if pad_width >= 1 else 0 else: raise NotImplementedError("Undefined padding mode: {}".format(padding_mode)) padded_image[hi : hi + h, wi : wi + w, ...] = image @@ -145,10 +123,3 @@ def read_image(filename, scale=1.0): height = int(image.shape[0] * scale) image = cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA) return image - - -def round_floats(float_list, decimals=2): - """ - Round list of floats with fixed decimals - """ - return [np.around(num, decimals) for num in float_list]