Skip to content
Snippets Groups Projects
Commit 2ecb07cd authored by Solene Tarride's avatar Solene Tarride
Browse files

generate synthetic documents with curriculum (number of lines)

parent f021b5d0
No related branches found
No related tags found
1 merge request!24Train with synthetic documents
......@@ -388,11 +388,16 @@ class OCRDataset(GenericDataset):
)
)
else:
label = self.samples[randint(0, len(self))]["label"]
# Get a page-level transcription and split it by lines
texts = self.samples[randint(0, len(self))]["label"].split("\n")
# Select some lines to be generated
n_lines = min(len(texts), nb_lines_per_page)
i = randint(0, len(texts) - n_lines + 1)
texts = texts[i : i + n_lines]
# Generate the synthetic document (of n_lines)
pages.append(
generate_typed_text_paragraph_image(
config=self.params["config"]["synthetic_data"]["config"],
texts=label.split("\n"),
self.generate_typed_text_paragraph_image(
texts=texts,
same_font_size=True,
)
)
......@@ -432,6 +437,74 @@ class OCRDataset(GenericDataset):
text, self.params["config"]["synthetic_data"]["config"]
)
def generate_typed_text_paragraph_image(
self, texts, padding_value=255, max_pad_left_ratio=0.1, same_font_size=False
):
config = self.params["config"]["synthetic_data"]["config"]
if same_font_size:
images = list()
txt_color = config["text_color_default"]
bg_color = config["background_color_default"]
font_size = randint(config["font_size_min"], config["font_size_max"] + 1)
for text in texts:
font_path = config["valid_fonts"][
randint(0, len(config["valid_fonts"]))
]
fnt = ImageFont.truetype(font_path, font_size)
text_width, text_height = fnt.getsize(text)
padding_top = int(
rand_uniform(
config["padding_top_ratio_min"], config["padding_top_ratio_max"]
)
* text_height
)
padding_bottom = int(
rand_uniform(
config["padding_bottom_ratio_min"],
config["padding_bottom_ratio_max"],
)
* text_height
)
padding_left = int(
rand_uniform(
config["padding_left_ratio_min"],
config["padding_left_ratio_max"],
)
* text_width
)
padding_right = int(
rand_uniform(
config["padding_right_ratio_min"],
config["padding_right_ratio_max"],
)
* text_width
)
padding = [padding_top, padding_bottom, padding_left, padding_right]
images.append(
generate_typed_text_line_image_from_params(
text, fnt, bg_color, txt_color, config["color_mode"], padding
)
)
else:
images = [generate_typed_text_line_image(t) for t in texts]
max_width = max([img.shape[1] for img in images])
padded_images = [
pad_image_width_random(
img,
max_width,
padding_value=padding_value,
max_pad_left_ratio=max_pad_left_ratio,
)
for img in images
]
label = {
"sem": "\n".join(texts),
"begin": "\n".join(texts),
"raw": "\n".join(texts),
}
return [np.concatenate(padded_images, axis=0), label, 1] # image, label, n_col
class OCRCollateFunction:
"""
......@@ -618,70 +691,6 @@ def generate_typed_text_line_image_from_params(
return np.array(img)
def generate_typed_text_paragraph_image(
config, texts, padding_value=255, max_pad_left_ratio=0.1, same_font_size=False
):
if same_font_size:
images = list()
txt_color = config["text_color_default"]
bg_color = config["background_color_default"]
font_size = randint(config["font_size_min"], config["font_size_max"] + 1)
for text in texts:
font_path = config["valid_fonts"][randint(0, len(config["valid_fonts"]))]
fnt = ImageFont.truetype(font_path, font_size)
text_width, text_height = fnt.getsize(text)
padding_top = int(
rand_uniform(
config["padding_top_ratio_min"], config["padding_top_ratio_max"]
)
* text_height
)
padding_bottom = int(
rand_uniform(
config["padding_bottom_ratio_min"],
config["padding_bottom_ratio_max"],
)
* text_height
)
padding_left = int(
rand_uniform(
config["padding_left_ratio_min"], config["padding_left_ratio_max"]
)
* text_width
)
padding_right = int(
rand_uniform(
config["padding_right_ratio_min"], config["padding_right_ratio_max"]
)
* text_width
)
padding = [padding_top, padding_bottom, padding_left, padding_right]
images.append(
generate_typed_text_line_image_from_params(
text, fnt, bg_color, txt_color, config["color_mode"], padding
)
)
else:
images = [generate_typed_text_line_image(t) for t in texts]
max_width = max([img.shape[1] for img in images])
padded_images = [
pad_image_width_random(
img,
max_width,
padding_value=padding_value,
max_pad_left_ratio=max_pad_left_ratio,
)
for img in images
]
label = {
"sem": "\n".join(texts),
"begin": "\n".join(texts),
"raw": "\n".join(texts),
}
return [np.concatenate(padded_images, axis=0), label, 1] # image, label, n_col
def char_in_font(unicode_char, font_path):
with TTFont(font_path) as font:
for cmap in font["cmap"].tables:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment