Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • atr/dan
1 result
Show changes
Commits on Source (2)
......@@ -9,7 +9,6 @@ import torch
from fontTools.ttLib import TTFont
from PIL import Image, ImageDraw, ImageFont
from dan import logger
from dan.manager.dataset import DatasetManager, GenericDataset, apply_preprocessing
from dan.ocr.utils import LM_str_to_ind
from dan.utils import (
......@@ -42,8 +41,6 @@ class OCRDatasetManager(DatasetManager):
and self.params["config"]["synthetic_data"]
):
self.synthetic_data = self.params["config"]["synthetic_data"]
if "config" in self.synthetic_data:
self.synthetic_data["config"]["valid_fonts"] = self.get_valid_fonts()
if "new_tokens" in params:
self.charset = sorted(
......@@ -104,34 +101,6 @@ class OCRDatasetManager(DatasetManager):
[s["img"].shape[1] for s in self.train_dataset.samples]
)
def get_valid_fonts(self):
"""
Select fonts that are compatible with the alphabet
"""
font_path = self.synthetic_data["font_path"]
alphabet = self.charset.copy()
special_chars = ["\n"]
alphabet = [char for char in alphabet if char not in special_chars]
valid_fonts = list()
for fold_detail in os.walk(font_path):
if fold_detail[2]:
for font_name in fold_detail[2]:
if ".ttf" not in font_name:
continue
font_path = os.path.join(fold_detail[0], font_name)
to_add = True
if alphabet is not None:
for char in alphabet:
if not char_in_font(char, font_path):
to_add = False
break
if to_add:
valid_fonts.append(font_path)
else:
valid_fonts.append(font_path)
logger.info(f"Found {len(valid_fonts)} fonts.")
return valid_fonts
class OCRDataset(GenericDataset):
"""
......
......@@ -1042,7 +1042,6 @@ class OCRManager(GenericTrainingManager):
{
"path": sample["path"],
"label": chunk,
"nb_cols": 1,
}
)
......@@ -1055,7 +1054,6 @@ class OCRManager(GenericTrainingManager):
Image.fromarray(img).save(img_path)
gt[set_name][img_name] = {
"text": sample["label"],
"nb_cols": sample["nb_cols"] if "nb_cols" in sample else 1,
}
if "line_label" in sample:
gt[set_name][img_name]["lines"] = sample["line_label"]
......