Skip to content
Snippets Groups Projects

Rename images with line id

Merged Solene Tarride requested to merge rename-images-with-line-id into master
2 files
+ 23
7
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -165,10 +165,14 @@ class HTRDataGenerator:
def get_accepted_zones(self, page_id: str):
try:
accepted_zones = []
print(self.api_client.cached_paginate(
"ListElementChildren", id=page_id, with_classes=True
))
for elt in self.api_client.cached_paginate(
"ListElementChildren", id=page_id, with_classes=True
):
elem_classes = [c for c in elt["classes"] if c["state"] != "rejected"]
print(elt, elem_classes)
should_accept = True
if self.should_filter_by_class:
@@ -307,6 +311,7 @@ class HTRDataGenerator:
trans_data = TranscriptionData(
element_id=res["element"]["id"],
element_name=res["element"]["name"],
polygon=polygon,
text=text,
trans_id=res["id"],
@@ -380,18 +385,25 @@ class HTRDataGenerator:
}
def _save_line_image(
self, page_id, i, line_img, manifest_fp=None, trans: TranscriptionData = None
self, page_id, line_img, manifest_fp=None, trans: TranscriptionData = None
):
# Get line id
line_id = trans.element_id
# Get line number from its name
line_number = trans.element_name.split('_')[-1]
if self.should_rotate:
if trans.rotation_class:
rotate_angle = ROTATION_CLASSES_TO_ANGLES[trans.rotation_class]
line_img = rotate_and_trim(line_img, rotate_angle, WHITE)
if self.format == "kraken":
save_img(f"{self.out_line_dir}/{page_id}_{i}.png", line_img)
manifest_fp.write(f"{page_id}_{i}.png\n")
# Save image using the template {page_id}_{line_number}_{line_id}
# TODO: check if (0>3) is enough (pad line_number to 3 digits)
save_img(f"{self.out_line_dir}/{page_id}_{line_number:0>3}_{line_id}.png".format(), line_img)
manifest_fp.write(f"{page_id}_{line_number:0>3}_{line_id}.png\n")
else:
save_img(f"{self.out_line_img_dir}/{page_id}_{i}.jpg", line_img)
save_img(f"{self.out_line_img_dir}/{page_id}_{line_number:0>3}_{line_id}.jpg", line_img)
def extract_lines(self, page_id: str, image_data: dict):
if self.should_filter_by_class or self.should_filter_by_style:
@@ -441,7 +453,7 @@ class HTRDataGenerator:
# not needed for kaldi
manifest_fp = None
for i, trans in enumerate(sorted_lines):
for trans in sorted_lines:
extracted_img = extract(
img=img,
polygon=trans.polygon,
@@ -452,7 +464,8 @@ class HTRDataGenerator:
grayscale=self.grayscale,
)
self._save_line_image(page_id, i, extracted_img, manifest_fp, trans)
# don't enumerate, read the line number from the elements's name (e.g. line_xx) so that it matches with Arkindex
self._save_line_image(page_id, extracted_img, manifest_fp, trans)
if self.format == "kraken":
manifest_fp.close()
Loading