Skip to content
Snippets Groups Projects
Commit 5f3fc3d8 authored by Chaza Abdelwahab's avatar Chaza Abdelwahab
Browse files

tested changes

parent 5d328548
No related branches found
No related tags found
1 merge request!21Draft: Resolve "support getting a certain element type's transcriptions and not just a page's"
Pipeline #74325 passed
......@@ -38,7 +38,6 @@ MANUAL = "manual"
TEXT_LINE = "text_line"
WHITE = 255
DEFAULT_RESCALE = 1.0
ROTATION_CLASSES_TO_ANGLES = {
"rotate_0": 0,
"rotate_left_90": 90,
......@@ -105,6 +104,7 @@ class HTRDataGenerator:
self.max_deskew_angle = max_deskew_angle
self.skew_angle = skew_angle
self.should_rotate = should_rotate
if scale_x or scale_y_top or scale_y_bottom:
self.should_resize_polygons = True
# use 1.0 as default - no resize, if not specified
......@@ -504,13 +504,16 @@ class HTRDataGenerator:
logger.debug(f"Page {page_id}")
self.extract_lines(page_id, image_data)
def run_volumes(self, volume_ids: list):
def run_volumes(self, volume_ids: list, element_type):
for volume_id in tqdm.tqdm(volume_ids):
logger.info(f"Volume {volume_id}")
pages = [
page
for page in self.api_client.cached_paginate(
"ListElementChildren", id=volume_id, recursive=True, type="page"
"ListElementChildren",
id=volume_id,
recursive=True,
type=element_type,
)
]
self.run_pages(pages)
......@@ -809,6 +812,13 @@ def create_parser():
help="Cache dir where to save the full size downloaded images. Change it to force redownload.",
)
parser.add_argument(
"--element_type",
type=str,
default="page",
help="The type of the element from which you want to extract the transcriptions",
)
return parser
......@@ -849,7 +859,7 @@ def main():
if args.pages:
data_generator.run_pages(args.pages)
if args.volumes:
data_generator.run_volumes(args.volumes)
data_generator.run_volumes(args.volumes, args.element_type)
if args.folders:
data_generator.run_folders(args.folders, args.volume_type)
if args.corpora:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment