Skip to content
Snippets Groups Projects
Commit aa982900 authored by Martin's avatar Martin
Browse files

support choosing elements by arkindex selection

parent f2d4d49d
No related branches found
No related tags found
No related merge requests found
Pipeline #74335 failed
......@@ -7,6 +7,7 @@ import os
import random
from collections import Counter, defaultdict
from enum import Enum
from itertools import groupby
from pathlib import Path
from typing import List
......@@ -574,6 +575,16 @@ class HTRDataGenerator:
return trimmed_img
def run_selection(self):
selected_elems = [e for e in self.api_client.paginate("ListSelection")]
for elem_type, elems_of_type in groupby(selected_elems, key=lambda x: x["type"]):
if elem_type == "page":
self.run_pages(list(elems_of_type))
elif elem_type in ["volume", "folder"]:
self.run_volumes(list(elems_of_type))
else:
raise ValueError(f"Unsupported element type {elem_type} in selection!")
def run_pages(self, pages: list):
if all(isinstance(n, str) for n in pages):
for page in pages:
......@@ -840,6 +851,13 @@ def create_parser():
default="volume",
help="Volumes (1 level above page) may have a different name on corpora",
)
parser.add_argument(
"--selection",
action="store_true",
default=False,
help="Get elements from selection",
)
parser.add_argument(
"--skip_vertical_lines",
action="store_true",
......@@ -962,6 +980,8 @@ def main():
)
# extract all the lines and transcriptions
if args.selection:
data_generator.run_selection()
if args.pages:
data_generator.run_pages(args.pages)
if args.volumes:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment