Skip to content
Snippets Groups Projects
Commit a21bc941 authored by Martin's avatar Martin
Browse files

select folders by ids

parent 2b036eb1
No related branches found
No related tags found
No related merge requests found
...@@ -165,6 +165,13 @@ class KaldiDataGenerator: ...@@ -165,6 +165,13 @@ class KaldiDataGenerator:
page_ids = [page['id'] for page in api_client.paginate('ListElementChildren', id=volume_id)] page_ids = [page['id'] for page in api_client.paginate('ListElementChildren', id=volume_id)]
self.run_pages(page_ids) self.run_pages(page_ids)
def run_folders(self, element_ids: list, volume_type: str):
for elem_id in tqdm.tqdm(element_ids):
logger.info(f"Folder {elem_id}")
vol_ids = [page['id'] for page in
api_client.paginate('ListElementChildren', id=elem_id, recursive=True, type=volume_type)]
self.run_volumes(vol_ids)
def run_corpora(self, corpus_ids: list, volume_type: str): def run_corpora(self, corpus_ids: list, volume_type: str):
for corpus_id in tqdm.tqdm(corpus_ids): for corpus_id in tqdm.tqdm(corpus_ids):
logger.info(f"Corpus {corpus_id}") logger.info(f"Corpus {corpus_id}")
...@@ -251,6 +258,9 @@ def create_parser(): ...@@ -251,6 +258,9 @@ def create_parser():
parser.add_argument('--corpora', nargs='*', parser.add_argument('--corpora', nargs='*',
help='List of corpus ids to be used, separated by spaces') help='List of corpus ids to be used, separated by spaces')
parser.add_argument('--folders', type=str, nargs='*',
help='List of folder ids to be used, separated by spaces. '
'Elements of `volume_type` will be searched recursively in these folders')
parser.add_argument('--volumes', nargs='*', parser.add_argument('--volumes', nargs='*',
help='List of volume ids to be used, separated by spaces') help='List of volume ids to be used, separated by spaces')
parser.add_argument('--pages', nargs='*', parser.add_argument('--pages', nargs='*',
...@@ -283,6 +293,8 @@ def main(): ...@@ -283,6 +293,8 @@ def main():
kaldi_data_generator.run_pages(args.pages) kaldi_data_generator.run_pages(args.pages)
if args.volumes: if args.volumes:
kaldi_data_generator.run_volumes(args.volumes) kaldi_data_generator.run_volumes(args.volumes)
if args.folders:
kaldi_data_generator.run_folders(args.folders, args.volume_type)
if args.corpora: if args.corpora:
kaldi_data_generator.run_corpora(args.corpora, args.volume_type) kaldi_data_generator.run_corpora(args.corpora, args.volume_type)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment