diff --git a/dan/datasets/extract/extract_from_arkindex.py b/dan/datasets/extract/extract_from_arkindex.py index b2617e8a4e228970e94ea8143a23754dd98d7411..1a2276b3f4d7fd1baafb84ae3f49668de8a946ab 100644 --- a/dan/datasets/extract/extract_from_arkindex.py +++ b/dan/datasets/extract/extract_from_arkindex.py @@ -266,9 +266,11 @@ class ArkindexExtractor: count = 0 for entity in entities["results"]: - start_token, end_token = self.tokens[ - entity["entity"]["metas"]["subtype"] - ] + matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]] + start_token, end_token = ( + matching_tokens["start"], + matching_tokens["end"], + ) text, count = insert_token( text, count, @@ -402,10 +404,6 @@ def run( if load_entities: assert tokens, "Please provide the entities to match." - # Get and initialize the parameters. - os.makedirs(IMAGES_DIR, exist_ok=True) - os.makedirs(LABELS_DIR, exist_ok=True) - # Login to arkindex. assert ( "ARKINDEX_API_URL" in os.environ