diff --git a/dan/datasets/extract/extract_from_arkindex.py b/dan/datasets/extract/extract_from_arkindex.py index b0274f9859c45dbaa50416a8305cb130a885c1dc..ea7860de60df6e6670f172f8d988361b820a1821 100644 --- a/dan/datasets/extract/extract_from_arkindex.py +++ b/dan/datasets/extract/extract_from_arkindex.py @@ -213,9 +213,11 @@ class ArkindexExtractor: count = 0 for entity in entities["results"]: - start_token, end_token = self.tokens[ - entity["entity"]["metas"]["subtype"] - ] + matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]] + start_token, end_token = ( + matching_tokens["start"], + matching_tokens["end"], + ) text, count = insert_token( text, count, @@ -349,10 +351,6 @@ def run( if load_entities: assert tokens, "Please provide the entities to match." - # Get and initialize the parameters. - os.makedirs(IMAGES_DIR, exist_ok=True) - os.makedirs(LABELS_DIR, exist_ok=True) - # Login to arkindex. assert ( "ARKINDEX_API_URL" in os.environ