From 58f3a2e34a18a0f004922f203ccece7305df0c08 Mon Sep 17 00:00:00 2001 From: Yoann Schneider <yschneider@teklia.com> Date: Wed, 16 Nov 2022 15:04:06 +0000 Subject: [PATCH] fix tokens retrieval --- dan/datasets/extract/extract_from_arkindex.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dan/datasets/extract/extract_from_arkindex.py b/dan/datasets/extract/extract_from_arkindex.py index b0274f98..ea7860de 100644 --- a/dan/datasets/extract/extract_from_arkindex.py +++ b/dan/datasets/extract/extract_from_arkindex.py @@ -213,9 +213,11 @@ class ArkindexExtractor: count = 0 for entity in entities["results"]: - start_token, end_token = self.tokens[ - entity["entity"]["metas"]["subtype"] - ] + matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]] + start_token, end_token = ( + matching_tokens["start"], + matching_tokens["end"], + ) text, count = insert_token( text, count, @@ -349,10 +351,6 @@ def run( if load_entities: assert tokens, "Please provide the entities to match." - # Get and initialize the parameters. - os.makedirs(IMAGES_DIR, exist_ok=True) - os.makedirs(LABELS_DIR, exist_ok=True) - # Login to arkindex. assert ( "ARKINDEX_API_URL" in os.environ -- GitLab