From 58f3a2e34a18a0f004922f203ccece7305df0c08 Mon Sep 17 00:00:00 2001
From: Yoann Schneider <yschneider@teklia.com>
Date: Wed, 16 Nov 2022 15:04:06 +0000
Subject: [PATCH] fix tokens retrieval

---
 dan/datasets/extract/extract_from_arkindex.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/dan/datasets/extract/extract_from_arkindex.py b/dan/datasets/extract/extract_from_arkindex.py
index b0274f98..ea7860de 100644
--- a/dan/datasets/extract/extract_from_arkindex.py
+++ b/dan/datasets/extract/extract_from_arkindex.py
@@ -213,9 +213,11 @@ class ArkindexExtractor:
 
             count = 0
             for entity in entities["results"]:
-                start_token, end_token = self.tokens[
-                    entity["entity"]["metas"]["subtype"]
-                ]
+                matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]]
+                start_token, end_token = (
+                    matching_tokens["start"],
+                    matching_tokens["end"],
+                )
                 text, count = insert_token(
                     text,
                     count,
@@ -349,10 +351,6 @@ def run(
     if load_entities:
         assert tokens, "Please provide the entities to match."
 
-    # Get and initialize the parameters.
-    os.makedirs(IMAGES_DIR, exist_ok=True)
-    os.makedirs(LABELS_DIR, exist_ok=True)
-
     # Login to arkindex.
     assert (
         "ARKINDEX_API_URL" in os.environ
-- 
GitLab