Skip to content
Snippets Groups Projects
Commit 85deca6d authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

final touches

parent 58f3a2e3
No related branches found
No related tags found
No related merge requests found
......@@ -180,6 +180,37 @@ class ArkindexExtractor:
else:
return self.split_names[2]
def extract_entities(self, transcription):
entities = self.client.request(
"ListTranscriptionEntities",
id=transcription["id"],
worker_version=self.entity_worker_version,
)
if entities["count"] == 0:
logger.warning(
f"No entities found on transcription ({transcription['id']})."
)
return
else:
text = transcription["text"]
count = 0
for entity in entities["results"]:
matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]]
start_token, end_token = (
matching_tokens["start"],
matching_tokens["end"],
)
text, count = insert_token(
text,
count,
start_token,
end_token,
offset=entity["offset"],
length=entity["length"],
)
return text
def extract_transcription(
self,
element,
......@@ -198,37 +229,9 @@ class ArkindexExtractor:
transcription = transcriptions["results"].pop()
if self.load_entities:
entities = self.client.request(
"ListTranscriptionEntities",
id=transcription["id"],
worker_version=self.entity_worker_version,
)
if entities["count"] == 0:
logger.warning(
f"No entities found on transcription ({transcription['id']})."
)
return
else:
text = transcription["text"]
count = 0
for entity in entities["results"]:
matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]]
start_token, end_token = (
matching_tokens["start"],
matching_tokens["end"],
)
text, count = insert_token(
text,
count,
start_token,
end_token,
offset=entity["offset"],
length=entity["length"],
)
return self.extract_entities(transcription)
else:
text = transcription["text"].strip()
return text
return transcription["text"].strip()
def process_element(
self,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment