Skip to content
Snippets Groups Projects
Verified Commit f37ae1f0 authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

final touches

parent 4af68829
No related branches found
No related tags found
2 merge requests!23Implement format command,!11Implement extraction command
This commit is part of merge request !11. Comments created here will be created in the context of that merge request.
......@@ -233,6 +233,37 @@ class ArkindexExtractor:
else:
return self.split_names[2]
def extract_entities(self, transcription):
entities = self.client.request(
"ListTranscriptionEntities",
id=transcription["id"],
worker_version=self.entity_worker_version,
)
if entities["count"] == 0:
logger.warning(
f"No entities found on transcription ({transcription['id']})."
)
return
else:
text = transcription["text"]
count = 0
for entity in entities["results"]:
matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]]
start_token, end_token = (
matching_tokens["start"],
matching_tokens["end"],
)
text, count = insert_token(
text,
count,
start_token,
end_token,
offset=entity["offset"],
length=entity["length"],
)
return text
def extract_transcription(
self,
element,
......@@ -251,37 +282,9 @@ class ArkindexExtractor:
transcription = transcriptions["results"].pop()
if self.load_entities:
entities = self.client.request(
"ListTranscriptionEntities",
id=transcription["id"],
worker_version=self.entity_worker_version,
)
if entities["count"] == 0:
logger.warning(
f"No entities found on transcription ({transcription['id']})."
)
return
else:
text = transcription["text"]
count = 0
for entity in entities["results"]:
matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]]
start_token, end_token = (
matching_tokens["start"],
matching_tokens["end"],
)
text, count = insert_token(
text,
count,
start_token,
end_token,
offset=entity["offset"],
length=entity["length"],
)
return self.extract_entities(transcription)
else:
text = transcription["text"].strip()
return text
return transcription["text"].strip()
def process_element(
self,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment