Skip to content
Snippets Groups Projects

Implement extraction command

Merged Yoann Schneider requested to merge implement-extraction-command into main
1 file
+ 33
30
Compare changes
  • Side-by-side
  • Inline
@@ -233,6 +233,37 @@ class ArkindexExtractor:
else:
return self.split_names[2]
def extract_entities(self, transcription):
entities = self.client.request(
"ListTranscriptionEntities",
id=transcription["id"],
worker_version=self.entity_worker_version,
)
if entities["count"] == 0:
logger.warning(
f"No entities found on transcription ({transcription['id']})."
)
return
else:
text = transcription["text"]
count = 0
for entity in entities["results"]:
matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]]
start_token, end_token = (
matching_tokens["start"],
matching_tokens["end"],
)
text, count = insert_token(
text,
count,
start_token,
end_token,
offset=entity["offset"],
length=entity["length"],
)
return text
def extract_transcription(
self,
element,
@@ -251,37 +282,9 @@ class ArkindexExtractor:
transcription = transcriptions["results"].pop()
if self.load_entities:
entities = self.client.request(
"ListTranscriptionEntities",
id=transcription["id"],
worker_version=self.entity_worker_version,
)
if entities["count"] == 0:
logger.warning(
f"No entities found on transcription ({transcription['id']})."
)
return
else:
text = transcription["text"]
count = 0
for entity in entities["results"]:
matching_tokens = self.tokens[entity["entity"]["metas"]["subtype"]]
start_token, end_token = (
matching_tokens["start"],
matching_tokens["end"],
)
text, count = insert_token(
text,
count,
start_token,
end_token,
offset=entity["offset"],
length=entity["length"],
)
return self.extract_entities(transcription)
else:
text = transcription["text"].strip()
return text
return transcription["text"].strip()
def process_element(
self,
Loading