Skip to content
Snippets Groups Projects

Allow extracting elements holding transcriptions without entities

Merged Eva Bardou requested to merge extract-page-without-entities into main
All threads resolved!
Files
2
@@ -106,9 +106,10 @@ class ArkindexExtractor:
raise NoTranscriptionError(element.id)
transcription = random.choice(transcriptions)
stripped_text = transcription.text.strip()
if not self.tokens:
return transcription.text.strip()
return stripped_text
entities = get_transcription_entities(
transcription.id,
@@ -116,6 +117,9 @@ class ArkindexExtractor:
supported_types=list(self.tokens),
)
if not entities.count():
return stripped_text
return self.translate(
entities_to_xml(
transcription.text, entities, entity_separators=self.entity_separators
Loading