Skip to content
Snippets Groups Projects
Commit 45ce3f9d authored by Eva Bardou's avatar Eva Bardou :frog:
Browse files

Allow extracting elements holding transcriptions without entities

parent 2e719c9e
No related branches found
No related tags found
1 merge request!348Allow extracting elements holding transcriptions without entities
......@@ -116,6 +116,9 @@ class ArkindexExtractor:
supported_types=list(self.tokens),
)
if not entities.count():
return transcription.text.strip()
return self.translate(
entities_to_xml(
transcription.text, entities, entity_separators=self.entity_separators
......
......@@ -425,6 +425,33 @@ def test_empty_transcription(allow_empty, mock_database):
extractor.extract_transcription(element_no_transcription)
def test_extract_transcription_no_tokens(mock_database):
extractor = ArkindexExtractor(
element_type=["text_line"],
entity_separators=None,
# No tokens provided to the extractor
tokens=None,
)
element = Element.get_by_id("test-page_1-line_1")
assert extractor.extract_transcription(element) == "Coupez Bouis 7.12.14"
def test_extract_transcription_without_entities(mock_database):
extractor = ArkindexExtractor(
element_type=["text_line"],
entity_separators=None,
tokens=EXTRACTION_DATA_PATH / "tokens.yml",
)
element = Element.get_by_id("test-page_1-line_1")
# Deleting all entities on the element transcriptions while leaving the transcriptions intact
TranscriptionEntity.delete().where(
TranscriptionEntity.transcription.in_(
Transcription.select().where(Transcription.element == element)
)
).execute()
assert extractor.extract_transcription(element) == "Coupez Bouis 7.12.14"
@pytest.mark.parametrize(
"nestation, xml_output, separators",
(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment