Skip to content
Snippets Groups Projects

Allow extracting elements holding transcriptions without entities

Merged Eva Bardou requested to merge extract-page-without-entities into main
All threads resolved!
1 file
+ 17
18
Compare changes
  • Side-by-side
  • Inline
+ 17
18
@@ -425,30 +425,29 @@ def test_empty_transcription(allow_empty, mock_database):
extractor.extract_transcription(element_no_transcription)
def test_extract_transcription_no_tokens(mock_database):
@pytest.mark.parametrize("tokens", (None, EXTRACTION_DATA_PATH / "tokens.yml"))
def test_extract_transcription_no_translation(mock_database, tokens):
extractor = ArkindexExtractor(
element_type=["text_line"],
entity_separators=None,
# No tokens provided to the extractor
tokens=None,
tokens=tokens,
)
element = Element.get_by_id("test-page_1-line_1")
assert extractor.extract_transcription(element) == "Coupez Bouis 7.12.14"
def test_extract_transcription_without_entities(mock_database):
extractor = ArkindexExtractor(
element_type=["text_line"],
entity_separators=None,
tokens=EXTRACTION_DATA_PATH / "tokens.yml",
)
element = Element.get_by_id("test-page_1-line_1")
# Deleting all entities on the element transcriptions while leaving the transcriptions intact
TranscriptionEntity.delete().where(
TranscriptionEntity.transcription.in_(
Transcription.select().where(Transcription.element == element)
)
).execute()
# Deleting one of the two transcriptions from the element
Transcription.get(
Transcription.element == element,
Transcription.worker_version_id == "worker_version_id",
).delete_instance(recursive=True)
# Deleting all entities on the element remaining transcription while leaving the transcription intact
if tokens:
TranscriptionEntity.delete().where(
TranscriptionEntity.transcription
== Transcription.select().where(Transcription.element == element).get()
).execute()
# Early return with only the element transcription text instead of a translation
assert extractor.extract_transcription(element) == "Coupez Bouis 7.12.14"
Loading