diff --git a/tests/test_extract.py b/tests/test_extract.py index 199041f8a1af9a9cd744684cc7c725aceea270b3..1bf92c3ac1e318a51a15e421408e74d5db83e3eb 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -425,30 +425,29 @@ def test_empty_transcription(allow_empty, mock_database): extractor.extract_transcription(element_no_transcription) -def test_extract_transcription_no_tokens(mock_database): +@pytest.mark.parametrize("tokens", (None, EXTRACTION_DATA_PATH / "tokens.yml")) +def test_extract_transcription_no_translation(mock_database, tokens): extractor = ArkindexExtractor( element_type=["text_line"], entity_separators=None, - # No tokens provided to the extractor - tokens=None, + tokens=tokens, ) - element = Element.get_by_id("test-page_1-line_1") - assert extractor.extract_transcription(element) == "Coupez Bouis 7.12.14" - -def test_extract_transcription_without_entities(mock_database): - extractor = ArkindexExtractor( - element_type=["text_line"], - entity_separators=None, - tokens=EXTRACTION_DATA_PATH / "tokens.yml", - ) element = Element.get_by_id("test-page_1-line_1") - # Deleting all entities on the element transcriptions while leaving the transcriptions intact - TranscriptionEntity.delete().where( - TranscriptionEntity.transcription.in_( - Transcription.select().where(Transcription.element == element) - ) - ).execute() + # Deleting one of the two transcriptions from the element + Transcription.get( + Transcription.element == element, + Transcription.worker_version_id == "worker_version_id", + ).delete_instance(recursive=True) + + # Deleting all entities on the element remaining transcription while leaving the transcription intact + if tokens: + TranscriptionEntity.delete().where( + TranscriptionEntity.transcription + == Transcription.select().where(Transcription.element == element).get() + ).execute() + + # Early return with only the element transcription text instead of a translation assert extractor.extract_transcription(element) == "Coupez Bouis 7.12.14"