diff --git a/dan/datasets/extract/arkindex.py b/dan/datasets/extract/arkindex.py index 93742351d4a73bd13f5299d1cc1bd5268472ae1e..12dff600a54c7b8768c973b8d759bae515a36911 100644 --- a/dan/datasets/extract/arkindex.py +++ b/dan/datasets/extract/arkindex.py @@ -106,9 +106,10 @@ class ArkindexExtractor: raise NoTranscriptionError(element.id) transcription = random.choice(transcriptions) + stripped_text = transcription.text.strip() if not self.tokens: - return transcription.text.strip() + return stripped_text entities = get_transcription_entities( transcription.id, @@ -117,7 +118,7 @@ class ArkindexExtractor: ) if not entities.count(): - return transcription.text.strip() + return stripped_text return self.translate( entities_to_xml(