diff --git a/dan/datasets/extract/exceptions.py b/dan/datasets/extract/exceptions.py index 7d3a8e33cc9be21c7f9b71bfc1244d4a8942023d..22c47a6c0faafe0f8aa1796fceae1f40a7c22fd4 100644 --- a/dan/datasets/extract/exceptions.py +++ b/dan/datasets/extract/exceptions.py @@ -49,15 +49,6 @@ class NoTranscriptionError(ElementProcessingError): return f"No transcriptions found on element ({self.element_id}) with this config. Skipping." -class MultipleTranscriptionsError(ElementProcessingError): - """ - Raised when there are more than one transcription on an element - """ - - def __str__(self) -> str: - return f"More than one transcription found on element ({self.element_id}) with this config." - - class UnknownLabelError(ProcessingError): """ Raised when the specified label is not known diff --git a/dan/datasets/extract/extract.py b/dan/datasets/extract/extract.py index d35b08d0400192bd31d6454e87e9b5687f246e08..b4dd1da9908ff04040919843a4559ead633d6813 100644 --- a/dan/datasets/extract/extract.py +++ b/dan/datasets/extract/extract.py @@ -18,7 +18,6 @@ from dan.datasets.extract.db import ( get_transcriptions, ) from dan.datasets.extract.exceptions import ( - MultipleTranscriptionsError, NoTranscriptionError, ProcessingError, UnknownLabelError, @@ -129,12 +128,10 @@ class ArkindexExtractor: transcriptions = get_transcriptions( element.id, self.transcription_worker_version ) - if len(transcriptions) > 1: - raise MultipleTranscriptionsError(element.id) - elif len(transcriptions) == 0: + if len(transcriptions) == 0: raise NoTranscriptionError(element.id) - transcription = transcriptions.pop() + transcription = random.choice(transcriptions) if self.load_entities: entities = get_transcription_entities(