From d52df58b046d8e364b2d5074dc7934c10da3cb29 Mon Sep 17 00:00:00 2001 From: manonBlanco <blanco@teklia.com> Date: Wed, 28 Jun 2023 16:30:49 +0200 Subject: [PATCH] Use a random transcription of an element when more than one found --- dan/datasets/extract/exceptions.py | 9 --------- dan/datasets/extract/extract.py | 7 ++----- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/dan/datasets/extract/exceptions.py b/dan/datasets/extract/exceptions.py index 7d3a8e33..22c47a6c 100644 --- a/dan/datasets/extract/exceptions.py +++ b/dan/datasets/extract/exceptions.py @@ -49,15 +49,6 @@ class NoTranscriptionError(ElementProcessingError): return f"No transcriptions found on element ({self.element_id}) with this config. Skipping." -class MultipleTranscriptionsError(ElementProcessingError): - """ - Raised when there are more than one transcription on an element - """ - - def __str__(self) -> str: - return f"More than one transcription found on element ({self.element_id}) with this config." - - class UnknownLabelError(ProcessingError): """ Raised when the specified label is not known diff --git a/dan/datasets/extract/extract.py b/dan/datasets/extract/extract.py index d35b08d0..b4dd1da9 100644 --- a/dan/datasets/extract/extract.py +++ b/dan/datasets/extract/extract.py @@ -18,7 +18,6 @@ from dan.datasets.extract.db import ( get_transcriptions, ) from dan.datasets.extract.exceptions import ( - MultipleTranscriptionsError, NoTranscriptionError, ProcessingError, UnknownLabelError, @@ -129,12 +128,10 @@ class ArkindexExtractor: transcriptions = get_transcriptions( element.id, self.transcription_worker_version ) - if len(transcriptions) > 1: - raise MultipleTranscriptionsError(element.id) - elif len(transcriptions) == 0: + if len(transcriptions) == 0: raise NoTranscriptionError(element.id) - transcription = transcriptions.pop() + transcription = random.choice(transcriptions) if self.load_entities: entities = get_transcription_entities( -- GitLab