From d52df58b046d8e364b2d5074dc7934c10da3cb29 Mon Sep 17 00:00:00 2001
From: manonBlanco <blanco@teklia.com>
Date: Wed, 28 Jun 2023 16:30:49 +0200
Subject: [PATCH] Use a random transcription of an element when more than one
 found

---
 dan/datasets/extract/exceptions.py | 9 ---------
 dan/datasets/extract/extract.py    | 7 ++-----
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/dan/datasets/extract/exceptions.py b/dan/datasets/extract/exceptions.py
index 7d3a8e33..22c47a6c 100644
--- a/dan/datasets/extract/exceptions.py
+++ b/dan/datasets/extract/exceptions.py
@@ -49,15 +49,6 @@ class NoTranscriptionError(ElementProcessingError):
         return f"No transcriptions found on element ({self.element_id}) with this config. Skipping."
 
 
-class MultipleTranscriptionsError(ElementProcessingError):
-    """
-    Raised when there are more than one transcription on an element
-    """
-
-    def __str__(self) -> str:
-        return f"More than one transcription found on element ({self.element_id}) with this config."
-
-
 class UnknownLabelError(ProcessingError):
     """
     Raised when the specified label is not known
diff --git a/dan/datasets/extract/extract.py b/dan/datasets/extract/extract.py
index d35b08d0..b4dd1da9 100644
--- a/dan/datasets/extract/extract.py
+++ b/dan/datasets/extract/extract.py
@@ -18,7 +18,6 @@ from dan.datasets.extract.db import (
     get_transcriptions,
 )
 from dan.datasets.extract.exceptions import (
-    MultipleTranscriptionsError,
     NoTranscriptionError,
     ProcessingError,
     UnknownLabelError,
@@ -129,12 +128,10 @@ class ArkindexExtractor:
         transcriptions = get_transcriptions(
             element.id, self.transcription_worker_version
         )
-        if len(transcriptions) > 1:
-            raise MultipleTranscriptionsError(element.id)
-        elif len(transcriptions) == 0:
+        if len(transcriptions) == 0:
             raise NoTranscriptionError(element.id)
 
-        transcription = transcriptions.pop()
+        transcription = random.choice(transcriptions)
 
         if self.load_entities:
             entities = get_transcription_entities(
-- 
GitLab