From 0ab3f1891f90a5ec89435c49173368de1bbfb2c0 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Thu, 27 Jan 2022 12:44:10 +0100
Subject: [PATCH] Add TranscriptionEntity.confidence to SQLite exports

---
 arkindex/documents/export/structure.sql           |  6 ++++--
 .../documents/export/transcription_entity.sql     |  3 ++-
 .../documents/management/commands/load_export.py  |  3 ++-
 .../documents/tests/commands/test_load_export.py  |  8 ++++++++
 arkindex/documents/tests/tasks/test_export.py     | 15 ++++++++++++---
 5 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/arkindex/documents/export/structure.sql b/arkindex/documents/export/structure.sql
index b2bcbf34ac..f8c603d7ba 100644
--- a/arkindex/documents/export/structure.sql
+++ b/arkindex/documents/export/structure.sql
@@ -1,6 +1,6 @@
 PRAGMA foreign_keys = ON;
 
-CREATE TABLE export_version AS SELECT 2 AS version;
+CREATE TABLE export_version AS SELECT 3 AS version;
 
 CREATE TABLE image_server (
     id VARCHAR(37) NOT NULL,
@@ -114,12 +114,14 @@ CREATE TABLE transcription_entity (
     offset INTEGER NOT NULL,
     length INTEGER NOT NULL,
     worker_version_id VARCHAR(37),
+    confidence REAL,
     PRIMARY KEY (id),
     FOREIGN KEY (transcription_id) REFERENCES transcription (id) ON DELETE CASCADE,
     FOREIGN KEY (entity_id) REFERENCES entity (id) ON DELETE CASCADE,
     FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE,
     UNIQUE (transcription_id, entity_id, offset, length, worker_version_id),
-    CHECK (offset >= 0 AND length >= 0)
+    CHECK (offset >= 0 AND length >= 0),
+    CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1))
 );
 
 CREATE TABLE entity_role (
diff --git a/arkindex/documents/export/transcription_entity.sql b/arkindex/documents/export/transcription_entity.sql
index eac7954aa3..932b73d3f0 100644
--- a/arkindex/documents/export/transcription_entity.sql
+++ b/arkindex/documents/export/transcription_entity.sql
@@ -6,7 +6,8 @@ SELECT
     te.entity_id,
     te.offset,
     te.length,
-    te.worker_version_id
+    te.worker_version_id,
+    te.confidence
 FROM documents_transcriptionentity te
 INNER JOIN documents_entity entity ON (te.entity_id = entity.id)
 WHERE entity.corpus_id = '{corpus_id}'::uuid
diff --git a/arkindex/documents/management/commands/load_export.py b/arkindex/documents/management/commands/load_export.py
index 0bd95ddd27..b892b1f058 100644
--- a/arkindex/documents/management/commands/load_export.py
+++ b/arkindex/documents/management/commands/load_export.py
@@ -258,6 +258,7 @@ class Command(BaseCommand):
             offset=row["offset"],
             length=row["length"],
             worker_version_id=row["worker_version_id"],
+            confidence=row["confidence"],
         )]
 
     def convert_metadatas(self, row):
@@ -337,7 +338,7 @@ class Command(BaseCommand):
 
         # Check export version
         db_results = cursor.execute(SQL_VERSION_QUERY).fetchall()
-        if len(db_results) != 1 or db_results[0]["version"] != 2:
+        if len(db_results) != 1 or db_results[0]["version"] != 3:
             raise CommandError(f"The SQLite database {db_path} does not have the correct export version")
 
         # Retrieve corpus name
diff --git a/arkindex/documents/tests/commands/test_load_export.py b/arkindex/documents/tests/commands/test_load_export.py
index e89ea725e1..66f07c841a 100644
--- a/arkindex/documents/tests/commands/test_load_export.py
+++ b/arkindex/documents/tests/commands/test_load_export.py
@@ -148,6 +148,14 @@ class TestLoadExport(FixtureTestCase):
             worker_version=version,
         )
 
+        transcription.transcription_entities.create(
+            entity=entity2,
+            offset=0,
+            length=1,
+            worker_version=version,
+            confidence=0.42,
+        )
+
         export = self.corpus.exports.create(user=self.user)
 
         export_corpus(export)
diff --git a/arkindex/documents/tests/tasks/test_export.py b/arkindex/documents/tests/tasks/test_export.py
index 0197cd6f5f..5e62754bdd 100644
--- a/arkindex/documents/tests/tasks/test_export.py
+++ b/arkindex/documents/tests/tasks/test_export.py
@@ -111,6 +111,14 @@ class TestExport(FixtureTestCase):
             worker_version=version,
         )
 
+        transcription.transcription_entities.create(
+            entity=entity2,
+            offset=0,
+            length=1,
+            worker_version=version,
+            confidence=0.42,
+        )
+
         export = self.corpus.exports.create(user=self.user)
 
         export_corpus(export)
@@ -144,7 +152,7 @@ class TestExport(FixtureTestCase):
         )
 
         self.assertCountEqual(
-            db.execute("SELECT version FROM export_version").fetchall(), [(2, )]
+            db.execute("SELECT version FROM export_version").fetchall(), [(3, )]
         )
 
         self.assertCountEqual(
@@ -342,7 +350,7 @@ class TestExport(FixtureTestCase):
         )
 
         self.assertCountEqual(
-            db.execute("SELECT id, transcription_id, entity_id, offset, length, worker_version_id FROM transcription_entity").fetchall(),
+            db.execute("SELECT id, transcription_id, entity_id, offset, length, worker_version_id, confidence FROM transcription_entity").fetchall(),
             [
                 (
                     str(transcription_entity.id),
@@ -350,7 +358,8 @@ class TestExport(FixtureTestCase):
                     str(transcription_entity.entity_id),
                     transcription_entity.offset,
                     transcription_entity.length,
-                    str(transcription_entity.worker_version_id) if transcription_entity.worker_version_id else None
+                    str(transcription_entity.worker_version_id) if transcription_entity.worker_version_id else None,
+                    transcription_entity.confidence,
                 )
                 for transcription_entity in TranscriptionEntity.objects.filter(entity__corpus=self.corpus)
             ]
-- 
GitLab