From 0ab3f1891f90a5ec89435c49173368de1bbfb2c0 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Thu, 27 Jan 2022 12:44:10 +0100 Subject: [PATCH] Add TranscriptionEntity.confidence to SQLite exports --- arkindex/documents/export/structure.sql | 6 ++++-- .../documents/export/transcription_entity.sql | 3 ++- .../documents/management/commands/load_export.py | 3 ++- .../documents/tests/commands/test_load_export.py | 8 ++++++++ arkindex/documents/tests/tasks/test_export.py | 15 ++++++++++++--- 5 files changed, 28 insertions(+), 7 deletions(-) diff --git a/arkindex/documents/export/structure.sql b/arkindex/documents/export/structure.sql index b2bcbf34ac..f8c603d7ba 100644 --- a/arkindex/documents/export/structure.sql +++ b/arkindex/documents/export/structure.sql @@ -1,6 +1,6 @@ PRAGMA foreign_keys = ON; -CREATE TABLE export_version AS SELECT 2 AS version; +CREATE TABLE export_version AS SELECT 3 AS version; CREATE TABLE image_server ( id VARCHAR(37) NOT NULL, @@ -114,12 +114,14 @@ CREATE TABLE transcription_entity ( offset INTEGER NOT NULL, length INTEGER NOT NULL, worker_version_id VARCHAR(37), + confidence REAL, PRIMARY KEY (id), FOREIGN KEY (transcription_id) REFERENCES transcription (id) ON DELETE CASCADE, FOREIGN KEY (entity_id) REFERENCES entity (id) ON DELETE CASCADE, FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE, UNIQUE (transcription_id, entity_id, offset, length, worker_version_id), - CHECK (offset >= 0 AND length >= 0) + CHECK (offset >= 0 AND length >= 0), + CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)) ); CREATE TABLE entity_role ( diff --git a/arkindex/documents/export/transcription_entity.sql b/arkindex/documents/export/transcription_entity.sql index eac7954aa3..932b73d3f0 100644 --- a/arkindex/documents/export/transcription_entity.sql +++ b/arkindex/documents/export/transcription_entity.sql @@ -6,7 +6,8 @@ SELECT te.entity_id, te.offset, te.length, - te.worker_version_id + te.worker_version_id, + te.confidence FROM documents_transcriptionentity te INNER JOIN documents_entity entity ON (te.entity_id = entity.id) WHERE entity.corpus_id = '{corpus_id}'::uuid diff --git a/arkindex/documents/management/commands/load_export.py b/arkindex/documents/management/commands/load_export.py index 0bd95ddd27..b892b1f058 100644 --- a/arkindex/documents/management/commands/load_export.py +++ b/arkindex/documents/management/commands/load_export.py @@ -258,6 +258,7 @@ class Command(BaseCommand): offset=row["offset"], length=row["length"], worker_version_id=row["worker_version_id"], + confidence=row["confidence"], )] def convert_metadatas(self, row): @@ -337,7 +338,7 @@ class Command(BaseCommand): # Check export version db_results = cursor.execute(SQL_VERSION_QUERY).fetchall() - if len(db_results) != 1 or db_results[0]["version"] != 2: + if len(db_results) != 1 or db_results[0]["version"] != 3: raise CommandError(f"The SQLite database {db_path} does not have the correct export version") # Retrieve corpus name diff --git a/arkindex/documents/tests/commands/test_load_export.py b/arkindex/documents/tests/commands/test_load_export.py index e89ea725e1..66f07c841a 100644 --- a/arkindex/documents/tests/commands/test_load_export.py +++ b/arkindex/documents/tests/commands/test_load_export.py @@ -148,6 +148,14 @@ class TestLoadExport(FixtureTestCase): worker_version=version, ) + transcription.transcription_entities.create( + entity=entity2, + offset=0, + length=1, + worker_version=version, + confidence=0.42, + ) + export = self.corpus.exports.create(user=self.user) export_corpus(export) diff --git a/arkindex/documents/tests/tasks/test_export.py b/arkindex/documents/tests/tasks/test_export.py index 0197cd6f5f..5e62754bdd 100644 --- a/arkindex/documents/tests/tasks/test_export.py +++ b/arkindex/documents/tests/tasks/test_export.py @@ -111,6 +111,14 @@ class TestExport(FixtureTestCase): worker_version=version, ) + transcription.transcription_entities.create( + entity=entity2, + offset=0, + length=1, + worker_version=version, + confidence=0.42, + ) + export = self.corpus.exports.create(user=self.user) export_corpus(export) @@ -144,7 +152,7 @@ class TestExport(FixtureTestCase): ) self.assertCountEqual( - db.execute("SELECT version FROM export_version").fetchall(), [(2, )] + db.execute("SELECT version FROM export_version").fetchall(), [(3, )] ) self.assertCountEqual( @@ -342,7 +350,7 @@ class TestExport(FixtureTestCase): ) self.assertCountEqual( - db.execute("SELECT id, transcription_id, entity_id, offset, length, worker_version_id FROM transcription_entity").fetchall(), + db.execute("SELECT id, transcription_id, entity_id, offset, length, worker_version_id, confidence FROM transcription_entity").fetchall(), [ ( str(transcription_entity.id), @@ -350,7 +358,8 @@ class TestExport(FixtureTestCase): str(transcription_entity.entity_id), transcription_entity.offset, transcription_entity.length, - str(transcription_entity.worker_version_id) if transcription_entity.worker_version_id else None + str(transcription_entity.worker_version_id) if transcription_entity.worker_version_id else None, + transcription_entity.confidence, ) for transcription_entity in TranscriptionEntity.objects.filter(entity__corpus=self.corpus) ] -- GitLab