diff --git a/arkindex/documents/export/structure.sql b/arkindex/documents/export/structure.sql index b2bcbf34ac841ca6e7c2839461a588a184cfd828..f8c603d7bae1d2f6db056a89a1985471f9f213f4 100644 --- a/arkindex/documents/export/structure.sql +++ b/arkindex/documents/export/structure.sql @@ -1,6 +1,6 @@ PRAGMA foreign_keys = ON; -CREATE TABLE export_version AS SELECT 2 AS version; +CREATE TABLE export_version AS SELECT 3 AS version; CREATE TABLE image_server ( id VARCHAR(37) NOT NULL, @@ -114,12 +114,14 @@ CREATE TABLE transcription_entity ( offset INTEGER NOT NULL, length INTEGER NOT NULL, worker_version_id VARCHAR(37), + confidence REAL, PRIMARY KEY (id), FOREIGN KEY (transcription_id) REFERENCES transcription (id) ON DELETE CASCADE, FOREIGN KEY (entity_id) REFERENCES entity (id) ON DELETE CASCADE, FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE, UNIQUE (transcription_id, entity_id, offset, length, worker_version_id), - CHECK (offset >= 0 AND length >= 0) + CHECK (offset >= 0 AND length >= 0), + CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)) ); CREATE TABLE entity_role ( diff --git a/arkindex/documents/export/transcription_entity.sql b/arkindex/documents/export/transcription_entity.sql index eac7954aa340786266fb5fcaea0422c4e9325dac..932b73d3f0f03c850bcf3b5df4e8b4cc8b49e408 100644 --- a/arkindex/documents/export/transcription_entity.sql +++ b/arkindex/documents/export/transcription_entity.sql @@ -6,7 +6,8 @@ SELECT te.entity_id, te.offset, te.length, - te.worker_version_id + te.worker_version_id, + te.confidence FROM documents_transcriptionentity te INNER JOIN documents_entity entity ON (te.entity_id = entity.id) WHERE entity.corpus_id = '{corpus_id}'::uuid diff --git a/arkindex/documents/management/commands/load_export.py b/arkindex/documents/management/commands/load_export.py index 0bd95ddd27337b91be020cc30c9eeeaac7726766..b892b1f058c16c43fba687cdc2598638be153f11 100644 --- a/arkindex/documents/management/commands/load_export.py +++ b/arkindex/documents/management/commands/load_export.py @@ -258,6 +258,7 @@ class Command(BaseCommand): offset=row["offset"], length=row["length"], worker_version_id=row["worker_version_id"], + confidence=row["confidence"], )] def convert_metadatas(self, row): @@ -337,7 +338,7 @@ class Command(BaseCommand): # Check export version db_results = cursor.execute(SQL_VERSION_QUERY).fetchall() - if len(db_results) != 1 or db_results[0]["version"] != 2: + if len(db_results) != 1 or db_results[0]["version"] != 3: raise CommandError(f"The SQLite database {db_path} does not have the correct export version") # Retrieve corpus name diff --git a/arkindex/documents/tests/commands/test_load_export.py b/arkindex/documents/tests/commands/test_load_export.py index e89ea725e1ee4ad3554816dd8798a02ec230d631..66f07c841ae059e5e8ae7970f4acef200871e4ae 100644 --- a/arkindex/documents/tests/commands/test_load_export.py +++ b/arkindex/documents/tests/commands/test_load_export.py @@ -148,6 +148,14 @@ class TestLoadExport(FixtureTestCase): worker_version=version, ) + transcription.transcription_entities.create( + entity=entity2, + offset=0, + length=1, + worker_version=version, + confidence=0.42, + ) + export = self.corpus.exports.create(user=self.user) export_corpus(export) diff --git a/arkindex/documents/tests/tasks/test_export.py b/arkindex/documents/tests/tasks/test_export.py index 0197cd6f5f96f298e272a59f4922e6d7a57beda2..5e62754bddda68e7f44d0e2eb1e1b286ce6eb921 100644 --- a/arkindex/documents/tests/tasks/test_export.py +++ b/arkindex/documents/tests/tasks/test_export.py @@ -111,6 +111,14 @@ class TestExport(FixtureTestCase): worker_version=version, ) + transcription.transcription_entities.create( + entity=entity2, + offset=0, + length=1, + worker_version=version, + confidence=0.42, + ) + export = self.corpus.exports.create(user=self.user) export_corpus(export) @@ -144,7 +152,7 @@ class TestExport(FixtureTestCase): ) self.assertCountEqual( - db.execute("SELECT version FROM export_version").fetchall(), [(2, )] + db.execute("SELECT version FROM export_version").fetchall(), [(3, )] ) self.assertCountEqual( @@ -342,7 +350,7 @@ class TestExport(FixtureTestCase): ) self.assertCountEqual( - db.execute("SELECT id, transcription_id, entity_id, offset, length, worker_version_id FROM transcription_entity").fetchall(), + db.execute("SELECT id, transcription_id, entity_id, offset, length, worker_version_id, confidence FROM transcription_entity").fetchall(), [ ( str(transcription_entity.id), @@ -350,7 +358,8 @@ class TestExport(FixtureTestCase): str(transcription_entity.entity_id), transcription_entity.offset, transcription_entity.length, - str(transcription_entity.worker_version_id) if transcription_entity.worker_version_id else None + str(transcription_entity.worker_version_id) if transcription_entity.worker_version_id else None, + transcription_entity.confidence, ) for transcription_entity in TranscriptionEntity.objects.filter(entity__corpus=self.corpus) ]