From efd96ba4f91bc2faa79a2916bb630992256c50e8 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Tue, 25 Apr 2023 11:56:35 +0000
Subject: [PATCH] Export WorkerRuns

---
 arkindex/documents/export/__init__.py         |  1 +
 arkindex/documents/export/classification.sql  |  3 +-
 arkindex/documents/export/element.sql         |  1 +
 arkindex/documents/export/entity.sql          |  3 +-
 arkindex/documents/export/indexes.sql         |  8 ++
 arkindex/documents/export/metadata.sql        |  3 +-
 arkindex/documents/export/structure.sql       | 45 ++++++++--
 arkindex/documents/export/transcription.sql   |  3 +-
 .../documents/export/transcription_entity.sql |  1 +
 arkindex/documents/export/worker_run.sql      | 46 ++++++++++
 .../management/commands/load_export.py        | 53 +++++++++++-
 .../tests/commands/test_load_export.py        | 31 +++++--
 arkindex/documents/tests/tasks/test_export.py | 85 +++++++++++++++++--
 13 files changed, 255 insertions(+), 28 deletions(-)
 create mode 100644 arkindex/documents/export/worker_run.sql

diff --git a/arkindex/documents/export/__init__.py b/arkindex/documents/export/__init__.py
index 89b8cbe2f9..779852419a 100644
--- a/arkindex/documents/export/__init__.py
+++ b/arkindex/documents/export/__init__.py
@@ -30,6 +30,7 @@ EXPORT_QUERIES = [
     'image_server',
     'image',
     'worker_version',
+    'worker_run',
     'element',
     'element_path',
     'transcription',
diff --git a/arkindex/documents/export/classification.sql b/arkindex/documents/export/classification.sql
index cb03484685..9cb1acb1cf 100644
--- a/arkindex/documents/export/classification.sql
+++ b/arkindex/documents/export/classification.sql
@@ -7,7 +7,8 @@ SELECT
     classification.confidence,
     -- SQLite has no boolean type, so high_confidence becomes an integer (0 or 1)
     classification.high_confidence::integer,
-    classification.worker_version_id
+    classification.worker_version_id,
+    classification.worker_run_id
 FROM documents_classification classification
 INNER JOIN documents_element element ON (element.id = classification.element_id)
 INNER JOIN documents_mlclass mlclass ON (mlclass.id = classification.ml_class_id)
diff --git a/arkindex/documents/export/element.sql b/arkindex/documents/export/element.sql
index c5ffe91505..3c6c1a7d1f 100644
--- a/arkindex/documents/export/element.sql
+++ b/arkindex/documents/export/element.sql
@@ -11,6 +11,7 @@ SELECT
     element.rotation_angle,
     element.mirrored::integer,
     element.worker_version_id,
+    element.worker_run_id,
     element.confidence
 FROM documents_element element
 INNER JOIN documents_elementtype type ON (element.type_id = type.id)
diff --git a/arkindex/documents/export/entity.sql b/arkindex/documents/export/entity.sql
index 3515b63f25..8949691cf6 100644
--- a/arkindex/documents/export/entity.sql
+++ b/arkindex/documents/export/entity.sql
@@ -5,7 +5,8 @@ SELECT
     entity.validated::integer,
     moderator.email,
     hstore_to_json(entity.metas),
-    entity.worker_version_id
+    entity.worker_version_id,
+    entity.worker_run_id
 FROM documents_entity entity
 LEFT JOIN users_user moderator ON (moderator.id = entity.moderator_id)
 WHERE entity.corpus_id = '{corpus_id}'::uuid
diff --git a/arkindex/documents/export/indexes.sql b/arkindex/documents/export/indexes.sql
index 84eab47a63..d0b28e3aa3 100644
--- a/arkindex/documents/export/indexes.sql
+++ b/arkindex/documents/export/indexes.sql
@@ -1,23 +1,30 @@
 CREATE INDEX image_server_id ON image (server_id);
 
+CREATE INDEX worker_run_worker_version_id ON worker_run (worker_version_id);
+
 CREATE INDEX element_image_id ON element (image_id);
 CREATE INDEX element_worker_version_id ON element (worker_version_id);
+CREATE INDEX element_worker_run_id ON element (worker_run_id);
 
 CREATE INDEX element_path_parent_id ON element_path (parent_id);
 CREATE INDEX element_path_child_id ON element_path (child_id);
 
 CREATE INDEX transcription_element_id ON transcription (element_id);
 CREATE INDEX transcription_worker_version_id ON transcription (worker_version_id);
+CREATE INDEX transcription_worker_run_id ON transcription (worker_run_id);
 
 CREATE INDEX classification_element_id ON classification (element_id);
 CREATE INDEX classification_worker_version_id ON classification (worker_version_id);
+CREATE INDEX classification_worker_run_id ON classification (worker_run_id);
 
 CREATE INDEX entity_worker_version_id ON entity (worker_version_id);
+CREATE INDEX entity_worker_run_id ON entity (worker_run_id);
 CREATE INDEX entity_type_id ON entity (type_id);
 
 CREATE INDEX transcription_entity_transcription_id ON transcription_entity (transcription_id);
 CREATE INDEX transcription_entity_entity_id ON transcription_entity (entity_id);
 CREATE INDEX transcription_entity_worker_version_id ON transcription_entity (worker_version_id);
+CREATE INDEX transcription_entity_worker_run_id ON transcription_entity (worker_run_id);
 
 CREATE INDEX entity_link_parent_id ON entity_link (parent_id);
 CREATE INDEX entity_link_child_id ON entity_link (child_id);
@@ -29,3 +36,4 @@ CREATE INDEX entity_role_child_type_id ON entity_role (child_type_id);
 CREATE INDEX metadata_element_id ON metadata (element_id);
 CREATE INDEX metadata_entity_id ON metadata (entity_id);
 CREATE INDEX metadata_worker_version_id ON metadata (worker_version_id);
+CREATE INDEX metadata_worker_run_id ON metadata (worker_run_id);
diff --git a/arkindex/documents/export/metadata.sql b/arkindex/documents/export/metadata.sql
index c4a9fa7021..714b73b208 100644
--- a/arkindex/documents/export/metadata.sql
+++ b/arkindex/documents/export/metadata.sql
@@ -5,7 +5,8 @@ SELECT
     metadata.type,
     metadata.value,
     metadata.entity_id,
-    metadata.worker_version_id
+    metadata.worker_version_id,
+    metadata.worker_run_id
 FROM documents_metadata metadata
 INNER JOIN documents_element element ON (element.id = metadata.element_id)
 WHERE element.corpus_id = '{corpus_id}'::uuid
diff --git a/arkindex/documents/export/structure.sql b/arkindex/documents/export/structure.sql
index b82d67c24a..f6d75f580f 100644
--- a/arkindex/documents/export/structure.sql
+++ b/arkindex/documents/export/structure.sql
@@ -1,6 +1,6 @@
 PRAGMA foreign_keys = ON;
 
-CREATE TABLE export_version AS SELECT 5 AS version;
+CREATE TABLE export_version AS SELECT 6 AS version;
 
 CREATE TABLE image_server (
     id VARCHAR(37) NOT NULL,
@@ -35,6 +35,20 @@ CREATE TABLE worker_version (
     PRIMARY KEY (id)
 );
 
+CREATE TABLE worker_run (
+    id VARCHAR(37) NOT NULL,
+    worker_version_id VARCHAR(37) NOT NULL,
+    model_version_id VARCHAR(37),
+    model_id VARCHAR(37),
+    model_name VARCHAR(100),
+    configuration_id VARCHAR(37),
+    configuration TEXT,
+    PRIMARY KEY (id),
+    FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE,
+    CHECK ((model_version_id IS NULL) = (model_id IS NULL) AND (model_id IS NULL) = (model_name IS NULL)),
+    CHECK ((configuration_id IS NULL) = (configuration IS NULL))
+);
+
 CREATE TABLE element (
     id VARCHAR(37) NOT NULL,
     created REAL NOT NULL,
@@ -46,14 +60,17 @@ CREATE TABLE element (
     rotation_angle INTEGER NOT NULL DEFAULT 0,
     mirrored INTEGER NOT NULL DEFAULT 0,
     worker_version_id VARCHAR(37),
+    worker_run_id VARCHAR(37),
     confidence REAL,
     PRIMARY KEY (id),
     FOREIGN KEY (image_id) REFERENCES image (id) ON DELETE CASCADE,
     FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE,
+    FOREIGN KEY (worker_run_id) REFERENCES worker_run (id) ON DELETE CASCADE,
     CHECK ((image_id IS NULL AND polygon IS NULL) OR (image_id IS NOT NULL AND polygon IS NOT NULL)),
     CHECK (rotation_angle >= 0 AND rotation_angle <= 359),
     CHECK (mirrored = 0 OR mirrored = 1),
-    CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1))
+    CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)),
+    CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL)
 );
 
 CREATE TABLE element_path (
@@ -75,9 +92,11 @@ CREATE TABLE transcription (
     confidence REAL,
     orientation TEXT NOT NULL DEFAULT 'horizontal-lr',
     worker_version_id VARCHAR(37),
+    worker_run_id VARCHAR(37),
     PRIMARY KEY (id),
     FOREIGN KEY (element_id) REFERENCES element (id) ON DELETE CASCADE,
     FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE,
+    FOREIGN KEY (worker_run_id) REFERENCES worker_run (id) ON DELETE CASCADE,
     CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)),
     CHECK (orientation IN ('horizontal-lr', 'horizontal-rl', 'vertical-lr', 'vertical-rl'))
 );
@@ -91,11 +110,14 @@ CREATE TABLE classification (
     confidence REAL,
     high_confidence INTEGER NOT NULL DEFAULT 0,
     worker_version_id VARCHAR(37),
+    worker_run_id VARCHAR(37),
     PRIMARY KEY (id),
     FOREIGN KEY (element_id) REFERENCES element (id) ON DELETE CASCADE,
     FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE,
+    FOREIGN KEY (worker_run_id) REFERENCES worker_run (id) ON DELETE CASCADE,
     CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)),
-    CHECK (high_confidence = 0 OR high_confidence = 1)
+    CHECK (high_confidence = 0 OR high_confidence = 1),
+    CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL)
 );
 
 CREATE TABLE entity (
@@ -106,10 +128,13 @@ CREATE TABLE entity (
     moderator VARCHAR(255),
     metas TEXT,
     worker_version_id VARCHAR(37),
+    worker_run_id VARCHAR(37),
     PRIMARY KEY (id),
     FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE,
+    FOREIGN KEY (worker_run_id) REFERENCES worker_run (id) ON DELETE CASCADE,
     FOREIGN KEY (type_id) REFERENCES entity_type (id) ON DELETE CASCADE,
-    CHECK (validated = 0 OR validated = 1)
+    CHECK (validated = 0 OR validated = 1),
+    CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL)
 );
 
 CREATE TABLE entity_type (
@@ -127,14 +152,17 @@ CREATE TABLE transcription_entity (
     offset INTEGER NOT NULL,
     length INTEGER NOT NULL,
     worker_version_id VARCHAR(37),
+    worker_run_id VARCHAR(37),
     confidence REAL,
     PRIMARY KEY (id),
     FOREIGN KEY (transcription_id) REFERENCES transcription (id) ON DELETE CASCADE,
     FOREIGN KEY (entity_id) REFERENCES entity (id) ON DELETE CASCADE,
     FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE,
-    UNIQUE (transcription_id, entity_id, offset, length, worker_version_id),
+    FOREIGN KEY (worker_run_id) REFERENCES worker_run (id) ON DELETE CASCADE,
+    UNIQUE (transcription_id, entity_id, offset, length, worker_version_id, worker_run_id),
     CHECK (offset >= 0 AND length >= 0),
-    CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1))
+    CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)),
+    CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL)
 );
 
 CREATE TABLE entity_role (
@@ -168,8 +196,11 @@ CREATE TABLE metadata (
     value TEXT NOT NULL,
     entity_id VARCHAR(37),
     worker_version_id VARCHAR(37),
+    worker_run_id VARCHAR(37),
     PRIMARY KEY (id),
     FOREIGN KEY (element_id) REFERENCES element (id) ON DELETE CASCADE,
     FOREIGN KEY (entity_id) REFERENCES entity (id) ON DELETE SET NULL,
-    FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE
+    FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE,
+    FOREIGN KEY (worker_run_id) REFERENCES worker_run (id) ON DELETE CASCADE,
+    CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL)
 );
diff --git a/arkindex/documents/export/transcription.sql b/arkindex/documents/export/transcription.sql
index 251497e89d..0f71045d41 100644
--- a/arkindex/documents/export/transcription.sql
+++ b/arkindex/documents/export/transcription.sql
@@ -4,7 +4,8 @@ SELECT
     transcription.text,
     transcription.confidence,
     transcription.orientation,
-    transcription.worker_version_id
+    transcription.worker_version_id,
+    transcription.worker_run_id
 FROM documents_transcription transcription
 INNER JOIN documents_element element ON (element.id = transcription.element_id)
 WHERE element.corpus_id = '{corpus_id}'::uuid
diff --git a/arkindex/documents/export/transcription_entity.sql b/arkindex/documents/export/transcription_entity.sql
index 932b73d3f0..52c9984de6 100644
--- a/arkindex/documents/export/transcription_entity.sql
+++ b/arkindex/documents/export/transcription_entity.sql
@@ -7,6 +7,7 @@ SELECT
     te.offset,
     te.length,
     te.worker_version_id,
+    te.worker_run_id,
     te.confidence
 FROM documents_transcriptionentity te
 INNER JOIN documents_entity entity ON (te.entity_id = entity.id)
diff --git a/arkindex/documents/export/worker_run.sql b/arkindex/documents/export/worker_run.sql
new file mode 100644
index 0000000000..fab77a3314
--- /dev/null
+++ b/arkindex/documents/export/worker_run.sql
@@ -0,0 +1,46 @@
+-- This filters worker runs to only include those used in any of the kinds
+-- of ML results we have.  Doing it using LEFT JOIN would require 9 joins and
+-- fills up the RAM.  Adding DISTINCT to all the SELECT queries of the UNION
+-- slows this query down by ~20%.  Using multiple INs instead of a UNION makes
+-- this query twice as slow.
+
+-- Note that exports may fail if an ML result uses a WorkerRun that has a
+-- WorkerVersion that is not the worker_version_id, as the version may not
+-- have been exported properly and the FK constraint may fail.
+SELECT
+    run.id,
+    run.version_id,
+    run.model_version_id,
+    model.id,
+    model.name,
+    run.configuration_id,
+    configuration.configuration
+FROM process_workerrun run
+LEFT JOIN process_workerconfiguration configuration ON configuration.id = run.configuration_id
+LEFT JOIN training_modelversion modelversion ON modelversion.id = run.model_version_id
+LEFT JOIN training_model model ON model.id = modelversion.model_id
+WHERE run.id IN (
+    SELECT worker_run_id FROM documents_element WHERE corpus_id = '{corpus_id}'::uuid
+UNION ALL
+    SELECT worker_run_id FROM documents_entity WHERE corpus_id = '{corpus_id}'::uuid
+UNION ALL
+    SELECT classification.worker_run_id
+    FROM documents_classification classification
+    INNER JOIN documents_element element ON (element.id = classification.element_id)
+    WHERE element.corpus_id = '{corpus_id}'::uuid
+UNION ALL
+    SELECT transcription.worker_run_id
+    FROM documents_transcription transcription
+    INNER JOIN documents_element element ON (element.id = transcription.element_id)
+    WHERE element.corpus_id = '{corpus_id}'::uuid
+UNION ALL
+    SELECT te.worker_run_id
+    FROM documents_transcriptionentity te
+    INNER JOIN documents_entity entity ON (te.entity_id = entity.id)
+    WHERE entity.corpus_id = '{corpus_id}'::uuid
+UNION ALL
+    SELECT md.worker_run_id
+    FROM documents_metadata md
+    INNER JOIN documents_element element ON (md.element_id = element.id)
+    WHERE element.corpus_id = '{corpus_id}'::uuid
+)
diff --git a/arkindex/documents/management/commands/load_export.py b/arkindex/documents/management/commands/load_export.py
index 9b325aaeb6..3342c44511 100644
--- a/arkindex/documents/management/commands/load_export.py
+++ b/arkindex/documents/management/commands/load_export.py
@@ -28,16 +28,27 @@ from arkindex.documents.models import (
     TranscriptionEntity,
 )
 from arkindex.images.models import Image, ImageServer
-from arkindex.process.models import Repository, Revision, Worker, WorkerType, WorkerVersion
+from arkindex.process.models import (
+    ProcessMode,
+    Repository,
+    Revision,
+    Worker,
+    WorkerConfiguration,
+    WorkerRun,
+    WorkerType,
+    WorkerVersion,
+)
+from arkindex.training.models import Model
 from arkindex.users.models import Role, User
 
-EXPORT_VERSION = 5
+EXPORT_VERSION = 6
 
 TABLE_NAMES = {
     'export_version',
     'image_server',
     'image',
     'worker_version',
+    'worker_run',
     'element',
     'element_path',
     'entity',
@@ -65,6 +76,7 @@ SQL_REPOSITORY_QUERY = "SELECT DISTINCT repository_url FROM worker_version"
 SQL_REVISION_QUERY = "SELECT DISTINCT revision, repository_url FROM worker_version"
 SQL_WORKER_TYPE_QUERY = "SELECT DISTINCT type FROM worker_version"
 SQL_WORKER_VERSION_QUERY = "SELECT * FROM worker_version"
+SQL_WORKER_RUN_QUERY = "SELECT * FROM worker_run"
 
 SQL_IMAGE_SERVER_QUERY = "SELECT * FROM image_server"
 SQL_IMAGE_QUERY = """
@@ -94,6 +106,7 @@ SQL_ELEMENT_QUERY = """
         rotation_angle,
         mirrored,
         worker_version_id,
+        worker_run_id,
         confidence
     FROM element
     LEFT JOIN image ON (image.id = element.image_id)
@@ -187,6 +200,7 @@ class Command(BaseCommand):
             rotation_angle=row["rotation_angle"],
             mirrored=row["mirrored"],
             worker_version_id=self.worker_version_map[row["worker_version_id"]] if row["worker_version_id"] else None,
+            worker_run_id=self.worker_run_map[row["worker_run_id"]] if row["worker_run_id"] else None,
             confidence=row["confidence"],
             corpus=self.corpus
         )]
@@ -215,6 +229,7 @@ class Command(BaseCommand):
             moderator=User.objects.get(email=row["moderator"]) if row["moderator"] else None,
             metas=json.loads(row["metas"]) if row["metas"] else None,
             worker_version_id=self.worker_version_map[row["worker_version_id"]] if row["worker_version_id"] else None,
+            worker_run_id=self.worker_run_map[row["worker_run_id"]] if row["worker_run_id"] else None,
             corpus=self.corpus
         )]
 
@@ -252,6 +267,7 @@ class Command(BaseCommand):
             confidence=row["confidence"],
             orientation=row["orientation"],
             worker_version_id=self.worker_version_map[row["worker_version_id"]] if row["worker_version_id"] else None,
+            worker_run_id=self.worker_run_map[row["worker_run_id"]] if row["worker_run_id"] else None,
         )]
 
     def convert_transcription_entities(self, row):
@@ -262,6 +278,7 @@ class Command(BaseCommand):
             offset=row["offset"],
             length=row["length"],
             worker_version_id=self.worker_version_map[row["worker_version_id"]] if row["worker_version_id"] else None,
+            worker_run_id=self.worker_run_map[row["worker_run_id"]] if row["worker_run_id"] else None,
             confidence=row["confidence"],
         )]
 
@@ -274,6 +291,7 @@ class Command(BaseCommand):
             value=row["value"],
             entity_id=row["entity_id"],
             worker_version_id=self.worker_version_map[row["worker_version_id"]] if row["worker_version_id"] else None,
+            worker_run_id=self.worker_run_map[row["worker_run_id"]] if row["worker_run_id"] else None,
         )]
 
     def convert_classifications(self, row):
@@ -286,6 +304,7 @@ class Command(BaseCommand):
             confidence=row["confidence"],
             high_confidence=row["high_confidence"],
             worker_version_id=self.worker_version_map[row["worker_version_id"]] if row["worker_version_id"] else None,
+            worker_run_id=self.worker_run_map[row["worker_run_id"]] if row["worker_run_id"] else None,
         )]
 
     def bulk_create_objects(self, ModelClass, convert_method, sql_query, ignore_conflicts=True):
@@ -407,6 +426,33 @@ class Command(BaseCommand):
             }
         )
 
+    def create_worker_run(self, row):
+        worker_version_id = self.worker_version_map[row['worker_version_id']]
+        model_version, configuration = None, None
+
+        if row['model_version_id']:
+            model, created = Model.objects.get_or_create(name=row['model_name'])
+            if created:
+                model.memberships.create(
+                    user=self.user,
+                    level=Role.Admin.value,
+                )
+
+            model_version, _ = model.objects.get_or_create(id=row['model_version_id'])
+
+        if row['configuration_id']:
+            configuration, _ = WorkerConfiguration.objects.get_or_create(
+                worker=Worker.objects.get(versions__id=worker_version_id),
+                configuration=json.loads(row['configuration']),
+            )
+
+        return self.local_process.worker_runs.get_or_create(
+            version_id=worker_version_id,
+            model_version=model_version,
+            configuration=configuration,
+            defaults={'parents': []},
+        )
+
     def create_image_server(self, row):
         return ImageServer.objects.get_or_create(
             url=row['url'],
@@ -479,6 +525,8 @@ class Command(BaseCommand):
         if not corpus_name:
             corpus_name = f"Corpus import {date}"
 
+        self.local_process, _ = self.user.processes.get_or_create(mode=ProcessMode.Local)
+
         self.stdout.write(f"Creating corpus {corpus_name}")
         with Timer() as t:
             # Create corpus
@@ -500,6 +548,7 @@ class Command(BaseCommand):
             self.worker_type_map = self.create_objects(WorkerType, self.create_worker_type, SQL_WORKER_TYPE_QUERY)
             self.worker_map = self.create_objects(Worker, self.create_worker, SQL_WORKER_VERSION_QUERY)
             self.worker_version_map = self.create_objects(WorkerVersion, self.create_worker_version, SQL_WORKER_VERSION_QUERY)
+            self.worker_run_map = self.create_objects(WorkerRun, self.create_worker_run, SQL_WORKER_RUN_QUERY)
 
             # Create images and servers
             self.image_server_map = self.create_objects(ImageServer, self.create_image_server, SQL_IMAGE_SERVER_QUERY)
diff --git a/arkindex/documents/tests/commands/test_load_export.py b/arkindex/documents/tests/commands/test_load_export.py
index 9a3a1cf594..f2056c3b25 100644
--- a/arkindex/documents/tests/commands/test_load_export.py
+++ b/arkindex/documents/tests/commands/test_load_export.py
@@ -12,7 +12,7 @@ from arkindex.documents.management.commands.load_export import Command
 from arkindex.documents.models import Corpus, Element, ElementPath, EntityType, Transcription
 from arkindex.documents.tasks import corpus_delete
 from arkindex.images.models import Image, ImageServer
-from arkindex.process.models import Repository, Worker, WorkerType, WorkerVersion
+from arkindex.process.models import ProcessMode, Repository, Worker, WorkerType, WorkerVersion
 from arkindex.project.tests import FixtureTestCase
 
 BASE_DIR = Path(__file__).absolute().parent
@@ -37,6 +37,9 @@ class TestLoadExport(FixtureTestCase):
             'process.worker': [],
             'process.revision': ['message', 'author'],
             'process.workerversion': ['configuration', 'state', 'docker_image', 'docker_image_iid'],
+            # The WorkerRuns lose their parents, use different worker versions that just got recreated,
+            # are assigned to the user's local process and not the original one
+            'process.workerrun': ['parents', 'version', 'process', 'summary'],
             'process.workertype': [],
             'images.imageserver': ['s3_bucket', 's3_region', 'created', 'updated', 'read_only'],
             'images.image': ['created', 'updated', 'hash', 'status'],
@@ -123,11 +126,17 @@ class TestLoadExport(FixtureTestCase):
 
         element = self.corpus.elements.get(name='Volume 1')
         transcription = Transcription.objects.first()
-        version = WorkerVersion.objects.get(worker__slug='reco')
+
+        reco_version = WorkerVersion.objects.get(worker__slug='reco')
+        reco_run = reco_version.worker_runs.get()
+        dla_version = WorkerVersion.objects.get(worker__slug='dla')
+        dla_run = dla_version.worker_runs.get()
 
         element.classifications.create(
             ml_class=self.corpus.ml_classes.create(name='Blah'),
             confidence=.55555555,
+            worker_version=dla_version,
+            worker_run=dla_run,
         )
 
         person_type = EntityType.objects.get(
@@ -161,14 +170,15 @@ class TestLoadExport(FixtureTestCase):
             entity=entity1,
             offset=1,
             length=1,
-            worker_version=version,
+            worker_version=reco_version,
+            worker_run=reco_run,
         )
 
         transcription.transcription_entities.create(
             entity=entity2,
             offset=0,
             length=1,
-            worker_version=version,
+            worker_version=reco_version,
             confidence=0.42,
         )
 
@@ -189,7 +199,7 @@ class TestLoadExport(FixtureTestCase):
         corpus_delete(self.corpus.id)
         Image.objects.all().delete()
         ImageServer.objects.all().delete()
-        WorkerVersion.objects.filter(id=version.id).delete()
+        WorkerVersion.objects.filter(id=reco_version.id).delete()
 
         call_command('load_export', db_path, '--email', self.user.email, '--corpus-name', 'My corpus')
 
@@ -199,12 +209,21 @@ class TestLoadExport(FixtureTestCase):
 
         corpus = Corpus.objects.get(name='My corpus')
 
+        local_process = self.user.processes.get(mode=ProcessMode.Local)
+
         data_before = self.clean_dump_data(
             dump_path_before,
             # Remap the corpus, imageserver and worker version IDs
             corpus={str(self.corpus.id): str(corpus.id)},
             server={self.imgsrv.id: ImageServer.objects.get().id},
-            worker_version={str(version.id): str(WorkerVersion.objects.get(worker__slug='reco').id)},
+            worker_version={
+                str(reco_version.id): str(WorkerVersion.objects.get(worker__slug='reco').id),
+                str(dla_version.id): str(WorkerVersion.objects.get(worker__slug='dla').id),
+            },
+            worker_run={
+                str(reco_run.id): str(local_process.worker_runs.get(version__worker__slug='reco').id),
+                str(dla_run.id): str(local_process.worker_runs.get(version__worker__slug='dla').id),
+            },
         )
         data_after = self.clean_dump_data(dump_path_after)
 
diff --git a/arkindex/documents/tests/tasks/test_export.py b/arkindex/documents/tests/tasks/test_export.py
index 3b35f95e56..00763ded34 100644
--- a/arkindex/documents/tests/tasks/test_export.py
+++ b/arkindex/documents/tests/tasks/test_export.py
@@ -41,6 +41,7 @@ TABLE_NAMES = {
     'transcription',
     'transcription_entity',
     'worker_version',
+    'worker_run',
 }
 
 
@@ -161,7 +162,7 @@ class TestExport(FixtureTestCase):
         )
 
         self.assertCountEqual(
-            db.execute("SELECT version FROM export_version").fetchall(), [(5, )]
+            db.execute("SELECT version FROM export_version").fetchall(), [(6, )]
         )
 
         self.assertCountEqual(
@@ -225,6 +226,7 @@ class TestExport(FixtureTestCase):
                 mirrored,
                 confidence,
                 worker_version_id,
+                worker_run_id,
                 image_id,
                 polygon
             FROM element
@@ -256,6 +258,11 @@ class TestExport(FixtureTestCase):
             else:
                 row.append(None)
 
+            if element.worker_run_id:
+                row.append(str(element.worker_run_id))
+            else:
+                row.append(None)
+
             if element.polygon:
                 row.append(str(element.image_id))
                 row.append([
@@ -286,7 +293,17 @@ class TestExport(FixtureTestCase):
         )
 
         self.assertCountEqual(
-            db.execute("SELECT id, element_id, text, confidence, orientation, worker_version_id FROM transcription").fetchall(),
+            db.execute("""
+                SELECT
+                    id,
+                    element_id,
+                    text,
+                    confidence,
+                    orientation,
+                    worker_version_id,
+                    worker_run_id
+                FROM transcription
+            """).fetchall(),
             [
                 (
                     str(transcription.id),
@@ -294,14 +311,27 @@ class TestExport(FixtureTestCase):
                     transcription.text,
                     transcription.confidence,
                     transcription.orientation.value,
-                    str(transcription.worker_version_id) if transcription.worker_version_id else None
+                    str(transcription.worker_version_id) if transcription.worker_version_id else None,
+                    str(transcription.worker_run_id) if transcription.worker_run_id else None,
                 )
                 for transcription in Transcription.objects.filter(element__corpus=self.corpus)
             ]
         )
 
         self.assertCountEqual(
-            db.execute("SELECT id, element_id, class_name, state, moderator, confidence, high_confidence, worker_version_id FROM classification").fetchall(),
+            db.execute("""
+                SELECT
+                    id,
+                    element_id,
+                    class_name,
+                    state,
+                    moderator,
+                    confidence,
+                    high_confidence,
+                    worker_version_id,
+                    worker_run_id
+                FROM classification
+            """).fetchall(),
             [
                 (
                     str(classification.id),
@@ -311,14 +341,26 @@ class TestExport(FixtureTestCase):
                     classification.moderator.email if classification.moderator else None,
                     classification.confidence,
                     int(classification.high_confidence),
-                    str(classification.worker_version_id) if classification.worker_version_id else None
+                    str(classification.worker_version_id) if classification.worker_version_id else None,
+                    str(classification.worker_run_id) if classification.worker_run_id else None,
                 )
                 for classification in Classification.objects.filter(element__corpus=self.corpus)
             ]
         )
 
         self.assertCountEqual(
-            db.execute("SELECT id, element_id, name, type, value, entity_id, worker_version_id FROM metadata").fetchall(),
+            db.execute("""
+                SELECT
+                    id,
+                    element_id,
+                    name,
+                    type,
+                    value,
+                    entity_id,
+                    worker_version_id,
+                    worker_run_id
+                FROM metadata
+            """).fetchall(),
             [
                 (
                     str(metadata.id),
@@ -327,14 +369,26 @@ class TestExport(FixtureTestCase):
                     metadata.type.value,
                     metadata.value,
                     str(metadata.entity_id) if metadata.entity_id else None,
-                    str(metadata.worker_version_id) if metadata.worker_version_id else None
+                    str(metadata.worker_version_id) if metadata.worker_version_id else None,
+                    str(metadata.worker_run_id) if metadata.worker_run_id else None,
                 )
                 for metadata in MetaData.objects.filter(element__corpus=self.corpus)
             ]
         )
 
         self.assertCountEqual(
-            db.execute("SELECT id, name, type_id, validated, moderator, metas, worker_version_id FROM entity").fetchall(),
+            db.execute("""
+                SELECT
+                    id,
+                    name,
+                    type_id,
+                    validated,
+                    moderator,
+                    metas,
+                    worker_version_id,
+                    worker_run_id
+                FROM entity
+            """).fetchall(),
             [
                 (
                     str(entity.id),
@@ -344,6 +398,7 @@ class TestExport(FixtureTestCase):
                     entity.moderator.email if entity.moderator else None,
                     json.dumps(entity.metas) if entity.metas else None,
                     str(entity.worker_version_id) if entity.worker_version_id else None,
+                    str(entity.worker_run_id) if entity.worker_run_id else None,
                 )
                 for entity in self.corpus.entities.all()
             ]
@@ -389,7 +444,18 @@ class TestExport(FixtureTestCase):
         )
 
         self.assertCountEqual(
-            db.execute("SELECT id, transcription_id, entity_id, offset, length, worker_version_id, confidence FROM transcription_entity").fetchall(),
+            db.execute("""
+                SELECT
+                    id,
+                    transcription_id,
+                    entity_id,
+                    offset,
+                    length,
+                    worker_version_id,
+                    worker_run_id,
+                    confidence
+                FROM transcription_entity
+            """).fetchall(),
             [
                 (
                     str(transcription_entity.id),
@@ -398,6 +464,7 @@ class TestExport(FixtureTestCase):
                     transcription_entity.offset,
                     transcription_entity.length,
                     str(transcription_entity.worker_version_id) if transcription_entity.worker_version_id else None,
+                    str(transcription_entity.worker_run_id) if transcription_entity.worker_run_id else None,
                     transcription_entity.confidence,
                 )
                 for transcription_entity in TranscriptionEntity.objects.filter(entity__corpus=self.corpus)
-- 
GitLab