From c35215fb91f51d5d4b2d8f364320530ff588fc3c Mon Sep 17 00:00:00 2001
From: mlbonhomme <bonhomme@teklia.com>
Date: Fri, 13 May 2022 15:21:03 +0000
Subject: [PATCH] support worker runs on metadata

---
 arkindex/documents/admin.py                   |   4 +-
 .../0053_metadata_worker_run_and_more.py      |  24 ++++
 arkindex/documents/models.py                  |  14 +++
 arkindex/documents/serializers/elements.py    |  33 ++++-
 arkindex/documents/tests/test_metadata.py     | 116 ++++++++++++++++--
 arkindex/sql_validation/indexer_prefetch.sql  |   3 +-
 6 files changed, 178 insertions(+), 16 deletions(-)
 create mode 100644 arkindex/documents/migrations/0053_metadata_worker_run_and_more.py

diff --git a/arkindex/documents/admin.py b/arkindex/documents/admin.py
index d77a8bef34..d7228c4abb 100644
--- a/arkindex/documents/admin.py
+++ b/arkindex/documents/admin.py
@@ -76,12 +76,12 @@ class MetaDataAdmin(admin.ModelAdmin):
     list_display = ('id', 'name', 'type', )
     list_filter = [('type', EnumFieldListFilter), ]
     readonly_fields = ('id', )
-    raw_id_fields = ('element', 'entity', 'worker_version')
+    raw_id_fields = ('element', 'entity', 'worker_version', 'worker_run')
 
 
 class MetaDataInline(admin.TabularInline):
     model = MetaData
-    raw_id_fields = ('entity', 'worker_version', )
+    raw_id_fields = ('entity', 'worker_version', 'worker_run')
 
 
 class ElementAdmin(admin.ModelAdmin):
diff --git a/arkindex/documents/migrations/0053_metadata_worker_run_and_more.py b/arkindex/documents/migrations/0053_metadata_worker_run_and_more.py
new file mode 100644
index 0000000000..e87139d38e
--- /dev/null
+++ b/arkindex/documents/migrations/0053_metadata_worker_run_and_more.py
@@ -0,0 +1,24 @@
+# Generated by Django 4.0.2 on 2022-05-13 14:54
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('dataimport', '0047_workerversion_model_usage'),
+        ('documents', '0052_element_worker_run_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='metadata',
+            name='worker_run',
+            field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='metadatas', to='dataimport.workerrun'),
+        ),
+        migrations.AddConstraint(
+            model_name='metadata',
+            constraint=models.CheckConstraint(check=models.Q(('worker_version_id__isnull', False), ('worker_run_id__isnull', True), _connector='OR'), name='metadata_worker_run_requires_worker_version'),
+        ),
+    ]
diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py
index 8610e87b3e..d5f58c9e91 100644
--- a/arkindex/documents/models.py
+++ b/arkindex/documents/models.py
@@ -772,6 +772,13 @@ class MetaData(InterpretedDateMixin, models.Model):
         null=True,
         blank=True,
     )
+    worker_run = models.ForeignKey(
+        'dataimport.WorkerRun',
+        related_name='metadatas',
+        on_delete=models.DO_NOTHING,
+        null=True,
+        blank=True
+    )
 
     class Meta:
         ordering = ('element', 'name', 'id')
@@ -785,6 +792,13 @@ class MetaData(InterpretedDateMixin, models.Model):
                 # have to use RawSQL. https://code.djangoproject.com/ticket/31646
                 check=~Q(type=MetaType.Numeric) | RawSQL('value::double precision IS NOT NULL', params=(), output_field=models.BooleanField()),
                 name='metadata_numeric_values'
+            ),
+            # There can be a worker run ID only if there is a worker version ID,
+            # but there can be a worker version ID without a worker run ID (backwards compatibility).
+            # In other words, either the worker run ID is null, or the worker version ID is not null.
+            models.CheckConstraint(
+                check=Q(worker_version_id__isnull=False) | Q(worker_run_id__isnull=True),
+                name='metadata_worker_run_requires_worker_version',
             )
         ]
         indexes = [
diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py
index 5987f7f88e..d50399d368 100644
--- a/arkindex/documents/serializers/elements.py
+++ b/arkindex/documents/serializers/elements.py
@@ -110,11 +110,12 @@ class MetaDataUpdateSerializer(MetaDataSerializer):
 
     class Meta:
         model = MetaData
-        fields = MetaDataSerializer.Meta.fields + ('entity_id', 'worker_version', )
-        read_only_fields = ('worker_version', )
+        fields = MetaDataSerializer.Meta.fields + ('entity_id', 'worker_version', 'worker_run_id', )
+        read_only_fields = ('worker_version', 'worker_run_id', )
         validators = [
             metadata_number_validator,
             metadata_url_validator,
+            WorkerRunOrVersionValidator(worker_version_field='worker_version', ),
         ]
 
     def __init__(self, *args, **kwargs):
@@ -126,10 +127,15 @@ class MetaDataUpdateSerializer(MetaDataSerializer):
         # self.instance can be a queryset if this serializer is used for a list; just ignore anything that isn't a metadata
         elif isinstance(self.instance, MetaData):
             corpus_id = self.instance.element.corpus_id
-
         if corpus_id:
             self.fields['entity_id'].queryset = Entity.objects.filter(corpus_id=corpus_id)
 
+    def validate(self, data):
+        worker_run = data.get('worker_run')
+        if worker_run is not None:
+            data['worker_version'] = WorkerVersion(id=worker_run.version_id)
+        return data
+
 
 class MetaDataCreateSerializer(MetaDataUpdateSerializer):
     worker_version = serializers.PrimaryKeyRelatedField(
@@ -138,6 +144,13 @@ class MetaDataCreateSerializer(MetaDataUpdateSerializer):
         allow_null=True,
         style={'base_template': 'input.html'},
     )
+    worker_run_id = serializers.PrimaryKeyRelatedField(
+        queryset=WorkerRun.objects.all(),
+        required=False,
+        allow_null=True,
+        style={'base_template': 'input.html'},
+        source='worker_run'
+    )
 
     class Meta(MetaDataUpdateSerializer.Meta):
         read_only_fields = ()
@@ -179,6 +192,16 @@ class MetaDataBulkSerializer(serializers.Serializer):
         allow_null=True,
         style={'base_template': 'input.html'},
     )
+    worker_run_id = serializers.PrimaryKeyRelatedField(
+        queryset=WorkerRun.objects.all(),
+        required=False,
+        allow_null=True,
+        style={'base_template': 'input.html'},
+        source='worker_run'
+    )
+
+    class Meta:
+        validators = [WorkerRunOrVersionValidator(worker_version_field='worker_version')]
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -197,6 +220,9 @@ class MetaDataBulkSerializer(serializers.Serializer):
 
     def validate(self, data):
         data = super().validate(data)
+        worker_run = data.get('worker_run')
+        if worker_run is not None:
+            data['worker_version'] = WorkerVersion(id=worker_run.version_id)
         request_metadata = self.make_metadata_tuples(data['metadata_list'])
         unique_metadata = set(request_metadata)
         if len(unique_metadata) != len(request_metadata):
@@ -216,6 +242,7 @@ class MetaDataBulkSerializer(serializers.Serializer):
                 id=uuid.uuid4(),
                 element=self.context['element'],
                 worker_version=validated_data.get('worker_version', None),
+                worker_run=validated_data.get('worker_run', None),
                 **m
             )
             for m in validated_data['metadata_list']
diff --git a/arkindex/documents/tests/test_metadata.py b/arkindex/documents/tests/test_metadata.py
index 0df3d4402e..4313dc4239 100644
--- a/arkindex/documents/tests/test_metadata.py
+++ b/arkindex/documents/tests/test_metadata.py
@@ -28,6 +28,7 @@ class TestMetaData(FixtureAPITestCase):
             )
         )
         cls.worker_version = WorkerVersion.objects.first()
+        cls.worker_run = cls.worker_version.worker_runs.first()
 
     def setUp(self):
         super().setUp()
@@ -47,7 +48,8 @@ class TestMetaData(FixtureAPITestCase):
                 'value': '123',
                 'dates': [],
                 'entity': None,
-                'worker_version': None
+                'worker_version': None,
+                'worker_run_id': None
             },
         ])
 
@@ -64,7 +66,8 @@ class TestMetaData(FixtureAPITestCase):
                 'value': '123',
                 'dates': [],
                 'entity': None,
-                'worker_version': None
+                'worker_version': None,
+                'worker_run_id': None
             },
         ])
 
@@ -98,6 +101,7 @@ class TestMetaData(FixtureAPITestCase):
                 'value': '123',
                 'entity': None,
                 'worker_version': None,
+                'worker_run_id': None
             },
             {
                 'id': str(entity_meta.id),
@@ -115,6 +119,7 @@ class TestMetaData(FixtureAPITestCase):
                     'worker_version_id': str(self.worker_version.id),
                 },
                 'worker_version': None,
+                'worker_run_id': None
             },
         ])
 
@@ -134,6 +139,7 @@ class TestMetaData(FixtureAPITestCase):
                 'value': '123',
                 'entity': None,
                 'worker_version': str(self.worker_version.id),
+                'worker_run_id': None
             },
         ])
 
@@ -201,6 +207,38 @@ class TestMetaData(FixtureAPITestCase):
         self.assertEqual(md.value, 'Texas')
         self.assertEqual(md.worker_version, self.worker_version)
 
+    def test_create_metadata_worker_run(self):
+        self.client.force_login(self.user)
+        response = self.client.post(
+            reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
+            data={'type': 'location', 'name': 'location', 'value': 'Texas', 'worker_run_id': str(self.worker_run.id)}
+        )
+        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+        md = self.vol.metadatas.get(type=MetaType.Location, name='location')
+        self.assertEqual(md.value, 'Texas')
+        self.assertEqual(md.worker_run_id, self.worker_run.id)
+        self.assertEqual(md.worker_version, self.worker_version)
+
+    def test_create_metadata_worker_run_or_version(self):
+        self.client.force_login(self.user)
+        response = self.client.post(
+            reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
+            data={'type': 'location', 'name': 'location', 'value': 'Texas', 'worker_run_id': str(self.worker_run.id), 'worker_version': str(self.worker_version.id)}
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertDictEqual(response.json(), {'non_field_errors': ['Only one of `worker_version` and `worker_run_id` may be set.']})
+
+    def test_create_metadata_bad_worker_run(self):
+        self.client.force_login(self.user)
+        non_existent_worker_run_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
+        response = self.client.post(
+            reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
+            data={'type': 'location', 'name': 'location', 'value': 'Texas', 'worker_run_id': non_existent_worker_run_id},
+            format='json'
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertDictEqual(response.json(), {'worker_run_id': [f'Invalid pk "{non_existent_worker_run_id}" - object does not exist.']})
+
     def test_create_metadata_date(self):
         self.client.force_login(self.user)
         response = self.client.post(
@@ -221,7 +259,8 @@ class TestMetaData(FixtureAPITestCase):
                 'type': 'exact',
                 'year': 1885
             }],
-            'worker_version': None
+            'worker_version': None,
+            'worker_run_id': None
         })
 
     def test_create_metadata_empty(self):
@@ -388,6 +427,7 @@ class TestMetaData(FixtureAPITestCase):
             'value': '2019-12-04',
             'entity': None,
             'worker_version': None,
+            'worker_run_id': None
         })
         self.metadata.refresh_from_db()
         self.assertEqual(self.metadata.type, MetaType.Date)
@@ -413,6 +453,7 @@ class TestMetaData(FixtureAPITestCase):
             'value': '123',
             'entity': None,
             'worker_version': None,
+            'worker_run_id': None
         })
         self.metadata.refresh_from_db()
         self.assertEqual(self.metadata.type, MetaType.Date)
@@ -437,6 +478,7 @@ class TestMetaData(FixtureAPITestCase):
             'value': '2019-12-04',
             'entity': None,
             'worker_version': None,
+            'worker_run_id': None
         })
         self.metadata.refresh_from_db()
         self.assertEqual(self.metadata.type, MetaType.Text)
@@ -570,7 +612,8 @@ class TestMetaData(FixtureAPITestCase):
                 'value': '123',
                 'entity': None,
                 'dates': [],
-                'worker_version': None
+                'worker_version': None,
+                'worker_run_id': None
             }
         )
 
@@ -592,7 +635,8 @@ class TestMetaData(FixtureAPITestCase):
                 'value': 123.0,
                 'entity': None,
                 'dates': [],
-                'worker_version': None
+                'worker_version': None,
+                'worker_run_id': None
             }
         )
 
@@ -686,7 +730,8 @@ class TestMetaData(FixtureAPITestCase):
                 'value': '# Title\n## Subtitle\nbla',
                 'entity': None,
                 'dates': [],
-                'worker_version': None
+                'worker_version': None,
+                'worker_run_id': None
             }
         )
 
@@ -704,7 +749,8 @@ class TestMetaData(FixtureAPITestCase):
                 'value': '<h1>Title</h1>',
                 'entity': None,
                 'dates': [],
-                'worker_version': None
+                'worker_version': None,
+                'worker_run_id': None
             }
         )
 
@@ -722,7 +768,8 @@ class TestMetaData(FixtureAPITestCase):
                 'value': '&lt;style type="text/css"&gt;* { display: none !important; }&lt;/style&gt;',
                 'entity': None,
                 'dates': [],
-                'worker_version': None
+                'worker_version': None,
+                'worker_run_id': None
             }
         )
 
@@ -893,7 +940,8 @@ class TestMetaData(FixtureAPITestCase):
                     'dates': []
                 }
             ],
-            'worker_version': None
+            'worker_version': None,
+            'worker_run_id': None
         })
 
     def test_bulk_create_metadata_worker_version(self):
@@ -914,6 +962,53 @@ class TestMetaData(FixtureAPITestCase):
         self.assertEqual(md.value, 'Mandarin')
         self.assertEqual(md.worker_version, self.worker_version)
 
+    def test_bulk_create_metadata_worker_run(self):
+        self.client.force_login(self.user)
+        response = self.client.post(
+            reverse('api:element-metadata-bulk', kwargs={'pk': str(self.vol.id)}),
+            data={'metadata_list': [
+                {'type': 'text', 'name': 'language', 'value': 'Chinese'},
+                {'type': 'text', 'name': 'alt_language', 'value': 'Mandarin'}
+            ], 'worker_run_id': str(self.worker_run.id)},
+            format='json'
+        )
+        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+        md = self.vol.metadatas.get(type=MetaType.Text, name='language')
+        self.assertEqual(md.value, 'Chinese')
+        self.assertEqual(md.worker_run_id, self.worker_run.id)
+        self.assertEqual(md.worker_version, self.worker_version)
+        md = self.vol.metadatas.get(type=MetaType.Text, name='alt_language')
+        self.assertEqual(md.value, 'Mandarin')
+        self.assertEqual(md.worker_run_id, self.worker_run.id)
+        self.assertEqual(md.worker_version, self.worker_version)
+
+    def test_bulk_create_metadata_worker_run_or_version(self):
+        self.client.force_login(self.user)
+        response = self.client.post(
+            reverse('api:element-metadata-bulk', kwargs={'pk': str(self.vol.id)}),
+            data={'metadata_list': [
+                {'type': 'text', 'name': 'language', 'value': 'Chinese'},
+                {'type': 'text', 'name': 'alt_language', 'value': 'Mandarin'}
+            ], 'worker_run_id': str(self.worker_run.id), 'worker_version': str(self.worker_version.id)},
+            format='json'
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertDictEqual(response.json(), {'non_field_errors': ['Only one of `worker_version` and `worker_run_id` may be set.']})
+
+    def test_bulk_create_metadata_bad_worker_run(self):
+        self.client.force_login(self.user)
+        non_existent_worker_run_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
+        response = self.client.post(
+            reverse('api:element-metadata-bulk', kwargs={'pk': str(self.vol.id)}),
+            data={'metadata_list': [
+                {'type': 'text', 'name': 'language', 'value': 'Chinese'},
+                {'type': 'text', 'name': 'alt_language', 'value': 'Mandarin'}
+            ], 'worker_run_id': non_existent_worker_run_id},
+            format='json'
+        )
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertDictEqual(response.json(), {'worker_run_id': [f'Invalid pk "{non_existent_worker_run_id}" - object does not exist.']})
+
     def test_bulk_create_metadata_empty(self):
         self.client.force_login(self.user)
         response = self.client.post(reverse('api:element-metadata-bulk', kwargs={'pk': str(self.vol.id)}))
@@ -974,7 +1069,8 @@ class TestMetaData(FixtureAPITestCase):
                             'entity_id': None,
                         },
                     ],
-                    'worker_version': None
+                    'worker_version': None,
+                    'worker_run_id': None
                 })
 
                 self.vol.metadatas.all().delete()
diff --git a/arkindex/sql_validation/indexer_prefetch.sql b/arkindex/sql_validation/indexer_prefetch.sql
index beb012cbdb..3e68a122c9 100644
--- a/arkindex/sql_validation/indexer_prefetch.sql
+++ b/arkindex/sql_validation/indexer_prefetch.sql
@@ -94,7 +94,8 @@ SELECT "documents_metadata"."id",
        "documents_metadata"."type",
        "documents_metadata"."value",
        "documents_metadata"."entity_id",
-       "documents_metadata"."worker_version_id"
+       "documents_metadata"."worker_version_id",
+       "documents_metadata"."worker_run_id"
 FROM "documents_metadata"
 WHERE "documents_metadata"."element_id" IN ('{page_id}'::uuid)
 ORDER BY "documents_metadata"."element_id" ASC,
-- 
GitLab