diff --git a/arkindex/dataimport/migrations/0011_refactor_payload_dataimport.py b/arkindex/dataimport/migrations/0011_refactor_payload_dataimport.py index 7ff7be8bb70e2abc02d245e4f0a74a2e424a8ecb..3342a7a42535cb04b9b0d303f41fec6d26a61cee 100644 --- a/arkindex/dataimport/migrations/0011_refactor_payload_dataimport.py +++ b/arkindex/dataimport/migrations/0011_refactor_payload_dataimport.py @@ -2,8 +2,11 @@ import django.db.models.deletion from django.db import migrations, models +from enumfields import Enum -from arkindex.dataimport.models import DataImportMode + +class OldDataImportMode(Enum): + Elements = 'elements' def populate_new_fields(apps, schema_editor): @@ -16,9 +19,11 @@ def populate_new_fields(apps, schema_editor): DataImport = apps.get_model('dataimport', 'DataImport') ElementType = apps.get_model('documents', 'ElementType') Element = apps.get_model('documents', 'Element') - for di in DataImport.objects.using(db_alias).filter( - mode=DataImportMode.Elements - ): + + # Switch to Enum to restore the removed Elements import mode to allow filtering on it + DataImport.mode.field.enum = OldDataImportMode + + for di in DataImport.objects.using(db_alias).filter(mode=OldDataImportMode.Elements): # There, we retrieve the payload elements that interest us to populate the new fields. We also remove the # attributes "ml_tools", "thumbnails" and "chunks" from the payload because they will no longer be useful. element, name_contains, elt_type, selection, best_class, _, _, _ = map( diff --git a/arkindex/dataimport/migrations/0030_remove_mode_elements.py b/arkindex/dataimport/migrations/0030_remove_mode_elements.py new file mode 100644 index 0000000000000000000000000000000000000000..c33c758584a65ff37e496f3649829acd62c71746 --- /dev/null +++ b/arkindex/dataimport/migrations/0030_remove_mode_elements.py @@ -0,0 +1,34 @@ +# Generated by Django 3.1.6 on 2021-02-24 10:38 + +from django.db import migrations +from enumfields import Enum + + +class OldDataImportMode(Enum): + Elements = 'elements' + + +def remove_mode_elements(apps, schema_editor): + DataImport = apps.get_model('dataimport', 'DataImport') + DataImportElement = apps.get_model('dataimport', 'DataImportElement') + + # Switch the enum to restore Elements, because the EnumField would otherwise not let us filter + DataImport.mode.field.enum = OldDataImportMode + + DataImportElement.objects.filter(dataimport__mode=OldDataImportMode.Elements).delete() + DataImport.objects.filter(mode=OldDataImportMode.Elements).delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('dataimport', '0029_worker_activity'), + ] + + operations = [ + migrations.RunPython( + remove_mode_elements, + reverse_code=migrations.RunPython.noop, + elidable=True, + ) + ] diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index 7db29a69498e6a716e40d3e6cac0de9bda380593..72969c69c1223bd8fbe40e8aecdc83176b57e6f3 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -25,7 +25,6 @@ class DataImportMode(Enum): Images = 'images' PDF = 'pdf' Repository = 'repository' - Elements = 'elements' IIIF = 'iiif' Workers = 'workers' Transkribus = 'transkribus' @@ -59,7 +58,7 @@ class DataImport(IndexableModel): ) # Used to define the element type during non-folder elements creation for Files/IIIF imports, - # or to filter elements by type with Elements imports + # or to filter elements by type with Workers imports element_type = models.ForeignKey( 'documents.ElementType', on_delete=models.SET_NULL, @@ -68,11 +67,11 @@ class DataImport(IndexableModel): related_name='element_imports', ) - # Used to filter out elements with a name that doesn't contain the substring, only on Elements imports + # Used to filter out elements with a name that doesn't contain the substring, only on Workers imports name_contains = models.CharField(null=True, blank=True, max_length=250) # Used to filter elements by best class, either having or not having a best class (boolean) - # or having a specific best class (UUID), only on Elements imports + # or having a specific best class (UUID), only on Workers imports best_class = models.CharField(null=True, blank=True, max_length=36) # Used to save a user's selection for Element workflows @@ -146,7 +145,7 @@ class DataImport(IndexableModel): """ Return a queryset of elements involved in this process """ - if self.mode not in (DataImportMode.Elements, DataImportMode.Workers): + if self.mode != DataImportMode.Workers: return Element.objects.none() elements = None @@ -193,7 +192,7 @@ class DataImport(IndexableModel): ''' ml_workflow_chunks = 1 import_task_name = 'import' - if self.mode == DataImportMode.Elements: + if self.mode == DataImportMode.Workers: import_task_name = 'initialisation' assert self.workflow is None, 'A workflow is already setup' @@ -213,7 +212,7 @@ class DataImport(IndexableModel): }, } - elif self.mode in (DataImportMode.Elements, DataImportMode.Workers): + elif self.mode == DataImportMode.Workers: if chunks is not None: assert isinstance(chunks, int) and chunks > 0, 'Chunks count should be an positive integer' assert chunks < 11, 'Import distribution is limited to 10 chunks' diff --git a/arkindex/dataimport/tests/test_imports.py b/arkindex/dataimport/tests/test_imports.py index e41846f05cd5af04d6d73cba3a3e9ef59ae770d5..22b0d80c9a9089a13b980f81d95dcd9ec6f4fc4e 100644 --- a/arkindex/dataimport/tests/test_imports.py +++ b/arkindex/dataimport/tests/test_imports.py @@ -990,7 +990,7 @@ class TestImports(FixtureAPITestCase): recipe_dump = yaml.safe_load(dataimport.workflow.recipe) self.assertCountEqual( recipe_dump['tasks'].keys(), - ['import', 'my_worker_1', 'my_worker_2', 'my_worker_3', 'thumbnails_1', 'thumbnails_2', 'thumbnails_3'] + ['initialisation', 'my_worker_1', 'my_worker_2', 'my_worker_3', 'thumbnails_1', 'thumbnails_2', 'thumbnails_3'] ) @override_settings(IMPORTS_WORKER_VERSION=uuid.uuid4()) diff --git a/arkindex/dataimport/tests/test_process_elements.py b/arkindex/dataimport/tests/test_process_elements.py index 9ce2f638ac590c308d46bc72a565f8fcd5a728f8..f7eae9f042c00c1263b94ce463abbaa883dd7222 100644 --- a/arkindex/dataimport/tests/test_process_elements.py +++ b/arkindex/dataimport/tests/test_process_elements.py @@ -172,7 +172,7 @@ class TestProcessElements(FixtureAPITestCase): super().setUp() self.dataimport = DataImport.objects.create( creator_id=self.user.id, - mode=DataImportMode.Elements, + mode=DataImportMode.Workers, corpus_id=self.private_corpus.id ) @@ -618,6 +618,8 @@ class TestProcessElements(FixtureAPITestCase): self.dataimport.load_children = True self.dataimport.save() self.client.force_login(self.superuser) + + # Every mode other than Workers never returns elements for mode in (DataImportMode.Images, DataImportMode.PDF, DataImportMode.Repository, DataImportMode.IIIF, DataImportMode.Transkribus): self.dataimport.mode = mode self.dataimport.save() @@ -629,23 +631,23 @@ class TestProcessElements(FixtureAPITestCase): elements = Element.objects.filter(corpus=self.private_corpus).order_by('name', 'type__slug') - for mode in (DataImportMode.Elements, DataImportMode.Workers): - self.dataimport.mode = mode - self.dataimport.save() - with self.assertNumQueries(6): - response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) - self.assertEqual(response.status_code, status.HTTP_200_OK) - data = response.json() - self.assertEqual(data["count"], None) - self.assertEqual(data["next"], None) - self.assertCountEqual(data["results"], [ - { - 'id': str(element.id), - 'type': element.type.slug, - 'name': element.name - } - for element in elements - ]) + # The Workers mode returns some elements + self.dataimport.mode = DataImportMode.Workers + self.dataimport.save() + with self.assertNumQueries(6): + response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = response.json() + self.assertIsNone(data["count"]) + self.assertIsNone(data["next"]) + self.assertCountEqual(data["results"], [ + { + 'id': str(element.id), + 'type': element.type.slug, + 'name': element.name + } + for element in elements + ]) def test_list_elements_cursor_pagination(self): """ diff --git a/arkindex/dataimport/tests/test_workflows_api.py b/arkindex/dataimport/tests/test_workflows_api.py index 9647822dbcad21a09c4e1305a03650b8154406b7..445db03ee5e7f2b9a41580f3159d554e0abe4a67 100644 --- a/arkindex/dataimport/tests/test_workflows_api.py +++ b/arkindex/dataimport/tests/test_workflows_api.py @@ -92,9 +92,9 @@ class TestWorkflows(FixtureAPITestCase): dataimport.refresh_from_db() self.assertIsNotNone(dataimport.workflow) - self.assertEqual(dataimport.workflow.recipes['import'].image, 'tasks:latest') + self.assertEqual(dataimport.workflow.recipes['initialisation'].image, 'tasks:latest') self.assertEqual( - dataimport.workflow.recipes['import'].command, + dataimport.workflow.recipes['initialisation'].command, 'python -m arkindex_tasks.init_elements ' f'{dataimport.id} ' '--chunks-number 1' @@ -376,7 +376,7 @@ class TestWorkflows(FixtureAPITestCase): self.assertIsNotNone(dataimport.workflow) self.assertEqual( - dataimport.workflow.recipes['import'].command, + dataimport.workflow.recipes['initialisation'].command, 'python -m arkindex_tasks.init_elements ' f'{dataimport.id} ' '--chunks-number 1' @@ -462,7 +462,7 @@ class TestWorkflows(FixtureAPITestCase): self.assertIsNotNone(dataimport.workflow) self.assertEqual( - dataimport.workflow.recipes['import'].command, + dataimport.workflow.recipes['initialisation'].command, 'python -m arkindex_tasks.init_elements ' f'{dataimport.id} ' '--chunks-number 3' @@ -470,7 +470,7 @@ class TestWorkflows(FixtureAPITestCase): for i in range(1, 4): self.assertEqual( dataimport.workflow.recipes[f'thumbnails_{i}'].command, - f'python3 -m arkindex_tasks.generate_thumbnails /data/import/elements_chunk_{i}.json' + f'python3 -m arkindex_tasks.generate_thumbnails /data/initialisation/elements_chunk_{i}.json' ) def test_workers_no_worker_runs(self): @@ -489,7 +489,7 @@ class TestWorkflows(FixtureAPITestCase): self.assertIsNotNone(dataimport_2.workflow) self.maxDiff = None self.assertDictEqual(yaml.safe_load(dataimport_2.workflow.recipe)['tasks'], { - 'import': + 'initialisation': { 'command': 'python -m arkindex_tasks.init_elements ' f'{dataimport_2.id} --chunks-number 1', @@ -525,7 +525,7 @@ class TestWorkflows(FixtureAPITestCase): self.assertIsNotNone(dataimport_2.workflow) self.maxDiff = None self.assertDictEqual(yaml.safe_load(dataimport_2.workflow.recipe)['tasks'], { - 'import': + 'initialisation': { 'command': 'python -m arkindex_tasks.init_elements ' f'{dataimport_2.id} --chunks-number 1', @@ -536,8 +536,8 @@ class TestWorkflows(FixtureAPITestCase): 'command': None, 'image': f'my_repo.fake/workers/worker/reco:{self.version_1.id}', 'artifact': str(self.version_1.docker_image.id), - 'parents': ['import'], - 'env': {'TASK_ELEMENTS': '/data/import/elements.json', 'WORKER_VERSION_ID': str(self.version_1.id)} + 'parents': ['initialisation'], + 'env': {'TASK_ELEMENTS': '/data/initialisation/elements.json', 'WORKER_VERSION_ID': str(self.version_1.id)} }, f'dla_{str(self.version_2.id)[0:6]}': { @@ -545,6 +545,6 @@ class TestWorkflows(FixtureAPITestCase): 'image': f'my_repo.fake/workers/worker/dla:{self.version_2.id}', 'artifact': str(self.version_1.docker_image.id), 'parents': [f'reco_{str(self.version_1.id)[0:6]}'], - 'env': {'TASK_ELEMENTS': '/data/import/elements.json', 'WORKER_VERSION_ID': str(self.version_2.id)} + 'env': {'TASK_ELEMENTS': '/data/initialisation/elements.json', 'WORKER_VERSION_ID': str(self.version_2.id)} } })