diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index d08dc9962652d24864971730a886273fadda46be..6bfa0d398528c8986e15caa99820f1e0bfe8e2ad 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -354,7 +354,7 @@ class CorpusWorkflow(SelectionMixin, CorpusACLMixin, CreateAPIView): serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) - corpus, element, process_name, name_contains, element_type, selection, best_class, load_children = map( + corpus, element, process_name, name_contains, element_type, selection, best_class, load_children, use_cache = map( lambda key: serializer.validated_data.pop(key, None), ( 'corpus', @@ -364,7 +364,8 @@ class CorpusWorkflow(SelectionMixin, CorpusACLMixin, CreateAPIView): 'element_type', 'selection', 'best_class', - 'load_children' + 'load_children', + 'use_cache', ) ) @@ -376,7 +377,8 @@ class CorpusWorkflow(SelectionMixin, CorpusACLMixin, CreateAPIView): element_type=element_type, element=element, best_class=best_class, - load_children=load_children + load_children=load_children, + use_cache=use_cache, ) # Ensure process elements do exists diff --git a/arkindex/dataimport/migrations/0031_dataimport_use_cache.py b/arkindex/dataimport/migrations/0031_dataimport_use_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..10490ccfccaed22e8e532e69327d09606545b605 --- /dev/null +++ b/arkindex/dataimport/migrations/0031_dataimport_use_cache.py @@ -0,0 +1,18 @@ +# Generated by Django 3.1.5 on 2021-03-30 08:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('dataimport', '0030_remove_mode_elements'), + ] + + operations = [ + migrations.AddField( + model_name='dataimport', + name='use_cache', + field=models.BooleanField(default=False), + ), + ] diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index 270ab2c3561f18f264add79d3694255a25062c4a..c95b4915bba057fa4560dc6d0d147bb73b0f1321 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -89,6 +89,9 @@ class DataImport(IndexableModel): collection_id = models.PositiveIntegerField(null=True, blank=True) build_entities = models.BooleanField(default=False) + # Use elements cache executing the workflow + use_cache = models.BooleanField(default=False) + class Meta: ordering = ['corpus', '-created'] @@ -223,6 +226,8 @@ class DataImport(IndexableModel): 'python', '-m', 'arkindex_tasks.init_elements', str(self.id), '--chunks-number', str(ml_workflow_chunks), ] + if self.use_cache: + args.append('--use-cache') tasks = { import_task_name: { diff --git a/arkindex/dataimport/serializers/imports.py b/arkindex/dataimport/serializers/imports.py index 6fffa27aabe94e054ef57386c61d4291ff329b27..0d029b338d575e1ec589bd3dd6963f46b8dafdd6 100644 --- a/arkindex/dataimport/serializers/imports.py +++ b/arkindex/dataimport/serializers/imports.py @@ -73,7 +73,8 @@ class DataImportSerializer(DataImportLightSerializer): 'folder_type', 'element_type', 'element_name_contains', - 'load_children' + 'load_children', + 'use_cache' ) read_only_fields = DataImportLightSerializer.Meta.read_only_fields + ( 'files', @@ -211,6 +212,7 @@ class ElementsWorkflowSerializer(serializers.Serializer): best_class = BestClassField(required=False) selection = serializers.BooleanField(default=False) load_children = serializers.BooleanField(default=False) + use_cache = serializers.BooleanField(default=False) def validate_corpus(self, corpus_id): try: diff --git a/arkindex/dataimport/tests/test_imports.py b/arkindex/dataimport/tests/test_imports.py index 67160a627dbbed7bf25cf948c1eb0dc8023d438b..9781ef323bd3a174b0a8ba24c7e90620e889c509 100644 --- a/arkindex/dataimport/tests/test_imports.py +++ b/arkindex/dataimport/tests/test_imports.py @@ -565,8 +565,8 @@ class TestImports(FixtureAPITestCase): self.client.force_login(self.user) dataimport = DataImport.objects.create(mode=DataImportMode.Workers, corpus=self.corpus, creator=self.user) self.assertTupleEqual( - (dataimport.name, dataimport.name_contains, dataimport.element_type, dataimport.load_children), - (None, None, None, False) + (dataimport.name, dataimport.name_contains, dataimport.element_type, dataimport.load_children, dataimport.use_cache), + (None, None, None, False, False) ) with self.assertNumQueries(9): response = self.client.patch( @@ -575,15 +575,16 @@ class TestImports(FixtureAPITestCase): 'name': 'newName', 'element_name_contains': 'AAA', 'element_type': 'page', - 'load_children': True + 'load_children': True, + 'use_cache': True, }, format='json' ) self.assertEqual(response.status_code, status.HTTP_200_OK) dataimport.refresh_from_db() self.assertTupleEqual( - (dataimport.name, dataimport.name_contains, dataimport.element_type, dataimport.load_children), - ('newName', 'AAA', self.page_type, True) + (dataimport.name, dataimport.name_contains, dataimport.element_type, dataimport.load_children, dataimport.use_cache), + ('newName', 'AAA', self.page_type, True, True) ) def test_update_process_reset_fields(self): @@ -659,6 +660,7 @@ class TestImports(FixtureAPITestCase): 'element_name_contains': 'AAA', 'element_type': 'page', 'load_children': True, + 'use_cache': False, 'id': str(dataimport.id), 'corpus': str(self.corpus.id), 'element': None, diff --git a/arkindex/dataimport/tests/test_repos.py b/arkindex/dataimport/tests/test_repos.py index 29a5cef91aba20727b813638924e4f415c5317fd..526cef5b33d76219b255df925e99620cfc164225 100644 --- a/arkindex/dataimport/tests/test_repos.py +++ b/arkindex/dataimport/tests/test_repos.py @@ -350,6 +350,7 @@ class TestRepositories(FixtureTestCase): 'element': None, 'element_type': None, 'load_children': False, + 'use_cache': False, 'element_name_contains': None, 'files': [], 'folder_type': None, @@ -390,6 +391,7 @@ class TestRepositories(FixtureTestCase): 'element': None, 'element_type': None, 'load_children': False, + 'use_cache': False, 'element_name_contains': None, 'files': [], 'folder_type': None, diff --git a/arkindex/dataimport/tests/test_workflows_api.py b/arkindex/dataimport/tests/test_workflows_api.py index 263d85443d596191deae56c72a221f28f49c682a..2f76fb6b035e94398267f653c2b5847425743f94 100644 --- a/arkindex/dataimport/tests/test_workflows_api.py +++ b/arkindex/dataimport/tests/test_workflows_api.py @@ -86,6 +86,7 @@ class TestWorkflows(FixtureAPITestCase): 'element_type': 'page', 'element': None, 'load_children': False, + 'use_cache': False, 'element_name_contains': None }) @@ -112,7 +113,8 @@ class TestWorkflows(FixtureAPITestCase): { 'corpus': str(self.corpus.id), 'element': str(self.volume.id), - 'load_children': True + 'load_children': True, + 'use_cache': True, }, format='json' ) @@ -136,6 +138,7 @@ class TestWorkflows(FixtureAPITestCase): 'folder_type': None, 'element_type': None, 'load_children': True, + 'use_cache': True, 'element_name_contains': None }) @@ -565,3 +568,42 @@ class TestWorkflows(FixtureAPITestCase): elements_ids, WorkerActivity.objects.filter(worker_version=self.version_2).values_list('element_id', flat=True) ) + + def test_create_process_use_cache_option(self): + """ + A process with the `use_cache` parameter creates an initialization task with the --use-cache flag + """ + dataimport_2 = self.corpus.imports.create(creator=self.user, mode=DataImportMode.Workers) + dataimport_2.worker_runs.create( + version=self.version_1, + parents=[], + ) + dataimport_2.use_cache = True + dataimport_2.save() + self.client.force_login(self.user) + with self.assertNumQueries(29): + response = self.client.post( + reverse('api:process-start', kwargs={'pk': str(dataimport_2.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json()['use_cache'], True) + dataimport_2.refresh_from_db() + self.assertEqual(dataimport_2.state, State.Unscheduled) + self.assertIsNotNone(dataimport_2.workflow) + self.assertDictEqual(yaml.safe_load(dataimport_2.workflow.recipe)['tasks'], { + 'initialisation': + { + 'command': 'python -m arkindex_tasks.init_elements ' + f'{dataimport_2.id} --chunks-number 1 ' + '--use-cache', + 'image': 'registry.gitlab.com/arkindex/tasks' + }, + f'reco_{str(self.version_1.id)[0:6]}': + { + 'command': None, + 'image': f'my_repo.fake/workers/worker/reco:{self.version_1.id}', + 'artifact': str(self.version_1.docker_image.id), + 'parents': ['initialisation'], + 'env': {'TASK_ELEMENTS': '/data/initialisation/elements.json', 'WORKER_VERSION_ID': str(self.version_1.id)} + }, + })