diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index f25bb0ae97fde83a0724e3faa36e2c490ed5b6fc..b19a3df19d00ffb2db924590956dd65ecc0195bf 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -256,7 +256,6 @@ class CorpusWorkflow(SelectionMixin, CreateAPIView): ) payload = { - 'corpus_id': str(corpus.id), # Re-serialize ML tools as JSON 'ml_tools': MLToolTaskSerializer(ml_tools, many=True).data, # Chunks and thumbnails parameters @@ -275,7 +274,7 @@ class CorpusWorkflow(SelectionMixin, CreateAPIView): if name: payload['name_contains'] = name if elt_type: - payload['elt_type'] = elt_type + payload['element_type'] = elt_type if element: payload['element'] = str(element.id) # Add information concerning import diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index d87ba38d4861afa468ddffc3cccc2442c1649c39..dc57f5769b2bc66a2a2a575ec5ee13ecec972f3c 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -28,9 +28,11 @@ class DataImport(IndexableModel): files = models.ManyToManyField('dataimport.DataFile', related_name='imports') revision = models.ForeignKey( 'dataimport.Revision', related_name='dataimports', on_delete=models.CASCADE, blank=True, null=True) - payload = JSONField(null=True, blank=True) workflow = models.OneToOneField('ponos.Workflow', on_delete=models.SET_NULL, null=True, blank=True) + # Docs on the payload's contents: https://wiki.vpn/en/arkindex/dev-doc/dataimport-payload + payload = JSONField(null=True, blank=True) + class Meta: ordering = ['corpus', '-created'] @@ -74,8 +76,6 @@ class DataImport(IndexableModel): elif self.mode == DataImportMode.Elements: assert self.payload, \ 'A payload is required to create build import workflow from Arkindex elements' - assert self.payload.get('corpus_id'), \ - '"corpus_id" is missing in Elements dataimport payload' chunks = self.payload.get('chunks') if chunks is not None: assert isinstance(chunks, int) and chunks > 0, 'Chunks count should be an positive integer' @@ -84,7 +84,7 @@ class DataImport(IndexableModel): args = [ 'python', '-m', 'arkindex_tasks.init_elements', - '--corpus-id', self.payload['corpus_id'], + '--corpus-id', str(self.corpus.id), '--chunks-number', str(ml_workflow_chunks), ] @@ -96,10 +96,10 @@ class DataImport(IndexableModel): if substring: args.append('--name-contains') args.append(shlex.quote(substring)) - elt_type = self.payload.get('elt_type') - if elt_type: + element_type = self.payload.get('element_type') + if element_type: args.append('--type') - args.append(shlex.quote(elt_type)) + args.append(shlex.quote(element_type)) best_class = self.payload.get('best_class') if best_class is not None: args.append('--best-class') @@ -186,17 +186,6 @@ class DataImport(IndexableModel): else: self.start() - @cached_property - def repo(self): - if self.revision: - return self.revision.repo - elif 'repo_id' in self.payload: - return Repository.objects.get(id=self.payload['repo_id']) - - @cached_property - def sha(self): - return self.revision.hash if self.revision else self.payload.get('sha') - @cached_property def ml_tools(self): if not self.payload or not self.payload.get('ml_tools'): @@ -326,10 +315,6 @@ class Revision(IndexableModel): creator=self.repo.credentials.user, corpus=self.repo.corpus, mode=DataImportMode.Repository, - payload={ - 'repo_id': str(self.repo.id), - 'sha': self.hash, - } ) dataimport.start() return dataimport diff --git a/arkindex/dataimport/providers.py b/arkindex/dataimport/providers.py index 40633b053b29bd6f0b837edded5f2a2d3fa07875..ddae906f30952732c0a73e027983cb225f0c7325 100644 --- a/arkindex/dataimport/providers.py +++ b/arkindex/dataimport/providers.py @@ -199,9 +199,6 @@ class GitLabProvider(GitProvider): creator=repo.credentials.user, mode=DataImportMode.Repository, revision=rev, - payload={ - 'repo_id': str(repo.id), - }, ) di.start() diff --git a/arkindex/dataimport/tests/test_gitlab_provider.py b/arkindex/dataimport/tests/test_gitlab_provider.py index 2363a24b7fb65f23f135fef747e739183eead42f..67ce705c94bf64743e00afdcd4ab3a2e22b80ccf 100644 --- a/arkindex/dataimport/tests/test_gitlab_provider.py +++ b/arkindex/dataimport/tests/test_gitlab_provider.py @@ -362,7 +362,7 @@ class TestGitLabProvider(FixtureTestCase): rev = self.repo.revisions.filter(hash=sha) self.assertFalse(rev.exists()) - repo_imports = DataImport.objects.filter(payload__repo_id=str(self.repo.id)) + repo_imports = DataImport.objects.filter(revision__repo_id=str(self.repo.id)) self.assertFalse(repo_imports.exists()) GitLabProvider(url='http://aaa', credentials=self.creds).handle_webhook(self.repo, request_mock) @@ -397,7 +397,7 @@ class TestGitLabProvider(FixtureTestCase): rev = self.repo.revisions.filter(hash=sha) self.assertTrue(rev.exists()) - repo_imports = DataImport.objects.filter(payload__repo_id=str(self.repo.id)) + repo_imports = DataImport.objects.filter(revision__repo_id=str(self.repo.id)) self.assertFalse(repo_imports.exists()) GitLabProvider(url='http://aaa', credentials=self.creds).handle_webhook(self.repo, request_mock) @@ -413,7 +413,7 @@ class TestGitLabProvider(FixtureTestCase): sha = '1337' rev = self.repo.revisions.filter(hash=sha) self.assertFalse(rev.exists()) - repo_imports = DataImport.objects.filter(payload__repo_id=str(self.repo.id)) + repo_imports = DataImport.objects.filter(revision__repo_id=str(self.repo.id)) glp = GitLabProvider(url='http://aaa', credentials=self.creds) request_mock = MagicMock() diff --git a/arkindex/dataimport/tests/test_workflows_api.py b/arkindex/dataimport/tests/test_workflows_api.py index df80eace2ad6cf9cf93c0c6d22638c32718954ae..ff5971ee789c6aa1e3bf24b02960b30d8040bade 100644 --- a/arkindex/dataimport/tests/test_workflows_api.py +++ b/arkindex/dataimport/tests/test_workflows_api.py @@ -71,7 +71,6 @@ class TestWorkflows(FixtureAPITestCase): 'files': [], 'mode': 'elements', 'payload': { - 'corpus_id': str(self.volume.corpus.id), 'ml_tools': [ { 'slug': 'line-detector', @@ -123,7 +122,6 @@ class TestWorkflows(FixtureAPITestCase): 'mode': 'elements', 'corpus': str(page.corpus.id), 'payload': { - 'corpus_id': str(page.corpus.id), 'chunks': 1, 'element': str(self.volume.id), 'ml_tools': [], @@ -139,7 +137,6 @@ class TestWorkflows(FixtureAPITestCase): }) def test_single_page_workflow(self, ml_get_mock): - page = self.pages[0] self.client.force_login(self.user) response = self.client.post( reverse('api:corpus-workflow'), @@ -154,7 +151,6 @@ class TestWorkflows(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_201_CREATED) data = response.json() self.assertDictEqual(data.get('payload'), { - 'corpus_id': str(page.corpus.id), 'chunks': 1, 'element': str(self.pages.first().id), 'folder_id': str(self.volume.id), @@ -207,7 +203,6 @@ class TestWorkflows(FixtureAPITestCase): 'chunks': 3, 'thumbnails': True, 'element': str(self.volume.id), - 'corpus_id': str(self.corpus.id), 'ml_tools': ml_tools, 'folder_id': str(self.volume.id) } @@ -309,7 +304,6 @@ class TestWorkflows(FixtureAPITestCase): dataimport = DataImport.objects.get(id=data['id']) self.assertDictEqual(dataimport.payload, { 'chunks': 1, - 'corpus_id': str(self.corpus.id), 'ml_tools': [], 'name_contains': self.pages.first().name[2:5], }) @@ -332,8 +326,7 @@ class TestWorkflows(FixtureAPITestCase): dataimport = DataImport.objects.get(id=data['id']) self.assertDictEqual(dataimport.payload, { 'chunks': 1, - 'corpus_id': str(self.corpus.id), - 'elt_type': 'page', + 'element_type': 'page', 'ml_tools': [] }) @@ -529,7 +522,6 @@ class TestWorkflows(FixtureAPITestCase): workflow = dataimport.workflow self.assertDictEqual(dataimport.payload, { 'chunks': 1, - 'corpus_id': str(self.corpus.id), 'ml_tools': [], 'elements': [str(self.pages.first().id)] })