diff --git a/arkindex/dataimport/tests/test_process_elements.py b/arkindex/dataimport/tests/test_process_elements.py index c0bc419ebe8cb06a4a0ed510643be142f394bc7c..bfc367349c9781bf37e768989faf0efc85ff1519 100644 --- a/arkindex/dataimport/tests/test_process_elements.py +++ b/arkindex/dataimport/tests/test_process_elements.py @@ -3,8 +3,8 @@ import uuid from django.urls import reverse from rest_framework import status -from arkindex.dataimport.models import DataImport, DataImportMode -from arkindex.documents.models import Classification, ClassificationState, Corpus, DataSource, Element, MLClass +from arkindex.dataimport.models import DataImport, DataImportMode, WorkerVersion +from arkindex.documents.models import Classification, ClassificationState, Corpus, Element, MLClass from arkindex.project.tests import FixtureAPITestCase @@ -127,44 +127,44 @@ class TestProcessElements(FixtureAPITestCase): cls.line_5.add_parent(cls.page_5) # Create best classes - source = DataSource.objects.first() - cls.coffee_source = MLClass.objects.create(name='C0FFEE', corpus=cls.private_corpus) - cls.food_source = MLClass.objects.create(name='F00D', corpus=cls.private_corpus) + worker_version = WorkerVersion.objects.get(worker__slug='reco') + cls.coffee_class = MLClass.objects.create(name='C0FFEE', corpus=cls.private_corpus) + cls.food_class = MLClass.objects.create(name='F00D', corpus=cls.private_corpus) Classification.objects.create( element=cls.folder_2, state=ClassificationState.Validated, - ml_class=cls.food_source, - source=source + ml_class=cls.food_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_1, state=ClassificationState.Validated, - ml_class=cls.coffee_source, - source=source + ml_class=cls.coffee_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_2, high_confidence=True, - ml_class=cls.food_source, - source=source + ml_class=cls.food_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_3, state=ClassificationState.Validated, - ml_class=cls.food_source, - source=source + ml_class=cls.food_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_5, high_confidence=True, - ml_class=cls.food_source, - source=source + ml_class=cls.food_class, + worker_version=worker_version, ) Classification.objects.create( element=cls.page_5, state=ClassificationState.Validated, - ml_class=cls.coffee_source , - source=source + ml_class=cls.coffee_class, + worker_version=worker_version, ) def setUp(self): @@ -323,7 +323,7 @@ class TestProcessElements(FixtureAPITestCase): ]) def test_filter_best_class_by_id(self): - self.dataimport.best_class = self.food_source.id + self.dataimport.best_class = self.food_class.id self.dataimport.save() elements = [self.page_5, self.page_3, self.folder_2, self.page_2] @@ -472,7 +472,7 @@ class TestProcessElements(FixtureAPITestCase): ]) def test_load_children_and_filter_best_class_by_id(self): - self.dataimport.best_class = self.food_source.id + self.dataimport.best_class = self.food_class.id self.dataimport.load_children = True self.dataimport.save() elements = [self.folder_2, self.page_2, self.page_3, self.page_5] diff --git a/arkindex/documents/tests/commands/test_reindex.py b/arkindex/documents/tests/commands/test_reindex.py index 0531056f530328711ffc2904d8ae94c9a07ec4de..ff2d90d73e49745d10b8872ebda08c411cfd5daa 100644 --- a/arkindex/documents/tests/commands/test_reindex.py +++ b/arkindex/documents/tests/commands/test_reindex.py @@ -3,7 +3,8 @@ from unittest.mock import call, patch from django.core.management import CommandError, call_command from django.test import override_settings -from arkindex.documents.models import DataSource, Element, Entity, EntityType, MLToolType, Transcription +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Element, Entity, EntityType, Transcription from arkindex.project.elastic import ESElement, ESEntity, ESTranscription from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import MetaType @@ -14,10 +15,10 @@ class TestReindexCommand(FixtureTestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - source = DataSource.objects.create(type=MLToolType.NER, slug='entity', internal=True) cls.indexer_patch = patch('arkindex.documents.management.commands.reindex.Indexer') cls.vol = cls.corpus.elements.get(name="Volume 1") - cls.entity = cls.corpus.entities.create(type=EntityType.Misc, name='Dummy entity', source=source) + worker_version = WorkerVersion.objects.first() + cls.entity = cls.corpus.entities.create(type=EntityType.Misc, name='Dummy entity', worker_version=worker_version) page = cls.corpus.elements.get(name='Volume 1, page 1r') page.metadatas.create(name='Dummy metadata', value='Dummy', type=MetaType.Text, entity=cls.entity) diff --git a/arkindex/documents/tests/tasks/test_corpus_delete.py b/arkindex/documents/tests/tasks/test_corpus_delete.py index 53c2cd99c50b31d7d7756a43ef960acd56e699f3..db15d2c640ca964f2ef648e1087f8c50842d15c9 100644 --- a/arkindex/documents/tests/tasks/test_corpus_delete.py +++ b/arkindex/documents/tests/tasks/test_corpus_delete.py @@ -1,11 +1,10 @@ from django.db.models.signals import pre_delete from arkindex.dataimport.models import Repository, RepositoryType, WorkerVersion -from arkindex.documents.models import Corpus, DataSource, Element, Transcription +from arkindex.documents.models import Corpus, Element, Transcription from arkindex.documents.tasks import corpus_delete from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import DataImportMode, EntityType, MetaType -from arkindex_common.ml_tool import MLToolType class TestDeleteCorpus(FixtureTestCase): @@ -32,12 +31,14 @@ class TestDeleteCorpus(FixtureTestCase): ) file_import.build_workflow() + cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') + element_import = cls.corpus.imports.create( creator=cls.user, mode=DataImportMode.Workers, ) element_import.elements.add(element) - element_import.worker_runs.create(version=WorkerVersion.objects.first(), parents=[]) + element_import.worker_runs.create(version=cls.worker_version, parents=[]) entity1 = cls.corpus.entities.create(name='Magnemite', type=EntityType.Person) entity2 = cls.corpus.entities.create(name='Magneton', type=EntityType.Person) @@ -98,24 +99,14 @@ class TestDeleteCorpus(FixtureTestCase): name='A page', ) cls.page.classifications.create( - source=DataSource.objects.create( - type=MLToolType.Classifier, - slug='classeur', - revision='Early Access', - internal=False, - ), + worker_version=cls.worker_version, ml_class=cls.corpus2.ml_classes.create( name='klass', ), confidence=0.5, ) cls.page.transcriptions.create( - source=DataSource.objects.create( - type=MLToolType.Recognizer, - slug='reco', - revision='-1', - internal=False, - ), + worker_version=cls.worker_version, text='hi', score=0.75, ) @@ -159,11 +150,11 @@ class TestDeleteCorpus(FixtureTestCase): self.assertEqual(md.value, 'data') cl = self.page.classifications.get() - self.assertEqual(cl.source.slug, 'classeur') + self.assertEqual(cl.worker_version, self.worker_version) self.assertEqual(cl.ml_class.name, 'klass') self.assertEqual(cl.confidence, 0.5) ts = self.page.transcriptions.get() - self.assertEqual(ts.source.slug, 'reco') + self.assertEqual(ts.worker_version, self.worker_version) self.assertEqual(ts.text, 'hi') self.assertEqual(ts.score, 0.75) diff --git a/arkindex/documents/tests/tasks/test_reindex.py b/arkindex/documents/tests/tasks/test_reindex.py index 014d09795eca1596ee9ce89229af961eafc3ce7e..0f5f3fccfe045e1b3b11ce83638e62302c20f3df 100644 --- a/arkindex/documents/tests/tasks/test_reindex.py +++ b/arkindex/documents/tests/tasks/test_reindex.py @@ -3,11 +3,11 @@ from unittest.mock import patch from django.contrib.gis.geos import LinearRing from django.db.models import Q -from arkindex.documents.models import Corpus, DataSource, Element, Entity, Transcription +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Corpus, Element, Entity, Transcription from arkindex.documents.tasks import reindex_start from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import EntityType, MetaType -from arkindex_common.ml_tool import MLToolType @patch('arkindex.documents.tasks.Indexer') @@ -16,8 +16,7 @@ class TestReindex(FixtureTestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) - + cls.worker_version = WorkerVersion.objects.first() cls.folder = cls.corpus.elements.get(name='Volume 1') cls.folder.metadatas.create( type=MetaType.Text, @@ -26,7 +25,7 @@ class TestReindex(FixtureTestCase): entity=cls.corpus.entities.create( type=EntityType.Person, name='Some entity', - source=source, + worker_version=cls.worker_version, ) ) @@ -41,13 +40,13 @@ class TestReindex(FixtureTestCase): ts = element2.transcriptions.create( score=0.8, text='something', - source=source, + worker_version=cls.worker_version, ) ts.transcription_entities.create( entity=corpus2.entities.create( type=EntityType.Misc, name='Some other entity', - source=source, + worker_version=cls.worker_version, ), offset=0, length=1, diff --git a/arkindex/documents/tests/test_bulk_classification.py b/arkindex/documents/tests/test_bulk_classification.py index 05f7d64f3abf3625ad327fc646b19696d0aff6ee..e834fe55d91d354907336f30868dbcf0c0f4ec7d 100644 --- a/arkindex/documents/tests/test_bulk_classification.py +++ b/arkindex/documents/tests/test_bulk_classification.py @@ -15,18 +15,6 @@ class TestBulkClassification(FixtureAPITestCase): cls.private_corpus = Corpus.objects.create(name='private', public=False) cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.src.internal = True - cls.src.save() - - @classmethod - def tearDownClass(cls): - super().tearDownClass() - cls.src.internal = False - cls.src.save() - def create_classifications_data(self, classifications, parent=None): return { "parent": parent or str(self.page.id), diff --git a/arkindex/documents/tests/test_bulk_element_transcriptions.py b/arkindex/documents/tests/test_bulk_element_transcriptions.py index 1361510f05e641126a7746379ce1f36e36d1f16c..71a2abb020dab77e49d5d8bb97e6e28173a13ced 100644 --- a/arkindex/documents/tests/test_bulk_element_transcriptions.py +++ b/arkindex/documents/tests/test_bulk_element_transcriptions.py @@ -361,8 +361,8 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): self.assertCountEqual( created_elts.values_list('transcriptions__text', 'transcriptions__worker_version'), [ - ('Hello world !', None, self.worker_version.id), - ('I <3 JavaScript', None, self.worker_version.id) + ('Hello world !', self.worker_version.id), + ('I <3 JavaScript', self.worker_version.id) ] ) self.assertEqual(delay_mock.call_count, 1) diff --git a/arkindex/documents/tests/test_classes.py b/arkindex/documents/tests/test_classes.py index 4984fef2ee2260147d3820905f99a995f7807382..d20945f3fb7bf5a687e5aaedbdfa6680a624c4f1 100644 --- a/arkindex/documents/tests/test_classes.py +++ b/arkindex/documents/tests/test_classes.py @@ -2,9 +2,9 @@ from django.test import override_settings from django.urls import reverse from rest_framework import status -from arkindex.documents.models import Classification, ClassificationState, Corpus, DataSource, Element, MLClass +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Classification, ClassificationState, Corpus, Element, MLClass from arkindex.project.tests import FixtureAPITestCase -from arkindex_common.ml_tool import MLToolType class TestClasses(FixtureAPITestCase): @@ -21,14 +21,8 @@ class TestClasses(FixtureAPITestCase): self.parent = self.corpus.elements.create(type=self.folder_type) self.common_children = self.corpus.elements.create(type=self.folder_type) - source1 = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) - source2 = DataSource.objects.create( - type=MLToolType.Classifier, - slug='source2', - name='classifier', - revision='123', - internal=False, - ) + self.version1 = WorkerVersion.objects.get(worker__slug='reco') + self.version2 = WorkerVersion.objects.get(worker__slug='dla') for elt_num in range(1, 13): elt = Element.objects.create( name='elt_{}'.format(elt_num), @@ -38,9 +32,9 @@ class TestClasses(FixtureAPITestCase): elt.add_parent(self.parent) self.common_children.add_parent(elt) for ml_class, score in zip((self.text, self.cover), (.7, .99)): - for source in (source1, source2): + for worker_version in (self.version1, self.version2): elt.classifications.create( - source_id=source.id, + worker_version=worker_version, ml_class_id=ml_class.id, confidence=score, high_confidence=bool(score == .99) @@ -245,8 +239,8 @@ class TestClasses(FixtureAPITestCase): self.assertEqual(data['count'], 12) for elt in data['results']: self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version2.id), .99), ('test', .99)] ) def test_list_elements_best_classes_false(self): @@ -275,8 +269,8 @@ class TestClasses(FixtureAPITestCase): self.assertEqual(data['count'], 12) for elt in data['results']: self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version1.id), .99), (str(self.version2.id), .99)] ) def test_element_children_best_classes(self): @@ -291,8 +285,8 @@ class TestClasses(FixtureAPITestCase): self.assertEqual(data['count'], 12) for elt in data['results']: self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version1.id), .99), (str(self.version2.id), .99)] ) def test_rejected_best_classes(self): @@ -330,13 +324,18 @@ class TestClasses(FixtureAPITestCase): for elt in response.json()['results']: if elt['id'] == str(parent.id): self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99), ('source2', .7), ('test', .7)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [ + (str(self.version1.id), .99), + (str(self.version2.id), .99), + (str(self.version1.id), .7), + (str(self.version2.id), .7), + ] ) continue self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version1.id), .99), (str(self.version2.id), .99)] ) def test_rejected_human_class(self): @@ -344,10 +343,8 @@ class TestClasses(FixtureAPITestCase): A manual classification rejected by a human may not appear in best classes """ self.populate_classified_elements() - data_source, _ = DataSource.objects.get_or_create(type=MLToolType.NER, slug="manual", internal=False) element = Element.objects.filter(type=self.classified.id).first() classif = element.classifications.create( - source_id=data_source.id, ml_class_id=self.text.id, confidence=1, high_confidence=True, @@ -360,13 +357,20 @@ class TestClasses(FixtureAPITestCase): for elt in response.json()['results']: if elt['id'] == str(element.id): self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('manual', 1.0), ('test', .99), ('source2', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [ + (None, 1.0), + (str(self.version1.id), .99), + (str(self.version2.id), .99), + ] ) continue self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [ + (str(self.version1.id), .99), + (str(self.version2.id), .99), + ] ) # Reject the manual classification classif.state = ClassificationState.Rejected @@ -378,16 +382,16 @@ class TestClasses(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) for elt in response.json()['results']: self.assertCountEqual( - list(map(lambda c: (c['source']['slug'], c['confidence']), elt['best_classes'])), - [('source2', .99), ('test', .99)] + list(map(lambda c: (c['worker_version'], c['confidence']), elt['best_classes'])), + [(str(self.version1.id), .99), (str(self.version2.id), .99)] ) def test_class_filter_list_elements(self): self.populate_classified_elements() element = Element.objects.filter(type=self.classified.id).first() element.classifications.create( - source_id=DataSource.objects.create(type=MLToolType.NER, slug='ner', internal=False).id, - ml_class_id=self.text.id, + worker_version=self.version1, + ml_class=self.text, confidence=.1337, high_confidence=True, ) @@ -482,7 +486,7 @@ class TestClasses(FixtureAPITestCase): element = Element.objects.filter(type=self.classified.id).first() element.classifications.all().delete() element.classifications.create( - source_id=DataSource.objects.create(type=MLToolType.NER, slug='ner', internal=False).id, + worker_version=self.version2, ml_class_id=self.text.id, confidence=.1337, high_confidence=True, @@ -507,7 +511,7 @@ class TestClasses(FixtureAPITestCase): element = Element.objects.filter(type=self.classified.id).first() element.classifications.all().delete() element.classifications.create( - source_id=DataSource.objects.create(type=MLToolType.NER, slug='ner', internal=False).id, + worker_version=self.version2, ml_class_id=self.text.id, confidence=.1337, high_confidence=False, diff --git a/arkindex/documents/tests/test_entities.py b/arkindex/documents/tests/test_entities.py index ba6efbf003e9a86aac35d59d6a2b57f4a7e7e53a..1f72233cc5c611976a9f9fcc691cbd818cef3593 100644 --- a/arkindex/documents/tests/test_entities.py +++ b/arkindex/documents/tests/test_entities.py @@ -1,6 +1,7 @@ from django.core.exceptions import ValidationError -from arkindex.documents.models import Corpus, DataSource, Entity, EntityLink, EntityRole, MetaData, MLToolType +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Corpus, Entity, EntityLink, EntityRole, MetaData from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import EntityType, MetaType @@ -10,16 +11,21 @@ class TestSaveEntities(FixtureTestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - source = DataSource.objects.create(type=MLToolType.NER, slug='entity', internal=False) + worker_version = WorkerVersion.objects.first() cls.corpus1 = Corpus.objects.create(name='corpus 1') cls.corpus2 = Corpus.objects.create(name='corpus 2') cls.parent = Entity.objects.create( name='parent', type=EntityType.Organization, corpus=cls.corpus1, - source=source + worker_version=worker_version, + ) + cls.child = Entity.objects.create( + type=EntityType.Person, + corpus=cls.corpus1, + name="child", + worker_version=worker_version, ) - cls.child = Entity.objects.create(type=EntityType.Person, corpus=cls.corpus1, name="child", source=source) cls.role = EntityRole.objects.create( parent_name='organization', child_name='person', diff --git a/arkindex/documents/tests/test_entities_api.py b/arkindex/documents/tests/test_entities_api.py index 2493f7bb6907292e964d7aa282e8c56f58720de3..ad84fa2132a5dc7caae2bf3bc2eee8831fbfa934 100644 --- a/arkindex/documents/tests/test_entities_api.py +++ b/arkindex/documents/tests/test_entities_api.py @@ -8,17 +8,7 @@ from elasticsearch.exceptions import NotFoundError from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import ( - Corpus, - DataSource, - Element, - Entity, - EntityLink, - EntityRole, - EntityType, - MLToolType, - TranscriptionEntity, -) +from arkindex.documents.models import Corpus, Element, Entity, EntityLink, EntityRole, EntityType, TranscriptionEntity from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import MetaType @@ -28,15 +18,9 @@ class TestEntitiesAPI(FixtureAPITestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - cls.entity_source = DataSource.objects.create( - type=MLToolType.NER, - slug='entity', - name='Test NER', - internal=True, - ) - cls.source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) cls.private_corpus = Corpus.objects.create(name='private') - cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') + cls.worker_version_1 = WorkerVersion.objects.get(worker__slug='reco') + cls.worker_version_2 = WorkerVersion.objects.get(worker__slug='dla') cls.page = cls.corpus.elements.get(name='Volume 1, page 1r') cls.element_type = cls.corpus.types.get(slug='text_line') @@ -46,13 +30,13 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Person, corpus=self.corpus, name='entity 1', - source_id=self.source.id + worker_version=self.worker_version_1, ) self.entity_bis = Entity.objects.create( type=EntityType.Location, corpus=self.corpus, name='entity 2', - source_id=self.source.id + worker_version=self.worker_version_2, ) self.role = EntityRole.objects.create( parent_name="parent", @@ -68,11 +52,11 @@ class TestEntitiesAPI(FixtureAPITestCase): type=self.element_type, name='Transcription', zone=zone, - source=self.source, + worker_version=self.worker_version_1, ) self.transcription = self.element.transcriptions.create( text='Some transcribed text', - source=self.source, + worker_version=self.worker_version_1, ) self.metadata = self.element.metadatas.create( name='test 1', @@ -129,7 +113,7 @@ class TestEntitiesAPI(FixtureAPITestCase): name='001', zone=zone, ) - elt_tr = elt.transcriptions.create(source_id=self.source.id, text='goodbye') + elt_tr = elt.transcriptions.create(worker_version=self.worker_version_1, text='goodbye') TranscriptionEntity.objects.create(transcription=elt_tr, entity=self.entity, offset=42, length=7) with self.assertNumQueries(8): response = self.client.get(reverse('api:entity-elements', kwargs={'pk': str(self.entity.id)})) @@ -239,8 +223,6 @@ class TestEntitiesAPI(FixtureAPITestCase): @patch('arkindex.project.triggers.tasks.reindex_start.delay') def test_create_entity_person(self, delay_mock): - self.entity_source.internal = True - self.entity_source.save() data = { 'name': 'entity', 'type': EntityType.Person.value, @@ -249,7 +231,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') @@ -257,7 +239,7 @@ class TestEntitiesAPI(FixtureAPITestCase): entity = Entity.objects.get(id=response.json()['id']) self.assertEqual(entity.name, 'entity') self.assertEqual(entity.raw_dates, None) - self.assertEqual(entity.worker_version, self.worker_version) + self.assertEqual(entity.worker_version, self.worker_version_1) self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=None, @@ -273,8 +255,6 @@ class TestEntitiesAPI(FixtureAPITestCase): @patch('arkindex.project.triggers.tasks.reindex_start.delay') def test_create_entity_number(self, delay_mock): - self.entity_source.internal = True - self.entity_source.save() data = { 'name': '300g', 'type': EntityType.Number.value, @@ -283,7 +263,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') @@ -291,7 +271,7 @@ class TestEntitiesAPI(FixtureAPITestCase): entity = Entity.objects.get(id=response.json()['id']) self.assertEqual(entity.name, '300g') self.assertEqual(entity.raw_dates, None) - self.assertEqual(entity.worker_version, self.worker_version) + self.assertEqual(entity.worker_version, self.worker_version_1) self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=None, @@ -307,8 +287,6 @@ class TestEntitiesAPI(FixtureAPITestCase): @patch('arkindex.project.triggers.tasks.reindex_start.delay') def test_create_entity_date(self, delay_mock): - self.entity_source.internal = True - self.entity_source.save() data = { 'name': '1789', 'type': EntityType.Date.value, @@ -317,7 +295,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') @@ -325,7 +303,7 @@ class TestEntitiesAPI(FixtureAPITestCase): entity = Entity.objects.get(id=response.json()['id']) self.assertEqual(entity.name, '1789') self.assertEqual(entity.raw_dates, entity.name) - self.assertEqual(entity.worker_version, self.worker_version) + self.assertEqual(entity.worker_version, self.worker_version_1) self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=None, @@ -348,7 +326,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'ner': self.entity_source.slug + 'worker_version': str(self.worker_version_1.id) } response = self.client.post(reverse('api:entity-create'), data=data, format='json') self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) @@ -363,15 +341,14 @@ class TestEntitiesAPI(FixtureAPITestCase): 'key': 'value', 'other key': 'other value' }, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') self.assertEqual(response.status_code, status.HTTP_201_CREATED) entity = Entity.objects.get(id=response.json()['id']) self.assertEqual(entity.name, '1789') - self.assertEqual(entity.source, None) - self.assertEqual(entity.worker_version, self.worker_version) + self.assertEqual(entity.worker_version, self.worker_version_1) self.assertEqual(delay_mock.call_count, 1) self.assertEqual(delay_mock.call_args, call( corpus_id=None, @@ -390,7 +367,7 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Location, corpus=self.corpus, name="child", - source_id=self.source.id + worker_version=self.worker_version_1, ) data = { 'parent': str(self.entity.id), @@ -410,7 +387,7 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Location, corpus=self.corpus, name="child", - source_id=self.source.id + worker_version=self.worker_version_1, ) data = { 'parent': str(self.entity.id), @@ -425,7 +402,7 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Person, corpus=self.corpus, name="child", - source_id=self.source.id + worker_version=self.worker_version_1, ) data = { 'parent': str(self.entity.id), @@ -489,7 +466,7 @@ class TestEntitiesAPI(FixtureAPITestCase): type=EntityType.Person, corpus=self.private_corpus, name="a private entity", - source_id=self.source.id + worker_version=self.worker_version_1, ) self.tr_entities_sample.update({'entity': ent.id}) response = self.client.post( @@ -563,15 +540,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': self.entity_bis.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_2.id), }, 'length': self.transcriptionentity.length, 'offset': self.transcriptionentity.offset @@ -579,10 +548,6 @@ class TestEntitiesAPI(FixtureAPITestCase): ) def test_list_transcription_entities_worker_version(self): - self.entity_bis.source = None - self.entity_bis.worker_version = self.worker_version - self.entity_bis.save() - response = self.client.get(reverse('api:transcription-entities', kwargs={'pk': str(self.transcription.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertListEqual( @@ -595,8 +560,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': False, 'dates': [], - 'source': None, - 'worker_version_id': str(self.worker_version.id), + 'worker_version_id': str(self.worker_version_2.id), }, 'length': 8, 'offset': 2 @@ -645,15 +609,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': t.entity.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_1.id), }, 'offset': t.offset, 'length': t.length @@ -666,15 +622,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': m.entity.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_1.id), }, 'id': str(m.id), 'type': m.type.value, @@ -704,14 +652,10 @@ class TestEntitiesAPI(FixtureAPITestCase): self.assertEqual(response.json(), {'worker_version': ['This worker version does not exist.']}) def test_list_element_entities_worker_version(self): - self.entity.source = None - self.entity.worker_version = self.worker_version - self.entity.save() - with self.assertNumQueries(6): response = self.client.get( reverse('api:element-entities', kwargs={'pk': str(self.element.id)}), - data={'worker_version': str(self.worker_version.id)} + data={'worker_version': str(self.worker_version_1.id)} ) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -737,8 +681,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'validated': False, 'dates': [], 'metas': None, - 'source': None, - 'worker_version_id': str(self.worker_version.id), + 'worker_version_id': str(self.worker_version_1.id), }, } ], @@ -870,15 +813,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': self.entity.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_1.id), }, 'child': { 'id': str(self.entity_bis.id), @@ -887,15 +822,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'metas': None, 'validated': self.entity_bis.validated, 'dates': [], - 'source': { - 'id': str(self.source.id), - 'type': self.source.type.value, - 'slug': self.source.slug, - 'name': self.source.name, - 'revision': self.source.revision, - 'internal': self.source.internal, - }, - 'worker_version_id': None, + 'worker_version_id': str(self.worker_version_2.id), }, 'role': { 'id': self.role.id, @@ -921,7 +848,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'name': 'entity', 'type': EntityType.Person.value, 'corpus': str(self.corpus.id), - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) } self.client.force_login(self.user) response = self.client.post(reverse('api:entity-create'), data=data, format='json') diff --git a/arkindex/documents/tests/test_indexer.py b/arkindex/documents/tests/test_indexer.py index 52a7d9a9a25be7670b7650d94ad30673814b2920..3981f62a9af86f10789f37e9df598e9313e7a5e1 100644 --- a/arkindex/documents/tests/test_indexer.py +++ b/arkindex/documents/tests/test_indexer.py @@ -3,8 +3,8 @@ from unittest.mock import MagicMock, call, patch from elasticsearch import Elasticsearch from elasticsearch.exceptions import NotFoundError +from arkindex.dataimport.models import WorkerVersion from arkindex.documents.indexer import Indexer -from arkindex.documents.models import DataSource, MLToolType from arkindex.project.tests import FixtureTestCase from arkindex_common.enums import EntityType @@ -14,9 +14,13 @@ class TestIndexer(FixtureTestCase): @classmethod def setUpTestData(cls): super().setUpTestData() - source = DataSource.objects.create(type=MLToolType.NER, slug='entity', internal=True) + worker_version = WorkerVersion.objects.first() for i in range(10): - cls.corpus.entities.create(name='ES Dummy {}'.format(i), type=EntityType.Misc, source=source) + cls.corpus.entities.create( + name=f'ES Dummy {i}', + type=EntityType.Misc, + worker_version=worker_version, + ) @patch('arkindex.documents.indexer.Elasticsearch') def test_drop_index(self, es_mock): diff --git a/arkindex/documents/tests/test_manifest.py b/arkindex/documents/tests/test_manifest.py index 659e5d351767a4fa2ad29ada75a8baa61a7316a6..4eaafa569977824d86e33eac4e62473d26dc0dcf 100644 --- a/arkindex/documents/tests/test_manifest.py +++ b/arkindex/documents/tests/test_manifest.py @@ -4,10 +4,10 @@ from django.urls import reverse from rest_framework import status from tripoli import IIIFValidator -from arkindex.documents.models import DataSource, Element +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Element from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import MetaType -from arkindex_common.ml_tool import MLToolType class TestFolderManifestSerializer(FixtureAPITestCase): @@ -135,11 +135,11 @@ class TestFolderManifestSerializer(FixtureAPITestCase): def test_with_classification(self): self.assertFalse(self.page.classifications.exists()) - source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) + worker_version = WorkerVersion.objects.first() text_class = self.corpus.ml_classes.create(name='text') cover_class = self.corpus.ml_classes.create(name='cover') - self.page.classifications.create(ml_class=text_class, confidence=0.42, source=source) - self.page.classifications.create(ml_class=cover_class, confidence=0.12, source=source) + self.page.classifications.create(ml_class=text_class, confidence=0.42, worker_version=worker_version) + self.page.classifications.create(ml_class=cover_class, confidence=0.12, worker_version=worker_version) response = self.client.get(reverse('api:folder-manifest', kwargs={'pk': self.vol.id})) self.assertEqual(response.status_code, status.HTTP_200_OK) diff --git a/arkindex/documents/tests/test_metadata.py b/arkindex/documents/tests/test_metadata.py index 989e83bcf0b5b261b1cebb32fdcfccb88a2788c1..1802992d19a86bd810c810957c45ae07b5a6666e 100644 --- a/arkindex/documents/tests/test_metadata.py +++ b/arkindex/documents/tests/test_metadata.py @@ -4,11 +4,11 @@ from django.test import override_settings from django.urls import reverse from rest_framework import status -from arkindex.documents.models import AllowedMetaData, Corpus, DataSource, MetaData +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import AllowedMetaData, Corpus, MetaData from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User from arkindex_common.enums import EntityType, MetaType -from arkindex_common.ml_tool import MLToolType class TestMetaData(FixtureAPITestCase): @@ -31,7 +31,7 @@ class TestMetaData(FixtureAPITestCase): (MetaType.Reference, '_id'), ) ) - cls.source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) + cls.worker_version = WorkerVersion.objects.first() def setUp(self): super().setUp() @@ -480,7 +480,11 @@ class TestMetaData(FixtureAPITestCase): def test_create_metadata_entity(self): self.client.force_login(self.superuser) - entity = self.corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) response = self.client.post( reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}), data={'type': 'location', 'name': 'location', 'value': 'Texas', 'entity': entity.id} @@ -490,7 +494,11 @@ class TestMetaData(FixtureAPITestCase): def test_patch_metadata_entity(self): self.client.force_login(self.superuser) - entity = self.corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) metadata = self.vol.metadatas.create(type=MetaType.Location, name='location', value='Texas') response = self.client.patch( reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}), @@ -502,7 +510,11 @@ class TestMetaData(FixtureAPITestCase): def test_patch_metadata_entity_none(self): self.client.force_login(self.superuser) - entity = self.corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) metadata = self.vol.metadatas.create(type=MetaType.Location, name='location', value='Texas', entity=entity) response = self.client.patch( reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}), @@ -515,7 +527,11 @@ class TestMetaData(FixtureAPITestCase): def test_create_metadata_entity_corpus_check(self): self.client.force_login(self.superuser) - entity = self.private_corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.private_corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) response = self.client.post( reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}), data={'type': 'location', 'name': 'location', 'value': 'Texas', 'entity': entity.id} @@ -524,7 +540,11 @@ class TestMetaData(FixtureAPITestCase): def test_patch_metadata_entity_corpus_check(self): self.client.force_login(self.superuser) - entity = self.private_corpus.entities.create(name='Texas', type=EntityType.Location, source=self.source) + entity = self.private_corpus.entities.create( + name='Texas', + type=EntityType.Location, + worker_version=self.worker_version, + ) metadata = self.vol.metadatas.create(type=MetaType.Location, name='location', value='Texas') response = self.client.patch( reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}), diff --git a/arkindex/documents/tests/test_moderation.py b/arkindex/documents/tests/test_moderation.py index 769525ad9e83182d440dea79f7f230aa68871a52..95fab997cf3c3dda3c92acffa8f37899d901d541 100644 --- a/arkindex/documents/tests/test_moderation.py +++ b/arkindex/documents/tests/test_moderation.py @@ -3,15 +3,7 @@ from django.urls import reverse from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import ( - Classification, - ClassificationState, - Corpus, - DataSource, - Element, - MLClass, - MLToolType, -) +from arkindex.documents.models import Classification, ClassificationState, Corpus, Element, MLClass from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User @@ -26,31 +18,9 @@ class TestClasses(FixtureAPITestCase): cls.act_type = cls.corpus.types.get(slug='act') cls.element = Element.objects.get(name='Volume 1, page 1v') cls.folder = cls.corpus.elements.get(name='Volume 1') - cls.worker_version = WorkerVersion.objects.get(worker__slug='dla') + cls.worker_version_1 = WorkerVersion.objects.get(worker__slug='dla') + cls.worker_version_2 = WorkerVersion.objects.get(worker__slug='reco') cls.internal_user = User.objects.get_by_natural_key('internal@internal.fr') - cls.classifier_source = DataSource.objects.create( - type=MLToolType.Classifier, - slug='some_classifier', - revision='1.3.3.7', - internal=False, - ) - - def _create_classification_from_source(self): - return self.element.classifications.create( - source=self.classifier_source, - ml_class=self.text, - confidence=.5, - ) - - def _serialized_source(self, classification): - return { - 'id': str(classification.source.id), - 'type': classification.source.type.value, - 'slug': classification.source.slug, - 'name': classification.source.name, - 'revision': classification.source.revision, - 'internal': classification.source.internal - } def test_manual_classification_creation(self): """ @@ -192,7 +162,7 @@ class TestClasses(FixtureAPITestCase): response = self.client.post(reverse('api:classification-create'), { 'element': str(self.element.id), 'ml_class': str(self.text.id), - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) }) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @@ -210,12 +180,12 @@ class TestClasses(FixtureAPITestCase): 'ml_class': str(self.text.id), 'confidence': 0.42, 'high_confidence': False, - 'worker_version': str(self.worker_version.id) + 'worker_version': str(self.worker_version_1.id) }) self.assertEqual(response.status_code, status.HTTP_201_CREATED) classification = self.element.classifications.get() - self.assertEqual(classification.worker_version, self.worker_version) + self.assertEqual(classification.worker_version, self.worker_version_1) self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Pending) self.assertEqual(classification.confidence, 0.42) @@ -227,15 +197,14 @@ class TestClasses(FixtureAPITestCase): response = self.client.post(reverse('api:classification-create'), { 'element': str(self.element.id), 'ml_class': str(self.text.id), - 'worker_version': str(self.worker_version.id), + 'worker_version': str(self.worker_version_1.id), 'confidence': 0.42, 'high_confidence': False, }) self.assertEqual(response.status_code, status.HTTP_201_CREATED) classification = self.element.classifications.get() - self.assertEqual(classification.source, None) - self.assertEqual(classification.worker_version, self.worker_version) + self.assertEqual(classification.worker_version, self.worker_version_1) self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Pending) self.assertEqual(classification.confidence, 0.42) @@ -250,14 +219,14 @@ class TestClasses(FixtureAPITestCase): response = self.client.post(reverse('api:classification-create'), { 'element': str(self.element.id), 'ml_class': str(self.text.id), - 'worker_version': str(self.worker_version.id), + 'worker_version': str(self.worker_version_1.id), 'confidence': 0, 'high_confidence': False, }) self.assertEqual(response.status_code, status.HTTP_201_CREATED) classification = self.element.classifications.get() - self.assertEqual(classification.worker_version, self.worker_version) + self.assertEqual(classification.worker_version, self.worker_version_1) self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Pending) self.assertEqual(classification.confidence, 0) @@ -265,7 +234,7 @@ class TestClasses(FixtureAPITestCase): def test_classification_validate(self): classification = self.element.classifications.create( - worker_version=self.worker_version, + worker_version=self.worker_version_1, ml_class=self.text, confidence=.1 ) @@ -277,8 +246,7 @@ class TestClasses(FixtureAPITestCase): self.assertDictEqual(response.json(), { 'id': str(classification.id), - 'source': None, - 'worker_version': str(self.worker_version.id), + 'worker_version': str(self.worker_version_1.id), 'ml_class': { 'id': str(classification.ml_class.id), 'name': classification.ml_class.name @@ -298,35 +266,10 @@ class TestClasses(FixtureAPITestCase): response = self.client.put(reverse('api:classification-validate', kwargs={'pk': classification.id})) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) - def test_source_classification_reject(self): - self.client.force_login(self.user) - classification = self._create_classification_from_source() - - with self.assertNumQueries(6): - response = self.client.put(reverse('api:classification-reject', kwargs={'pk': classification.id})) - self.assertEqual(response.status_code, status.HTTP_200_OK) - - self.assertDictEqual(response.json(), { - 'id': str(classification.id), - 'source': self._serialized_source(classification), - 'worker_version': None, - 'ml_class': { - 'id': str(classification.ml_class.id), - 'name': classification.ml_class.name - }, - 'state': ClassificationState.Rejected.value, - 'confidence': classification.confidence, - 'high_confidence': False - }) - - # Ensure moderator has been set - classification.refresh_from_db() - self.assertEqual(classification.moderator, self.user) - def test_worker_classification_reject(self): self.client.force_login(self.user) classification = self.element.classifications.create( - worker_version=self.worker_version, + worker_version=self.worker_version_1, ml_class=self.text, confidence=.1, ) @@ -337,8 +280,7 @@ class TestClasses(FixtureAPITestCase): self.assertDictEqual(response.json(), { 'id': str(classification.id), - 'source': None, - 'worker_version': str(self.worker_version.id), + 'worker_version': str(self.worker_version_1.id), 'ml_class': { 'id': str(classification.ml_class.id), 'name': classification.ml_class.name @@ -352,24 +294,6 @@ class TestClasses(FixtureAPITestCase): classification.refresh_from_db() self.assertEqual(classification.moderator, self.user) - def test_classification_reject_manual_source_delete(self): - """ - A rejected classifications from a manual source should be automatically deleted - """ - self.client.force_login(self.user) - classification = self.element.classifications.create( - source=DataSource.objects.create(slug='manual', type=MLToolType.Classifier, internal=False), - ml_class=self.text, - confidence=.42, - ) - - with self.assertNumQueries(5): - response = self.client.put(reverse('api:classification-reject', kwargs={'pk': classification.id})) - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - - with self.assertRaises(Classification.DoesNotExist): - classification.refresh_from_db() - def test_manual_classification_reject_delete(self): """ A classifications with no worker version should be deleted when rejected @@ -385,7 +309,7 @@ class TestClasses(FixtureAPITestCase): classification.refresh_from_db() def test_classification_reject_without_permissions(self): - classification = self._create_classification_from_source() + classification = self.element.classifications.create(ml_class=self.text, confidence=.42) with self.assertNumQueries(0): response = self.client.put(reverse('api:classification-reject', kwargs={'pk': classification.id})) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) @@ -404,7 +328,6 @@ class TestClasses(FixtureAPITestCase): self.assertDictEqual(response.json(), { 'id': str(classification.id), - 'source': self._serialized_source(classification), 'ml_class': { 'id': str(classification.ml_class.id), 'name': classification.ml_class.name @@ -422,7 +345,6 @@ class TestClasses(FixtureAPITestCase): self.assertDictEqual(response.json(), { 'id': str(classification.id), - 'source': self._serialized_source(classification), 'ml_class': { 'id': str(classification.ml_class.id), 'name': classification.ml_class.name @@ -546,26 +468,17 @@ class TestClasses(FixtureAPITestCase): classification = self.folder.classifications.get() self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Validated) - self.assertEqual(classification.source.slug, 'manual') + self.assertIsNone(classification.worker_version) self.assertEqual(classification.confidence, 1) self.assertTrue(self.element.classifications.exists()) classification = self.element.classifications.get() self.assertEqual(classification.ml_class, self.text) self.assertEqual(classification.state, ClassificationState.Validated) - self.assertEqual(classification.source.slug, 'manual') + self.assertIsNone(classification.worker_version) self.assertEqual(classification.confidence, 1) def test_classifications_selection_validate(self): - source_1 = DataSource.objects.get(slug='some_classifier') - source_2 = DataSource.objects.create( - type=MLToolType.NER, - slug='test', - name='Test NER', - revision='4.2', - internal=False, - ) - line = MLClass.objects.create(name='line', corpus=self.private_corpus) act_x = Element.objects.create( type=self.act_type, @@ -574,7 +487,7 @@ class TestClasses(FixtureAPITestCase): ) Classification.objects.create( element=act_x, - source=source_1, + worker_version=self.worker_version_1, state=ClassificationState.Pending, high_confidence=True, ml_class=line @@ -583,14 +496,14 @@ class TestClasses(FixtureAPITestCase): for e in [self.folder, self.element]: Classification.objects.create( element=e, - source=source_1, + worker_version=self.worker_version_1, state=ClassificationState.Pending, high_confidence=True, ml_class=self.text ) Classification.objects.create( element=e, - source=source_2, + worker_version=self.worker_version_2, state=ClassificationState.Pending, high_confidence=False, ml_class=self.text @@ -613,11 +526,11 @@ class TestClasses(FixtureAPITestCase): for e in [self.folder, self.element]: classification = e.classifications.get(state=ClassificationState.Validated) self.assertTrue(classification.high_confidence) - self.assertEqual(classification.source, source_1) + self.assertEqual(classification.worker_version, self.worker_version_1) classification = e.classifications.get(state=ClassificationState.Pending) self.assertFalse(classification.high_confidence) - self.assertEqual(classification.source, source_2) + self.assertEqual(classification.worker_version, self.worker_version_2) classification = act_x.classifications.get() self.assertEqual(classification.state, ClassificationState.Pending) diff --git a/arkindex/documents/tests/test_parents_elements.py b/arkindex/documents/tests/test_parents_elements.py index 3e65fb504b5b889e4c912ab7067477f25b2439bf..bbf9b7f756bedbeaabfcc2c78817cb1ddf795b52 100644 --- a/arkindex/documents/tests/test_parents_elements.py +++ b/arkindex/documents/tests/test_parents_elements.py @@ -4,9 +4,8 @@ from django.urls import reverse from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import Corpus, DataSource, Element +from arkindex.documents.models import Corpus, Element from arkindex.project.tests import FixtureAPITestCase -from arkindex_common.ml_tool import MLToolType class TestParentsElements(FixtureAPITestCase): @@ -17,7 +16,6 @@ class TestParentsElements(FixtureAPITestCase): cls.vol = cls.corpus.elements.get(name='Volume 1') cls.private_corpus = Corpus.objects.create(name='private', public=False) cls.private_elt = cls.private_corpus.elements.create(type=cls.private_corpus.types.create(slug='type')) - cls.manual_source = DataSource.objects.create(type=MLToolType.Recognizer, slug='manual', internal=True) cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') def setUp(self): diff --git a/arkindex/documents/tests/test_patch_elements.py b/arkindex/documents/tests/test_patch_elements.py index 993f0ad5e09d1e6fac42577aa7f15127eaaeab61..e622da2a3be99e9c072f22228a448b8ddd24474b 100644 --- a/arkindex/documents/tests/test_patch_elements.py +++ b/arkindex/documents/tests/test_patch_elements.py @@ -1,12 +1,11 @@ from django.urls import reverse from rest_framework import status -from arkindex.documents.models import Corpus, DataSource, Element +from arkindex.documents.models import Corpus, Element from arkindex.images.models import ImageServer from arkindex.project.aws import S3FileStatus from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User -from arkindex_common.ml_tool import MLToolType class TestPatchElements(FixtureAPITestCase): @@ -28,7 +27,6 @@ class TestPatchElements(FixtureAPITestCase): ) cls.private_corpus = Corpus.objects.create(name='private', public=False) cls.private_elt = cls.private_corpus.elements.create(type=cls.private_corpus.types.create(slug='type')) - cls.manual_source = DataSource.objects.create(type=MLToolType.Recognizer, slug='manual', internal=True) def test_patch_element_unverified(self): """ diff --git a/arkindex/documents/tests/test_retrieve_elements.py b/arkindex/documents/tests/test_retrieve_elements.py index b9e593af2fcd171f7fe22ca53421569495d1dcfc..cd5566b3c274c9d966939af6888c1a8e11d56e80 100644 --- a/arkindex/documents/tests/test_retrieve_elements.py +++ b/arkindex/documents/tests/test_retrieve_elements.py @@ -1,10 +1,10 @@ from django.urls import reverse from rest_framework import status -from arkindex.documents.models import Classification, Corpus, DataSource, Entity, MLClass +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Corpus, Entity, MLClass from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import EntityType, MetaType -from arkindex_common.ml_tool import MLToolType class TestRetrieveElements(FixtureAPITestCase): @@ -14,6 +14,7 @@ class TestRetrieveElements(FixtureAPITestCase): super().setUpTestData() cls.vol = cls.corpus.elements.get(name='Volume 1') cls.private_corpus = Corpus.objects.create(name='private', public=False) + cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') def setUp(self): self.page = self.corpus.elements.get(name='Volume 1, page 1r') @@ -25,9 +26,8 @@ class TestRetrieveElements(FixtureAPITestCase): ) def test_get_element(self): - data_source = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) ml_class = MLClass.objects.create(name='text', corpus=self.corpus) - classification = Classification.objects.create(element=self.vol, source=data_source, ml_class=ml_class) + classification = self.vol.classifications.create(worker_version=self.worker_version, ml_class=ml_class) response = self.client.get(reverse('api:element-retrieve', kwargs={'pk': str(self.vol.id)})) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -42,7 +42,6 @@ class TestRetrieveElements(FixtureAPITestCase): }, 'thumbnail_url': self.vol.thumbnail.s3_url, 'thumbnail_put_url': None, - 'source': None, 'worker_version': None, 'zone': None, 'metadata': [], @@ -52,15 +51,7 @@ class TestRetrieveElements(FixtureAPITestCase): 'confidence': None, 'high_confidence': False, 'state': 'pending', - 'worker_version': None, - 'source': { - 'id': str(data_source.id), - 'slug': 'test', - 'type': 'recognizer', - 'name': 'Test Recognizer', - 'revision': '4.2', - 'internal': False, - }, + 'worker_version': str(self.worker_version.id), 'ml_class': { 'id': str(ml_class.id), 'name': 'text', @@ -157,7 +148,7 @@ class TestRetrieveElements(FixtureAPITestCase): type=EntityType.Person, corpus=self.corpus, name='Marc', - source_id=DataSource.objects.get(slug='test', type=MLToolType.Recognizer).id + worker_version=self.worker_version, ) self.metadata.entity = entity self.metadata.save() diff --git a/arkindex/documents/tests/test_search.py b/arkindex/documents/tests/test_search.py index dd001a2ef4398a43b9755dd4555a0dffe1c2b52a..46d0702c9ab2045184abab58292303549118d57b 100644 --- a/arkindex/documents/tests/test_search.py +++ b/arkindex/documents/tests/test_search.py @@ -6,7 +6,8 @@ from django.urls import reverse from elasticsearch_dsl.connections import connections from rest_framework import status -from arkindex.documents.models import Corpus, DataSource, Element, MLToolType, Transcription +from arkindex.dataimport.models import WorkerVersion +from arkindex.documents.models import Corpus, Element, Transcription from arkindex.project.elastic import ESTranscription from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import EntityType, MetaType @@ -237,9 +238,9 @@ class TestSearchAPI(FixtureAPITestCase): self.assertListEqual(conditions, [{'match': {'text': 'paris'}}]) def test_entity_search(self): - source = DataSource.objects.create(type=MLToolType.NER, slug='entity source', internal=True) - entity_1 = self.corpus.entities.create(type=EntityType.Person, name="an entity", source=source) - entity_2 = self.corpus.entities.create(type=EntityType.Location, name="somewhere", source=source) + worker_version = WorkerVersion.objects.first() + entity_1 = self.corpus.entities.create(type=EntityType.Person, name="an entity", worker_version=worker_version) + entity_2 = self.corpus.entities.create(type=EntityType.Location, name="somewhere", worker_version=worker_version) self.es_mock.count.return_value = {'count': 2} self.es_mock.search.return_value = self.build_es_response([ # Test the ES ordering is preserved by returning entities in non-alphabetical order diff --git a/arkindex/documents/tests/test_transcriptions.py b/arkindex/documents/tests/test_transcriptions.py index 7ff9d3a1e0ca7fb9b59128cc8d880502c22fdb01..861b2e2d0483f56f61d145ca4391e4ed62753935 100644 --- a/arkindex/documents/tests/test_transcriptions.py +++ b/arkindex/documents/tests/test_transcriptions.py @@ -2,10 +2,9 @@ from django.urls import reverse from rest_framework import status from arkindex.dataimport.models import WorkerVersion -from arkindex.documents.models import Corpus, DataSource +from arkindex.documents.models import Corpus from arkindex.project.tests import FixtureAPITestCase from arkindex.users.models import User -from arkindex_common.ml_tool import MLToolType class TestTranscriptions(FixtureAPITestCase): @@ -21,7 +20,6 @@ class TestTranscriptions(FixtureAPITestCase): cls.line = cls.corpus.elements.get(name='Text line') cls.private_corpus = Corpus.objects.create(name='Private') cls.private_page = cls.private_corpus.elements.create(type=cls.page.type) - cls.src = DataSource.objects.get(slug='test', type=MLToolType.Recognizer) # Create an user with a read right only on the private corpus cls.private_read_user = User.objects.create_user('a@bc.de', 'a') cls.private_read_user.verified_email = True @@ -53,14 +51,6 @@ class TestTranscriptions(FixtureAPITestCase): 'id': str(tr1.id), 'text': 'Lorem ipsum dolor sit amet', 'score': 1.0, - 'source': { - 'id': str(self.src.id), - 'type': 'recognizer', - 'slug': 'test', - 'name': 'Test Recognizer', - 'revision': '4.2', - 'internal': False, - }, 'worker_version_id': None, 'element': None, }, @@ -68,7 +58,6 @@ class TestTranscriptions(FixtureAPITestCase): 'id': str(tr2.id), 'text': 'something', 'score': 0.369, - 'source': None, 'worker_version_id': str(self.worker_version.id), 'element': None, } @@ -117,7 +106,6 @@ class TestTranscriptions(FixtureAPITestCase): 'id': str(worker_transcription.id), 'text': 'something', 'score': 0.369, - 'source': None, 'worker_version_id': str(self.worker_version.id), 'element': { 'id': str(self.page.id), diff --git a/arkindex/project/openapi/patch.yml b/arkindex/project/openapi/patch.yml index 1aa64baee2a7baf4eef3a4afefd0c8608af2e77a..187025b3f9f9d46cf17b768ca81db1bb3ae16721 100644 --- a/arkindex/project/openapi/patch.yml +++ b/arkindex/project/openapi/patch.yml @@ -100,11 +100,6 @@ paths: id: 55cd009d-cd4b-4ec2-a475-b060f98f9138 corpus: - Role already exists in this corpus - /api/v1/corpus/{id}/ml-stats/: - delete: - # Will need https://gitlab.com/arkindex/backend/-/issues/86 to be removed - operationId: DestroyCorpusMLResults - description: Delete machine learning results on all elements of a corpus. /api/v1/element/{id}/: get: description: Retrieve a single element's informations and metadata @@ -115,11 +110,6 @@ paths: description: Rename an element delete: description: Delete a childless element - /api/v1/element/{id}/ml-stats/: - delete: - # Will need https://gitlab.com/arkindex/backend/-/issues/86 to be removed - operationId: DestroyElementMLResults - description: Delete machine learning results on an element and its direct children. /api/v1/elements/{id}/children/: delete: operationId: DestroyElementChildren diff --git a/arkindex/project/tests/test_elastic.py b/arkindex/project/tests/test_elastic.py index 05565df4ec8e00c7ffa93ecedee78f10f20200ee..4f5f7d854584ec70b8736ed266ed20b6c8758462 100644 --- a/arkindex/project/tests/test_elastic.py +++ b/arkindex/project/tests/test_elastic.py @@ -1,11 +1,10 @@ from unittest.mock import patch +from arkindex.dataimport.models import WorkerVersion from arkindex.documents.dates import DateType, InterpretedDate -from arkindex.documents.models import DataSource from arkindex.project.elastic import ESElement from arkindex.project.tests import FixtureAPITestCase from arkindex_common.enums import MetaType -from arkindex_common.ml_tool import MLToolType class TestESDocuments(FixtureAPITestCase): @@ -61,7 +60,7 @@ class TestESDocuments(FixtureAPITestCase): surface.add_parent(page) surface.transcriptions.create( text='invisible transcription', - source=DataSource.objects.get(slug='test', type=MLToolType.Recognizer), + source=WorkerVersion.objects.get(worker__slug='reco'), ) texts = [tr['text'] for tr in ESElement.from_model(page).to_dict()['transcriptions']] self.assertNotIn('invisible transcription', texts)