diff --git a/arkindex/documents/api/ml.py b/arkindex/documents/api/ml.py index 60547b495137baabae825fe8352495d9e3346931..9980943970ecff969fcd2e7c95bce72312e3854a 100644 --- a/arkindex/documents/api/ml.py +++ b/arkindex/documents/api/ml.py @@ -19,13 +19,11 @@ from arkindex.documents.serializers.ml import ( CountMLClassSerializer, MLClassSerializer ) from arkindex.documents.pagexml import PageXmlParser -from arkindex.images.models import Zone from arkindex.images.importer import build_transcriptions, save_transcriptions from arkindex.project.filters import SafeSearchFilter from arkindex.project.mixins import SelectionMixin, CorpusACLMixin from arkindex.project.parsers import XMLParser from arkindex.project.permissions import IsVerified, IsAdminUser -from arkindex.project.polygon import Polygon from arkindex.project.triggers import reindex_start, ml_results_delete import os.path import logging @@ -36,6 +34,7 @@ logger = logging.getLogger(__name__) class TranscriptionCreate(CreateAPIView): """ Create a single transcription on an element + The transcription zone is defined by the element it is attached """ serializer_class = TranscriptionCreateSerializer permission_classes = (IsVerified, ) @@ -50,18 +49,10 @@ class TranscriptionCreate(CreateAPIView): raise ValidationError("This endpoint can only import transcriptions.") element = serializer.validated_data['element'] - # Create a zone on the page's image - polygon = Polygon(serializer.validated_data['polygon']) - ts_zone, _ = Zone.objects.get_or_create( - image=element.zone.image, - polygon=polygon, - ) - ts, created = Transcription.objects.get_or_create( element=element, source=serializer.validated_data['source'], type=transcription_type, - zone=ts_zone, text=serializer.validated_data['text'], defaults={ 'score': serializer.validated_data['score'] diff --git a/arkindex/documents/serializers/ml.py b/arkindex/documents/serializers/ml.py index b49d0e9e3e7930200a90fcbde82a05db09f47b49..277832fcb3c8ab9745274fbb89e25baaa728e58c 100644 --- a/arkindex/documents/serializers/ml.py +++ b/arkindex/documents/serializers/ml.py @@ -176,7 +176,7 @@ class TranscriptionCreateSerializer(serializers.Serializer): queryset=Element.objects.filter(zone__isnull=False), ) source = DataSourceSlugField(tool_type=MLToolType.Recognizer) - polygon = PolygonField() + polygon = PolygonField(read_only=True) text = serializers.CharField() score = serializers.FloatField(min_value=0, max_value=1) type = EnumField(TranscriptionType) diff --git a/arkindex/documents/tests/test_transcription_create.py b/arkindex/documents/tests/test_transcription_create.py index 998f8fab541edde17b8c122c29850a0756e27d8f..e6bfc61f9e0281952c2504f792375e07be10664b 100644 --- a/arkindex/documents/tests/test_transcription_create.py +++ b/arkindex/documents/tests/test_transcription_create.py @@ -38,14 +38,13 @@ class TestTranscriptionCreate(FixtureAPITestCase): "type": "word", "element": str(self.page.id), "source": self.src.slug, - "polygon": [(0, 0), (100, 0), (100, 100), (0, 100), (0, 0)], "text": "NEKUDOTAYIM", "score": 0.83, }) self.assertEqual(response.status_code, status.HTTP_201_CREATED) new_ts = Transcription.objects.get(text="NEKUDOTAYIM", type=TranscriptionType.Word) - self.assertEqual(new_ts.zone.polygon, Polygon.from_coords(0, 0, 100, 100)) + self.assertEqual(new_ts.zone, None) self.assertEqual(new_ts.score, 0.83) self.assertEqual(new_ts.source, self.src) self.assertTrue(self.page.transcriptions.filter(pk=new_ts.id).exists()) @@ -62,33 +61,25 @@ class TestTranscriptionCreate(FixtureAPITestCase): }) @patch('arkindex.project.triggers.get_channel_layer') - def test_unique_zone(self, get_layer_mock): + def test_create_transcription_takes_no_zone(self, get_layer_mock): """ - Checks the view reuses zones when available + TranscriptionCreate endpoint do not create any zone for the transcription """ get_layer_mock.return_value.send = AsyncMock() + self.client.force_login(self.user) - ts = Transcription.objects.get(zone__image__path='img1', text="PARIS") response = self.client.post(reverse('api:transcription-create'), format='json', data={ - "type": "word", + "type": "page", "element": str(self.page.id), "source": self.src.slug, - "polygon": ts.zone.polygon.serialize(), - "text": "GLOUBIBOULGA", - "score": 0.8, + "polygon": [(0, 0), (42, 0), (42, 42), (0, 42), (0, 0)], + "text": "SQUARE", + "score": 0.42, }) self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(Transcription.objects.get(text="GLOUBIBOULGA").zone.id, ts.zone.id) - get_layer_mock().send.assert_called_once_with('reindex', { - 'type': 'reindex.start', - 'element': str(self.page.id), - 'corpus': None, - 'entity': None, - 'transcriptions': True, - 'elements': True, - 'entities': False, - 'drop': False, - }) + + new_ts = Transcription.objects.get(text="SQUARE", type=TranscriptionType.Page) + self.assertEqual(new_ts.zone, None) @patch('arkindex.project.triggers.get_channel_layer') def test_update_transcription(self, get_layer_mock): @@ -103,7 +94,6 @@ class TestTranscriptionCreate(FixtureAPITestCase): "type": "word", "element": str(self.page.id), "source": ts.source.slug, - "polygon": ts.zone.polygon.serialize(), "text": ts.text, "score": 0.99, }) @@ -123,10 +113,11 @@ class TestTranscriptionCreate(FixtureAPITestCase): 'drop': False, }) + @override_settings(ARKINDEX_FEATURES={'search': False}) def test_invalid_data(self): """ Checks the view validates data properly - (score between 0 and 1, minimum points in polygon, non-negative coordinates, non-existent element) + (score between 0 and 1, non-existent element) """ self.client.force_login(self.user) @@ -134,31 +125,38 @@ class TestTranscriptionCreate(FixtureAPITestCase): "type": "word", "element": str(self.page.id), "source": self.src.slug, - "polygon": [(0, 0), (0, 100), (100, 100), (100, 0), (0, 0)], "text": "NEKUDOTAYIM", "score": 0.83, } - # Negative score - post_data['score'] = -1 + # Assert data is valid response = self.client.post(reverse('api:transcription-create'), format='json', data=post_data) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + # Negative score + data = {**post_data, 'score': -1} + response = self.client.post(reverse('api:transcription-create'), format='json', data=data) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + 'score': ['Ensure this value is greater than or equal to 0.'] + }) # Score over 100% - post_data['score'] = 2 - response = self.client.post(reverse('api:transcription-create'), format='json', data=post_data) + data = {**post_data, 'score': 2} + response = self.client.post(reverse('api:transcription-create'), format='json', data=data) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + 'score': ['Ensure this value is less than or equal to 1.'] + }) # Missing element - post_data['score'] = 0.83 - post_data['element'] = str(uuid.uuid4()) - response = self.client.post(reverse('api:transcription-create'), format='json', data=post_data) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - - # Not enough polygon points - post_data['polygon'] = [(0, 0), (100, 100)] - response = self.client.post(reverse('api:transcription-create'), format='json', data=post_data) + wrong_id = str(uuid.uuid4()) + data = {**post_data, 'element': wrong_id} + response = self.client.post(reverse('api:transcription-create'), format='json', data=data) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + 'element': [f'Invalid pk "{wrong_id}" - object does not exist.'] + }) @patch('arkindex.project.serializer_fields.MLTool.get') @patch('arkindex.project.triggers.get_channel_layer') @@ -410,6 +408,40 @@ class TestTranscriptionCreate(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_201_CREATED) self.assertFalse(get_layer_mock().send.called) + @patch('arkindex.project.serializer_fields.MLTool.get') + @patch('arkindex.project.triggers.get_channel_layer') + @override_settings(ARKINDEX_FEATURES={'search': False}) + def test_unique_zone(self, get_layer_mock, ml_get_mock): + """ + Checks the bulk view reuses zones when available + """ + ml_get_mock.return_value.type = self.src.type + ml_get_mock.return_value.slug = 'new_slug' + ml_get_mock.return_value.name = self.src.name + ml_get_mock.return_value.version = self.src.revision + self.src.internal = True + self.src.save() + + self.assertEqual(self.page.transcriptions.count(), 4) + existing_ts = self.page.transcriptions.get(zone__image__path='img1', text="PARIS") + data = { + "parent": str(self.page.id), + "recognizer": self.src.slug, + "transcriptions": [{ + "type": "word", + "text": "PARISH", + "score": 0.999, + "polygon": existing_ts.zone.polygon.serialize(), + }] + } + self.client.force_login(self.user) + response = self.client.put(reverse('api:transcription-bulk'), format='json', data=data) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + # A new transcription has been added with the same zone + self.assertEqual(self.page.transcriptions.count(), 5) + self.assertEqual(Transcription.objects.get(text="PARISH").zone.id, existing_ts.zone.id) + @patch('arkindex.project.triggers.get_channel_layer') @override_settings(ARKINDEX_FEATURES={'search': False}) def test_create_transcription_no_search(self, get_layer_mock):