diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index a90505034460395328ac38c1d1833177f5f0e4e0..dbb7bc00af65f51711c41aa0f40328ffa07ff8e2 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -1651,7 +1651,8 @@ class ElementBulkCreate(CreateAPIView): .objects \ .using('default') \ .filter(corpus__in=Corpus.objects.writable(self.request.user)) \ - .only('id', 'corpus_id', 'image_id', 'rotation_angle', 'mirrored') + .select_related('image') \ + .only('id', 'corpus_id', 'rotation_angle', 'mirrored', 'image') def get_serializer_context(self): context = super().get_serializer_context() @@ -1699,7 +1700,7 @@ class ElementBulkCreate(CreateAPIView): type_id=element_data['type'], name=element_data['name'], worker_version=worker_version, - image_id=self.element.image_id, + image_id=self.element.image.id, polygon=element_data['polygon'], rotation_angle=self.element.rotation_angle, mirrored=self.element.mirrored diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py index f6b89bed2bcbe108de9b512b7dc370780f66cd41..f57921a61b0ababcdd589fc88f3b574263c9071a 100644 --- a/arkindex/documents/serializers/elements.py +++ b/arkindex/documents/serializers/elements.py @@ -38,6 +38,7 @@ from arkindex.images.serializers import ZoneSerializer from arkindex.project.fields import Array from arkindex.project.mixins import SelectionMixin from arkindex.project.serializer_fields import LinearRingField +from arkindex.project.tools import polygon_outside_image from arkindex.project.triggers import move_selection from arkindex.users.models import Role from arkindex.users.utils import get_max_level @@ -467,7 +468,6 @@ class ElementParentSerializer(serializers.Serializer): child = data.get('child') parent = data.get('parent') - errors = defaultdict(list) if parent.corpus != child.corpus: errors['parent'].append("Parent is not from corpus '{}'".format(child.corpus.name)) if parent.id == child.id: @@ -595,6 +595,11 @@ class ElementSerializer(ElementSlimSerializer): (image.width, 0), (0, 0) ) + if polygon and image: + if polygon_outside_image(image, polygon): + raise ValidationError({ + 'polygon': ["An element's polygon must not exceed its image's bounds."] + }) validated_data.update(image=image, polygon=polygon) @@ -712,6 +717,10 @@ class ElementCreateSerializer(ElementLightSerializer): # will lead to many errors everywhere as this would create impossible polygons errors['image'].append('This image does not have valid dimensions.') + if polygon and image: + if polygon_outside_image(image, polygon): + errors['polygon'].append("An element's polygon must not exceed its image's bounds.") + if data.get('worker_version') and not self.context['request'].user.is_internal: errors['worker_version'].append('Only an internal user can create an element with a worker version.') @@ -883,6 +892,7 @@ class ElementBulkSerializer(serializers.Serializer): ) def validate(self, data): + errors = defaultdict() type_slugs = set(element['type'] for element in data['elements']) type_ids = dict( ElementType @@ -898,9 +908,19 @@ class ElementBulkSerializer(serializers.Serializer): f"Element types with slugs {', '.join(missing_types)} do not exist in the parent element's corpus" ]) - for element in data['elements']: + element_errors = {} + for i, element in enumerate(data['elements']): # Replace slugs with type IDs element['type'] = type_ids[element['type']] + # Check that polygons are not outside the image + if polygon_outside_image(self.context['element'].image, element['polygon']): + element_errors[i] = {'polygon': ["An element's polygon must not exceed its image's bounds."]} + + if element_errors: + errors['elements'] = element_errors + + if errors: + raise ValidationError(errors) return data diff --git a/arkindex/documents/serializers/ml.py b/arkindex/documents/serializers/ml.py index 62316e0f053b2505e6601f8ee2d14b77f2dc7e5f..c0ce007820d9aa30a10506526960aff70c18b974 100644 --- a/arkindex/documents/serializers/ml.py +++ b/arkindex/documents/serializers/ml.py @@ -1,4 +1,5 @@ import uuid +from collections import defaultdict from enum import Enum from django.conf import settings @@ -19,6 +20,7 @@ from arkindex.documents.models import ( ) from arkindex.documents.serializers.light import ElementZoneSerializer from arkindex.project.serializer_fields import EnumField, LinearRingField +from arkindex.project.tools import polygon_outside_image class ClassificationMode(Enum): @@ -353,6 +355,19 @@ class ElementTranscriptionsBulkSerializer(serializers.Serializer): # Use the parent types for validation as elements are in the same corpus self.fields['element_type'].queryset = ElementType.objects.filter(corpus=self.context['element'].corpus) + def validate(self, data): + errors = defaultdict(dict) + transcription_errors = {} + for i, transcription in enumerate(data['transcriptions']): + # Check that polygons are not outside the image + if polygon_outside_image(self.context['element'].image, transcription['polygon']): + transcription_errors[i] = {'polygon': ["A transcription element's polygon must not exceed its image's bounds."]} + if transcription_errors: + errors['transcriptions'] = transcription_errors + if errors: + raise ValidationError(errors) + return data + class AnnotatedElementSerializer(serializers.Serializer): """ diff --git a/arkindex/documents/tests/test_bulk_element_transcriptions.py b/arkindex/documents/tests/test_bulk_element_transcriptions.py index 4926761d7f9a3dbf64332e782fda0291ddaf91eb..a725abbfdd97b1fe49410a1d57fd5c2dab347411 100644 --- a/arkindex/documents/tests/test_bulk_element_transcriptions.py +++ b/arkindex/documents/tests/test_bulk_element_transcriptions.py @@ -7,6 +7,8 @@ from rest_framework import status from arkindex.dataimport.models import WorkerVersion from arkindex.documents.models import Corpus, Element, TextOrientation, Transcription +from arkindex.images.models import ImageServer +from arkindex.project.aws import S3FileStatus from arkindex.project.tests import FixtureAPITestCase @@ -26,6 +28,13 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): cls.private_corpus = Corpus.objects.create(name='Private') cls.private_page = cls.private_corpus.elements.create(type=cls.page.type) cls.worker_version = WorkerVersion.objects.get(worker__slug='reco') + cls.huge_image = ImageServer.objects.local.images.create( + path="kingdom/far/far/away", + status=S3FileStatus.Checked, + width=4200, + height=4200, + ) + cls.huge_page = Element.objects.create(corpus=cls.corpus, type=cls.page.type, image=cls.huge_image, polygon=[[0, 0], [0, 4200], [4200, 4200], [4200, 0], [0, 0]], name='huge_page') def test_bulk_transcriptions(self): """ @@ -109,7 +118,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): self.assertEqual(created_elts.count(), 1) self.client.force_login(self.internal_user) - with self.assertNumQueries(13): + with self.assertNumQueries(14): response = self.client.post( reverse('api:element-transcriptions-bulk', kwargs={'pk': self.page.id}), format='json', @@ -137,6 +146,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): ([[13, 37], [133, 37], [133, 137], [13, 137], [13, 37]], 'Hello world !', 0.1337), # Use line zone to create the second transcription (list(self.line.polygon), 'I <3 JavaScript', 0.42), + (list(self.line.polygon), 'Animula vagula blandula', 0.24), ] data = { 'element_type': 'text_line', @@ -149,7 +159,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): } self.client.force_login(self.internal_user) - with self.assertNumQueries(13): + with self.assertNumQueries(14): response = self.client.post( reverse('api:element-transcriptions-bulk', kwargs={'pk': self.page.id}), format='json', @@ -162,7 +172,8 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): self.assertEqual(created_elts.count(), 1) self.assertCountEqual(self.line.transcriptions.values_list('text', flat=True), [ 'A manual transcription', - 'I <3 JavaScript' + 'I <3 JavaScript', + 'Animula vagula blandula' ]) def test_bulk_transcriptions_existing_huge_polygon(self): @@ -171,10 +182,10 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): messy_line = self.corpus.elements.create( type=self.corpus.types.get(slug='text_line'), name=':bazar:', - image=self.page.image, + image=self.huge_image, polygon=polygon, ) - messy_line.add_parent(self.page) + messy_line.add_parent(self.huge_page) messy_line.transcriptions.create( text='Blah', @@ -182,7 +193,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): ) self.assertEqual(messy_line.transcriptions.count(), 1) - existing_element_ids = list(Element.objects.get_descending(self.page.id).values_list('id', flat=True)) + existing_element_ids = list(Element.objects.get_descending(self.huge_page.id).values_list('id', flat=True)) data = { 'element_type': 'text_line', 'worker_version': str(self.worker_version.id), @@ -196,15 +207,15 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): } self.client.force_login(self.internal_user) - with self.assertNumQueries(11): + with self.assertNumQueries(12): response = self.client.post( - reverse('api:element-transcriptions-bulk', kwargs={'pk': self.page.id}), + reverse('api:element-transcriptions-bulk', kwargs={'pk': self.huge_page.id}), format='json', data=data ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) - created_elts = Element.objects.get_descending(self.page.id).exclude(id__in=existing_element_ids) + created_elts = Element.objects.get_descending(self.huge_page.id).exclude(id__in=existing_element_ids) # The existing text line has been reused self.assertFalse(created_elts.exists()) self.assertCountEqual(messy_line.transcriptions.values_list('text', flat=True), [ @@ -333,7 +344,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): } self.client.force_login(self.internal_user) - with self.assertNumQueries(13): + with self.assertNumQueries(14): response = self.client.post( reverse('api:element-transcriptions-bulk', kwargs={'pk': self.page.id}), format='json', @@ -560,3 +571,31 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): (1, '2', ((24.0, 42.0), (24.0, 142.0), (64.0, 142.0), (64.0, 42.0), (24.0, 42.0)), self.worker_version.id, True) ] ) + + def test_bulk_transcriptions_outside_image(self): + """ + Cannot create transcription elements with a polygon with one or more points outside the parent image + """ + transcriptions = [ + ([[13, 37], [133, 37], [133, 137], [13, 137], [13, 37]], 'Hello world !', 0.1337), + ([[24, 42], [64, 42], [64, 142], [2400, 142], [24, 42]], 'I <3 JavaScript', 0.42), + ] + data = { + 'element_type': 'text_line', + 'worker_version': str(self.worker_version.id), + 'transcriptions': [{ + 'polygon': poly, + 'text': text, + 'confidence': confidence, + } for poly, text, confidence in transcriptions] + } + self.client.force_login(self.user) + response = self.client.post( + reverse('api:element-transcriptions-bulk', kwargs={'pk': self.page.id}), + format='json', + data=data + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + 'transcriptions': {'1': {'polygon': ["A transcription element's polygon must not exceed its image's bounds."]}} + }) diff --git a/arkindex/documents/tests/test_bulk_elements.py b/arkindex/documents/tests/test_bulk_elements.py index 5a5551e4f49ca32c7634561d248714dcc38fdd87..1ccc832cdfb5885bf1656d3640c2bf557c43e2e6 100644 --- a/arkindex/documents/tests/test_bulk_elements.py +++ b/arkindex/documents/tests/test_bulk_elements.py @@ -333,3 +333,62 @@ class TestBulkElements(FixtureAPITestCase): created_elements = Element.objects.get_descending(self.mirrored_page.id) for elt in created_elements: self.assertEqual(elt.mirrored, True) + + def test_bulk_create_outside_image(self): + """ + Cannot create elements outside their image + """ + self.maxDiff = None + self.client.force_login(self.user) + payload = { + 'worker_version': str(self.worker_version.id), + 'elements': [ + { + 'name': 'A', + 'type': 'act', + 'polygon': [ + [0, 0], + [0, 1], + [1, 1], + [1, 0], + [0, 0], + ] + }, + { + 'name': 'B', + 'type': 'surface', + # Out of bounds + 'polygon': [ + [1003, 0], + [0, 1], + [1, 1], + [1, 0], + [0, 0], + ] + }, + { + 'name': 'C', + 'type': 'surface', + # Also out of bounds + 'polygon': [ + [0, 0], + [0, 9], + [9, 1200], + [9, 0], + [0, 0], + ] + } + ] + } + response = self.client.post( + reverse('api:elements-bulk-create', kwargs={'pk': str(self.element.id)}), + data=payload, + format='json', + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + 'elements': { + '1': {'polygon': ["An element's polygon must not exceed its image's bounds."]}, + '2': {'polygon': ["An element's polygon must not exceed its image's bounds."]} + } + }) diff --git a/arkindex/documents/tests/test_create_elements.py b/arkindex/documents/tests/test_create_elements.py index fb7ed3609d15cacbeec8c60c140dea3c6fe11401..dcc43407f0e2ea155d5cdd88f152e061cab78fb4 100644 --- a/arkindex/documents/tests/test_create_elements.py +++ b/arkindex/documents/tests/test_create_elements.py @@ -18,13 +18,19 @@ class TestCreateElements(FixtureAPITestCase): cls.page_type = cls.corpus.types.get(slug='page') cls.act_type = cls.corpus.types.get(slug='act') cls.vol = cls.corpus.elements.get(name='Volume 1') - cls.element = Element.objects.get(name='Volume 1, page 2r') + cls.page = Element.objects.get(name='Volume 1, page 2r') cls.image = ImageServer.objects.local.images.create( path="kingdom/far/away", status=S3FileStatus.Checked, width=42, height=42, ) + cls.huge_image = ImageServer.objects.local.images.create( + path="kingdom/far/far/away", + status=S3FileStatus.Checked, + width=4200, + height=4200, + ) cls.worker_version = WorkerVersion.objects.get(worker__slug='dla') # The image is always the same in API responses @@ -314,7 +320,7 @@ class TestCreateElements(FixtureAPITestCase): parent=str(self.vol.id), elt_type='page', name='The castle of my dreams again', - image=str(self.image.id), + image=str(self.huge_image.id), polygon=polygon ) with self.assertNumQueries(16): @@ -499,3 +505,23 @@ class TestCreateElements(FixtureAPITestCase): self.assertDictEqual(response.json(), { 'rotation_angle': [expected_message] }) + + def test_create_element_outside_image(self): + """ + Cannot create an elements outside of its image + """ + self.client.force_login(self.user) + polygon = [[32, 32], [32, 56], [56, 56], [56, 32], [32, 32]] + request = self.make_create_request( + parent=str(self.page.id), + elt_type='act', + name='Out Of Bounds', + image=str(self.image.id), + polygon=polygon + ) + with self.assertNumQueries(8): + response = self.client.post(**request) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + 'polygon': ["An element's polygon must not exceed its image's bounds."] + }) diff --git a/arkindex/documents/tests/test_patch_elements.py b/arkindex/documents/tests/test_patch_elements.py index faa3e805092b08ba270464c80f14054b8d76c470..a6c81de1bb16f5d11f1237070a13f2d508802658 100644 --- a/arkindex/documents/tests/test_patch_elements.py +++ b/arkindex/documents/tests/test_patch_elements.py @@ -19,6 +19,13 @@ class TestPatchElements(FixtureAPITestCase): cls.vol = cls.corpus.elements.get(name='Volume 1') cls.element = Element.objects.get(name='Volume 1, page 2r') cls.line = cls.corpus.elements.get(name='Text line') + cls.line_2 = Element.objects.create( + corpus=cls.corpus, + name="Line 2", + type=cls.line_type, + image=cls.line.image, + polygon=((0, 0), (0, 12), (10, 12), (10, 0), (0, 0)), + ) cls.image = ImageServer.objects.local.images.create( path="kingdom/far/away", status=S3FileStatus.Checked, @@ -161,19 +168,19 @@ class TestPatchElements(FixtureAPITestCase): def test_patch_element_image_preserve_polygon(self): self.client.force_login(self.user) - self.assertNotEqual(self.element.image, self.image) - expected_polygon = self.element.polygon.clone() + self.assertNotEqual(self.line_2.image, self.image) + expected_polygon = self.line_2.polygon.clone() response = self.client.patch( - reverse('api:element-retrieve', kwargs={'pk': str(self.element.id)}), + reverse('api:element-retrieve', kwargs={'pk': str(self.line_2.id)}), data={ 'image': str(self.image.id), }, format='json', ) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.element.refresh_from_db() - self.assertEqual(self.element.image, self.image) - self.assertEqual(self.element.polygon, expected_polygon) + self.line_2.refresh_from_db() + self.assertEqual(self.line_2.image, self.image) + self.assertEqual(self.line_2.polygon, expected_polygon) def test_patch_element_new_zone(self): self.client.force_login(self.user) @@ -233,3 +240,17 @@ class TestPatchElements(FixtureAPITestCase): } } }) + + def test_patch_element_polygon_outside_image(self): + self.client.force_login(self.user) + response = self.client.patch( + reverse('api:element-retrieve', kwargs={'pk': str(self.element.id)}), + data={ + 'polygon': [[0, 1009], [10, 30], [30, 30], [30, 20], [10, 20]] + }, + format='json', + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + 'polygon': ["An element's polygon must not exceed its image's bounds."] + }) diff --git a/arkindex/documents/tests/test_put_elements.py b/arkindex/documents/tests/test_put_elements.py index 07a3e530cbc27459b68b29f06ab181e46e8e6f16..3c25525086e451fa27189529393cfff2b56000cb 100644 --- a/arkindex/documents/tests/test_put_elements.py +++ b/arkindex/documents/tests/test_put_elements.py @@ -22,8 +22,8 @@ class TestPatchElements(FixtureAPITestCase): cls.image = ImageServer.objects.local.images.create( path="kingdom/far/away", status=S3FileStatus.Checked, - width=42, - height=42, + width=3100, + height=3100, ) cls.private_corpus = Corpus.objects.create(name='private', public=False) cls.private_elt = cls.private_corpus.elements.create(type=cls.private_corpus.types.create(slug='type')) @@ -252,3 +252,19 @@ class TestPatchElements(FixtureAPITestCase): } } }) + + def test_put_element_polygon_outside_image(self): + self.client.force_login(self.user) + response = self.client.put( + reverse('api:element-retrieve', kwargs={'pk': str(self.element.id)}), + data={ + 'name': 'Untitled (2)', + 'type': 'text_line', + 'polygon': [[10, 20], [10, 30], [30, 30], [3000, 2000], [10, 20]] + }, + format='json', + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + 'polygon': ["An element's polygon must not exceed its image's bounds."] + }) diff --git a/arkindex/project/tools.py b/arkindex/project/tools.py index 192b3f548cb65245fc321af6a02b2b9ef1dcf034..f63c9488d25ed67dd85800458f7d1bef48eeec4e 100644 --- a/arkindex/project/tools.py +++ b/arkindex/project/tools.py @@ -68,6 +68,11 @@ def bounding_box(linestring): return int(x), int(y), int(width), int(height) +def polygon_outside_image(img, linestring): + x, y, w, h = bounding_box(linestring) + return (x > img.width or x + w > img.width or y > img.height or y + h > img.height) + + class BulkMap(Sized, Iterable): """ Apply a function on the full list of items as soon as an iterator is requested.