diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 8c32535cd1f0a32c5916fc3367c9d116aa0f06e4..e2358b5911e8c5593365232763fabd99a2f5a283 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -1009,7 +1009,7 @@ class ElementBulkCreate(CreateAPIView): # Retrieve existing zones from the DB image_id = self.element.zone.image_id existing_zones = { - # Use WKB represensation to compare existing zones + # Use WKB representation to compare existing zones # to avoid comparing references or slower coordinates polygon.wkb: zone_id for polygon, zone_id in Zone.objects.filter(image_id=image_id).values_list('polygon', 'id') diff --git a/arkindex/documents/api/ml.py b/arkindex/documents/api/ml.py index fa4c3087ad18e4495bcb763b442b2c1e46ca2580..f25151096b5711bbc372602cd2772e3a1011a851 100644 --- a/arkindex/documents/api/ml.py +++ b/arkindex/documents/api/ml.py @@ -198,63 +198,73 @@ class ElementTranscriptionsBulk(CreateAPIView): worker_version = serializer.validated_data['worker_version'] annotations = serializer.validated_data['transcriptions'] - image = self.element.zone.image - # Retrieve existing zones from the DB + image_id = self.element.zone.image_id existing_zones = { - zone.polygon: zone - for zone in image.zones.all() + # Use WKB representation to compare existing zones + # to avoid comparing references or slower coordinates + polygon.wkb: zone_id + for polygon, zone_id in Zone.objects.using('default').filter(image_id=image_id).values_list('polygon', 'id') } # Retrieve or create required zones - missing_zones = [] + new_zones = [] for annotation in annotations: # Retrieve or create the zone on the image - annotation['zone'] = existing_zones.get(annotation['polygon']) - if not annotation['zone']: - annotation['zone'] = Zone( + zone_id = existing_zones.get(annotation['polygon'].wkb) + if not zone_id: + new_zone = Zone( id=uuid.uuid4(), - image_id=image.id, + image_id=image_id, polygon=annotation['polygon'], ) - existing_zones[annotation['polygon']] = annotation['zone'] - missing_zones.append(annotation['zone']) - Zone.objects.bulk_create(missing_zones) + zone_id = new_zone.id + existing_zones[annotation['polygon'].wkb] = zone_id + new_zones.append(new_zone) + annotation['zone_id'] = zone_id + + Zone.objects.bulk_create(new_zones) # Retrieve or create elements to attach transcriptions to missing_elements = [] missing_paths = [] # List existing direct children (with their zones and transcriptions) children = { - elt.zone.id: elt - for elt in Element.objects.get_descending(self.element.id).filter( - zone__image=image, + element.zone_id: element + for element in Element + .objects + .get_descending(self.element.id) + .using('default') + .filter( + zone__image_id=image_id, type=elt_type, paths__path__last=self.element.id ) + .only('id', 'zone_id') } - paths = self.element.paths.all() + # Load the paths immediately to avoid iterating over them for each element + paths = list(self.element.paths.all()) next_path_ordering = self.element.get_next_order(elt_type) for annotation in annotations: # Look for a direct children with the right type and zone - annotation['element'] = children.get(annotation['zone'].id) + annotation['element'] = children.get(annotation['zone_id']) if not annotation['element']: annotation['element'] = Element( id=uuid.uuid4(), - zone=annotation['zone'], - corpus=self.element.corpus, + zone_id=annotation['zone_id'], + corpus_id=self.element.corpus_id, type=elt_type, name=next_path_ordering + 1 ) # Specify the annotated element has been created annotation['created'] = True - children[annotation['zone'].id] = annotation['element'] + children[annotation['zone_id']] = annotation['element'] missing_elements.append(annotation['element']) for parent_path in paths: new_path = parent_path.path + [self.element.id] # Add the children to all of its parent paths missing_paths.append(ElementPath( - element_id=annotation['element'].id, + element=annotation['element'], path=new_path, ordering=next_path_ordering )) diff --git a/arkindex/documents/tests/test_bulk_element_transcriptions.py b/arkindex/documents/tests/test_bulk_element_transcriptions.py index 15077ebe10816dd9c69896e83d7d66a04314c3f2..0fb7b3ed14cdf5a60365aba2ec6ae66c84c53674 100644 --- a/arkindex/documents/tests/test_bulk_element_transcriptions.py +++ b/arkindex/documents/tests/test_bulk_element_transcriptions.py @@ -65,7 +65,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): } self.client.force_login(self.internal_user) - with self.assertNumQueries(19): + with self.assertNumQueries(17): response = self.client.post( reverse('api:element-transcriptions-bulk', kwargs={'pk': self.page.id}), format='json', @@ -198,7 +198,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): self.assertEqual(created_elts.count(), 1) self.client.force_login(self.internal_user) - with self.assertNumQueries(19): + with self.assertNumQueries(17): response = self.client.post( reverse('api:element-transcriptions-bulk', kwargs={'pk': self.page.id}), format='json', @@ -234,7 +234,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): } self.client.force_login(self.internal_user) - with self.assertNumQueries(19): + with self.assertNumQueries(17): response = self.client.post( reverse('api:element-transcriptions-bulk', kwargs={'pk': self.page.id}), format='json', @@ -409,7 +409,7 @@ class TestBulkElementTranscriptions(FixtureAPITestCase): } self.client.force_login(self.internal_user) - with self.assertNumQueries(19): + with self.assertNumQueries(17): response = self.client.post( reverse('api:element-transcriptions-bulk', kwargs={'pk': self.page.id}), format='json',