From f930fbf9306e836d2f5c70b541a55bbe35622096 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Thu, 17 May 2018 12:52:50 +0200 Subject: [PATCH] Fix bulk_transcriptions --- arkindex/images/importer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arkindex/images/importer.py b/arkindex/images/importer.py index 40b2edec2b..19822f4923 100644 --- a/arkindex/images/importer.py +++ b/arkindex/images/importer.py @@ -8,6 +8,7 @@ import re import gzip import logging import fnmatch +import uuid REGEX_INDEX = re.compile( b'^(?:line_(\d+) )?(.+) \d+ ([\de\-\.]+) (\d+) (\d+) (\d+) (\d+)') @@ -93,13 +94,12 @@ def bulk_transcriptions(image, page, items): # Build all TrBox from existing existing = { TrBox( - BoundingBox(zone.polygon), + BoundingBox(tr.zone.polygon), tr.line, tr.text, tr.score, ) - for tr in Transcription.objects.filter(zones__image=image).prefetch_related('zones') - for zone in tr.zones.all() + for tr in Transcription.objects.filter(zone__image=image).prefetch_related('zone') } # Calc needed TrBox to build @@ -111,7 +111,7 @@ def bulk_transcriptions(image, page, items): # Raw elements elements = Element.objects.bulk_create( - Element(type=ElementType.Transcription) + Element(type=ElementType.Transcription, zone_id=uuid.uuid4()) for _ in needed ) @@ -125,7 +125,7 @@ def bulk_transcriptions(image, page, items): score=n.score, ), Zone( - element_id=elt.id, + id=elt.zone_id, image=image, polygon=n.box.to_polygon(), ) @@ -133,6 +133,9 @@ def bulk_transcriptions(image, page, items): for elt, n in zip(elements, needed) ]) + # Create zones in bulk + Zone.objects.bulk_create(zones) + # Create transcriptions using a low-level bulk_create # as multi table is not supported yet by Django Transcription.objects.none()._batched_insert( @@ -145,9 +148,6 @@ def bulk_transcriptions(image, page, items): batch_size=None, ) - # Create zones in bulk - Zone.objects.bulk_create(zones) - # Create all links between transcription and page max_order_dl = ElementLink.objects.filter(parent=page).order_by('-order').first() max_order = 0 if max_order_dl is None else max_order_dl.order + 1 -- GitLab