Skip to content
Snippets Groups Projects
Commit 7ca6e065 authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Support element paths in bulk_transcriptions

parent 617fcc31
No related branches found
No related tags found
1 merge request!56Go Faster while creating transcription from API
from arkindex.images.models import Zone, Image
from arkindex.documents.models import Transcription, ElementLink, ElementType, Element, Page
from arkindex.documents.models import Transcription, ElementLink, ElementType, Element, Page, ElementPath
from arkindex.project.tools import BoundingBox
from collections import namedtuple
from abc import ABC, abstractmethod
......@@ -60,10 +60,12 @@ def import_indexes(image, page, index_path, extension='jpg'):
logger.info('Created {} transcriptions '.format(len(new_transcriptions)))
def bulk_transcriptions(image, page, items):
def bulk_transcriptions(image, parent, items):
"""
Create transcriptions and zones in bulk
"""
assert isinstance(parent, Element)
# Link a transcription data with a bounding box
# This is hashable (box is hashable)
TrBox = namedtuple('TrBox', 'box, line, text, score')
......@@ -109,7 +111,7 @@ def bulk_transcriptions(image, page, items):
# Raw elements
elements = Element.objects.bulk_create(
Element(
corpus=page.corpus,
corpus=parent.corpus,
type=ElementType.Word,
name=n.text,
zone_id=uuid.uuid4()
......@@ -151,13 +153,23 @@ def bulk_transcriptions(image, page, items):
)
# Create all links between transcription and page
max_order_dl = ElementLink.objects.filter(parent=page).order_by('-order').first()
max_order_dl = ElementLink.objects.filter(parent=parent).order_by('-order').first()
max_order = 0 if max_order_dl is None else max_order_dl.order + 1
ElementLink.objects.bulk_create(
ElementLink(parent=page, child=elt, order=i)
ElementLink(parent=parent, child=elt, order=i)
for i, elt in enumerate(elements, max_order)
)
# Support ElementPath
paths = ElementPath.objects.filter(element=parent).values_list('path', flat=True)
if not paths:
paths = [[]] # Wonderful hack to handle no parents case
ElementPath.objects.bulk_create(
ElementPath(element=elt, path=[parent.id, ] + path)
for elt in elements
for path in paths
)
return transcriptions
......
from django.test import TestCase
from arkindex.documents.models import Corpus, Page, Transcription
from arkindex.documents.models import Corpus, Page, Transcription, Element, ElementType
from arkindex.images.models import ImageServer, Image, Zone
from arkindex.images.importer import bulk_transcriptions
......@@ -57,6 +57,13 @@ class TestBulkTranscriptions(TestCase):
self.assertEqual(out[0].zone.polygon, [(0, 0), (0, 100), (100, 100), (100, 0)])
self.assertEqual(out[1].zone.polygon, [(20, 20), (20, 120), (120, 120), (120, 20)])
# Check path
children = Element.objects.get_descending(self.page.id)
self.assertEqual(children.count(), 2)
ids = children.values_list('id', flat=True)
self.assertIn(out[0].id, ids)
self.assertIn(out[1].id, ids)
def test_bulk_transcriptions_unique(self):
"""Check bulk_transcriptions does not import the same transcriptions twice"""
items = [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment