Skip to content
Snippets Groups Projects
Commit 63a4e47d authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Merge branch 'index-pages-on-trs' into 'master'

Index transcriptions through a single method

See merge request !154
parents 04ae0b8b 4b46c990
No related branches found
No related tags found
1 merge request!154Index transcriptions through a single method
......@@ -6,13 +6,12 @@ from django.conf import settings
from django.db import transaction
from django.core.exceptions import ValidationError
from arkindex.project.celery import ReportingTask
from arkindex.documents.models import Element, ElementType, Page, Transcription, TranscriptionType
from arkindex.documents.models import Element, ElementType, Page, TranscriptionType
from arkindex.documents.importer import import_page
from arkindex.documents.indexer import Indexer
from arkindex.documents.tei import TeiParser
from arkindex.documents.tasks import generate_thumbnail
from arkindex.images.models import ImageServer, ImageStatus
from arkindex.images.importer import build_transcriptions, save_transcriptions
from arkindex.images.importer import build_transcriptions, save_transcriptions, index_transcriptions
from arkindex.dataimport.models import DataImport, DataImportState, DataImportMode, EventType
from arkindex.dataimport.config import ConfigFile
from arkindex.dataimport.filetypes import FileType
......@@ -233,13 +232,7 @@ def save_ml_results(self, results, **kwargs):
if trpolygons:
transcriptions, _ = save_transcriptions(*trpolygons)
self.report_message('Saved transcriptions for {}'.format(page))
Indexer().run_index(
settings.ES_INDEX_TRANSCRIPTIONS,
Transcription.INDEX_TYPE,
Transcription.objects.filter(
id__in=[t[0] for t in transcriptions],
),
)
index_transcriptions(transcriptions)
self.report_message('Indexed transcriptions for {}'.format(page))
return list(map(str, results.keys()))
......
......@@ -8,7 +8,7 @@ from arkindex.documents.models import Transcription, TranscriptionType
from arkindex.documents.serializers.transcriptions import TranscriptionsSerializer, TranscriptionCreateSerializer
from arkindex.documents.indexer import Indexer
from arkindex.images.models import Zone
from arkindex.images.importer import build_transcriptions, save_transcriptions
from arkindex.images.importer import build_transcriptions, save_transcriptions, index_transcriptions
from arkindex.project.polygon import Polygon
......@@ -39,13 +39,7 @@ class TranscriptionBulk(CreateAPIView):
)
if trpolygons:
transcriptions, _ = save_transcriptions(*trpolygons)
Indexer().run_index(
settings.ES_INDEX_TRANSCRIPTIONS,
Transcription.INDEX_TYPE,
Transcription.objects.filter(
id__in=[t[0] for t in transcriptions]
),
)
index_transcriptions(transcriptions)
class TranscriptionCreate(CreateAPIView):
......
......@@ -111,7 +111,7 @@ class TestTranscriptionCreate(FixtureAPITestCase):
response = self.client.post(reverse('api:transcription-create'), format='json', data=post_data)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
@patch('arkindex.documents.api.transcriptions.Indexer')
@patch('arkindex.images.importer.Indexer')
def test_create_bulk_transcription(self, indexer):
"""
Checks the view creates transcriptions, zones, links, paths and runs ES indexing
......@@ -143,4 +143,4 @@ class TestTranscriptionCreate(FixtureAPITestCase):
self.assertTrue(self.page.transcriptions.filter(pk=new_ts.id).exists())
# indexer called
self.assertTrue(indexer.return_value.run_index.called)
self.assertEqual(indexer.return_value.run_index.call_count, 2)
......@@ -3,9 +3,10 @@ from abc import ABC, abstractmethod
from django.db import connection
from django.conf import settings
from arkindex.images.models import Image
from arkindex.documents.models import TranscriptionType, Element, Page, ElementType
from arkindex.documents.models import TranscriptionType, Element, Page, ElementType, Transcription
from arkindex.project.polygon import Polygon
from arkindex.project.tools import Timer
from arkindex.documents.indexer import Indexer
import csv
import io
import os
......@@ -151,6 +152,32 @@ def save_transcriptions(*tr_polygons, delimiter='\t', quotechar='"'):
return (transcriptions, zones)
def index_transcriptions(transcriptions):
'''
Index in ElasticSearch new transcriptions built above
'''
assert all(map(lambda t: isinstance(t, tuple), transcriptions))
# Index transcriptions directly (IIIF search)
indexer = Indexer()
indexer.run_index(
settings.ES_INDEX_TRANSCRIPTIONS,
Transcription.INDEX_TYPE,
Transcription.objects.filter(
id__in=[t[0] for t in transcriptions]
),
)
# Index transcriptions in pages
pages = Page.objects.filter(id__in=[t[1] for t in transcriptions])
if pages.exists():
indexer.run_index(
settings.ES_INDEX_PAGES,
Page.INDEX_TYPE,
pages,
)
class BaseIndexImporter(ABC):
"""Import index files (.idx.gz) as transcriptions."""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment