diff --git a/arkindex/documents/admin.py b/arkindex/documents/admin.py index 1fbd4378efb2a568bc85d554a08e5e034fe12240..90ee001fb32cb70643b84861a281da8c8c58a955 100644 --- a/arkindex/documents/admin.py +++ b/arkindex/documents/admin.py @@ -2,11 +2,15 @@ from django.contrib import admin from django.conf.urls import url from django.urls import reverse from django.utils.html import format_html -from arkindex.documents.models import Page, Element, ElementLink, ElementType, Act, Transcription +from arkindex.documents.models import Corpus, Page, Element, ElementLink, ElementType, Act, Transcription from arkindex.documents.views import DumpActs from enumfields.admin import EnumFieldListFilter +class CorpusAdmin(admin.ModelAdmin): + list_display = ('id', 'name', ) + + class PageAdmin(admin.ModelAdmin): list_display = ('id', 'name', 'page_type', 'nb', 'direction', ) list_filter = [('page_type', EnumFieldListFilter)] @@ -21,8 +25,8 @@ class ElementLinkAdmin(admin.TabularInline): class ElementAdmin(admin.ModelAdmin): - list_display = ('id', 'name', 'type', 'element_actions') - list_filter = [('type', EnumFieldListFilter)] + list_display = ('id', 'name', 'type', 'corpus', 'element_actions') + list_filter = [('type', EnumFieldListFilter), 'corpus'] fields = ('id', 'type', 'name') readonly_fields = ('id', 'element_actions') inlines = [ElementLinkAdmin, ] @@ -59,6 +63,7 @@ class TranscriptionAdmin(admin.ModelAdmin): readonly_fields = ('id', ) +admin.site.register(Corpus, CorpusAdmin) admin.site.register(Page, PageAdmin) admin.site.register(Element, ElementAdmin) admin.site.register(Act, ActAdmin) diff --git a/arkindex/documents/api.py b/arkindex/documents/api.py index 163cbe73e9047de4e3ec43519325c5303d4d70e3..395a8352e570bfc8a1d72b7fd35788ad71aa4b03 100644 --- a/arkindex/documents/api.py +++ b/arkindex/documents/api.py @@ -8,7 +8,7 @@ from django.conf import settings from django.utils.decorators import method_decorator from django.views.decorators.cache import cache_page from arkindex.documents.serializers import \ - ElementSerializer, PageLightSerializer, \ + ElementSerializer, CorpusSerializer, PageLightSerializer, \ TranscriptionSearchResultSerializer, ActSearchResultSerializer, \ VolumeManifestSerializer, ActManifestSerializer, \ PageAnnotationListSerializer, PageActAnnotationListSerializer, \ @@ -16,7 +16,7 @@ from arkindex.documents.serializers import \ ActSerializer, ElementLinkSerializer, SurfaceSerializer, \ TextCreationSerializer from arkindex.documents.models import \ - Element, ElementType, Page, Act, Transcription, ElementLink + Element, ElementType, Page, Act, Transcription, ElementLink, Corpus from arkindex.documents.search import \ search_transcriptions_post, search_acts_post, search_transcriptions_filter_post from arkindex.documents.tasks import refresh_db_cache @@ -73,6 +73,15 @@ class ElementSurfaces(ListAPIView): .filter(type=ElementType.Surface) +class CorpusList(ListAPIView): + """ + List all available corpora + """ + serializer_class = CorpusSerializer + pagination_class = None + queryset = Corpus.objects.all().order_by('name') + + class SurfaceDetails(RetrieveAPIView): """ Get details for a specific surface @@ -152,6 +161,7 @@ class TextElementCreate(CreateAPIView): polygon=[tuple(x) for x in serializer.validated_data['polygon']], ) ts, created = Transcription.objects.get_or_create( + corpus=element.corpus, type=serializer.validated_data['type'], zone=ts_zone, text=serializer.validated_data['text'], @@ -186,10 +196,11 @@ class TranscriptionSearch(ListAPIView): def get_queryset(self): query = self.request.query_params.get('q') + corpus = self.request.query_params.get('corpus') return ESQuerySet( query=ESQuerySet.make_query( 'elastic/search_transcriptions.json', - ctx={'query': query}, + ctx={'query': query, 'corpus': corpus}, ), sort={"score": {"order": "desc", "mode": "max"}}, es_index=settings.ES_INDEX_TRANSCRIPTIONS, @@ -206,11 +217,12 @@ class ActSearch(ListAPIView): def get_queryset(self): query = self.request.query_params.get('q') + corpus = self.request.query_params.get('corpus') return ESQuerySet( _source=False, query=ESQuerySet.make_query( 'elastic/search_acts.json', - ctx={'query': query}, + ctx={'query': query, 'corpus': corpus}, ), es_index=settings.ES_INDEX_ACTS, es_type=Act.INDEX_TYPE, diff --git a/arkindex/documents/importer.py b/arkindex/documents/importer.py index 350153eda315a1e578a331ed1f9964ad786d86aa..37cabd7505606c2151f25c425058c21f331bc3ac 100644 --- a/arkindex/documents/importer.py +++ b/arkindex/documents/importer.py @@ -1,4 +1,5 @@ -from arkindex.documents.models import PageType, PageDirection, PageComplement, Page, ElementType, Element, ElementLink +from arkindex.documents.models import \ + PageType, PageDirection, PageComplement, Page, ElementType, Element, ElementLink, Corpus from arkindex.images.models import Image, ImageServer, Zone from arkindex.images.importer import bulk_transcriptions from abc import ABC, abstractmethod @@ -42,6 +43,7 @@ def import_page(volume, image, register, folio, order): element, _ = Element.objects.get_or_create( type=ElementType.Register, name=register, + corpus=volume.corpus, ) ElementLink.objects.get_or_create(parent=volume, child=element) @@ -57,9 +59,15 @@ def import_page(volume, image, register, folio, order): else: page_type, page_nb, page_direction, page_complement = parse_folio(folio) p = Page.objects.create( - folio=folio, name="Page {0} du volume {1}".format(folio, volume.name), - page_type=page_type, nb=page_nb, direction=page_direction, complement=page_complement, - zone=Zone.objects.create(polygon=poly, image=image)) + folio=folio, + name="Page {0} du volume {1}".format(folio, volume.name), + page_type=page_type, + nb=page_nb, + direction=page_direction, + complement=page_complement, + zone=Zone.objects.create(polygon=poly, image=image), + corpus=volume.corpus, + ) ElementLink.objects.get_or_create(parent=element, child=p, defaults={'order': order}) return p @@ -115,7 +123,7 @@ class ManifestsImporter(ABC): Parses JSON manifests and annotation data to import them in the database. """ - def __init__(self, imgserv, offline=False, annotations=True, volume_name=None): + def __init__(self, imgserv, corpus, offline=False, annotations=True, volume_name=None): """Initialize a manifest importer `imgserv` can be either one ImageServer or a list of ImageServers. When `volume_name` is set, it overrides the manifest volume name.""" @@ -128,6 +136,8 @@ class ManifestsImporter(ABC): self.offline = offline self.annotations = annotations self.volume_name = volume_name + assert isinstance(corpus, Corpus) + self.corpus = corpus # This dictionary associates canvas IDs with images and pages # Filled by parse_manifest ; used by parse_annotation_list @@ -197,8 +207,8 @@ class ManifestsImporter(ABC): # Create a volume and a register logger.debug("Creating volume {}".format(vol_name)) - vol, _ = Element.objects.get_or_create(name=vol_name, type=ElementType.Volume) - doc, _ = Element.objects.get_or_create(name=vol_name, type=ElementType.Register) + vol, _ = Element.objects.get_or_create(name=vol_name, type=ElementType.Volume, corpus=self.corpus) + doc, _ = Element.objects.get_or_create(name=vol_name, type=ElementType.Register, corpus=self.corpus) ElementLink.objects.get_or_create(parent=vol, child=doc) stream.seek(0) self.parse_manifest_canvases(stream, vol) @@ -298,27 +308,23 @@ class ManifestsImporter(ABC): # Fill data dictionary self.images_transcription_data[(image, page)].append({ - 'x': x, 'y': y, 'width': w, 'height': h, 'text': text, 'line': None, 'score': 1 + 'x': x, 'y': y, 'width': w, 'height': h, 'text': text, 'line': 0, 'score': 1 }) def save_transcriptions(self): - """To optimize transcription parsing, saving and indexing is done in bulk.""" + """To optimize transcription parsing, saving is done in bulk.""" if len(self.images_transcription_data) < 1: return - total_zones, total_transcriptions, total_indexes = 0, 0, 0 + total_zones, total_transcriptions = 0, 0 for (image, page), data in self.images_transcription_data.items(): new_transcriptions = bulk_transcriptions(image, page, data) total_transcriptions += len(new_transcriptions) logger.debug("Created {0} transcriptions for image {1}".format(len(new_transcriptions), image.path)) - index_count = image.index() - total_indexes += index_count - logger.debug("Created {0} indexes for image {1}".format(index_count, image.path)) - - logger.info("Saved {0} zones, {1} transcriptions and {2} indexes".format( - total_zones, total_transcriptions, total_indexes)) + logger.info("Saved {0} zones and {1} transcriptions".format( + total_zones, total_transcriptions)) # Reset self.images_transcription_data = {} @@ -376,8 +382,8 @@ class ManifestsImporter(ABC): class LocalManifestsImporter(ManifestsImporter): """Allows importing of local JSON files.""" - def __init__(self, imgserv, path, **kwargs): - super().__init__(imgserv, **kwargs) + def __init__(self, imgserv, corpus, path, **kwargs): + super().__init__(imgserv, corpus, **kwargs) self.path = path def get_json_files(self): @@ -399,8 +405,8 @@ class LocalManifestsImporter(ManifestsImporter): class URLManifestsImporter(ManifestsImporter): """Allows importing of remote JSON files.""" - def __init__(self, imgserv, url, **kwargs): - super().__init__(imgserv, **kwargs) + def __init__(self, imgserv, corpus, url, **kwargs): + super().__init__(imgserv, corpus, **kwargs) self.url = url def get_json_files(self): diff --git a/arkindex/documents/indexer.py b/arkindex/documents/indexer.py index 36182359c81804ef16a39cf6dcf799122fb5d58a..693857dabeac6761d9f7169ecc0a6d31701b43ac 100644 --- a/arkindex/documents/indexer.py +++ b/arkindex/documents/indexer.py @@ -31,7 +31,8 @@ class Indexer(object): "id": {"type": "text"}, "type": {"type": "text"}, "score": {"type": "float"}, - "text": {"type": "text"} + "text": {"type": "text"}, + "corpus": {"type": "text"} } } } diff --git a/arkindex/documents/management/commands/from_csv.py b/arkindex/documents/management/commands/from_csv.py index 83c89db1791ad8e4732e5a66631ec07c70288f90..f9d8c998a284a437f7f1cab093a0254afae2d58f 100755 --- a/arkindex/documents/management/commands/from_csv.py +++ b/arkindex/documents/management/commands/from_csv.py @@ -28,6 +28,11 @@ class Command(BaseCommand): help='Root folder for indexes (/home/data/indexes)', default='.', ) + parser.add_argument( + '--corpus-id', + required=True, + help='Slug of corpus to import volumes into' + ) parser.add_argument( '--col-name', help='Index of the volume name column', @@ -69,6 +74,7 @@ class Command(BaseCommand): task = import_manifests_annotations.delay( csv_data, options['index_root'], + options['corpus_id'], col_name=options['col_name'], col_url=options['col_url'], col_path=options['col_path'], diff --git a/arkindex/documents/management/commands/import_images.py b/arkindex/documents/management/commands/import_images.py index b0bcd315cbff5da7578073393b6463e75d86de65..7350e92f92d8debe8708af011acb8e6498b46cf8 100644 --- a/arkindex/documents/management/commands/import_images.py +++ b/arkindex/documents/management/commands/import_images.py @@ -1,6 +1,6 @@ from django.core.management.base import BaseCommand, CommandError from arkindex.images.models import ImageServer -from arkindex.documents.models import Element, ElementType +from arkindex.documents.models import Element, ElementType, Corpus from arkindex.documents.importer import import_page from arkindex.documents.tasks import refresh_db_cache import logging @@ -35,6 +35,12 @@ class Command(BaseCommand): help='Name of the volume to import images in', required=True, ) + parser.add_argument( + '--corpus-id', + type=str, + help='Slug of the corpus to import images in', + required=True, + ) parser.add_argument( '--offline', action='store_true', @@ -63,8 +69,9 @@ class Command(BaseCommand): except ImageServer.DoesNotExist: raise CommandError('No image server found !') - logger.info('Loading volume') - vol, _ = Element.objects.get_or_create(type=ElementType.Volume, name=options['volume_name']) + logger.info('Loading corpus and volume') + corpus = Corpus.objects.get(id=options['corpus_id']) + vol, _ = Element.objects.get_or_create(type=ElementType.Volume, name=options['volume_name'], corpus=corpus) for i, path in enumerate(options['image_list'].readlines(), 1): img = server.find_image(path.strip(), offline=options['offline']) diff --git a/arkindex/documents/management/commands/import_manifest.py b/arkindex/documents/management/commands/import_manifest.py index a1a278f5a7e3b3033f0be3b55d2649ecd72a9546..8e17af56a688bf00313abeabce3d9755901aec67 100644 --- a/arkindex/documents/management/commands/import_manifest.py +++ b/arkindex/documents/management/commands/import_manifest.py @@ -25,6 +25,12 @@ class Command(BaseCommand): help='IDs of IIIF servers where to find images', nargs='+' ) + parser.add_argument( + '--corpus-id', + type=str, + help='Slug of corpus to import manifests into', + required=True, + ) parser.add_argument( '--offline', action='store_true', @@ -54,6 +60,7 @@ class Command(BaseCommand): import_manifest.si( options['manifest_folder'], server_ids=options.get('iiif-server', []), + corpus_id=options['corpus_id'], offline=options['offline'], annotations=options['annotations'], volume_name=options.get('volume_name') diff --git a/arkindex/documents/migrations/0015_corpus.py b/arkindex/documents/migrations/0015_corpus.py new file mode 100644 index 0000000000000000000000000000000000000000..5c981357b5ca7857bae8cc4f1d459243b5724935 --- /dev/null +++ b/arkindex/documents/migrations/0015_corpus.py @@ -0,0 +1,60 @@ +# Generated by Django 2.0 on 2018-06-26 08:10 + +from django.db import migrations, models +import django.db.models.deletion + + +def init_corpus(apps, schema_editor): + ''' + Create default corpus "Himanis" + and assign it to every element + ''' + Corpus = apps.get_model('documents', 'Corpus') + Element = apps.get_model('documents', 'Element') + + himanis = Corpus.objects.create(id='himanis', name='Himanis') + Element.objects.update(corpus=himanis) + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0014_auto_20180625_1119'), + ] + + operations = [ + migrations.CreateModel( + name='Corpus', + fields=[ + ('id', models.CharField(max_length=250, primary_key=True, serialize=False)), + ('name', models.CharField(max_length=250)), + ], + ), + migrations.AddField( + model_name='element', + name='corpus', + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name='elements', + to='documents.Corpus', + ), + preserve_default=False, + ), + migrations.RunPython(init_corpus), + migrations.AlterField( + model_name='element', + name='corpus', + field=models.ForeignKey( + null=False, + on_delete=django.db.models.deletion.CASCADE, + related_name='elements', + to='documents.Corpus', + ), + preserve_default=False, + ), + migrations.AlterModelOptions( + name='corpus', + options={'verbose_name_plural': 'corpora'}, + ), + ] diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 1166ad43dfda767c390e6e7a23ad037e57d526e6..2a8722674de6000fa2568a7f783e60970f2f2c08 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -11,6 +11,20 @@ import itertools logger = logging.getLogger(__name__) +class Corpus(models.Model): + ''' + A group of elements, usually linked to users or projects + ''' + id = models.CharField(max_length=250, primary_key=True) + name = models.CharField(max_length=250) + + class Meta: + verbose_name_plural = 'corpora' + + def __str__(self): + return self.name + + class ElementType(Enum): Volume = 'volume' Register = 'register' @@ -59,6 +73,7 @@ class Element(IndexableModel): """ aka Unité Codicologique """ + corpus = models.ForeignKey('documents.Corpus', related_name='elements', on_delete=models.CASCADE) parents = models.ManyToManyField('self', through=ElementLink, symmetrical=False) type = EnumField(ElementType, max_length=50, db_index=True) name = models.CharField(max_length=250) @@ -296,7 +311,8 @@ class Act(Element): 'id': t.id, 'type': t.type.value, 'score': t.score, - 'text': t.text + 'text': t.text, + 'corpus': t.corpus_id, } for sublist in transcriptions for t in sublist ] @@ -334,4 +350,5 @@ class Transcription(Element): 'score': self.score, 'line': self.line, 'text': self.text, + 'corpus': self.corpus_id, } diff --git a/arkindex/documents/serializers.py b/arkindex/documents/serializers.py index 3fd5ea1c41f64b8559ad260b0af5faa059db61f4..8a29748ddac84526b812580ac4286b3f867da11c 100644 --- a/arkindex/documents/serializers.py +++ b/arkindex/documents/serializers.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from django.conf import settings from rest_framework import serializers from arkindex.documents.models import \ - Element, ElementType, ElementLink, Transcription, Page, PageType, PageDirection, Act + Element, ElementType, ElementLink, Transcription, Page, PageType, PageDirection, Act, Corpus from arkindex.documents.serializer_fields import EnumField, ViewerURLField from arkindex.images.models import Image, Zone from arkindex.images.serializers import ZoneSerializer, ImageSerializer @@ -139,6 +139,16 @@ class SurfaceSerializer(serializers.ModelSerializer): ) +class CorpusSerializer(serializers.ModelSerializer): + """ + Serialize a corpus + """ + + class Meta: + model = Corpus + fields = ('id', 'name') + + class TextCreationSerializer(serializers.Serializer): """ Allows for insertion of new transcriptions and zones diff --git a/arkindex/documents/surface.py b/arkindex/documents/surface.py index e75d849593dadba00f0d62ab9c9e99a1f7c94cfd..92010f84d91cd88020c06889ab0a50af0a3878ed 100644 --- a/arkindex/documents/surface.py +++ b/arkindex/documents/surface.py @@ -264,6 +264,7 @@ class SurfaceImporter(object): surface, _ = Element.objects.get_or_create( type=ElementType.Surface, zone=z, + corpus=page.corpus, defaults={'name': "Surface {}".format(surfacedata['id'])}) self.surfaces[page].append(surface) @@ -298,7 +299,8 @@ class SurfaceImporter(object): new_acts += 1 act = Act.objects.create( name="Act {} of volume {}".format(row.number, row.volume_name), - number=row.number) + number=row.number, + corpus=volume.corpus) self._create_element_link(volume, act) for page in act_pages: diff --git a/arkindex/documents/surface_link.py b/arkindex/documents/surface_link.py index 8e256164f969354c2cf8b0c9dfd5dc0866dee2d1..370842042c4ed16dd5b77e2ec5bafc1e9b10e7a3 100644 --- a/arkindex/documents/surface_link.py +++ b/arkindex/documents/surface_link.py @@ -215,6 +215,7 @@ class SurfaceLinker(object): name="Act {}".format(act_raw.number), number=act_raw.number, folio=act_raw.folio, + corpus=self.volume.corpus, ) self.created_acts += 1 diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py index c73e6b64130748d0ed17959d28c2f8c566649f54..e0f6e4f5c27ab5da1d30ee677f45711f8bd30312 100644 --- a/arkindex/documents/tasks.py +++ b/arkindex/documents/tasks.py @@ -2,7 +2,7 @@ from celery import shared_task, group, chain from celery_once import QueueOnce from celery.utils.log import get_task_logger from django.conf import settings -from arkindex.documents.models import Element, Act, Transcription, ElementType +from arkindex.documents.models import Element, Act, Transcription, ElementType, Corpus from arkindex.documents.indexer import Indexer from arkindex.documents.importer import URLManifestsImporter, LocalManifestsImporter from arkindex.documents.surface import SurfaceImporter @@ -51,7 +51,7 @@ def reindex_transcriptions(bulk_size=400, volume_id=None): @shared_task -def import_manifest(path, server_ids=[], offline=False, annotations=False, volume_name=None): +def import_manifest(path, server_ids=[], corpus_id=None, offline=False, annotations=False, volume_name=None): """ Import a IIIF manifest from a local file or a URL """ @@ -60,6 +60,11 @@ def import_manifest(path, server_ids=[], offline=False, annotations=False, volum except Exception as e: raise ValueError("Image server not found: {}".format(e)) + try: + corpus = Corpus.objects.get(id=corpus_id) + except Exception as e: + raise ValueError("Corpus not found: {}".format(e)) + if any(path.startswith(scheme) for scheme in ('http://', 'https://',)): importer_class = URLManifestsImporter else: @@ -67,6 +72,7 @@ def import_manifest(path, server_ids=[], offline=False, annotations=False, volum importer_class( servers, + corpus, path, offline=offline, annotations=annotations, @@ -111,13 +117,14 @@ def import_annotations_csv(name, raw_path, source, index_root): @shared_task -def import_manifests_annotations(csv_data, index_root, col_name=0, col_url=1, col_path=2, col_source=3): +def import_manifests_annotations(csv_data, index_root, corpus_id, col_name=0, col_url=1, col_path=2, col_source=3): ''' Import both manifests & annotations from a parsed csv file (or whatever table) ''' manifests = [ - import_manifest.si(row[col_url], offline=True, annotations=False, volume_name=row[col_name]) + import_manifest.si( + row[col_url], offline=True, annotations=False, volume_name=row[col_name], corpus_id=corpus_id) for row in csv_data ] annotations = [ diff --git a/arkindex/documents/tests/test_annotation_list.py b/arkindex/documents/tests/test_annotation_list.py index 951ae700f4d0fa52fa3bf4d2bd6708e519ad319f..3fb53971d294d8189dfd9e9248d9e44a82376676 100644 --- a/arkindex/documents/tests/test_annotation_list.py +++ b/arkindex/documents/tests/test_annotation_list.py @@ -1,7 +1,7 @@ from django.urls import reverse from rest_framework.test import APITestCase from rest_framework import status -from arkindex.documents.models import Element, ElementLink, Page, Transcription, Act, ElementType +from arkindex.documents.models import Corpus, Element, ElementLink, Page, Transcription, Act, ElementType from arkindex.images.models import ImageServer, Image, Zone from arkindex.documents.cache import refresh_sync_only_for_unit_tests @@ -10,16 +10,17 @@ class TestPageAnnotationListSerializer(APITestCase): def setUp(self): # Create a page and an image with some transcriptions + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server") self.img = Image.objects.create(path='img', width=1337, height=42, server=self.imgsrv) pagezone = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 42), (42, 0), (0, 0)], image=self.img) - self.page = Page.objects.create(name="page", folio="page", zone=pagezone) + self.page = Page.objects.create(corpus=self.corpus, name="page", folio="page", zone=pagezone) self.z1 = Zone.objects.create( polygon=[(100, 200), (100, 300), (300, 300), (300, 200), (100, 200)], image=self.img) self.z2 = Zone.objects.create( polygon=[(50, 100), (50, 150), (150, 150), (150, 100), (50, 100)], image=self.img) - self.t1 = Transcription.objects.create(text="AAA", zone=self.z1) - self.t2 = Transcription.objects.create(text="BBB", zone=self.z2) + self.t1 = Transcription.objects.create(corpus=self.corpus, text="AAA", zone=self.z1) + self.t2 = Transcription.objects.create(corpus=self.corpus, text="BBB", zone=self.z2) ElementLink.objects.create(parent=self.page, child=self.t1, order=0) ElementLink.objects.create(parent=self.page, child=self.t2, order=1) refresh_sync_only_for_unit_tests() @@ -61,7 +62,7 @@ class TestPageAnnotationListSerializer(APITestCase): def test_empty_list(self): # An annotation list with nothing in it response = self.client.get(reverse('api:page-transcription-manifest', kwargs={ - 'pk': Page.objects.create(name="Empty Page").id + 'pk': Page.objects.create(corpus=self.corpus, name="Empty Page").id })) self.assertEqual(response.status_code, status.HTTP_200_OK) annotation_list = response.json() @@ -73,16 +74,17 @@ class TestPageActAnnotationListSerializer(APITestCase): def setUp(self): # Create a page and an image with some transcriptions + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') imgsrv = ImageServer.objects.create(name="Test Server", url="http://server") img = Image.objects.create(path='img', width=1337, height=42, server=imgsrv) pagezone = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 42), (42, 0), (0, 0)], image=img) - self.page = Page.objects.create(name="page", folio="page", zone=pagezone) + self.page = Page.objects.create(corpus=self.corpus, name="page", folio="page", zone=pagezone) z1 = Zone.objects.create(polygon=[(100, 200), (100, 300), (300, 300), (300, 200), (100, 200)], image=img) z2 = Zone.objects.create(polygon=[(50, 100), (50, 150), (150, 150), (150, 100), (50, 100)], image=img) - a1 = Act.objects.create(name="a1", number="123") - a2 = Act.objects.create(name="a2", number="456") - s1 = Element.objects.create(type=ElementType.Surface, name="s1", zone=z1) - s2 = Element.objects.create(type=ElementType.Surface, name="s2", zone=z2) + a1 = Act.objects.create(corpus=self.corpus, name="a1", number="123") + a2 = Act.objects.create(corpus=self.corpus, name="a2", number="456") + s1 = Element.objects.create(corpus=self.corpus, type=ElementType.Surface, name="s1", zone=z1) + s2 = Element.objects.create(corpus=self.corpus, type=ElementType.Surface, name="s2", zone=z2) ElementLink.objects.create(parent=a1, child=s1, order=0) ElementLink.objects.create(parent=a2, child=s2, order=0) refresh_sync_only_for_unit_tests() @@ -124,7 +126,7 @@ class TestPageActAnnotationListSerializer(APITestCase): def test_empty_list(self): # An annotation list with nothing in it response = self.client.get(reverse('api:page-act-manifest', kwargs={ - 'pk': Page.objects.create(name="Empty Page").id + 'pk': Page.objects.create(corpus=self.corpus, name="Empty Page").id })) self.assertEqual(response.status_code, status.HTTP_200_OK) annotation_list = response.json() diff --git a/arkindex/documents/tests/test_cache_db.py b/arkindex/documents/tests/test_cache_db.py index a1aeddcd419ee98531d54119167a3e25bc1f9993..8cc49b31bdb257f86c20887ab302574cc243f042 100644 --- a/arkindex/documents/tests/test_cache_db.py +++ b/arkindex/documents/tests/test_cache_db.py @@ -1,7 +1,7 @@ from django.test import TestCase from django.db import connection from arkindex.documents.cache import CacheDB -from arkindex.documents.models import ElementPath, Element, ElementType, ElementLink +from arkindex.documents.models import Corpus, ElementPath, Element, ElementType, ElementLink import itertools @@ -11,12 +11,13 @@ class TestCacheDB(TestCase): ''' def setUp(self): - self.volA = Element.objects.create(type=ElementType.Volume, name='vol A') - self.volB = Element.objects.create(type=ElementType.Volume, name='vol B') - self.pageA = Element.objects.create(type=ElementType.Page, name='page A') - self.pageB = Element.objects.create(type=ElementType.Page, name='page B') - self.act = Element.objects.create(type=ElementType.Act, name='an act') - self.tr = Element.objects.create(type=ElementType.Line, name='some tr') + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') + self.volA = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name='vol A') + self.volB = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name='vol B') + self.pageA = Element.objects.create(corpus=self.corpus, type=ElementType.Page, name='page A') + self.pageB = Element.objects.create(corpus=self.corpus, type=ElementType.Page, name='page B') + self.act = Element.objects.create(corpus=self.corpus, type=ElementType.Act, name='an act') + self.tr = Element.objects.create(corpus=self.corpus, type=ElementType.Line, name='some tr') ElementLink.objects.create(parent=self.volA, child=self.pageA, order=1) ElementLink.objects.create(parent=self.volB, child=self.pageB, order=1) diff --git a/arkindex/documents/tests/test_edit_elementpath.py b/arkindex/documents/tests/test_edit_elementpath.py index 75e2c3f393594904b7340a0de7499f9d0b0a098e..afb0d8f634da8fc702813fbf9b0f3c0792e4347a 100644 --- a/arkindex/documents/tests/test_edit_elementpath.py +++ b/arkindex/documents/tests/test_edit_elementpath.py @@ -1,5 +1,5 @@ from django.test import TestCase -from arkindex.documents.models import Element, ElementPath, ElementType, ElementLink +from arkindex.documents.models import Corpus, Element, ElementPath, ElementType, ElementLink from arkindex.documents.cache import refresh_sync_only_for_unit_tests import itertools @@ -8,6 +8,8 @@ class TestEditElementPath(TestCase): """ Test ElementPath editing algorithms """ + def setUp(self): + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') def build_tree(self, tree, element_type=ElementType.Volume): """ @@ -30,7 +32,14 @@ class TestEditElementPath(TestCase): # Removes the need to add 'element': [] for root elements names = set(tree.keys()).union(*[parse_value(val) for val in tree.values()]) # Create elements - elements = {name: Element.objects.create(name=name, type=element_type) for name in names} + elements = { + name: Element.objects.create( + corpus=self.corpus, + name=name, + type=element_type, + ) + for name in names + } orders = {name: 0 for name in names} # Create links for child_name, value in tree.items(): @@ -146,8 +155,8 @@ class TestEditElementPath(TestCase): Check adding a parent works with an empty tree """ elements = { - 'A': Element.objects.create(name="A", type=ElementType.Volume), - 'B': Element.objects.create(name="A", type=ElementType.Volume), + 'A': Element.objects.create(corpus=self.corpus, name="A", type=ElementType.Volume), + 'B': Element.objects.create(corpus=self.corpus, name="A", type=ElementType.Volume), } self.check_parents(elements, 'A') self.check_parents(elements, 'B') diff --git a/arkindex/documents/tests/test_edit_link.py b/arkindex/documents/tests/test_edit_link.py index d1fae949856ea0ea027fc95146d8bccc780d726b..eddb517ca690ba4202ddd9cf781196956f5adee4 100644 --- a/arkindex/documents/tests/test_edit_link.py +++ b/arkindex/documents/tests/test_edit_link.py @@ -1,7 +1,7 @@ from django.urls import reverse from rest_framework.test import APITestCase from rest_framework import status -from arkindex.documents.models import Element, ElementType, ElementLink +from arkindex.documents.models import Corpus, Element, ElementType, ElementLink from arkindex.users.models import User @@ -9,9 +9,10 @@ class TestEditLink(APITestCase): """Tests for link creation, retrieval and removal views""" def setUp(self): - self.element1 = Element.objects.create(type=ElementType.Volume, name="Element 1") - self.element2 = Element.objects.create(type=ElementType.Volume, name="Element 2") - self.element3 = Element.objects.create(type=ElementType.Volume, name="Element 3") + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') + self.element1 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Element 1") + self.element2 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Element 2") + self.element3 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Element 3") self.link1 = ElementLink.objects.create(parent=self.element1, child=self.element2, order=0) self.user = User.objects.create_user(email='user@user.com', password='P45$w0rD') diff --git a/arkindex/documents/tests/test_element_manager.py b/arkindex/documents/tests/test_element_manager.py index 436e3a5b116f58bd39a39ab925cfb0e94982cb58..d3e45a48eda70b3bae6d4313f3fd1ac9c2ef8f6e 100644 --- a/arkindex/documents/tests/test_element_manager.py +++ b/arkindex/documents/tests/test_element_manager.py @@ -1,5 +1,5 @@ from django.test import TestCase -from arkindex.documents.models import Element, ElementLink, ElementType +from arkindex.documents.models import Corpus, Element, ElementLink, ElementType from arkindex.documents.cache import refresh_sync_only_for_unit_tests @@ -7,11 +7,12 @@ class TestElementManager(TestCase): """Tests for ElementManager class""" def setUp(self): - self.reg = Element.objects.create(name="Register", type=ElementType.Register) - self.vol = Element.objects.create(name="Volume", type=ElementType.Volume) - self.p1 = Element.objects.create(name="Page1", type=ElementType.Page) - self.p2 = Element.objects.create(name="Page2", type=ElementType.Page) - self.act = Element.objects.create(name="Act", type=ElementType.Act) + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') + self.reg = Element.objects.create(corpus=self.corpus, name="Register", type=ElementType.Register) + self.vol = Element.objects.create(corpus=self.corpus, name="Volume", type=ElementType.Volume) + self.p1 = Element.objects.create(corpus=self.corpus, name="Page1", type=ElementType.Page) + self.p2 = Element.objects.create(corpus=self.corpus, name="Page2", type=ElementType.Page) + self.act = Element.objects.create(corpus=self.corpus, name="Act", type=ElementType.Act) # Register --> Volume --> [Page1 --> Act, Page2] ElementLink.objects.create(parent=self.reg, child=self.vol, order=0) ElementLink.objects.create(parent=self.vol, child=self.p1, order=0) @@ -58,6 +59,6 @@ class TestElementManager(TestCase): def test_get_related_empty(self): # Use a element linked to nothing, expect empty list - alone = Element.objects.create(name="LonelyPage", type=ElementType.Page) + alone = Element.objects.create(corpus=self.corpus, name="LonelyPage", type=ElementType.Page) ids = Element.objects.get_related(alone.id) self.assertCountEqual(ids, []) diff --git a/arkindex/documents/tests/test_search_post.py b/arkindex/documents/tests/test_search_post.py index 03a67dced614e0c103108148c80b191aec8b7369..6c9dc214c4f255f7752affaad28166c6b8414744 100644 --- a/arkindex/documents/tests/test_search_post.py +++ b/arkindex/documents/tests/test_search_post.py @@ -1,5 +1,5 @@ from django.test import TestCase -from arkindex.documents.models import Element, ElementType, ElementLink, Transcription, Act, Page +from arkindex.documents.models import Corpus, Element, ElementType, ElementLink, Transcription, Act, Page from arkindex.images.models import ImageServer, Image, Zone from arkindex.documents.search import search_transcriptions_post, search_acts_post, search_transcriptions_filter_post from arkindex.documents.cache import refresh_sync_only_for_unit_tests @@ -10,6 +10,7 @@ class TestSearchPostProcess(TestCase): def setUp(self): # Create a server with three images + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server") self.img1 = Image.objects.create(path='img1', width=1000, height=1000, server=self.imgsrv) self.img2 = Image.objects.create(path='img2', width=1000, height=1000, server=self.imgsrv) @@ -19,23 +20,23 @@ class TestSearchPostProcess(TestCase): self.z3 = Zone.objects.create(polygon=[(0, 0), (1000, 0), (1000, 1000), (1000, 0), (0, 0)], image=self.img3) # Create a volume with two pages and another with one page - self.vol1 = Element.objects.create(name="Volume 1", type=ElementType.Volume) - self.vol2 = Element.objects.create(name="Volume 2", type=ElementType.Volume) - self.p1 = Page.objects.create(name="p1", folio="p1", zone=self.z1) - self.p2 = Page.objects.create(name="p2", folio="p2", zone=self.z2) - self.p3 = Page.objects.create(name="p3", folio="p3", zone=self.z3) + self.vol1 = Element.objects.create(corpus=self.corpus, name="Volume 1", type=ElementType.Volume) + self.vol2 = Element.objects.create(corpus=self.corpus, name="Volume 2", type=ElementType.Volume) + self.p1 = Page.objects.create(corpus=self.corpus, name="p1", folio="p1", zone=self.z1) + self.p2 = Page.objects.create(corpus=self.corpus, name="p2", folio="p2", zone=self.z2) + self.p3 = Page.objects.create(corpus=self.corpus, name="p3", folio="p3", zone=self.z3) ElementLink.objects.create(parent=self.vol1, child=self.p1, order=0) ElementLink.objects.create(parent=self.vol1, child=self.p2, order=1) ElementLink.objects.create(parent=self.vol2, child=self.p3, order=0) # Create a bunch of transcriptions - self.t1 = Transcription.objects.create(text="word", zone=Zone.objects.create( + self.t1 = Transcription.objects.create(corpus=self.corpus, text="word", zone=Zone.objects.create( polygon=[(10, 10), (20, 10), (20, 20), (10, 20), (10, 10)], image=self.img1)) - self.t2 = Transcription.objects.create(text="word", zone=Zone.objects.create( + self.t2 = Transcription.objects.create(corpus=self.corpus, text="word", zone=Zone.objects.create( polygon=[(110, 110), (120, 110), (120, 120), (110, 120), (110, 110)], image=self.img1)) - self.t3 = Transcription.objects.create(text="word", zone=Zone.objects.create( + self.t3 = Transcription.objects.create(corpus=self.corpus, text="word", zone=Zone.objects.create( polygon=[(210, 210), (220, 210), (220, 220), (210, 220), (210, 210)], image=self.img2)) - self.t4 = Transcription.objects.create(text="word", zone=Zone.objects.create( + self.t4 = Transcription.objects.create(corpus=self.corpus, text="word", zone=Zone.objects.create( polygon=[(310, 210), (320, 310), (320, 320), (310, 320), (310, 310)], image=self.img3)) ElementLink.objects.create(parent=self.p1, child=self.t1, order=0) ElementLink.objects.create(parent=self.p1, child=self.t2, order=1) @@ -43,11 +44,21 @@ class TestSearchPostProcess(TestCase): ElementLink.objects.create(parent=self.p3, child=self.t4, order=0) # Create an act with surfaces on the first volume - self.act = Act.objects.create(name="Act 42", number="42") + self.act = Act.objects.create(corpus=self.corpus, name="Act 42", number="42") self.sz1 = Zone.objects.create(polygon=[(0, 0), (500, 0), (500, 500), (0, 500), (0, 0)], image=self.img1) self.sz2 = Zone.objects.create(polygon=[(0, 0), (500, 0), (500, 500), (0, 500), (0, 0)], image=self.img2) - self.surf1 = Element.objects.create(type=ElementType.Surface, name="Surface 1", zone=self.sz1) - self.surf2 = Element.objects.create(type=ElementType.Surface, name="Surface 2", zone=self.sz2) + self.surf1 = Element.objects.create( + corpus=self.corpus, + type=ElementType.Surface, + name="Surface 1", + zone=self.sz1, + ) + self.surf2 = Element.objects.create( + corpus=self.corpus, + type=ElementType.Surface, + name="Surface 2", + zone=self.sz2, + ) ElementLink.objects.create(parent=self.vol1, child=self.act, order=2) ElementLink.objects.create(parent=self.act, child=self.surf1, order=0) ElementLink.objects.create(parent=self.act, child=self.surf2, order=1) diff --git a/arkindex/documents/tests/test_surface_importer.py b/arkindex/documents/tests/test_surface_importer.py index 42cc60dceb25c89472a1b7cd32fcdedbcf3e4f07..51d05bf5b1da86e85afc0a64a5822cdaa3bcf75a 100644 --- a/arkindex/documents/tests/test_surface_importer.py +++ b/arkindex/documents/tests/test_surface_importer.py @@ -1,16 +1,19 @@ from django.test import TestCase from arkindex.documents.surface import parse_himanis_volume_name, parse_act_folio -from arkindex.documents.models import Element, ElementType, Page, PageType, PageDirection +from arkindex.documents.models import Corpus, Element, ElementType, Page, PageType, PageDirection class TestSurfaceImporterFunctions(TestCase): """Test surface importing helper functions.""" + def setUp(self): + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') + def test_parse_himanis_volume_name(self): - v42 = Element.objects.create(type=ElementType.Volume, name="Volume JJ 042") - v43 = Element.objects.create(type=ElementType.Volume, name="Volume JJ043") - v300a = Element.objects.create(type=ElementType.Volume, name="Volume JJ 300A") - v300b = Element.objects.create(type=ElementType.Volume, name="Volume JJ300B") + v42 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume JJ 042") + v43 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume JJ043") + v300a = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume JJ 300A") + v300b = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume JJ300B") self.assertEqual(parse_himanis_volume_name('JJ42'), v42) self.assertEqual(parse_himanis_volume_name('JJ43'), v43) self.assertEqual(parse_himanis_volume_name('JJ043'), v43) @@ -23,14 +26,14 @@ class TestSurfaceImporterFunctions(TestCase): def test_parse_act_folio(self): # Make two normal pages - p1 = Page.objects.create(name="Page 1", folio="0001r", + p1 = Page.objects.create(corpus=self.corpus, name="Page 1", folio="0001r", nb=1, direction=PageDirection.Recto, page_type=PageType.Page) - p2 = Page.objects.create(name="Page 2", folio="0100v", + p2 = Page.objects.create(corpus=self.corpus, name="Page 2", folio="0100v", nb=2, direction=PageDirection.Verso, page_type=PageType.Page) # A summary page that should be ignored - p3 = Page.objects.create(name="Page 2", folio="0100v", + p3 = Page.objects.create(corpus=self.corpus, name="Page 2", folio="0100v", nb=2, direction=PageDirection.Verso, page_type=PageType.Summary) pages = [p1, p2] diff --git a/arkindex/documents/tests/test_surface_linker.py b/arkindex/documents/tests/test_surface_linker.py index f37b3f152947ca0351233401f7086fff6cd6bcd1..5f3b8dddae97917b90645c9cb266b2099406182d 100644 --- a/arkindex/documents/tests/test_surface_linker.py +++ b/arkindex/documents/tests/test_surface_linker.py @@ -1,7 +1,8 @@ from unittest.mock import patch from django.test import TestCase from arkindex.documents.surface_link import parse_folios, ParsedFolio, SurfaceLinker -from arkindex.documents.models import Element, ElementType, ElementLink, Page, PageDirection, PageComplement, PageType +from arkindex.documents.models import Corpus, Element, ElementType, \ + ElementLink, Page, PageDirection, PageComplement, PageType from arkindex.images.models import ImageServer, Image, Zone from arkindex.documents.cache import refresh_sync_only_for_unit_tests @@ -10,13 +11,20 @@ class TestSurfaceLinkerFunctions(TestCase): """Test SurfaceLinker helper functions.""" def setUp(self): - vol = Element.objects.create(type=ElementType.Volume, name="Volume") - self.p1r = Page.objects.create(name="1r", folio="1r", nb=1, direction=PageDirection.Recto) - self.p1v = Page.objects.create(name="1v", folio="1v", nb=1, direction=PageDirection.Verso) - self.p2r = Page.objects.create(name="2r", folio="2r", nb=2, direction=PageDirection.Recto) + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') + vol = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume") + self.p1r = Page.objects.create(corpus=self.corpus, name="1r", folio="1r", nb=1, direction=PageDirection.Recto) + self.p1v = Page.objects.create(corpus=self.corpus, name="1v", folio="1v", nb=1, direction=PageDirection.Verso) + self.p2r = Page.objects.create(corpus=self.corpus, name="2r", folio="2r", nb=2, direction=PageDirection.Recto) self.p2bisr = Page.objects.create( - name="2bisr", folio="2bisr", nb=2, direction=PageDirection.Recto, complement=PageComplement.Bis) - self.p2v = Page.objects.create(name="2v", folio="2v", nb=2, direction=PageDirection.Verso) + corpus=self.corpus, + name="2bisr", + folio="2bisr", + nb=2, + direction=PageDirection.Recto, + complement=PageComplement.Bis, + ) + self.p2v = Page.objects.create(corpus=self.corpus, name="2v", folio="2v", nb=2, direction=PageDirection.Verso) for i, page in enumerate((self.p1r, self.p1v, self.p2r, self.p2bisr, self.p2v)): ElementLink.objects.create(parent=vol, child=page, order=i) refresh_sync_only_for_unit_tests() @@ -98,6 +106,7 @@ class TestSurfaceLinker(TestCase): def setUp(self): # Create a server with three images + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server") self.img1 = Image.objects.create(path='img1', width=1337, height=42, server=self.imgsrv) self.img2 = Image.objects.create(path='img2', width=255, height=420, server=self.imgsrv) @@ -117,8 +126,9 @@ class TestSurfaceLinker(TestCase): self.z3b = Zone.objects.create(polygon=[(0, 202), (418, 202), (418, 404), (0, 404), (0, 202)], image=self.img3) # Create a volume and 3 pages - self.vol = Element.objects.create(name="Volume Name", type=ElementType.Volume) + self.vol = Element.objects.create(corpus=self.corpus, name="Volume Name", type=ElementType.Volume) self.p1 = Page.objects.create( + corpus=self.corpus, name="p1", folio="p1", zone=self.z1, @@ -127,6 +137,7 @@ class TestSurfaceLinker(TestCase): page_type=PageType.Page, ) self.p2 = Page.objects.create( + corpus=self.corpus, name="p2", folio="p2", zone=self.z2, @@ -136,6 +147,7 @@ class TestSurfaceLinker(TestCase): page_type=PageType.Page, ) self.p3 = Page.objects.create( + corpus=self.corpus, name="p3", folio="p3", zone=self.z3, @@ -145,12 +157,42 @@ class TestSurfaceLinker(TestCase): ) # Create 6 surfaces - self.s1a = Element.objects.create(name="Surface P1 #1", type=ElementType.Surface, zone=self.z1a) - self.s1b = Element.objects.create(name="Surface P1 #2", type=ElementType.Surface, zone=self.z1b) - self.s2a = Element.objects.create(name="Surface P2 #1", type=ElementType.Surface, zone=self.z2a) - self.s2b = Element.objects.create(name="Surface P2 #2", type=ElementType.Surface, zone=self.z2b) - self.s3a = Element.objects.create(name="Surface P3 #1", type=ElementType.Surface, zone=self.z3a) - self.s3b = Element.objects.create(name="Surface P3 #2", type=ElementType.Surface, zone=self.z3b) + self.s1a = Element.objects.create( + corpus=self.corpus, + name="Surface P1 #1", + type=ElementType.Surface, + zone=self.z1a, + ) + self.s1b = Element.objects.create( + corpus=self.corpus, + name="Surface P1 #2", + type=ElementType.Surface, + zone=self.z1b, + ) + self.s2a = Element.objects.create( + corpus=self.corpus, + name="Surface P2 #1", + type=ElementType.Surface, + zone=self.z2a, + ) + self.s2b = Element.objects.create( + corpus=self.corpus, + name="Surface P2 #2", + type=ElementType.Surface, + zone=self.z2b, + ) + self.s3a = Element.objects.create( + corpus=self.corpus, + name="Surface P3 #1", + type=ElementType.Surface, + zone=self.z3a, + ) + self.s3b = Element.objects.create( + corpus=self.corpus, + name="Surface P3 #2", + type=ElementType.Surface, + zone=self.z3b, + ) # Link surfaces to pages links = { diff --git a/arkindex/documents/tests/test_text_create.py b/arkindex/documents/tests/test_text_create.py index c75c9bdaf57c1dd6cb395de6063702de5204aabb..817c7e51345ca72a3e0e96752a81b710fb9b58e6 100644 --- a/arkindex/documents/tests/test_text_create.py +++ b/arkindex/documents/tests/test_text_create.py @@ -2,7 +2,7 @@ from django.urls import reverse from unittest.mock import patch from rest_framework.test import APITestCase from rest_framework import status -from arkindex.documents.models import Page, Transcription, ElementLink, ElementPath, ElementType +from arkindex.documents.models import Corpus, Page, Transcription, ElementLink, ElementPath, ElementType from arkindex.images.models import ImageServer, Image, Zone from arkindex.users.models import User from arkindex.documents.cache import refresh_sync_only_for_unit_tests @@ -18,13 +18,14 @@ class TestTextElementCreate(APITestCase): """ Create a page and an image with one already included transcription """ + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server") self.img = Image.objects.create(path='img', width=1337, height=1337, server=self.imgsrv) pagezone = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 1337), (1337, 0), (0, 0)], image=self.img) - self.page = Page.objects.create(name="page", folio="page", zone=pagezone) + self.page = Page.objects.create(corpus=self.corpus, name="page", folio="page", zone=pagezone) self.ts_zone = Zone.objects.create( polygon=[(100, 200), (100, 300), (300, 300), (300, 200), (100, 200)], image=self.img) - self.ts = Transcription.objects.create(text="PAAMAYIM", score=0.5, zone=self.ts_zone) + self.ts = Transcription.objects.create(corpus=self.corpus, text="PAAMAYIM", score=0.5, zone=self.ts_zone) ElementLink.objects.create(parent=self.page, child=self.ts, order=0) self.user = User.objects.create_user(email='user@user.com', password='P45$w0rD') refresh_sync_only_for_unit_tests() diff --git a/arkindex/documents/tests/test_volume_manifest.py b/arkindex/documents/tests/test_volume_manifest.py index a0147573d3a4c9efe00fcd6d40b3e3b9f60ad872..2e258b74b7ec9f772e78244ea49f5c83ae10a262 100644 --- a/arkindex/documents/tests/test_volume_manifest.py +++ b/arkindex/documents/tests/test_volume_manifest.py @@ -2,7 +2,7 @@ from django.urls import reverse from rest_framework.test import APITestCase from rest_framework import status from tripoli import IIIFValidator -from arkindex.documents.models import Element, ElementLink, ElementType, Page +from arkindex.documents.models import Corpus, Element, ElementLink, ElementType, Page from arkindex.images.models import ImageServer, Image, Zone from arkindex.documents.cache import refresh_sync_only_for_unit_tests @@ -12,14 +12,15 @@ class TestVolumeManifestSerializer(APITestCase): def setUp(self): # Create a volume with two pages and a server with two images + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server") self.img1 = Image.objects.create(path='img1', width=1337, height=42, server=self.imgsrv) self.img2 = Image.objects.create(path='img2', width=255, height=420, server=self.imgsrv) self.z1 = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 42), (42, 0), (0, 0)], image=self.img1) self.z2 = Zone.objects.create(polygon=[(0, 0), (255, 0), (255, 420), (0, 420), (0, 0)], image=self.img2) - self.vol = Element.objects.create(name="Volume Name", type=ElementType.Volume) - self.p1 = Page.objects.create(name="p1", folio="p1", zone=self.z1) - self.p2 = Page.objects.create(name="p2", folio="p2", zone=self.z2) + self.vol = Element.objects.create(corpus=self.corpus, name="Volume Name", type=ElementType.Volume) + self.p1 = Page.objects.create(corpus=self.corpus, name="p1", folio="p1", zone=self.z1) + self.p2 = Page.objects.create(corpus=self.corpus, name="p2", folio="p2", zone=self.z2) ElementLink.objects.create(parent=self.vol, child=self.p1, order=0) ElementLink.objects.create(parent=self.vol, child=self.p2, order=1) refresh_sync_only_for_unit_tests() @@ -91,7 +92,7 @@ class TestVolumeManifestSerializer(APITestCase): def test_no_page(self): # A manifest for an empty volume response = self.client.get(reverse('api:volume-manifest', kwargs={ - 'pk': Element.objects.create(type=ElementType.Volume, name="Empty Volume").id + 'pk': Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Empty Volume").id })) self.assertEqual(response.status_code, status.HTTP_200_OK) manifest = response.json() diff --git a/arkindex/images/importer.py b/arkindex/images/importer.py index c4ebd5dc627a8b3f4564ab56ee35530ac5b980f5..a91864e16ad9d55194a927d4ab6c282fd2e846b6 100644 --- a/arkindex/images/importer.py +++ b/arkindex/images/importer.py @@ -108,7 +108,12 @@ def bulk_transcriptions(image, page, items): # Raw elements elements = Element.objects.bulk_create( - Element(type=ElementType.Word, name=n.text, zone_id=uuid.uuid4()) + Element( + corpus=page.corpus, + type=ElementType.Word, + name=n.text, + zone_id=uuid.uuid4() + ) for n in needed ) diff --git a/arkindex/images/tests.py b/arkindex/images/tests.py index a0c03b98d24cf5cfb5b03edc5c9f186361ac745c..645d88ad59880b4a1815d0b0c4c9f2dadf347abc 100644 --- a/arkindex/images/tests.py +++ b/arkindex/images/tests.py @@ -1,5 +1,5 @@ from django.test import TestCase -from arkindex.documents.models import Page, Transcription +from arkindex.documents.models import Corpus, Page, Transcription from arkindex.images.models import ImageServer, Image, Zone from arkindex.images.importer import bulk_transcriptions @@ -9,10 +9,11 @@ class TestBulkTranscriptions(TestCase): def setUp(self): # Create a page and an image + self.corpus = Corpus.objects.create(id='test', name='Unit Tests') self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server") self.img = Image.objects.create(path='img', width=1337, height=42, server=self.imgsrv) pagezone = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 42), (42, 0), (0, 0)], image=self.img) - self.page = Page.objects.create(name="page", folio="page", zone=pagezone) + self.page = Page.objects.create(corpus=self.corpus, name="page", folio="page", zone=pagezone) def test_bulk_transcriptions(self): items = [ diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index b17156a1cf99c66f008eb1c2d453097d50099293..4acafaa1b68a5d528ce5acf6f1a6924e0bdf8d63 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -1,7 +1,7 @@ from django.conf.urls import url from django.views.generic.base import RedirectView from arkindex.documents.api import \ - ElementsList, RelatedElementsList, ElementPages, ElementSurfaces, \ + ElementsList, RelatedElementsList, ElementPages, ElementSurfaces, CorpusList, \ VolumeManifest, ActManifest, \ PageAnnotationList, PageActAnnotationList, SurfaceAnnotationList, \ TranscriptionSearch, ActSearch, TranscriptionSearchAnnotationList, \ @@ -18,6 +18,7 @@ api = [ RelatedElementsList.as_view(), name='related-elements'), url(r'elements/$', ElementsList.as_view(), name='elements'), url(r'surface/(?P<pk>[\w\-]+)/?$', SurfaceDetails.as_view(), name='surface-details'), + url(r'corpus/$', CorpusList.as_view(), name='corpus'), # Manifests url(r'^manifest/(?P<pk>[\w\-]+)/pages/?$', diff --git a/arkindex/templates/elastic/search_acts.json b/arkindex/templates/elastic/search_acts.json index 8f6e3e0cbaae90c7ac606c6bed06ddc4bae737c1..9073b5ba2c0761c630b2974778c5f6b0957d196f 100644 --- a/arkindex/templates/elastic/search_acts.json +++ b/arkindex/templates/elastic/search_acts.json @@ -15,6 +15,13 @@ } } }, + {% if corpus %} + { + "match": { + "transcriptions.corpus": "{{ corpus }}" + } + }, + {% endif %} { "match": { "transcriptions.text": "{{ query }}" diff --git a/arkindex/templates/elastic/search_transcriptions.json b/arkindex/templates/elastic/search_transcriptions.json index 2135cc707494bb5c00049737d5933c7bf03b6a34..8ce0d2476cde78f2d40d375def58d191c132ea6a 100644 --- a/arkindex/templates/elastic/search_transcriptions.json +++ b/arkindex/templates/elastic/search_transcriptions.json @@ -6,6 +6,13 @@ "text": "{{ query }}" } }, + {% if corpus %} + { + "match": { + "corpus": "{{ corpus }}" + } + }, + {% endif %} { "range": { "score": {