From 69cab9403cbf5ecefdc30075e3ad521845bc6aee Mon Sep 17 00:00:00 2001
From: Bastien Abadie <bastien@nextcairn.com>
Date: Wed, 27 Jun 2018 14:36:58 +0000
Subject: [PATCH] Corpus: model + admin

---
 arkindex/documents/admin.py                   | 11 ++-
 arkindex/documents/api.py                     | 20 ++++--
 arkindex/documents/importer.py                | 46 ++++++------
 arkindex/documents/indexer.py                 |  3 +-
 .../documents/management/commands/from_csv.py |  6 ++
 .../management/commands/import_images.py      | 13 +++-
 .../management/commands/import_manifest.py    |  7 ++
 arkindex/documents/migrations/0015_corpus.py  | 60 ++++++++++++++++
 arkindex/documents/models.py                  | 19 ++++-
 arkindex/documents/serializers.py             | 12 +++-
 arkindex/documents/surface.py                 |  4 +-
 arkindex/documents/surface_link.py            |  1 +
 arkindex/documents/tasks.py                   | 15 ++--
 .../documents/tests/test_annotation_list.py   | 24 ++++---
 arkindex/documents/tests/test_cache_db.py     | 15 ++--
 .../documents/tests/test_edit_elementpath.py  | 17 +++--
 arkindex/documents/tests/test_edit_link.py    |  9 +--
 .../documents/tests/test_element_manager.py   | 15 ++--
 arkindex/documents/tests/test_search_post.py  | 37 ++++++----
 .../documents/tests/test_surface_importer.py  | 19 ++---
 .../documents/tests/test_surface_linker.py    | 70 +++++++++++++++----
 arkindex/documents/tests/test_text_create.py  |  7 +-
 .../documents/tests/test_volume_manifest.py   | 11 +--
 arkindex/images/importer.py                   |  7 +-
 arkindex/images/tests.py                      |  5 +-
 arkindex/project/api_v1.py                    |  3 +-
 arkindex/templates/elastic/search_acts.json   |  7 ++
 .../elastic/search_transcriptions.json        |  7 ++
 28 files changed, 352 insertions(+), 118 deletions(-)
 create mode 100644 arkindex/documents/migrations/0015_corpus.py

diff --git a/arkindex/documents/admin.py b/arkindex/documents/admin.py
index 1fbd4378ef..90ee001fb3 100644
--- a/arkindex/documents/admin.py
+++ b/arkindex/documents/admin.py
@@ -2,11 +2,15 @@ from django.contrib import admin
 from django.conf.urls import url
 from django.urls import reverse
 from django.utils.html import format_html
-from arkindex.documents.models import Page, Element, ElementLink, ElementType, Act, Transcription
+from arkindex.documents.models import Corpus, Page, Element, ElementLink, ElementType, Act, Transcription
 from arkindex.documents.views import DumpActs
 from enumfields.admin import EnumFieldListFilter
 
 
+class CorpusAdmin(admin.ModelAdmin):
+    list_display = ('id', 'name', )
+
+
 class PageAdmin(admin.ModelAdmin):
     list_display = ('id', 'name', 'page_type', 'nb', 'direction', )
     list_filter = [('page_type', EnumFieldListFilter)]
@@ -21,8 +25,8 @@ class ElementLinkAdmin(admin.TabularInline):
 
 
 class ElementAdmin(admin.ModelAdmin):
-    list_display = ('id', 'name', 'type', 'element_actions')
-    list_filter = [('type', EnumFieldListFilter)]
+    list_display = ('id', 'name', 'type', 'corpus', 'element_actions')
+    list_filter = [('type', EnumFieldListFilter), 'corpus']
     fields = ('id', 'type', 'name')
     readonly_fields = ('id', 'element_actions')
     inlines = [ElementLinkAdmin, ]
@@ -59,6 +63,7 @@ class TranscriptionAdmin(admin.ModelAdmin):
     readonly_fields = ('id', )
 
 
+admin.site.register(Corpus, CorpusAdmin)
 admin.site.register(Page, PageAdmin)
 admin.site.register(Element, ElementAdmin)
 admin.site.register(Act, ActAdmin)
diff --git a/arkindex/documents/api.py b/arkindex/documents/api.py
index 163cbe73e9..395a8352e5 100644
--- a/arkindex/documents/api.py
+++ b/arkindex/documents/api.py
@@ -8,7 +8,7 @@ from django.conf import settings
 from django.utils.decorators import method_decorator
 from django.views.decorators.cache import cache_page
 from arkindex.documents.serializers import \
-    ElementSerializer, PageLightSerializer, \
+    ElementSerializer, CorpusSerializer, PageLightSerializer, \
     TranscriptionSearchResultSerializer, ActSearchResultSerializer, \
     VolumeManifestSerializer, ActManifestSerializer, \
     PageAnnotationListSerializer, PageActAnnotationListSerializer, \
@@ -16,7 +16,7 @@ from arkindex.documents.serializers import \
     ActSerializer, ElementLinkSerializer, SurfaceSerializer, \
     TextCreationSerializer
 from arkindex.documents.models import \
-    Element, ElementType, Page, Act, Transcription, ElementLink
+    Element, ElementType, Page, Act, Transcription, ElementLink, Corpus
 from arkindex.documents.search import \
     search_transcriptions_post, search_acts_post, search_transcriptions_filter_post
 from arkindex.documents.tasks import refresh_db_cache
@@ -73,6 +73,15 @@ class ElementSurfaces(ListAPIView):
                               .filter(type=ElementType.Surface)
 
 
+class CorpusList(ListAPIView):
+    """
+    List all available corpora
+    """
+    serializer_class = CorpusSerializer
+    pagination_class = None
+    queryset = Corpus.objects.all().order_by('name')
+
+
 class SurfaceDetails(RetrieveAPIView):
     """
     Get details for a specific surface
@@ -152,6 +161,7 @@ class TextElementCreate(CreateAPIView):
             polygon=[tuple(x) for x in serializer.validated_data['polygon']],
         )
         ts, created = Transcription.objects.get_or_create(
+            corpus=element.corpus,
             type=serializer.validated_data['type'],
             zone=ts_zone,
             text=serializer.validated_data['text'],
@@ -186,10 +196,11 @@ class TranscriptionSearch(ListAPIView):
 
     def get_queryset(self):
         query = self.request.query_params.get('q')
+        corpus = self.request.query_params.get('corpus')
         return ESQuerySet(
             query=ESQuerySet.make_query(
                 'elastic/search_transcriptions.json',
-                ctx={'query': query},
+                ctx={'query': query, 'corpus': corpus},
             ),
             sort={"score": {"order": "desc", "mode": "max"}},
             es_index=settings.ES_INDEX_TRANSCRIPTIONS,
@@ -206,11 +217,12 @@ class ActSearch(ListAPIView):
 
     def get_queryset(self):
         query = self.request.query_params.get('q')
+        corpus = self.request.query_params.get('corpus')
         return ESQuerySet(
             _source=False,
             query=ESQuerySet.make_query(
                 'elastic/search_acts.json',
-                ctx={'query': query},
+                ctx={'query': query, 'corpus': corpus},
             ),
             es_index=settings.ES_INDEX_ACTS,
             es_type=Act.INDEX_TYPE,
diff --git a/arkindex/documents/importer.py b/arkindex/documents/importer.py
index 350153eda3..37cabd7505 100644
--- a/arkindex/documents/importer.py
+++ b/arkindex/documents/importer.py
@@ -1,4 +1,5 @@
-from arkindex.documents.models import PageType, PageDirection, PageComplement, Page, ElementType, Element, ElementLink
+from arkindex.documents.models import \
+    PageType, PageDirection, PageComplement, Page, ElementType, Element, ElementLink, Corpus
 from arkindex.images.models import Image, ImageServer, Zone
 from arkindex.images.importer import bulk_transcriptions
 from abc import ABC, abstractmethod
@@ -42,6 +43,7 @@ def import_page(volume, image, register, folio, order):
     element, _ = Element.objects.get_or_create(
         type=ElementType.Register,
         name=register,
+        corpus=volume.corpus,
     )
     ElementLink.objects.get_or_create(parent=volume, child=element)
 
@@ -57,9 +59,15 @@ def import_page(volume, image, register, folio, order):
     else:
         page_type, page_nb, page_direction, page_complement = parse_folio(folio)
         p = Page.objects.create(
-            folio=folio, name="Page {0} du volume {1}".format(folio, volume.name),
-            page_type=page_type, nb=page_nb, direction=page_direction, complement=page_complement,
-            zone=Zone.objects.create(polygon=poly, image=image))
+            folio=folio,
+            name="Page {0} du volume {1}".format(folio, volume.name),
+            page_type=page_type,
+            nb=page_nb,
+            direction=page_direction,
+            complement=page_complement,
+            zone=Zone.objects.create(polygon=poly, image=image),
+            corpus=volume.corpus,
+        )
 
     ElementLink.objects.get_or_create(parent=element, child=p, defaults={'order': order})
     return p
@@ -115,7 +123,7 @@ class ManifestsImporter(ABC):
     Parses JSON manifests and annotation data to import them in the database.
     """
 
-    def __init__(self, imgserv, offline=False, annotations=True, volume_name=None):
+    def __init__(self, imgserv, corpus, offline=False, annotations=True, volume_name=None):
         """Initialize a manifest importer
         `imgserv` can be either one ImageServer or a list of ImageServers.
         When `volume_name` is set, it overrides the manifest volume name."""
@@ -128,6 +136,8 @@ class ManifestsImporter(ABC):
         self.offline = offline
         self.annotations = annotations
         self.volume_name = volume_name
+        assert isinstance(corpus, Corpus)
+        self.corpus = corpus
 
         # This dictionary associates canvas IDs with images and pages
         # Filled by parse_manifest ; used by parse_annotation_list
@@ -197,8 +207,8 @@ class ManifestsImporter(ABC):
 
         # Create a volume and a register
         logger.debug("Creating volume {}".format(vol_name))
-        vol, _ = Element.objects.get_or_create(name=vol_name, type=ElementType.Volume)
-        doc, _ = Element.objects.get_or_create(name=vol_name, type=ElementType.Register)
+        vol, _ = Element.objects.get_or_create(name=vol_name, type=ElementType.Volume, corpus=self.corpus)
+        doc, _ = Element.objects.get_or_create(name=vol_name, type=ElementType.Register, corpus=self.corpus)
         ElementLink.objects.get_or_create(parent=vol, child=doc)
         stream.seek(0)
         self.parse_manifest_canvases(stream, vol)
@@ -298,27 +308,23 @@ class ManifestsImporter(ABC):
 
             # Fill data dictionary
             self.images_transcription_data[(image, page)].append({
-                'x': x, 'y': y, 'width': w, 'height': h, 'text': text, 'line': None, 'score': 1
+                'x': x, 'y': y, 'width': w, 'height': h, 'text': text, 'line': 0, 'score': 1
             })
 
     def save_transcriptions(self):
-        """To optimize transcription parsing, saving and indexing is done in bulk."""
+        """To optimize transcription parsing, saving is done in bulk."""
         if len(self.images_transcription_data) < 1:
             return
 
-        total_zones, total_transcriptions, total_indexes = 0, 0, 0
+        total_zones, total_transcriptions = 0, 0
 
         for (image, page), data in self.images_transcription_data.items():
             new_transcriptions = bulk_transcriptions(image, page, data)
             total_transcriptions += len(new_transcriptions)
             logger.debug("Created {0} transcriptions for image {1}".format(len(new_transcriptions), image.path))
 
-            index_count = image.index()
-            total_indexes += index_count
-            logger.debug("Created {0} indexes for image {1}".format(index_count, image.path))
-
-        logger.info("Saved {0} zones, {1} transcriptions and {2} indexes".format(
-            total_zones, total_transcriptions, total_indexes))
+        logger.info("Saved {0} zones and {1} transcriptions".format(
+            total_zones, total_transcriptions))
 
         # Reset
         self.images_transcription_data = {}
@@ -376,8 +382,8 @@ class ManifestsImporter(ABC):
 class LocalManifestsImporter(ManifestsImporter):
     """Allows importing of local JSON files."""
 
-    def __init__(self, imgserv, path, **kwargs):
-        super().__init__(imgserv, **kwargs)
+    def __init__(self, imgserv, corpus, path, **kwargs):
+        super().__init__(imgserv, corpus, **kwargs)
         self.path = path
 
     def get_json_files(self):
@@ -399,8 +405,8 @@ class LocalManifestsImporter(ManifestsImporter):
 class URLManifestsImporter(ManifestsImporter):
     """Allows importing of remote JSON files."""
 
-    def __init__(self, imgserv, url, **kwargs):
-        super().__init__(imgserv, **kwargs)
+    def __init__(self, imgserv, corpus, url, **kwargs):
+        super().__init__(imgserv, corpus, **kwargs)
         self.url = url
 
     def get_json_files(self):
diff --git a/arkindex/documents/indexer.py b/arkindex/documents/indexer.py
index 36182359c8..693857dabe 100644
--- a/arkindex/documents/indexer.py
+++ b/arkindex/documents/indexer.py
@@ -31,7 +31,8 @@ class Indexer(object):
                                     "id": {"type": "text"},
                                     "type": {"type": "text"},
                                     "score": {"type": "float"},
-                                    "text": {"type": "text"}
+                                    "text": {"type": "text"},
+                                    "corpus": {"type": "text"}
                                 }
                             }
                         }
diff --git a/arkindex/documents/management/commands/from_csv.py b/arkindex/documents/management/commands/from_csv.py
index 83c89db179..f9d8c998a2 100755
--- a/arkindex/documents/management/commands/from_csv.py
+++ b/arkindex/documents/management/commands/from_csv.py
@@ -28,6 +28,11 @@ class Command(BaseCommand):
             help='Root folder for indexes (/home/data/indexes)',
             default='.',
         )
+        parser.add_argument(
+            '--corpus-id',
+            required=True,
+            help='Slug of corpus to import volumes into'
+        )
         parser.add_argument(
             '--col-name',
             help='Index of the volume name column',
@@ -69,6 +74,7 @@ class Command(BaseCommand):
         task = import_manifests_annotations.delay(
             csv_data,
             options['index_root'],
+            options['corpus_id'],
             col_name=options['col_name'],
             col_url=options['col_url'],
             col_path=options['col_path'],
diff --git a/arkindex/documents/management/commands/import_images.py b/arkindex/documents/management/commands/import_images.py
index b0bcd315cb..7350e92f92 100644
--- a/arkindex/documents/management/commands/import_images.py
+++ b/arkindex/documents/management/commands/import_images.py
@@ -1,6 +1,6 @@
 from django.core.management.base import BaseCommand, CommandError
 from arkindex.images.models import ImageServer
-from arkindex.documents.models import Element, ElementType
+from arkindex.documents.models import Element, ElementType, Corpus
 from arkindex.documents.importer import import_page
 from arkindex.documents.tasks import refresh_db_cache
 import logging
@@ -35,6 +35,12 @@ class Command(BaseCommand):
             help='Name of the volume to import images in',
             required=True,
         )
+        parser.add_argument(
+            '--corpus-id',
+            type=str,
+            help='Slug of the corpus to import images in',
+            required=True,
+        )
         parser.add_argument(
             '--offline',
             action='store_true',
@@ -63,8 +69,9 @@ class Command(BaseCommand):
         except ImageServer.DoesNotExist:
             raise CommandError('No image server found !')
 
-        logger.info('Loading volume')
-        vol, _ = Element.objects.get_or_create(type=ElementType.Volume, name=options['volume_name'])
+        logger.info('Loading corpus and volume')
+        corpus = Corpus.objects.get(id=options['corpus_id'])
+        vol, _ = Element.objects.get_or_create(type=ElementType.Volume, name=options['volume_name'], corpus=corpus)
 
         for i, path in enumerate(options['image_list'].readlines(), 1):
             img = server.find_image(path.strip(), offline=options['offline'])
diff --git a/arkindex/documents/management/commands/import_manifest.py b/arkindex/documents/management/commands/import_manifest.py
index a1a278f5a7..8e17af56a6 100644
--- a/arkindex/documents/management/commands/import_manifest.py
+++ b/arkindex/documents/management/commands/import_manifest.py
@@ -25,6 +25,12 @@ class Command(BaseCommand):
             help='IDs of IIIF servers where to find images',
             nargs='+'
         )
+        parser.add_argument(
+            '--corpus-id',
+            type=str,
+            help='Slug of corpus to import manifests into',
+            required=True,
+        )
         parser.add_argument(
             '--offline',
             action='store_true',
@@ -54,6 +60,7 @@ class Command(BaseCommand):
             import_manifest.si(
                 options['manifest_folder'],
                 server_ids=options.get('iiif-server', []),
+                corpus_id=options['corpus_id'],
                 offline=options['offline'],
                 annotations=options['annotations'],
                 volume_name=options.get('volume_name')
diff --git a/arkindex/documents/migrations/0015_corpus.py b/arkindex/documents/migrations/0015_corpus.py
new file mode 100644
index 0000000000..5c981357b5
--- /dev/null
+++ b/arkindex/documents/migrations/0015_corpus.py
@@ -0,0 +1,60 @@
+# Generated by Django 2.0 on 2018-06-26 08:10
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+def init_corpus(apps, schema_editor):
+    '''
+    Create default corpus "Himanis"
+    and assign it to every element
+    '''
+    Corpus = apps.get_model('documents', 'Corpus')
+    Element = apps.get_model('documents', 'Element')
+
+    himanis = Corpus.objects.create(id='himanis', name='Himanis')
+    Element.objects.update(corpus=himanis)
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0014_auto_20180625_1119'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='Corpus',
+            fields=[
+                ('id', models.CharField(max_length=250, primary_key=True, serialize=False)),
+                ('name', models.CharField(max_length=250)),
+            ],
+        ),
+        migrations.AddField(
+            model_name='element',
+            name='corpus',
+            field=models.ForeignKey(
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name='elements',
+                to='documents.Corpus',
+            ),
+            preserve_default=False,
+        ),
+        migrations.RunPython(init_corpus),
+        migrations.AlterField(
+            model_name='element',
+            name='corpus',
+            field=models.ForeignKey(
+                null=False,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name='elements',
+                to='documents.Corpus',
+            ),
+            preserve_default=False,
+        ),
+        migrations.AlterModelOptions(
+            name='corpus',
+            options={'verbose_name_plural': 'corpora'},
+        ),
+    ]
diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py
index 1166ad43df..2a8722674d 100644
--- a/arkindex/documents/models.py
+++ b/arkindex/documents/models.py
@@ -11,6 +11,20 @@ import itertools
 logger = logging.getLogger(__name__)
 
 
+class Corpus(models.Model):
+    '''
+    A group of elements, usually linked to users or projects
+    '''
+    id = models.CharField(max_length=250, primary_key=True)
+    name = models.CharField(max_length=250)
+
+    class Meta:
+        verbose_name_plural = 'corpora'
+
+    def __str__(self):
+        return self.name
+
+
 class ElementType(Enum):
     Volume = 'volume'
     Register = 'register'
@@ -59,6 +73,7 @@ class Element(IndexableModel):
     """
     aka Unité Codicologique
     """
+    corpus = models.ForeignKey('documents.Corpus', related_name='elements', on_delete=models.CASCADE)
     parents = models.ManyToManyField('self', through=ElementLink, symmetrical=False)
     type = EnumField(ElementType, max_length=50, db_index=True)
     name = models.CharField(max_length=250)
@@ -296,7 +311,8 @@ class Act(Element):
                     'id': t.id,
                     'type': t.type.value,
                     'score': t.score,
-                    'text': t.text
+                    'text': t.text,
+                    'corpus': t.corpus_id,
                 }
                 for sublist in transcriptions for t in sublist
             ]
@@ -334,4 +350,5 @@ class Transcription(Element):
             'score': self.score,
             'line': self.line,
             'text': self.text,
+            'corpus': self.corpus_id,
         }
diff --git a/arkindex/documents/serializers.py b/arkindex/documents/serializers.py
index 3fd5ea1c41..8a29748dda 100644
--- a/arkindex/documents/serializers.py
+++ b/arkindex/documents/serializers.py
@@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
 from django.conf import settings
 from rest_framework import serializers
 from arkindex.documents.models import \
-    Element, ElementType, ElementLink, Transcription, Page, PageType, PageDirection, Act
+    Element, ElementType, ElementLink, Transcription, Page, PageType, PageDirection, Act, Corpus
 from arkindex.documents.serializer_fields import EnumField, ViewerURLField
 from arkindex.images.models import Image, Zone
 from arkindex.images.serializers import ZoneSerializer, ImageSerializer
@@ -139,6 +139,16 @@ class SurfaceSerializer(serializers.ModelSerializer):
         )
 
 
+class CorpusSerializer(serializers.ModelSerializer):
+    """
+    Serialize a corpus
+    """
+
+    class Meta:
+        model = Corpus
+        fields = ('id', 'name')
+
+
 class TextCreationSerializer(serializers.Serializer):
     """
     Allows for insertion of new transcriptions and zones
diff --git a/arkindex/documents/surface.py b/arkindex/documents/surface.py
index e75d849593..92010f84d9 100644
--- a/arkindex/documents/surface.py
+++ b/arkindex/documents/surface.py
@@ -264,6 +264,7 @@ class SurfaceImporter(object):
                 surface, _ = Element.objects.get_or_create(
                     type=ElementType.Surface,
                     zone=z,
+                    corpus=page.corpus,
                     defaults={'name': "Surface {}".format(surfacedata['id'])})
                 self.surfaces[page].append(surface)
 
@@ -298,7 +299,8 @@ class SurfaceImporter(object):
                 new_acts += 1
                 act = Act.objects.create(
                     name="Act {} of volume {}".format(row.number, row.volume_name),
-                    number=row.number)
+                    number=row.number,
+                    corpus=volume.corpus)
                 self._create_element_link(volume, act)
 
             for page in act_pages:
diff --git a/arkindex/documents/surface_link.py b/arkindex/documents/surface_link.py
index 8e256164f9..370842042c 100644
--- a/arkindex/documents/surface_link.py
+++ b/arkindex/documents/surface_link.py
@@ -215,6 +215,7 @@ class SurfaceLinker(object):
                     name="Act {}".format(act_raw.number),
                     number=act_raw.number,
                     folio=act_raw.folio,
+                    corpus=self.volume.corpus,
                 )
                 self.created_acts += 1
 
diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py
index c73e6b6413..e0f6e4f5c2 100644
--- a/arkindex/documents/tasks.py
+++ b/arkindex/documents/tasks.py
@@ -2,7 +2,7 @@ from celery import shared_task, group, chain
 from celery_once import QueueOnce
 from celery.utils.log import get_task_logger
 from django.conf import settings
-from arkindex.documents.models import Element, Act, Transcription, ElementType
+from arkindex.documents.models import Element, Act, Transcription, ElementType, Corpus
 from arkindex.documents.indexer import Indexer
 from arkindex.documents.importer import URLManifestsImporter, LocalManifestsImporter
 from arkindex.documents.surface import SurfaceImporter
@@ -51,7 +51,7 @@ def reindex_transcriptions(bulk_size=400, volume_id=None):
 
 
 @shared_task
-def import_manifest(path, server_ids=[], offline=False, annotations=False, volume_name=None):
+def import_manifest(path, server_ids=[], corpus_id=None, offline=False, annotations=False, volume_name=None):
     """
     Import a IIIF manifest from a local file or a URL
     """
@@ -60,6 +60,11 @@ def import_manifest(path, server_ids=[], offline=False, annotations=False, volum
     except Exception as e:
         raise ValueError("Image server not found: {}".format(e))
 
+    try:
+        corpus = Corpus.objects.get(id=corpus_id)
+    except Exception as e:
+        raise ValueError("Corpus not found: {}".format(e))
+
     if any(path.startswith(scheme) for scheme in ('http://', 'https://',)):
         importer_class = URLManifestsImporter
     else:
@@ -67,6 +72,7 @@ def import_manifest(path, server_ids=[], offline=False, annotations=False, volum
 
     importer_class(
         servers,
+        corpus,
         path,
         offline=offline,
         annotations=annotations,
@@ -111,13 +117,14 @@ def import_annotations_csv(name, raw_path, source, index_root):
 
 
 @shared_task
-def import_manifests_annotations(csv_data, index_root, col_name=0, col_url=1, col_path=2, col_source=3):
+def import_manifests_annotations(csv_data, index_root, corpus_id, col_name=0, col_url=1, col_path=2, col_source=3):
     '''
     Import both manifests & annotations
     from a parsed csv file (or whatever table)
     '''
     manifests = [
-        import_manifest.si(row[col_url], offline=True, annotations=False, volume_name=row[col_name])
+        import_manifest.si(
+            row[col_url], offline=True, annotations=False, volume_name=row[col_name], corpus_id=corpus_id)
         for row in csv_data
     ]
     annotations = [
diff --git a/arkindex/documents/tests/test_annotation_list.py b/arkindex/documents/tests/test_annotation_list.py
index 951ae700f4..3fb53971d2 100644
--- a/arkindex/documents/tests/test_annotation_list.py
+++ b/arkindex/documents/tests/test_annotation_list.py
@@ -1,7 +1,7 @@
 from django.urls import reverse
 from rest_framework.test import APITestCase
 from rest_framework import status
-from arkindex.documents.models import Element, ElementLink, Page, Transcription, Act, ElementType
+from arkindex.documents.models import Corpus, Element, ElementLink, Page, Transcription, Act, ElementType
 from arkindex.images.models import ImageServer, Image, Zone
 from arkindex.documents.cache import refresh_sync_only_for_unit_tests
 
@@ -10,16 +10,17 @@ class TestPageAnnotationListSerializer(APITestCase):
 
     def setUp(self):
         # Create a page and an image with some transcriptions
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
         self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server")
         self.img = Image.objects.create(path='img', width=1337, height=42, server=self.imgsrv)
         pagezone = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 42), (42, 0), (0, 0)], image=self.img)
-        self.page = Page.objects.create(name="page", folio="page", zone=pagezone)
+        self.page = Page.objects.create(corpus=self.corpus, name="page", folio="page", zone=pagezone)
         self.z1 = Zone.objects.create(
             polygon=[(100, 200), (100, 300), (300, 300), (300, 200), (100, 200)], image=self.img)
         self.z2 = Zone.objects.create(
             polygon=[(50, 100), (50, 150), (150, 150), (150, 100), (50, 100)], image=self.img)
-        self.t1 = Transcription.objects.create(text="AAA", zone=self.z1)
-        self.t2 = Transcription.objects.create(text="BBB", zone=self.z2)
+        self.t1 = Transcription.objects.create(corpus=self.corpus, text="AAA", zone=self.z1)
+        self.t2 = Transcription.objects.create(corpus=self.corpus, text="BBB", zone=self.z2)
         ElementLink.objects.create(parent=self.page, child=self.t1, order=0)
         ElementLink.objects.create(parent=self.page, child=self.t2, order=1)
         refresh_sync_only_for_unit_tests()
@@ -61,7 +62,7 @@ class TestPageAnnotationListSerializer(APITestCase):
     def test_empty_list(self):
         # An annotation list with nothing in it
         response = self.client.get(reverse('api:page-transcription-manifest', kwargs={
-            'pk': Page.objects.create(name="Empty Page").id
+            'pk': Page.objects.create(corpus=self.corpus, name="Empty Page").id
         }))
         self.assertEqual(response.status_code, status.HTTP_200_OK)
         annotation_list = response.json()
@@ -73,16 +74,17 @@ class TestPageActAnnotationListSerializer(APITestCase):
 
     def setUp(self):
         # Create a page and an image with some transcriptions
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
         imgsrv = ImageServer.objects.create(name="Test Server", url="http://server")
         img = Image.objects.create(path='img', width=1337, height=42, server=imgsrv)
         pagezone = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 42), (42, 0), (0, 0)], image=img)
-        self.page = Page.objects.create(name="page", folio="page", zone=pagezone)
+        self.page = Page.objects.create(corpus=self.corpus, name="page", folio="page", zone=pagezone)
         z1 = Zone.objects.create(polygon=[(100, 200), (100, 300), (300, 300), (300, 200), (100, 200)], image=img)
         z2 = Zone.objects.create(polygon=[(50, 100), (50, 150), (150, 150), (150, 100), (50, 100)], image=img)
-        a1 = Act.objects.create(name="a1", number="123")
-        a2 = Act.objects.create(name="a2", number="456")
-        s1 = Element.objects.create(type=ElementType.Surface, name="s1", zone=z1)
-        s2 = Element.objects.create(type=ElementType.Surface, name="s2", zone=z2)
+        a1 = Act.objects.create(corpus=self.corpus, name="a1", number="123")
+        a2 = Act.objects.create(corpus=self.corpus, name="a2", number="456")
+        s1 = Element.objects.create(corpus=self.corpus, type=ElementType.Surface, name="s1", zone=z1)
+        s2 = Element.objects.create(corpus=self.corpus, type=ElementType.Surface, name="s2", zone=z2)
         ElementLink.objects.create(parent=a1, child=s1, order=0)
         ElementLink.objects.create(parent=a2, child=s2, order=0)
         refresh_sync_only_for_unit_tests()
@@ -124,7 +126,7 @@ class TestPageActAnnotationListSerializer(APITestCase):
     def test_empty_list(self):
         # An annotation list with nothing in it
         response = self.client.get(reverse('api:page-act-manifest', kwargs={
-            'pk': Page.objects.create(name="Empty Page").id
+            'pk': Page.objects.create(corpus=self.corpus, name="Empty Page").id
         }))
         self.assertEqual(response.status_code, status.HTTP_200_OK)
         annotation_list = response.json()
diff --git a/arkindex/documents/tests/test_cache_db.py b/arkindex/documents/tests/test_cache_db.py
index a1aeddcd41..8cc49b31bd 100644
--- a/arkindex/documents/tests/test_cache_db.py
+++ b/arkindex/documents/tests/test_cache_db.py
@@ -1,7 +1,7 @@
 from django.test import TestCase
 from django.db import connection
 from arkindex.documents.cache import CacheDB
-from arkindex.documents.models import ElementPath, Element, ElementType, ElementLink
+from arkindex.documents.models import Corpus, ElementPath, Element, ElementType, ElementLink
 import itertools
 
 
@@ -11,12 +11,13 @@ class TestCacheDB(TestCase):
     '''
 
     def setUp(self):
-        self.volA = Element.objects.create(type=ElementType.Volume, name='vol A')
-        self.volB = Element.objects.create(type=ElementType.Volume, name='vol B')
-        self.pageA = Element.objects.create(type=ElementType.Page, name='page A')
-        self.pageB = Element.objects.create(type=ElementType.Page, name='page B')
-        self.act = Element.objects.create(type=ElementType.Act, name='an act')
-        self.tr = Element.objects.create(type=ElementType.Line, name='some tr')
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
+        self.volA = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name='vol A')
+        self.volB = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name='vol B')
+        self.pageA = Element.objects.create(corpus=self.corpus, type=ElementType.Page, name='page A')
+        self.pageB = Element.objects.create(corpus=self.corpus, type=ElementType.Page, name='page B')
+        self.act = Element.objects.create(corpus=self.corpus, type=ElementType.Act, name='an act')
+        self.tr = Element.objects.create(corpus=self.corpus, type=ElementType.Line, name='some tr')
 
         ElementLink.objects.create(parent=self.volA, child=self.pageA, order=1)
         ElementLink.objects.create(parent=self.volB, child=self.pageB, order=1)
diff --git a/arkindex/documents/tests/test_edit_elementpath.py b/arkindex/documents/tests/test_edit_elementpath.py
index 75e2c3f393..afb0d8f634 100644
--- a/arkindex/documents/tests/test_edit_elementpath.py
+++ b/arkindex/documents/tests/test_edit_elementpath.py
@@ -1,5 +1,5 @@
 from django.test import TestCase
-from arkindex.documents.models import Element, ElementPath, ElementType, ElementLink
+from arkindex.documents.models import Corpus, Element, ElementPath, ElementType, ElementLink
 from arkindex.documents.cache import refresh_sync_only_for_unit_tests
 import itertools
 
@@ -8,6 +8,8 @@ class TestEditElementPath(TestCase):
     """
     Test ElementPath editing algorithms
     """
+    def setUp(self):
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
 
     def build_tree(self, tree, element_type=ElementType.Volume):
         """
@@ -30,7 +32,14 @@ class TestEditElementPath(TestCase):
         # Removes the need to add 'element': [] for root elements
         names = set(tree.keys()).union(*[parse_value(val) for val in tree.values()])
         # Create elements
-        elements = {name: Element.objects.create(name=name, type=element_type) for name in names}
+        elements = {
+            name: Element.objects.create(
+                corpus=self.corpus,
+                name=name,
+                type=element_type,
+            )
+            for name in names
+        }
         orders = {name: 0 for name in names}
         # Create links
         for child_name, value in tree.items():
@@ -146,8 +155,8 @@ class TestEditElementPath(TestCase):
         Check adding a parent works with an empty tree
         """
         elements = {
-            'A': Element.objects.create(name="A", type=ElementType.Volume),
-            'B': Element.objects.create(name="A", type=ElementType.Volume),
+            'A': Element.objects.create(corpus=self.corpus, name="A", type=ElementType.Volume),
+            'B': Element.objects.create(corpus=self.corpus, name="A", type=ElementType.Volume),
         }
         self.check_parents(elements, 'A')
         self.check_parents(elements, 'B')
diff --git a/arkindex/documents/tests/test_edit_link.py b/arkindex/documents/tests/test_edit_link.py
index d1fae94985..eddb517ca6 100644
--- a/arkindex/documents/tests/test_edit_link.py
+++ b/arkindex/documents/tests/test_edit_link.py
@@ -1,7 +1,7 @@
 from django.urls import reverse
 from rest_framework.test import APITestCase
 from rest_framework import status
-from arkindex.documents.models import Element, ElementType, ElementLink
+from arkindex.documents.models import Corpus, Element, ElementType, ElementLink
 from arkindex.users.models import User
 
 
@@ -9,9 +9,10 @@ class TestEditLink(APITestCase):
     """Tests for link creation, retrieval and removal views"""
 
     def setUp(self):
-        self.element1 = Element.objects.create(type=ElementType.Volume, name="Element 1")
-        self.element2 = Element.objects.create(type=ElementType.Volume, name="Element 2")
-        self.element3 = Element.objects.create(type=ElementType.Volume, name="Element 3")
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
+        self.element1 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Element 1")
+        self.element2 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Element 2")
+        self.element3 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Element 3")
         self.link1 = ElementLink.objects.create(parent=self.element1, child=self.element2, order=0)
         self.user = User.objects.create_user(email='user@user.com', password='P45$w0rD')
 
diff --git a/arkindex/documents/tests/test_element_manager.py b/arkindex/documents/tests/test_element_manager.py
index 436e3a5b11..d3e45a48ed 100644
--- a/arkindex/documents/tests/test_element_manager.py
+++ b/arkindex/documents/tests/test_element_manager.py
@@ -1,5 +1,5 @@
 from django.test import TestCase
-from arkindex.documents.models import Element, ElementLink, ElementType
+from arkindex.documents.models import Corpus, Element, ElementLink, ElementType
 from arkindex.documents.cache import refresh_sync_only_for_unit_tests
 
 
@@ -7,11 +7,12 @@ class TestElementManager(TestCase):
     """Tests for ElementManager class"""
 
     def setUp(self):
-        self.reg = Element.objects.create(name="Register", type=ElementType.Register)
-        self.vol = Element.objects.create(name="Volume", type=ElementType.Volume)
-        self.p1 = Element.objects.create(name="Page1", type=ElementType.Page)
-        self.p2 = Element.objects.create(name="Page2", type=ElementType.Page)
-        self.act = Element.objects.create(name="Act", type=ElementType.Act)
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
+        self.reg = Element.objects.create(corpus=self.corpus, name="Register", type=ElementType.Register)
+        self.vol = Element.objects.create(corpus=self.corpus, name="Volume", type=ElementType.Volume)
+        self.p1 = Element.objects.create(corpus=self.corpus, name="Page1", type=ElementType.Page)
+        self.p2 = Element.objects.create(corpus=self.corpus, name="Page2", type=ElementType.Page)
+        self.act = Element.objects.create(corpus=self.corpus, name="Act", type=ElementType.Act)
         # Register --> Volume --> [Page1 --> Act, Page2]
         ElementLink.objects.create(parent=self.reg, child=self.vol, order=0)
         ElementLink.objects.create(parent=self.vol, child=self.p1, order=0)
@@ -58,6 +59,6 @@ class TestElementManager(TestCase):
 
     def test_get_related_empty(self):
         # Use a element linked to nothing, expect empty list
-        alone = Element.objects.create(name="LonelyPage", type=ElementType.Page)
+        alone = Element.objects.create(corpus=self.corpus, name="LonelyPage", type=ElementType.Page)
         ids = Element.objects.get_related(alone.id)
         self.assertCountEqual(ids, [])
diff --git a/arkindex/documents/tests/test_search_post.py b/arkindex/documents/tests/test_search_post.py
index 03a67dced6..6c9dc214c4 100644
--- a/arkindex/documents/tests/test_search_post.py
+++ b/arkindex/documents/tests/test_search_post.py
@@ -1,5 +1,5 @@
 from django.test import TestCase
-from arkindex.documents.models import Element, ElementType, ElementLink, Transcription, Act, Page
+from arkindex.documents.models import Corpus, Element, ElementType, ElementLink, Transcription, Act, Page
 from arkindex.images.models import ImageServer, Image, Zone
 from arkindex.documents.search import search_transcriptions_post, search_acts_post, search_transcriptions_filter_post
 from arkindex.documents.cache import refresh_sync_only_for_unit_tests
@@ -10,6 +10,7 @@ class TestSearchPostProcess(TestCase):
 
     def setUp(self):
         # Create a server with three images
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
         self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server")
         self.img1 = Image.objects.create(path='img1', width=1000, height=1000, server=self.imgsrv)
         self.img2 = Image.objects.create(path='img2', width=1000, height=1000, server=self.imgsrv)
@@ -19,23 +20,23 @@ class TestSearchPostProcess(TestCase):
         self.z3 = Zone.objects.create(polygon=[(0, 0), (1000, 0), (1000, 1000), (1000, 0), (0, 0)], image=self.img3)
 
         # Create a volume with two pages and another with one page
-        self.vol1 = Element.objects.create(name="Volume 1", type=ElementType.Volume)
-        self.vol2 = Element.objects.create(name="Volume 2", type=ElementType.Volume)
-        self.p1 = Page.objects.create(name="p1", folio="p1", zone=self.z1)
-        self.p2 = Page.objects.create(name="p2", folio="p2", zone=self.z2)
-        self.p3 = Page.objects.create(name="p3", folio="p3", zone=self.z3)
+        self.vol1 = Element.objects.create(corpus=self.corpus, name="Volume 1", type=ElementType.Volume)
+        self.vol2 = Element.objects.create(corpus=self.corpus, name="Volume 2", type=ElementType.Volume)
+        self.p1 = Page.objects.create(corpus=self.corpus, name="p1", folio="p1", zone=self.z1)
+        self.p2 = Page.objects.create(corpus=self.corpus, name="p2", folio="p2", zone=self.z2)
+        self.p3 = Page.objects.create(corpus=self.corpus, name="p3", folio="p3", zone=self.z3)
         ElementLink.objects.create(parent=self.vol1, child=self.p1, order=0)
         ElementLink.objects.create(parent=self.vol1, child=self.p2, order=1)
         ElementLink.objects.create(parent=self.vol2, child=self.p3, order=0)
 
         # Create a bunch of transcriptions
-        self.t1 = Transcription.objects.create(text="word", zone=Zone.objects.create(
+        self.t1 = Transcription.objects.create(corpus=self.corpus, text="word", zone=Zone.objects.create(
             polygon=[(10, 10), (20, 10), (20, 20), (10, 20), (10, 10)], image=self.img1))
-        self.t2 = Transcription.objects.create(text="word", zone=Zone.objects.create(
+        self.t2 = Transcription.objects.create(corpus=self.corpus, text="word", zone=Zone.objects.create(
             polygon=[(110, 110), (120, 110), (120, 120), (110, 120), (110, 110)], image=self.img1))
-        self.t3 = Transcription.objects.create(text="word", zone=Zone.objects.create(
+        self.t3 = Transcription.objects.create(corpus=self.corpus, text="word", zone=Zone.objects.create(
             polygon=[(210, 210), (220, 210), (220, 220), (210, 220), (210, 210)], image=self.img2))
-        self.t4 = Transcription.objects.create(text="word", zone=Zone.objects.create(
+        self.t4 = Transcription.objects.create(corpus=self.corpus, text="word", zone=Zone.objects.create(
             polygon=[(310, 210), (320, 310), (320, 320), (310, 320), (310, 310)], image=self.img3))
         ElementLink.objects.create(parent=self.p1, child=self.t1, order=0)
         ElementLink.objects.create(parent=self.p1, child=self.t2, order=1)
@@ -43,11 +44,21 @@ class TestSearchPostProcess(TestCase):
         ElementLink.objects.create(parent=self.p3, child=self.t4, order=0)
 
         # Create an act with surfaces on the first volume
-        self.act = Act.objects.create(name="Act 42", number="42")
+        self.act = Act.objects.create(corpus=self.corpus, name="Act 42", number="42")
         self.sz1 = Zone.objects.create(polygon=[(0, 0), (500, 0), (500, 500), (0, 500), (0, 0)], image=self.img1)
         self.sz2 = Zone.objects.create(polygon=[(0, 0), (500, 0), (500, 500), (0, 500), (0, 0)], image=self.img2)
-        self.surf1 = Element.objects.create(type=ElementType.Surface, name="Surface 1", zone=self.sz1)
-        self.surf2 = Element.objects.create(type=ElementType.Surface, name="Surface 2", zone=self.sz2)
+        self.surf1 = Element.objects.create(
+            corpus=self.corpus,
+            type=ElementType.Surface,
+            name="Surface 1",
+            zone=self.sz1,
+        )
+        self.surf2 = Element.objects.create(
+            corpus=self.corpus,
+            type=ElementType.Surface,
+            name="Surface 2",
+            zone=self.sz2,
+        )
         ElementLink.objects.create(parent=self.vol1, child=self.act, order=2)
         ElementLink.objects.create(parent=self.act, child=self.surf1, order=0)
         ElementLink.objects.create(parent=self.act, child=self.surf2, order=1)
diff --git a/arkindex/documents/tests/test_surface_importer.py b/arkindex/documents/tests/test_surface_importer.py
index 42cc60dceb..51d05bf5b1 100644
--- a/arkindex/documents/tests/test_surface_importer.py
+++ b/arkindex/documents/tests/test_surface_importer.py
@@ -1,16 +1,19 @@
 from django.test import TestCase
 from arkindex.documents.surface import parse_himanis_volume_name, parse_act_folio
-from arkindex.documents.models import Element, ElementType, Page, PageType, PageDirection
+from arkindex.documents.models import Corpus, Element, ElementType, Page, PageType, PageDirection
 
 
 class TestSurfaceImporterFunctions(TestCase):
     """Test surface importing helper functions."""
 
+    def setUp(self):
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
+
     def test_parse_himanis_volume_name(self):
-        v42 = Element.objects.create(type=ElementType.Volume, name="Volume JJ 042")
-        v43 = Element.objects.create(type=ElementType.Volume, name="Volume JJ043")
-        v300a = Element.objects.create(type=ElementType.Volume, name="Volume JJ 300A")
-        v300b = Element.objects.create(type=ElementType.Volume, name="Volume JJ300B")
+        v42 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume JJ 042")
+        v43 = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume JJ043")
+        v300a = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume JJ 300A")
+        v300b = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume JJ300B")
         self.assertEqual(parse_himanis_volume_name('JJ42'), v42)
         self.assertEqual(parse_himanis_volume_name('JJ43'), v43)
         self.assertEqual(parse_himanis_volume_name('JJ043'), v43)
@@ -23,14 +26,14 @@ class TestSurfaceImporterFunctions(TestCase):
 
     def test_parse_act_folio(self):
         # Make two normal pages
-        p1 = Page.objects.create(name="Page 1", folio="0001r",
+        p1 = Page.objects.create(corpus=self.corpus, name="Page 1", folio="0001r",
                                  nb=1, direction=PageDirection.Recto,
                                  page_type=PageType.Page)
-        p2 = Page.objects.create(name="Page 2", folio="0100v",
+        p2 = Page.objects.create(corpus=self.corpus, name="Page 2", folio="0100v",
                                  nb=2, direction=PageDirection.Verso,
                                  page_type=PageType.Page)
         # A summary page that should be ignored
-        p3 = Page.objects.create(name="Page 2", folio="0100v",
+        p3 = Page.objects.create(corpus=self.corpus, name="Page 2", folio="0100v",
                                  nb=2, direction=PageDirection.Verso,
                                  page_type=PageType.Summary)
         pages = [p1, p2]
diff --git a/arkindex/documents/tests/test_surface_linker.py b/arkindex/documents/tests/test_surface_linker.py
index f37b3f1529..5f3b8dddae 100644
--- a/arkindex/documents/tests/test_surface_linker.py
+++ b/arkindex/documents/tests/test_surface_linker.py
@@ -1,7 +1,8 @@
 from unittest.mock import patch
 from django.test import TestCase
 from arkindex.documents.surface_link import parse_folios, ParsedFolio, SurfaceLinker
-from arkindex.documents.models import Element, ElementType, ElementLink, Page, PageDirection, PageComplement, PageType
+from arkindex.documents.models import Corpus, Element, ElementType, \
+    ElementLink, Page, PageDirection, PageComplement, PageType
 from arkindex.images.models import ImageServer, Image, Zone
 from arkindex.documents.cache import refresh_sync_only_for_unit_tests
 
@@ -10,13 +11,20 @@ class TestSurfaceLinkerFunctions(TestCase):
     """Test SurfaceLinker helper functions."""
 
     def setUp(self):
-        vol = Element.objects.create(type=ElementType.Volume, name="Volume")
-        self.p1r = Page.objects.create(name="1r", folio="1r", nb=1, direction=PageDirection.Recto)
-        self.p1v = Page.objects.create(name="1v", folio="1v", nb=1, direction=PageDirection.Verso)
-        self.p2r = Page.objects.create(name="2r", folio="2r", nb=2, direction=PageDirection.Recto)
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
+        vol = Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Volume")
+        self.p1r = Page.objects.create(corpus=self.corpus, name="1r", folio="1r", nb=1, direction=PageDirection.Recto)
+        self.p1v = Page.objects.create(corpus=self.corpus, name="1v", folio="1v", nb=1, direction=PageDirection.Verso)
+        self.p2r = Page.objects.create(corpus=self.corpus, name="2r", folio="2r", nb=2, direction=PageDirection.Recto)
         self.p2bisr = Page.objects.create(
-            name="2bisr", folio="2bisr", nb=2, direction=PageDirection.Recto, complement=PageComplement.Bis)
-        self.p2v = Page.objects.create(name="2v", folio="2v", nb=2, direction=PageDirection.Verso)
+            corpus=self.corpus,
+            name="2bisr",
+            folio="2bisr",
+            nb=2,
+            direction=PageDirection.Recto,
+            complement=PageComplement.Bis,
+        )
+        self.p2v = Page.objects.create(corpus=self.corpus, name="2v", folio="2v", nb=2, direction=PageDirection.Verso)
         for i, page in enumerate((self.p1r, self.p1v, self.p2r, self.p2bisr, self.p2v)):
             ElementLink.objects.create(parent=vol, child=page, order=i)
         refresh_sync_only_for_unit_tests()
@@ -98,6 +106,7 @@ class TestSurfaceLinker(TestCase):
 
     def setUp(self):
         # Create a server with three images
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
         self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server")
         self.img1 = Image.objects.create(path='img1', width=1337, height=42, server=self.imgsrv)
         self.img2 = Image.objects.create(path='img2', width=255, height=420, server=self.imgsrv)
@@ -117,8 +126,9 @@ class TestSurfaceLinker(TestCase):
         self.z3b = Zone.objects.create(polygon=[(0, 202), (418, 202), (418, 404), (0, 404), (0, 202)], image=self.img3)
 
         # Create a volume and 3 pages
-        self.vol = Element.objects.create(name="Volume Name", type=ElementType.Volume)
+        self.vol = Element.objects.create(corpus=self.corpus, name="Volume Name", type=ElementType.Volume)
         self.p1 = Page.objects.create(
+            corpus=self.corpus,
             name="p1",
             folio="p1",
             zone=self.z1,
@@ -127,6 +137,7 @@ class TestSurfaceLinker(TestCase):
             page_type=PageType.Page,
         )
         self.p2 = Page.objects.create(
+            corpus=self.corpus,
             name="p2",
             folio="p2",
             zone=self.z2,
@@ -136,6 +147,7 @@ class TestSurfaceLinker(TestCase):
             page_type=PageType.Page,
         )
         self.p3 = Page.objects.create(
+            corpus=self.corpus,
             name="p3",
             folio="p3",
             zone=self.z3,
@@ -145,12 +157,42 @@ class TestSurfaceLinker(TestCase):
         )
 
         # Create 6 surfaces
-        self.s1a = Element.objects.create(name="Surface P1 #1", type=ElementType.Surface, zone=self.z1a)
-        self.s1b = Element.objects.create(name="Surface P1 #2", type=ElementType.Surface, zone=self.z1b)
-        self.s2a = Element.objects.create(name="Surface P2 #1", type=ElementType.Surface, zone=self.z2a)
-        self.s2b = Element.objects.create(name="Surface P2 #2", type=ElementType.Surface, zone=self.z2b)
-        self.s3a = Element.objects.create(name="Surface P3 #1", type=ElementType.Surface, zone=self.z3a)
-        self.s3b = Element.objects.create(name="Surface P3 #2", type=ElementType.Surface, zone=self.z3b)
+        self.s1a = Element.objects.create(
+            corpus=self.corpus,
+            name="Surface P1 #1",
+            type=ElementType.Surface,
+            zone=self.z1a,
+        )
+        self.s1b = Element.objects.create(
+            corpus=self.corpus,
+            name="Surface P1 #2",
+            type=ElementType.Surface,
+            zone=self.z1b,
+        )
+        self.s2a = Element.objects.create(
+            corpus=self.corpus,
+            name="Surface P2 #1",
+            type=ElementType.Surface,
+            zone=self.z2a,
+        )
+        self.s2b = Element.objects.create(
+            corpus=self.corpus,
+            name="Surface P2 #2",
+            type=ElementType.Surface,
+            zone=self.z2b,
+        )
+        self.s3a = Element.objects.create(
+            corpus=self.corpus,
+            name="Surface P3 #1",
+            type=ElementType.Surface,
+            zone=self.z3a,
+        )
+        self.s3b = Element.objects.create(
+            corpus=self.corpus,
+            name="Surface P3 #2",
+            type=ElementType.Surface,
+            zone=self.z3b,
+        )
 
         # Link surfaces to pages
         links = {
diff --git a/arkindex/documents/tests/test_text_create.py b/arkindex/documents/tests/test_text_create.py
index c75c9bdaf5..817c7e5134 100644
--- a/arkindex/documents/tests/test_text_create.py
+++ b/arkindex/documents/tests/test_text_create.py
@@ -2,7 +2,7 @@ from django.urls import reverse
 from unittest.mock import patch
 from rest_framework.test import APITestCase
 from rest_framework import status
-from arkindex.documents.models import Page, Transcription, ElementLink, ElementPath, ElementType
+from arkindex.documents.models import Corpus, Page, Transcription, ElementLink, ElementPath, ElementType
 from arkindex.images.models import ImageServer, Image, Zone
 from arkindex.users.models import User
 from arkindex.documents.cache import refresh_sync_only_for_unit_tests
@@ -18,13 +18,14 @@ class TestTextElementCreate(APITestCase):
         """
         Create a page and an image with one already included transcription
         """
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
         self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server")
         self.img = Image.objects.create(path='img', width=1337, height=1337, server=self.imgsrv)
         pagezone = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 1337), (1337, 0), (0, 0)], image=self.img)
-        self.page = Page.objects.create(name="page", folio="page", zone=pagezone)
+        self.page = Page.objects.create(corpus=self.corpus, name="page", folio="page", zone=pagezone)
         self.ts_zone = Zone.objects.create(
             polygon=[(100, 200), (100, 300), (300, 300), (300, 200), (100, 200)], image=self.img)
-        self.ts = Transcription.objects.create(text="PAAMAYIM", score=0.5, zone=self.ts_zone)
+        self.ts = Transcription.objects.create(corpus=self.corpus, text="PAAMAYIM", score=0.5, zone=self.ts_zone)
         ElementLink.objects.create(parent=self.page, child=self.ts, order=0)
         self.user = User.objects.create_user(email='user@user.com', password='P45$w0rD')
         refresh_sync_only_for_unit_tests()
diff --git a/arkindex/documents/tests/test_volume_manifest.py b/arkindex/documents/tests/test_volume_manifest.py
index a0147573d3..2e258b74b7 100644
--- a/arkindex/documents/tests/test_volume_manifest.py
+++ b/arkindex/documents/tests/test_volume_manifest.py
@@ -2,7 +2,7 @@ from django.urls import reverse
 from rest_framework.test import APITestCase
 from rest_framework import status
 from tripoli import IIIFValidator
-from arkindex.documents.models import Element, ElementLink, ElementType, Page
+from arkindex.documents.models import Corpus, Element, ElementLink, ElementType, Page
 from arkindex.images.models import ImageServer, Image, Zone
 from arkindex.documents.cache import refresh_sync_only_for_unit_tests
 
@@ -12,14 +12,15 @@ class TestVolumeManifestSerializer(APITestCase):
 
     def setUp(self):
         # Create a volume with two pages and a server with two images
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
         self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server")
         self.img1 = Image.objects.create(path='img1', width=1337, height=42, server=self.imgsrv)
         self.img2 = Image.objects.create(path='img2', width=255, height=420, server=self.imgsrv)
         self.z1 = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 42), (42, 0), (0, 0)], image=self.img1)
         self.z2 = Zone.objects.create(polygon=[(0, 0), (255, 0), (255, 420), (0, 420), (0, 0)], image=self.img2)
-        self.vol = Element.objects.create(name="Volume Name", type=ElementType.Volume)
-        self.p1 = Page.objects.create(name="p1", folio="p1", zone=self.z1)
-        self.p2 = Page.objects.create(name="p2", folio="p2", zone=self.z2)
+        self.vol = Element.objects.create(corpus=self.corpus, name="Volume Name", type=ElementType.Volume)
+        self.p1 = Page.objects.create(corpus=self.corpus, name="p1", folio="p1", zone=self.z1)
+        self.p2 = Page.objects.create(corpus=self.corpus, name="p2", folio="p2", zone=self.z2)
         ElementLink.objects.create(parent=self.vol, child=self.p1, order=0)
         ElementLink.objects.create(parent=self.vol, child=self.p2, order=1)
         refresh_sync_only_for_unit_tests()
@@ -91,7 +92,7 @@ class TestVolumeManifestSerializer(APITestCase):
     def test_no_page(self):
         # A manifest for an empty volume
         response = self.client.get(reverse('api:volume-manifest', kwargs={
-            'pk': Element.objects.create(type=ElementType.Volume, name="Empty Volume").id
+            'pk': Element.objects.create(corpus=self.corpus, type=ElementType.Volume, name="Empty Volume").id
         }))
         self.assertEqual(response.status_code, status.HTTP_200_OK)
         manifest = response.json()
diff --git a/arkindex/images/importer.py b/arkindex/images/importer.py
index c4ebd5dc62..a91864e16a 100644
--- a/arkindex/images/importer.py
+++ b/arkindex/images/importer.py
@@ -108,7 +108,12 @@ def bulk_transcriptions(image, page, items):
 
         # Raw elements
         elements = Element.objects.bulk_create(
-            Element(type=ElementType.Word, name=n.text, zone_id=uuid.uuid4())
+            Element(
+                corpus=page.corpus,
+                type=ElementType.Word,
+                name=n.text,
+                zone_id=uuid.uuid4()
+            )
             for n in needed
         )
 
diff --git a/arkindex/images/tests.py b/arkindex/images/tests.py
index a0c03b98d2..645d88ad59 100644
--- a/arkindex/images/tests.py
+++ b/arkindex/images/tests.py
@@ -1,5 +1,5 @@
 from django.test import TestCase
-from arkindex.documents.models import Page, Transcription
+from arkindex.documents.models import Corpus, Page, Transcription
 from arkindex.images.models import ImageServer, Image, Zone
 from arkindex.images.importer import bulk_transcriptions
 
@@ -9,10 +9,11 @@ class TestBulkTranscriptions(TestCase):
 
     def setUp(self):
         # Create a page and an image
+        self.corpus = Corpus.objects.create(id='test', name='Unit Tests')
         self.imgsrv = ImageServer.objects.create(name="Test Server", url="http://server")
         self.img = Image.objects.create(path='img', width=1337, height=42, server=self.imgsrv)
         pagezone = Zone.objects.create(polygon=[(0, 0), (1337, 0), (1337, 42), (42, 0), (0, 0)], image=self.img)
-        self.page = Page.objects.create(name="page", folio="page", zone=pagezone)
+        self.page = Page.objects.create(corpus=self.corpus, name="page", folio="page", zone=pagezone)
 
     def test_bulk_transcriptions(self):
         items = [
diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py
index b17156a1cf..4acafaa1b6 100644
--- a/arkindex/project/api_v1.py
+++ b/arkindex/project/api_v1.py
@@ -1,7 +1,7 @@
 from django.conf.urls import url
 from django.views.generic.base import RedirectView
 from arkindex.documents.api import \
-    ElementsList, RelatedElementsList, ElementPages, ElementSurfaces, \
+    ElementsList, RelatedElementsList, ElementPages, ElementSurfaces, CorpusList, \
     VolumeManifest, ActManifest, \
     PageAnnotationList, PageActAnnotationList, SurfaceAnnotationList, \
     TranscriptionSearch, ActSearch, TranscriptionSearchAnnotationList, \
@@ -18,6 +18,7 @@ api = [
         RelatedElementsList.as_view(), name='related-elements'),
     url(r'elements/$', ElementsList.as_view(), name='elements'),
     url(r'surface/(?P<pk>[\w\-]+)/?$', SurfaceDetails.as_view(), name='surface-details'),
+    url(r'corpus/$', CorpusList.as_view(), name='corpus'),
 
     # Manifests
     url(r'^manifest/(?P<pk>[\w\-]+)/pages/?$',
diff --git a/arkindex/templates/elastic/search_acts.json b/arkindex/templates/elastic/search_acts.json
index 8f6e3e0cba..9073b5ba2c 100644
--- a/arkindex/templates/elastic/search_acts.json
+++ b/arkindex/templates/elastic/search_acts.json
@@ -15,6 +15,13 @@
                                     }
                                 }
                             },
+                            {% if corpus %}
+                            {
+                                "match": {
+                                    "transcriptions.corpus": "{{ corpus }}"
+                                }
+                            },
+                            {% endif %}
                             {
                                 "match": {
                                     "transcriptions.text": "{{ query }}"
diff --git a/arkindex/templates/elastic/search_transcriptions.json b/arkindex/templates/elastic/search_transcriptions.json
index 2135cc7074..8ce0d2476c 100644
--- a/arkindex/templates/elastic/search_transcriptions.json
+++ b/arkindex/templates/elastic/search_transcriptions.json
@@ -6,6 +6,13 @@
                     "text": "{{ query }}"
                 }
             },
+            {% if corpus %}
+            {
+                "match": {
+                    "corpus": "{{ corpus }}"
+                }
+            },
+            {% endif %}
             {
                 "range": {
                     "score": {
-- 
GitLab