From 06dcfdc319517d4e7f0758c2e32d8acadf2c3e86 Mon Sep 17 00:00:00 2001
From: manon blanco <blanco@teklia.com>
Date: Mon, 20 Jul 2020 08:52:40 +0000
Subject: [PATCH] Destroy PageXmlTranscriptionsImport endpoint

---
 .pre-commit-config.yaml                       |    1 -
 arkindex/documents/api/ml.py                  |   79 --
 .../documents/fixtures/roles_transkribus.yml  |   90 --
 arkindex/documents/pagexml.py                 |  481 --------
 .../tests/pagexml_samples/create_blocks.xml   |   80 --
 .../tests/pagexml_samples/create_entities.xml |  275 -----
 .../pagexml_samples/first_and_last_name.xml   |  185 ---
 .../documents/tests/pagexml_samples/merge.xml |  115 --
 .../tests/pagexml_samples/regroup.xml         |  110 --
 .../tests/pagexml_samples/transcript.xml      |   90 --
 arkindex/documents/tests/test_pagexml.py      | 1027 -----------------
 arkindex/project/api_v1.py                    |    7 +-
 arkindex/project/parsers.py                   |   15 -
 base/requirements.txt                         |    1 -
 requirements.txt                              |    1 -
 15 files changed, 1 insertion(+), 2556 deletions(-)
 delete mode 100644 arkindex/documents/fixtures/roles_transkribus.yml
 delete mode 100644 arkindex/documents/pagexml.py
 delete mode 100644 arkindex/documents/tests/pagexml_samples/create_blocks.xml
 delete mode 100644 arkindex/documents/tests/pagexml_samples/create_entities.xml
 delete mode 100644 arkindex/documents/tests/pagexml_samples/first_and_last_name.xml
 delete mode 100644 arkindex/documents/tests/pagexml_samples/merge.xml
 delete mode 100644 arkindex/documents/tests/pagexml_samples/regroup.xml
 delete mode 100644 arkindex/documents/tests/pagexml_samples/transcript.xml
 delete mode 100644 arkindex/documents/tests/test_pagexml.py
 delete mode 100644 arkindex/project/parsers.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 16f3ccafcd..b99ec0aed3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -16,7 +16,6 @@ repos:
       - id: check-symlinks
       - id: debug-statements
       - id: trailing-whitespace
-        exclude: '^arkindex/documents/tests/pagexml_samples/(.*).xml$'
       - id: check-yaml
         args: [--allow-multiple-documents]
       - id: mixed-line-ending
diff --git a/arkindex/documents/api/ml.py b/arkindex/documents/api/ml.py
index 46f7bc6a73..8a54779541 100644
--- a/arkindex/documents/api/ml.py
+++ b/arkindex/documents/api/ml.py
@@ -1,35 +1,27 @@
 from collections import defaultdict
-from django.conf import settings
 from django.db import transaction
 from django.db.models import Q, Count
-from django.shortcuts import get_object_or_404
 from rest_framework import status
-from rest_framework.mixins import CreateModelMixin
 from rest_framework.generics import \
     GenericAPIView, ListAPIView, CreateAPIView, UpdateAPIView, RetrieveDestroyAPIView, RetrieveUpdateDestroyAPIView
 from rest_framework.exceptions import ValidationError, PermissionDenied
 from rest_framework.response import Response
-from rest_framework.views import APIView
 from arkindex.documents.models import \
     Corpus, Element, ElementPath, DataSource, Classification, ClassificationState, Transcription, Entity, Right, MLClass
 from arkindex_common.ml_tool import MLToolType
-from arkindex_common.enums import EntityType
 from arkindex.documents.serializers.ml import (
     ClassificationsSerializer, ClassificationCreateSerializer, ClassificationSerializer,
     TranscriptionsSerializer, TranscriptionSerializer, TranscriptionCreateSerializer,
     DataSourceStatsSerializer, ClassificationsSelectionSerializer, ClassificationMode,
     CountMLClassSerializer, MLClassSerializer, ElementTranscriptionsBulkSerializer,
 )
-from arkindex.documents.pagexml import PageXmlParser
 from arkindex.images.importer import build_transcriptions, save_transcriptions
 from arkindex.images.models import Zone
 from arkindex.project.filters import SafeSearchFilter
 from arkindex.project.mixins import SelectionMixin, CorpusACLMixin
-from arkindex.project.parsers import XMLParser
 from arkindex.project.permissions import IsVerified, IsAdminUser
 from arkindex.project.triggers import reindex_start, ml_results_delete
 import uuid
-import os.path
 import logging
 
 logger = logging.getLogger(__name__)
@@ -531,77 +523,6 @@ class ClassificationReject(ClassificationModerationActionsMixin):
         return Response(serializer.data, status=status.HTTP_200_OK)
 
 
-class PageXmlTranscriptionsImport(CreateModelMixin, APIView):
-    parser_classes = (XMLParser, )
-    permission_classes = (IsVerified, )
-    openapi_overrides = {
-        'operationId': 'ImportPageXmlTranscriptions',
-        'description': 'Import transcriptions into Arkindex from region data in the PAGE XML format.',
-        'tags': ['transcriptions'],
-        'requestBody': {
-            'required': True,
-            'description': 'A PAGE XML document. TextRegion tags will be imported as Paragraph elements '
-                           'and TextLine tags will become Line elements. '
-                           'Transcriptions will be attached to those elements corresponding to the annotated text. '
-                           'The source of thoses entries will be set to `transkribus`. '
-                           'See https://github.com/PRImA-Research-Lab/PAGE-XML for more info '
-                           'about the PAGE XML format.',
-            'content': {
-                'application/xml': {
-                    'schema': {}
-                }
-            }
-        }
-    }
-
-    def get_queryset(self):
-        return Element.objects.filter(
-            corpus__in=Corpus.objects.writable(self.request.user),
-            zone__isnull=False,
-        )
-
-    def get_object(self):
-        """
-        Since we are inheriting from APIView, because GenericAPIView would break OpenAPI,
-        we have to rewrite get_object ourselves.
-        """
-        obj = get_object_or_404(self.get_queryset(), pk=self.kwargs['pk'])
-        self.check_object_permissions(self.request, obj)
-        return obj
-
-    def post(self, request, *args, **kwargs):
-        element = self.get_object()
-        try:
-            parser = PageXmlParser(request.data, element.corpus)
-        except AssertionError as e:
-            raise ValidationError('Could not parse PAGE XML document: {!s}'.format(e))
-
-        transcriptions = parser.save(element)
-
-        blocks = parser.create_blocks(transcriptions)
-        blocks = parser.merge(blocks)
-
-        path = os.path.join(settings.BASE_DIR, 'documents', 'fixtures', 'roles_transkribus.yml')
-        types = {
-            '_nom': EntityType.Person.value,
-            '_prenom': EntityType.Person.value,
-            '_lieu_residence': EntityType.Location.value,
-            '_profession': EntityType.Subject.value,
-        }
-        entities_id = parser.create_objects(blocks, element, path, types)
-        logger.info('{} blocks: {} entities created'.format(
-            len(blocks),
-            len(entities_id)
-        ))
-
-        reindex_start(element=element, transcriptions=True, elements=True, entities=True)
-
-        return Response(
-            status=status.HTTP_201_CREATED,
-            headers=self.get_success_headers(None),
-        )
-
-
 class MLStatsBase(object):
     serializer_class = DataSourceStatsSerializer
     permission_classes = (IsAdminUser, )
diff --git a/arkindex/documents/fixtures/roles_transkribus.yml b/arkindex/documents/fixtures/roles_transkribus.yml
deleted file mode 100644
index fd3bcdd050..0000000000
--- a/arkindex/documents/fixtures/roles_transkribus.yml
+++ /dev/null
@@ -1,90 +0,0 @@
-père de l'époux:
-  parent_name: père
-  child_name: enfant
-  parent_type: person
-  child_type: person
-  link_with: époux
-mère de l'époux:
-  parent_name: mère
-  child_name: enfant
-  parent_type: person
-  child_type: person
-  link_with: époux
-père de l'épouse:
-  parent_name: père
-  child_name: enfant
-  parent_type: person
-  child_type: person
-  link_with: épouse
-mère de l'épouse:
-  parent_name: mère
-  child_name: enfant
-  parent_type: person
-  child_type: person
-  link_with: épouse
-marraine:
-  parent_name: marraine
-  child_name: filleul
-  parent_type: person
-  child_type: person
-  link_with: sujet
-parrain:
-  parent_name: parrain
-  child_name: filleul
-  parent_type: person
-  child_type: person
-  link_with: sujet
-conjoint:
-  parent_name: conjoint
-  child_name: conjointe
-  parent_type: person
-  child_type: person
-  link_with: conjointe
-conjointe:
-  parent_name: conjointe
-  child_name: conjoint
-  parent_type: person
-  child_type: person
-  link_with: conjoint
-ex-conjointe:
-  parent_name: ex-conjointe
-  child_name: ex-conjoint
-  parent_type: person
-  child_type: person
-  link_with: conjoint
-ex-conjoint:
-  parent_name: ex-conjoint
-  child_name: ex-conjointe
-  parent_type: person
-  child_type: person
-  link_with: conjointe
-témoin:
-  parent_name: témoin
-  child_name: sujet
-  parent_type: person
-  child_type: person
-  link_with: sujet
-époux:
-  parent_name: époux
-  child_name: épouse
-  parent_type: person
-  child_type: person
-  link_with: épouse
-épouse:
-  parent_name: épouse
-  child_name: époux
-  parent_type: person
-  child_type: person
-  link_with: époux
-père:
-  parent_name: père
-  child_name: enfant
-  parent_type: person
-  child_type: person
-  link_with: sujet
-mère:
-  parent_name: mère
-  child_name: enfant
-  parent_type: person
-  child_type: person
-  link_with: sujet
\ No newline at end of file
diff --git a/arkindex/documents/pagexml.py b/arkindex/documents/pagexml.py
deleted file mode 100644
index 161acbdfce..0000000000
--- a/arkindex/documents/pagexml.py
+++ /dev/null
@@ -1,481 +0,0 @@
-from django.utils.functional import cached_property
-from django.db import transaction
-from arkindex_common.ml_tool import MLToolType
-from arkindex_common.pagexml import PageXmlPage
-from arkindex.project.polygon import Polygon
-from arkindex_common.enums import TranscriptionType
-from arkindex.documents.models import \
-    DataSource, Element, Entity, EntityRole, EntityLink, TranscriptionEntity
-import functools
-import string
-import Levenshtein
-import os.path
-import yaml
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-def best_levenshtein(value, tests):
-    score = sorted([
-        (i, Levenshtein.ratio(value, i))
-        for i in tests
-    ], key=lambda x: x[1], reverse=True)
-    return score[0]
-
-
-def load_yaml_configuration(path):
-    assert os.path.exists(path)
-    with open(path) as f:
-        return yaml.safe_load(f)
-
-
-class PageXmlParser(object):
-
-    def __init__(self, path_or_xml, corpus):
-        self.pagexml_page = PageXmlPage(path_or_xml)
-        self.corpus = corpus
-
-    @cached_property
-    def paragraph_type(self):
-        paragraph, _ = self.corpus.types.get_or_create(
-            corpus=self.corpus,
-            slug='paragraph',
-            defaults={'display_name': 'Paragraph'}
-        )
-        return paragraph
-
-    @cached_property
-    def line_type(self):
-        line, _ = self.corpus.types.get_or_create(
-            corpus=self.corpus,
-            slug='line',
-            defaults={'display_name': 'Line'}
-        )
-        return line
-
-    @cached_property
-    def source(self):
-        ds, _ = DataSource.objects.get_or_create(
-            type=MLToolType.Recognizer,
-            slug='transkribus',
-            revision='2013-07-15',
-            internal=False,
-        )
-        return ds
-
-    def get_zone(self, region, parent):
-        poly = Polygon(region.points)
-        z, _ = parent.zone.image.zones.get_or_create(polygon=poly)
-        return z
-
-    def create_annotation(self, region, parent, element_type, transcription_type, name=''):
-        # Creates an element and its corresponding transcription from a region in one transaction
-        if region.text is None:
-            logger.warning('No text in region {}'.format(region.id))
-            return None, False
-
-        if region.points is None:
-            logger.warning('No points in region {}'.format(region.id))
-            return None, False
-
-        with transaction.atomic():
-            # Create or retrieve the corresponding element
-            element, created_elt = self.get_zone(region, parent).elements.get_or_create(
-                corpus=self.corpus,
-                type=element_type,
-                source=self.source,
-                defaults={'name': name}
-            )
-            # Attach the transcribed text to this element
-            transcription, created_tr = element.transcriptions.get_or_create(
-                element=parent,
-                text=region.text,
-                source=self.source,
-                type=transcription_type,
-                score=1 if region.confidence is None else region.confidence,
-            )
-            # Create the relation to the parent (Skipped in case it already exists)
-            element.add_parent(parent)
-
-        return element, transcription, (created_elt or created_tr)
-
-    def save(self, parent):
-        assert isinstance(parent, Element), 'Parent element should be an Arkindex element'
-
-        if self.pagexml_page.page.text_regions is None or not len(self.pagexml_page.page.text_regions):
-            logger.warning('No annotations to save')
-            return []
-
-        region_count, line_count, region_ts_count, line_ts_count = 0, 0, 0, 0
-        annotations = []
-        for region in self.pagexml_page.page.text_regions:
-            if len(region.points) < 2:
-                logger.warning('Ignoring region {} (not enough points in polygon)'.format(region.id))
-                continue
-
-            region_count += 1
-
-            # Create a paragraph element for the text regions
-            paragraph_element, transcription, created = self.create_annotation(
-                region,
-                parent,
-                self.paragraph_type,
-                TranscriptionType.Paragraph,
-                name=str(region_count)
-            )
-            region_ts_count += created
-            if transcription:
-                annotations.append((transcription.id, region))
-
-            for index, line in enumerate(region.lines, start=1):
-                if len(line.points) < 2:
-                    logger.warning('Ignoring line {} (not enough points in polygon)'.format(line.id))
-                    continue
-
-                line_count += 1
-                # Create a line element as a child of the paragraph element
-                _, transcription, created = self.create_annotation(
-                    line,
-                    paragraph_element,
-                    self.line_type,
-                    TranscriptionType.Line,
-                    name=str(index)
-                )
-                line_ts_count += created
-
-        logger.info(
-            f'Parsed {region_count} regions and {line_count} lines and created {region_ts_count} paragraph '
-            f'and {line_ts_count} line elements with a transcription.'
-        )
-        return annotations
-
-    def merge(self, blocks):
-        """
-        Merge all tags and return blocks
-            Regroup tag that continues
-            Regroup all first name with a last name
-            Create completed value
-        """
-        def all_tags(block):
-            for line in block['lines']:
-                for tag in line['tags']:
-                    yield tag
-
-        MERGE_FUNCTION = [
-            self.regroup,
-            self.first_and_last_name,
-            self.cesure
-        ]
-        all_blocks = {}
-        for block in blocks:
-            index_block = block['reading_order']
-            # Filter tags because we can have :
-            #   '_prenom' with 'continued'
-            #   and 'textStyle' after
-            #   and '_prenom' with 'continued' in the next line
-            #       => this broke groupby 'continued'
-            tags = list(filter(lambda tag: tag['name'].startswith('_'), all_tags(block)))
-            for method in MERGE_FUNCTION:
-                tags = method(tags)
-            all_blocks[index_block] = tags
-        return all_blocks
-
-    def regroup(self, items):
-        """
-        Regroup tag that continues
-        """
-        def _clean(accu, item):
-            assert isinstance(accu, list)
-
-            if accu:
-                last = accu[-1]
-                # name is type (_prenom, _nom, ...)
-                # last and item continued
-                # ------------- have the same name
-                # item is one line after last
-                if (last and 'continued' in last.keys() and 'continued' in item.keys()
-                        and last['name'] == item['name'] and last['reading_order'] + 1 == item['reading_order']):
-                    accu[-1]['value'] = None
-                    accu[-1]['length'] = 0
-                    accu[-1]['reading_order'] = item['reading_order']
-                    accu[-1]['items'].append(item)
-                    return accu
-
-            new_item = dict(item)
-            new_item['items'] = []
-            new_item['items'].append(item)
-            return accu + [new_item]
-
-        return functools.reduce(_clean, items, [])
-
-    def first_and_last_name(self, tags):
-        """
-        Regroup all first name with a last name
-        """
-        def add_used(tags):
-            all_tags = []
-            for tag in tags:
-                all_tags.append(
-                    {
-                        'used': False,
-                        'tag': tag
-                    }
-                )
-            return all_tags
-
-        def distance(first_name, last_name):
-            if first_name['offset_in_region'] < last_name['offset_in_region']:
-                lentgh = first_name['length']
-            else:
-                lentgh = last_name['length']
-            return abs(int(first_name['offset_in_region']) - int(last_name['offset_in_region'])) - int(lentgh)
-
-        def find_last_name(first_name, list_last_name):
-            score = sorted([
-                (last_name, distance(first_name, last_name['tag']))
-                for last_name in list_last_name
-            ], key=lambda x: x[1])
-            return score[0][0]
-
-        all_tags = []
-        last_names = add_used(filter(lambda tag: tag['name'] == '_nom', tags))
-
-        for tag in tags:
-            if tag['name'] == '_prenom' and last_names:
-                last_name = find_last_name(tag, last_names)
-                tag['items'] += last_name['tag']['items']
-                all_tags.append(tag)
-                last_name['used'] = True
-
-            elif tag['name'] != '_nom':
-                all_tags.append(tag)
-
-        all_tags += [tag['tag'] for tag in last_names if not tag['used']]
-
-        return all_tags
-
-    def cesure(self, tags):
-        """
-        Create completed value
-        """
-        for tag in tags:
-            value = ''
-            length = 0
-            for item in tag['items']:
-                item['value'] = item['value'].strip()
-                if len(item['value']) < 1:
-                    continue
-                if value.endswith('-') or item['value'].startswith('-'):
-                    value = value.rstrip(string.whitespace + '-')
-                    item['value'] = item['value'].lstrip(string.whitespace + '-')
-                    sep = '-' if item['value'][0].isupper() else ''
-                else:
-                    sep = ' ' if item['value'][0].isupper() else ''
-                value = '{}{}{}'.format(
-                    value,
-                    sep,
-                    item['value']
-                ).strip()
-                # +1 to count line break after this item
-                length += len(item['value']) + 1
-            tag['value'] = value
-            # -1 to remove the line break after the last item
-            tag['length'] = length - 1
-        return tags
-
-    def create_objects(self, blocks, element, path, types, ratio=0.90):
-        """
-        Create entity on transcription, role and link
-        """
-        entities = []
-        for tags in blocks.values():
-            new_entities, new_tags = self.create_entities(tags, element, types)
-            entities += new_entities
-
-            self.create_link_transcription_entity(new_tags)
-            tags = self.create_roles(new_tags, element, path, ratio)
-            self.create_links(tags, path, ratio)
-        return entities
-
-    def create_entities(self, tags, element, types):
-        """
-        Create entity according to 'name' in tag
-        """
-        entities = []
-        for tag in tags:
-            if tag['name'] in types.keys():
-                entity = Entity.objects.create(
-                    name=tag['value'],
-                    type=types[tag['name']],
-                    corpus_id=element.corpus_id,
-                    source_id=self.source.id
-                )
-                entities.append(entity.id)
-                tag['entity_id'] = entity.id
-        return entities, tags
-
-    def create_link_transcription_entity(self, tags):
-
-        def _clean(accu, item):
-            assert isinstance(accu, list)
-
-            if accu:
-                last = accu[-1]
-                # last item follows current item
-                if (last['offset_in_region'] + int(last['length']) + 1 == item['offset_in_region']):
-                    last['value'] = '{} {}'.format(
-                        last['value'],
-                        item['value']
-                    )
-                    last['length'] = len(last['value'])
-                    return accu
-
-            return accu + [item]
-
-        for tag in tags:
-            tag['items'] = sorted(tag['items'], key=lambda tag: tag['offset_in_region'])
-            if'entity_id' in tag.keys() and 'transcription_id' in tag.keys():
-                grouped_items = functools.reduce(_clean, tag['items'], [])
-                for item in grouped_items:
-                    TranscriptionEntity.objects.get_or_create(
-                        entity_id=tag['entity_id'],
-                        transcription_id=tag['transcription_id'],
-                        offset=item['offset_in_region'],
-                        length=item['length']
-                    )
-
-    def create_roles(self, tags, element, path, ratio):
-        """
-        Create role according to '_role' in tag
-        """
-        roles = load_yaml_configuration(path)
-        for tag in tags:
-            if '_role' in tag.keys():
-                # Entity with parent role
-                role = best_levenshtein(
-                    tag['_role'],
-                    roles.keys()
-                )
-                if role[1] > ratio:
-                    name = role[0]
-                    role, created = EntityRole.objects.get_or_create(
-                        parent_name=roles[name]['parent_name'],
-                        child_name=roles[name]['child_name'],
-                        parent_type=roles[name]['parent_type'],
-                        child_type=roles[name]['child_type'],
-                        corpus_id=element.corpus_id
-                    )
-                    tag['role_id'] = role.id
-                else:
-                    logger.warning(
-                        'Role {} not recognized for a parent role'.format(tag['_role'])
-                    )
-
-        return tags
-
-    def create_links(self, tags, path, ratio):
-        """
-        Create link between two entities with a role
-        """
-        links = load_yaml_configuration(path)
-        for tag in tags:
-            if 'role_id' in tag.keys():
-                link = best_levenshtein(
-                    tag['_role'],
-                    links.keys()
-                )
-                if link[1] > ratio:
-                    child = next(
-                        filter(lambda tag: '_role' in tag.keys() and tag['_role'] == links[link[0]]['link_with'], tags),
-                        False
-                    )
-                    if child:
-                        link, created = EntityLink.objects.get_or_create(
-                            parent_id=tag['entity_id'],
-                            child_id=child['entity_id'],
-                            role_id=tag['role_id']
-                        )
-                    else:
-                        logger.warning(
-                            'Child not found for role {}'.format(
-                                tag['_role']
-                            )
-                        )
-                else:
-                    logger.warning(
-                        'Role {} not recognized for a parent role'.format(tag['_role'])
-                    )
-        return tags
-
-    def create_blocks(self, transcriptions):
-        """
-        Create blocks with lines, and line with tags
-        Order tags according to 'offset'
-        Order lines and blocks according to 'reading_order'
-        """
-
-        def update_tag(tag, line, index, transcription_id):
-            if 'offset' in tag.keys():
-                # Save the text value
-                tag['value'] = line.text[
-                    int(tag['offset']):int(tag['length']) + int(tag['offset'])
-                ] if line.text else ''
-            tag['reading_order'] = index
-            tag['transcription_id'] = transcription_id
-            return tag
-
-        blocks = []
-        for transcription_id, region in transcriptions:
-            # Search the tag 'readingOrder' to select the index
-            tag_reading_order = next(filter(lambda tag: tag.name == 'readingOrder', region.tags))
-            index_region = int(tag_reading_order.index)
-            block = {
-                'reading_order': index_region,
-                'lines': []
-            }
-
-            for line in region.lines:
-                # Search the tag 'readingOrder' to select the index
-                tag_reading_order = next(filter(lambda tag: tag.name == 'readingOrder', line.tags))
-                index_line = int(tag_reading_order.index)
-                current_line = {
-                    'reading_order': index_line,
-                    'tags': [],
-                    'length': len(line.text) if line.text else 0
-                }
-                # Add all tags
-                tags = [
-                    update_tag(tag.as_dict(), line, index_line, transcription_id)
-                    for tag in line.tags
-                ]
-                current_line['tags'] = tags
-
-                block['lines'].append(current_line)
-                # Order tags with their offset
-                current_line['tags'] = sorted(
-                    current_line['tags'],
-                    key=lambda i: int(i['offset']) if 'offset' in i.keys() else -1
-                )
-
-            # Order lines with their reading order
-            block['lines'] = sorted(
-                block['lines'],
-                key=lambda i: i['reading_order']
-            )
-            blocks.append(block)
-
-        # Order blocks with their reading order
-        blocks = sorted(blocks, key=lambda i: i['reading_order'])
-
-        # Add an offset in region
-        for block in blocks:
-            length_region = 0
-            for line in block['lines']:
-                for tag in line['tags']:
-                    tag['offset_in_region'] = length_region
-                    tag['offset_in_region'] += int(tag['offset']) if 'offset' in tag.keys() else 0
-                # +1 to add line break
-                length_region += line['length'] + 1
-
-        return blocks
diff --git a/arkindex/documents/tests/pagexml_samples/create_blocks.xml b/arkindex/documents/tests/pagexml_samples/create_blocks.xml
deleted file mode 100644
index 9b71ec8f26..0000000000
--- a/arkindex/documents/tests/pagexml_samples/create_blocks.xml
+++ /dev/null
@@ -1,80 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd">
-    <Metadata>
-        <Creator>TRP</Creator>
-        <Created>2018-10-01T09:25:16.139-04:00</Created>
-        <LastChange>2019-02-19T19:45:19.222+01:00</LastChange>
-    </Metadata>
-    <Page imageFilename="01R_CE101S01_1907_005.tif" imageWidth="2415" imageHeight="3936">
-        <ReadingOrder>
-            <OrderedGroup id="ro_1550601919253" caption="Regions reading order">
-                <RegionRefIndexed index="1" regionRef="TextRegion_1540299380975_9"/>
-                <RegionRefIndexed index="0" regionRef="TextRegion_1540299473514_23"/>
-            </OrderedGroup>
-        </ReadingOrder>
-        <Relations>
-            <Relation type="link">
-                <RegionRef regionRef="TextRegion_1540299473514_23"/>
-                <RegionRef regionRef="TextRegion_1540299380975_9"/>
-            </Relation>
-        </Relations>
-        <TextRegion orientation="0.0" type="marginalia" id="TextRegion_1540299380975_9" custom="readingOrder {index:1;} structure {type:marginalia;}">
-            <Coords points="12,34 56,78 910,1112"/>
-            <TextLine id="r1l9" custom="readingOrder {index:3;} _nom {offset:0; length:7;}">
-                <Coords points="12,34 56,78 910,1112"/>
-                <Baseline points="13,37 42,42 37,13"/>
-                <TextEquiv>
-                    <Unicode>Lemieux</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l8" custom="_prenom {offset:0; length:13; continued:true;_role:sujet;} readingOrder {index:2;}">
-                <Coords points="12,34 56,78 910,1112"/>
-                <Baseline points="13,37 42,42 37,13"/>
-                <TextEquiv>
-                    <Unicode>Pierre Siméon</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l7" custom="readingOrder {index:1;} _prenom {offset:0; length:12; continued:true;_role:sujet;}">
-                <Coords points="12,34 56,78 910,1112"/>
-                <Baseline points="13,37 42,42 37,13"/>
-                <TextEquiv>
-                    <Unicode>Louis Joseph</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l6" custom="readingOrder {index:0;} structure {type:ref;}">
-                <Coords points="12,34 56,78 910,1112"/>
-                <Baseline points="13,37 42,42 37,13"/>
-                <TextEquiv>
-                    <Unicode>B .1</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>B .1
-Louis Joseph
-Pierre Siméon
-Lemieux</Unicode>
-            </TextEquiv>
-        </TextRegion>
-        <TextRegion orientation="0.0" id="TextRegion_1540299473514_23" custom="readingOrder {index:0;}">
-            <Coords points="12,34 56,78 910,1112"/>
-            <TextLine id="r2l12" custom="_date {offset:3; length:30;_enregistrement:1;} readingOrder {index:0;}">
-                <Coords points="12,34 56,78 910,1112"/>
-                <Baseline points="13,37 42,42 37,13"/>
-                <TextEquiv>
-                    <Unicode>Le onze janvier mil neuf centsept</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l13" custom="readingOrder {index:1;} _prenom {offset:36; length:5; continued:true;_role:sujet;}">
-                <Coords points="12,34 56,78 910,1112"/>
-                <Baseline points="13,37 42,42 37,13"/>
-                <TextEquiv>
-                    <Unicode>nous prêtre soussigné avons baptisé Louis</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>Le onze janvier mil neuf centsept
-nous prêtre soussigné avons baptisé Louis</Unicode>
-            </TextEquiv>
-        </TextRegion>
-    </Page>
-</PcGts>
diff --git a/arkindex/documents/tests/pagexml_samples/create_entities.xml b/arkindex/documents/tests/pagexml_samples/create_entities.xml
deleted file mode 100644
index 4e1dd3ad33..0000000000
--- a/arkindex/documents/tests/pagexml_samples/create_entities.xml
+++ /dev/null
@@ -1,275 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd">
-    <Metadata>
-        <Creator>prov=University of Rostock/Institute of Mathematics/CITlab/Tobias Gruening/tobias.gruening@uni-rostock.de:name=/net_tf/LA73_249_0mod360.pb:de.uros.citlab.segmentation.CITlab_LA_ML:v=?0.1
-TRP</Creator>
-        <Created>2019-02-28T13:35:12.029-05:00</Created>
-        <LastChange>2019-03-29T15:46:32.663+01:00</LastChange>
-    </Metadata>
-    <Page imageFilename="05S_CE501S27_1906_025.tif" imageWidth="2381" imageHeight="3888">
-        <ReadingOrder>
-            <OrderedGroup id="ro_1553870792710" caption="Regions reading order">
-                <RegionRefIndexed index="1" regionRef="TextRegion_1552404369903_2171"/>
-                <RegionRefIndexed index="0" regionRef="TextRegion_1552404379440_2176"/>
-            </OrderedGroup>
-        </ReadingOrder>
-        <TextRegion orientation="0.0" type="marginalia" id="TextRegion_1552404369903_2171" custom="readingOrder {index:1;}">
-            <Coords points="56,1837 516,1837 516,2348 56,2348"/>
-            <TextLine id="r1l6" custom="readingOrder {index:2;}">
-                <Coords points="153,2115 178,2115 203,2115 229,2115 254,2116 280,2117 305,2118 330,2119 356,2121 381,2122 407,2124 407,2061 381,2059 356,2058 330,2056 305,2055 280,2054 254,2053 229,2052 203,2052 178,2052 153,2052"/>
-                <Baseline points="153,2094 178,2094 203,2094 229,2094 254,2095 280,2096 305,2097 330,2098 356,2100 381,2101 407,2103"/>
-                <TextEquiv>
-                    <Unicode>epse du</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l7" custom="readingOrder {index:3;} _prenom {offset:0; length:6;_role:conjoint;} _nom {offset:7; length:4;}">
-                <Coords points="56,2225 81,2221 107,2218 133,2216 159,2214 185,2211 211,2211 237,2210 263,2209 289,2208 315,2209 341,2210 367,2210 393,2211 419,2211 419,2148 393,2148 367,2147 341,2147 315,2146 289,2145 263,2146 237,2147 211,2148 185,2148 159,2151 133,2153 107,2155 81,2158 56,2162"/>
-                <Baseline points="56,2204 81,2200 107,2197 133,2195 159,2193 185,2190 211,2190 237,2189 263,2188 289,2187 315,2188 341,2189 367,2189 393,2190 419,2190"/>
-                <TextEquiv>
-                    <Unicode>Pierre Guay</Unicode>
-                </TextEquiv>
-            </TextLine>
-             <TextLine id="r1l5" custom="readingOrder {index:1;} _nom {offset:0; length:6;} _prenom {offset:7; length:7;_role:sujet;}">
-                <Coords points="57,2019 82,2018 107,2017 132,2016 157,2015 182,2015 208,2015 233,2015 258,2015 283,2016 308,2016 333,2016 359,2018 384,2018 409,2019 434,2019 459,2021 484,2021 510,2022 510,1959 484,1958 459,1958 434,1956 409,1956 384,1955 359,1955 333,1953 308,1953 283,1953 258,1952 233,1952 208,1952 182,1952 157,1952 132,1953 107,1954 82,1955 57,1956"/>
-                <Baseline points="57,1998 82,1997 107,1996 132,1995 157,1994 182,1994 208,1994 233,1994 258,1994 283,1995 308,1995 333,1995 359,1997 384,1997 409,1998 434,1998 459,2000 484,2000 510,2001"/>
-                <TextEquiv>
-                    <Unicode>Boivin Onezime</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l8" custom="readingOrder {index:4;}">
-                <Coords points="189,2348 215,2343 241,2340 267,2336 293,2333 319,2331 345,2330 371,2328 397,2328 423,2328 423,2265 397,2265 371,2265 345,2267 319,2268 293,2270 267,2273 241,2277 215,2280 189,2285"/>
-                <Baseline points="189,2327 215,2322 241,2319 267,2315 293,2312 319,2310 345,2309 371,2307 397,2307 423,2307"/>
-                <TextEquiv>
-                    <Unicode>P.109</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l4" custom="readingOrder {index:0;} structure {type:ref;}">
-                <Coords points="170,1922 196,1923 223,1923 250,1923 276,1922 303,1920 330,1920 357,1920 357,1857 330,1857 303,1857 276,1859 250,1860 223,1860 196,1860 170,1859"/>
-                <Baseline points="170,1901 196,1902 223,1902 250,1902 276,1901 303,1899 330,1899 357,1899"/>
-                <TextEquiv>
-                    <Unicode>S. 15</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>S. 15
-Boivin Onezime
-epse du
-Pierre Guay
-P.109</Unicode>
-            </TextEquiv>
-        </TextRegion>
-        <TextRegion orientation="0.0" id="TextRegion_1552404379440_2176" custom="readingOrder {index:0;}">
-            <Coords points="516,1839 2337,1838 2337,3818 516,3819"/>
-            <TextLine id="r2l17" custom="readingOrder {index:0;} _date {offset:3; length:29;_enregistrement:1;}">
-                <Coords points="546,1916 632,1919 719,1922 806,1923 892,1925 979,1926 1066,1926 1152,1926 1239,1926 1326,1926 1413,1926 1499,1925 1586,1925 1673,1925 1759,1925 1846,1925 1933,1925 2019,1925 2106,1926 2193,1928 2280,1931 2280,1868 2193,1865 2106,1863 2019,1862 1933,1862 1846,1862 1759,1862 1673,1862 1586,1862 1499,1862 1413,1863 1326,1863 1239,1863 1152,1863 1066,1863 979,1863 892,1862 806,1860 719,1859 632,1856 546,1853"/>
-                <Baseline points="546,1895 632,1898 719,1901 806,1902 892,1904 979,1905 1066,1905 1152,1905 1239,1905 1326,1905 1413,1905 1499,1904 1586,1904 1673,1904 1759,1904 1846,1904 1933,1904 2019,1904 2106,1905 2193,1907 2280,1910"/>
-                <TextEquiv>
-                    <Unicode>Le treize Août mil neuf cent-six, nous</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l18" custom="readingOrder {index:1;}">
-                <Coords points="551,2021 637,2021 723,2021 809,2021 895,2021 981,2020 1067,2020 1153,2018 1239,2017 1325,2015 1411,2014 1497,2012 1583,2011 1669,2011 1755,2009 1841,2009 1927,2009 2013,2009 2099,2010 2185,2011 2271,2014 2271,1953 2185,1950 2099,1949 2013,1948 1927,1948 1841,1948 1755,1948 1669,1950 1583,1950 1497,1951 1411,1953 1325,1954 1239,1956 1153,1957 1067,1959 981,1959 895,1960 809,1960 723,1960 637,1960 551,1960"/>
-                <Baseline points="551,2001 637,2001 723,2001 809,2001 895,2001 981,2000 1067,2000 1153,1998 1239,1997 1325,1995 1411,1994 1497,1992 1583,1991 1669,1991 1755,1989 1841,1989 1927,1989 2013,1989 2099,1990 2185,1991 2271,1994"/>
-                <TextEquiv>
-                    <Unicode>Prêtre curé soussigné, avons inhumé, dans </Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l19" custom="readingOrder {index:2;}">
-                <Coords points="527,2117 614,2117 701,2117 788,2117 875,2117 962,2117 1049,2115 1136,2115 1223,2115 1310,2115 1397,2115 1484,2115 1571,2115 1658,2115 1745,2115 1832,2115 1919,2115 2006,2114 2093,2114 2180,2114 2267,2114 2267,2051 2180,2051 2093,2051 2006,2051 1919,2052 1832,2052 1745,2052 1658,2052 1571,2052 1484,2052 1397,2052 1310,2052 1223,2052 1136,2052 1049,2052 962,2054 875,2054 788,2054 701,2054 614,2054 527,2054"/>
-                <Baseline points="527,2096 614,2096 701,2096 788,2096 875,2096 962,2096 1049,2094 1136,2094 1223,2094 1310,2094 1397,2094 1484,2094 1571,2094 1658,2094 1745,2094 1832,2094 1919,2094 2006,2093 2093,2093 2180,2093 2267,2093"/>
-                <TextEquiv>
-                    <Unicode>le cimetière de cette paroisse, le corps</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l20" custom="_prenom {offset:3; length:7;_role:sujet;} _nom {offset:11; length:6;} readingOrder {index:3;} _prenom {offset:29; length:6;_role:conjoint;}">
-                <Coords points="539,2216 629,2213 719,2210 810,2208 900,2207 991,2205 1081,2205 1172,2205 1262,2205 1353,2205 1443,2205 1534,2205 1624,2207 1715,2207 1805,2207 1896,2205 1986,2205 2077,2204 2167,2202 2258,2199 2258,2136 2167,2139 2077,2141 1986,2142 1896,2142 1805,2144 1715,2144 1624,2144 1534,2142 1443,2142 1353,2142 1262,2142 1172,2142 1081,2142 991,2142 900,2144 810,2145 719,2147 629,2150 539,2153"/>
-                <Baseline points="539,2195 629,2192 719,2189 810,2187 900,2186 991,2184 1081,2184 1172,2184 1262,2184 1353,2184 1443,2184 1534,2184 1624,2186 1715,2186 1805,2186 1896,2184 1986,2184 2077,2183 2167,2181 2258,2178"/>
-                <TextEquiv>
-                    <Unicode>de Onézime Boivin, épouse de Pierre</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l21" custom="readingOrder {index:4;} _nom {offset:0; length:4;} _date {offset:14; length:10;_evenement:1;}">
-                <Coords points="545,2310 619,2307 694,2304 769,2302 844,2301 919,2299 994,2298 1069,2298 1144,2298 1219,2298 1294,2298 1368,2298 1443,2298 1518,2298 1593,2298 1668,2298 1743,2298 1818,2298 1893,2298 1968,2296 2043,2296 2043,2230 1968,2230 1893,2232 1818,2232 1743,2232 1668,2232 1593,2232 1518,2232 1443,2232 1368,2232 1294,2232 1219,2232 1144,2232 1069,2232 994,2232 919,2233 844,2235 769,2236 694,2238 619,2241 545,2244"/>
-                <Baseline points="545,2288 619,2285 694,2282 769,2280 844,2279 919,2277 994,2276 1069,2276 1144,2276 1219,2276 1294,2276 1368,2276 1443,2276 1518,2276 1593,2276 1668,2276 1743,2276 1818,2276 1893,2276 1968,2274 2043,2274"/>
-                <TextEquiv>
-                    <Unicode>Guay; décédée avant-hier à l'âge de</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l22" custom="readingOrder {index:5;} _lieu_residence {offset:30; length:10; continued:true;} _age {offset:0; length:27;} blackening {offset:23; length:4;}">
-                <Coords points="537,2400 621,2399 706,2398 791,2398 876,2397 960,2397 1045,2397 1130,2397 1215,2397 1299,2397 1384,2397 1469,2397 1554,2397 1638,2397 1723,2397 1808,2397 1893,2397 1977,2397 2062,2397 2147,2397 2232,2397 2232,2331 2147,2331 2062,2331 1977,2331 1893,2331 1808,2331 1723,2331 1638,2331 1554,2331 1469,2331 1384,2331 1299,2331 1215,2331 1130,2331 1045,2331 960,2331 876,2331 791,2332 706,2332 621,2333 537,2334"/>
-                <Baseline points="537,2378 621,2377 706,2376 791,2376 876,2375 960,2375 1045,2375 1130,2375 1215,2375 1299,2375 1384,2375 1469,2375 1554,2375 1638,2375 1723,2375 1808,2375 1893,2375 1977,2375 2062,2375 2147,2375 2232,2375"/>
-                <TextEquiv>
-                    <Unicode>soixante-un ans et dix mois à Manchester</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l23" custom="readingOrder {index:6;} _lieu_residence {offset:0; length:4; continued:true;}">
-                <Coords points="536,2497 619,2497 702,2496 785,2496 868,2496 951,2494 1034,2494 1117,2494 1200,2493 1283,2493 1366,2493 1449,2493 1532,2493 1615,2493 1698,2493 1781,2493 1864,2494 1947,2494 2030,2496 2113,2497 2196,2500 2196,2434 2113,2431 2030,2430 1947,2428 1864,2428 1781,2427 1698,2427 1615,2427 1532,2427 1449,2427 1366,2427 1283,2427 1200,2427 1117,2428 1034,2428 951,2428 868,2430 785,2430 702,2430 619,2431 536,2431"/>
-                <Baseline points="536,2475 619,2475 702,2474 785,2474 868,2474 951,2472 1034,2472 1117,2472 1200,2471 1283,2471 1366,2471 1449,2471 1532,2471 1615,2471 1698,2471 1781,2471 1864,2472 1947,2472 2030,2474 2113,2475 2196,2478"/>
-                <TextEquiv>
-                    <Unicode>N.H. Etats-Unis; et transportée pour</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l24" custom="readingOrder {index:7;}">
-                <Coords points="540,2594 630,2595 720,2595 811,2594 901,2594 992,2592 1082,2590 1173,2588 1263,2586 1354,2585 1444,2583 1535,2582 1625,2580 1716,2580 1806,2582 1897,2582 1987,2585 2078,2588 2168,2592 2259,2597 2259,2534 2168,2529 2078,2525 1987,2522 1897,2519 1806,2519 1716,2517 1625,2517 1535,2519 1444,2520 1354,2522 1263,2523 1173,2525 1082,2527 992,2529 901,2531 811,2531 720,2532 630,2532 540,2531"/>
-                <Baseline points="540,2573 630,2574 720,2574 811,2573 901,2573 992,2571 1082,2569 1173,2567 1263,2565 1354,2564 1444,2562 1535,2561 1625,2559 1716,2559 1806,2561 1897,2561 1987,2564 2078,2567 2168,2571 2259,2576"/>
-                <TextEquiv>
-                    <Unicode>inhumation sons permis de transport</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l25" custom="readingOrder {index:8;} textStyle {offset:12; length:1;fontSize:0.0; kerning:0; superscript:true;} strike_through {offset:14; length:17;} textStyle {offset:14; length:17;strikethrough:true; fontSize:0.0; kerning:0;}">
-                <Coords points="546,2679 633,2681 721,2682 808,2682 896,2682 984,2682 1071,2682 1159,2681 1246,2681 1334,2679 1422,2678 1509,2678 1597,2676 1684,2676 1772,2676 1860,2676 1947,2676 2035,2678 2122,2680 2210,2682 2298,2685 2298,2622 2210,2619 2122,2617 2035,2615 1947,2613 1860,2613 1772,2613 1684,2613 1597,2613 1509,2615 1422,2615 1334,2616 1246,2618 1159,2618 1071,2619 984,2619 896,2619 808,2619 721,2619 633,2618 546,2616"/>
-                <Baseline points="546,2658 633,2660 721,2661 808,2661 896,2661 984,2661 1071,2661 1159,2660 1246,2660 1334,2658 1422,2657 1509,2657 1597,2655 1684,2655 1772,2655 1860,2655 1947,2655 2035,2657 2122,2659 2210,2661 2298,2664"/>
-                <TextEquiv>
-                    <Unicode>délivré par x(par A. L. Godbois) A.L.Gad</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l26" custom="readingOrder {index:9;} unclear {offset:6; length:1;}">
-                <Coords points="534,2768 620,2770 707,2771 794,2771 880,2773 967,2773 1054,2773 1140,2773 1227,2773 1314,2773 1401,2773 1487,2771 1574,2771 1661,2771 1747,2771 1834,2771 1921,2772 2007,2773 2094,2773 2181,2774 2268,2776 2268,2707 2181,2705 2094,2704 2007,2704 1921,2703 1834,2702 1747,2702 1661,2702 1574,2702 1487,2702 1401,2704 1314,2704 1227,2704 1140,2704 1054,2704 967,2704 880,2704 794,2702 707,2702 620,2701 534,2699"/>
-                <Baseline points="534,2745 620,2747 707,2748 794,2748 880,2750 967,2750 1054,2750 1140,2750 1227,2750 1314,2750 1401,2750 1487,2748 1574,2748 1661,2748 1747,2748 1834,2748 1921,2749 2007,2750 2094,2750 2181,2751 2268,2753"/>
-                <TextEquiv>
-                    <Unicode>bois, entrepreneur de Manchester, comté</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l27" custom="readingOrder {index:10;}">
-                <Coords points="543,2863 630,2864 718,2866 806,2866 894,2864 981,2864 1069,2863 1157,2861 1245,2860 1332,2858 1420,2857 1508,2857 1596,2857 1683,2857 1771,2858 1859,2861 1947,2866 2034,2870 2122,2876 2210,2885 2324,2916 2324,2847 2210,2816 2122,2807 2034,2801 1947,2797 1859,2792 1771,2789 1683,2788 1596,2788 1508,2788 1420,2788 1332,2789 1245,2791 1157,2792 1069,2794 981,2795 894,2795 806,2797 718,2797 630,2795 543,2794"/>
-                <Baseline points="543,2840 630,2841 718,2843 806,2843 894,2841 981,2841 1069,2840 1157,2838 1245,2837 1332,2835 1420,2834 1508,2834 1596,2834 1683,2834 1771,2835 1859,2838 1947,2843 2034,2847 2122,2853 2210,2862 2322,2905"/>
-                <TextEquiv>
-                    <Unicode>de Hillsborough, N.H. Etats-Unis. Témoins</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l28" custom="readingOrder {index:11;} _prenom {offset:0; length:6;_role:témoin;} _nom {offset:7; length:4;} _prenom {offset:27; length:4;_role:témoin;} _nom {offset:32; length:4;}">
-                <Coords points="549,2960 637,2964 725,2967 813,2968 901,2969 989,2969 1077,2967 1165,2966 1253,2965 1341,2963 1429,2961 1517,2960 1605,2958 1693,2957 1781,2957 1869,2958 1957,2960 2045,2961 2133,2965 2221,2970 2328,2985 2328,2922 2221,2907 2133,2902 2045,2898 1957,2897 1869,2895 1781,2894 1693,2894 1605,2895 1517,2897 1429,2898 1341,2900 1253,2902 1165,2903 1077,2904 989,2906 901,2906 813,2905 725,2904 637,2901 549,2897"/>
-                <Baseline points="549,2939 637,2943 725,2946 813,2947 901,2948 989,2948 1077,2946 1165,2945 1253,2944 1341,2942 1429,2940 1517,2939 1605,2937 1693,2936 1781,2936 1869,2937 1957,2939 2045,2940 2133,2944 2221,2949 2326,2971"/>
-                <TextEquiv>
-                    <Unicode>Pierre Guay époux sus-dit; Omer Guay, et</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l29" custom="readingOrder {index:12;} _prenom {offset:0; length:7;_role:témoin;} _prenom {offset:9; length:8;_role:témoin;} _prenom {offset:19; length:6;_role:témoin;} _prenom {offset:27; length:6;_role:témoin;} _prenom {offset:35; length:4;_role:témoin;}">
-                <Coords points="554,3048 640,3051 726,3053 812,3054 899,3056 985,3056 1071,3057 1158,3057 1244,3057 1330,3057 1417,3057 1503,3057 1589,3057 1675,3057 1762,3057 1848,3059 1934,3060 2021,3062 2107,3065 2193,3067 2280,3071 2280,3008 2193,3004 2107,3002 2021,2999 1934,2997 1848,2996 1762,2994 1675,2994 1589,2994 1503,2994 1417,2994 1330,2994 1244,2994 1158,2994 1071,2994 985,2993 899,2993 812,2991 726,2990 640,2988 554,2985"/>
-                <Baseline points="554,3027 640,3030 726,3032 812,3033 899,3035 985,3035 1071,3036 1158,3036 1244,3036 1330,3036 1417,3036 1503,3036 1589,3036 1675,3036 1762,3036 1848,3038 1934,3039 2021,3041 2107,3044 2193,3046 2280,3050"/>
-                <TextEquiv>
-                    <Unicode>Liguori, Gonzague, Silvio, Azilda, Emma</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l30" custom="readingOrder {index:13;} _prenom {offset:0; length:5;_role:témoin;} _prenom {offset:7; length:6;_role:témoin;} _prenom {offset:15; length:6;_role:témoin;} blackening {offset:39; length:1;}">
-                <Coords points="546,3126 637,3131 729,3135 820,3138 912,3139 1003,3139 1095,3139 1186,3138 1278,3138 1369,3136 1461,3134 1552,3133 1644,3132 1735,3132 1827,3132 1918,3133 2010,3136 2101,3140 2193,3145 2285,3153 2285,3123 2193,3115 2101,3110 2010,3106 1918,3103 1827,3102 1735,3102 1644,3102 1552,3103 1461,3104 1369,3106 1278,3108 1186,3108 1095,3109 1003,3109 912,3109 820,3108 729,3105 637,3101 546,3096"/>
-                <Baseline points="546,3116 637,3121 729,3125 820,3128 912,3129 1003,3129 1095,3129 1186,3128 1278,3128 1369,3126 1461,3124 1552,3123 1644,3122 1735,3122 1827,3122 1918,3123 2010,3126 2101,3130 2193,3135 2285,3143"/>
-                <TextEquiv>
-                    <Unicode>Odile, Sylvia, Adwida : Tous enfants, de</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l32" custom="readingOrder {index:14;}">
-                <Coords points="537,3245 625,3245 713,3245 801,3245 889,3244 977,3242 1065,3242 1153,3241 1241,3241 1329,3239 1417,3239 1505,3239 1593,3239 1681,3241 1769,3242 1857,3244 1945,3245 2033,3248 2121,3253 2209,3257 2298,3263 2298,3194 2209,3188 2121,3184 2033,3179 1945,3176 1857,3175 1769,3173 1681,3172 1593,3170 1505,3170 1417,3170 1329,3170 1241,3172 1153,3172 1065,3173 977,3173 889,3175 801,3176 713,3176 625,3176 537,3176"/>
-                <Baseline points="537,3222 625,3222 713,3222 801,3222 889,3221 977,3219 1065,3219 1153,3218 1241,3218 1329,3216 1417,3216 1505,3216 1593,3216 1681,3218 1769,3219 1857,3221 1945,3222 2033,3225 2121,3230 2209,3234 2298,3240"/>
-                <TextEquiv>
-                    <Unicode>la défunte, aussi que des parents et amis</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l33" custom="readingOrder {index:15;}">
-                <Coords points="525,3339 618,3339 712,3339 805,3337 899,3336 993,3334 1086,3333 1180,3331 1274,3330 1367,3328 1461,3328 1554,3327 1648,3328 1742,3330 1835,3331 1929,3334 2023,3339 2116,3344 2210,3352 2304,3360 2304,3294 2210,3286 2116,3278 2023,3273 1929,3268 1835,3265 1742,3264 1648,3262 1554,3261 1461,3262 1367,3262 1274,3264 1180,3265 1086,3267 993,3268 899,3270 805,3271 712,3273 618,3273 525,3273"/>
-                <Baseline points="525,3317 618,3317 712,3317 805,3315 899,3314 993,3312 1086,3311 1180,3309 1274,3308 1367,3306 1461,3306 1554,3305 1648,3306 1742,3308 1835,3309 1929,3312 2023,3317 2116,3322 2210,3330 2304,3338"/>
-                <TextEquiv>
-                    <Unicode>un très grand nombre les suivants ont</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l34" custom="readingOrder {index:16;}">
-                <Coords points="533,3432 610,3431 687,3429 764,3428 841,3426 918,3425 995,3423 1072,3422 1149,3420 1226,3419 1303,3419 1380,3417 1457,3417 1534,3417 1611,3417 1688,3419 1765,3420 1842,3422 1919,3425 1996,3429 2073,3432 2073,3368 1996,3365 1919,3361 1842,3358 1765,3356 1688,3355 1611,3353 1534,3353 1457,3353 1380,3353 1303,3355 1226,3355 1149,3356 1072,3358 995,3359 918,3361 841,3362 764,3364 687,3365 610,3367 533,3368"/>
-                <Baseline points="533,3411 610,3410 687,3408 764,3407 841,3405 918,3404 995,3402 1072,3401 1149,3399 1226,3398 1303,3398 1380,3396 1457,3396 1534,3396 1611,3396 1688,3398 1765,3399 1842,3401 1919,3404 1996,3408 2073,3411"/>
-                <TextEquiv>
-                    <Unicode>signé avec nous : lecture faite.</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l35" custom="readingOrder {index:17;} textStyle {offset:0; length:1;fontSize:0.0; kerning:0; superscript:true;}">
-                <Coords points="588,3506 709,3516 780,3516 851,3516 922,3516 993,3516 1063,3515 1134,3515 1205,3513 1276,3513 1347,3512 1417,3512 1488,3512 1559,3510 1630,3510 1701,3510 1771,3512 1842,3512 1913,3513 1984,3513 2055,3515 2055,3451 1984,3449 1913,3449 1842,3448 1771,3448 1701,3446 1630,3446 1559,3446 1488,3448 1417,3448 1347,3448 1276,3449 1205,3449 1134,3451 1063,3451 993,3452 922,3452 851,3452 780,3452 709,3452 588,3442"/>
-                <Baseline points="585,3486 709,3495 780,3495 851,3495 922,3495 993,3495 1063,3494 1134,3494 1205,3492 1276,3492 1347,3491 1417,3491 1488,3491 1559,3489 1630,3489 1701,3489 1771,3491 1842,3491 1913,3492 1984,3492 2055,3494"/>
-                <TextEquiv>
-                    <Unicode>x deux mots et deux lettres rayés nuls</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="line_1553870772618_1314" custom="readingOrder {index:18;} structure {type:signature-mark;}">
-                <Coords points="624,3556 649,3554 681,3553 714,3552 747,3550 943,3550 976,3552 1041,3552 1074,3553 1113,3553 1113,3614 1074,3614 1041,3613 976,3613 943,3611 747,3611 714,3613 681,3614 649,3615 624,3617"/>
-                <Baseline points="624,3597 649,3595 681,3594 714,3593 747,3591 779,3591 812,3591 845,3591 878,3591 910,3591 943,3591 976,3593 1008,3593 1041,3593 1074,3594 1106,3594 1113,3594"/>
-                <TextEquiv>
-                    <Unicode>Omer Guay</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="line_1552917881829_666" custom="readingOrder {index:19;} structure {type:signature-mark;}">
-                <Coords points="1271,3549 1533,3549 1586,3550 1638,3550 1691,3552 1743,3552 1796,3553 1848,3553 1887,3554 1887,3615 1848,3614 1796,3614 1743,3613 1691,3613 1638,3611 1586,3611 1533,3610 1271,3610"/>
-                <Baseline points="1271,3590 1323,3590 1376,3590 1428,3590 1481,3590 1533,3590 1586,3591 1638,3591 1691,3593 1743,3593 1796,3594 1848,3594 1887,3595"/>
-                <TextEquiv>
-                    <Unicode>Liguori Guay</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="line_1552917880693_660" custom="readingOrder {index:20;} structure {type:signature-mark;} blackening {offset:3; length:2;}">
-                <Coords points="1931,3555 1953,3555 2006,3556 2111,3556 2163,3558 2216,3558 2268,3557 2334,3556 2334,3617 2268,3618 2216,3619 2163,3619 2111,3617 2006,3617 1953,3616 1931,3616"/>
-                <Baseline points="1931,3596 1953,3596 2006,3597 2058,3597 2111,3597 2163,3599 2216,3599 2268,3598 2334,3596"/>
-                <TextEquiv>
-                    <Unicode>Odile Guay</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="line_1552404356529_2159" custom="readingOrder {index:21;} structure {type:signature-mark;}">
-                <Coords points="548,3647 614,3643 680,3639 746,3637 812,3635 1010,3635 1076,3637 1142,3638 1144,3638 1144,3698 1142,3698 1076,3697 1010,3695 812,3695 746,3697 680,3699 614,3703 548,3707"/>
-                <Baseline points="548,3687 614,3683 680,3679 746,3677 812,3675 878,3675 944,3675 1010,3675 1076,3677 1142,3678 1144,3678"/>
-                <TextEquiv>
-                    <Unicode>Pierre Guay</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="line_1552404355305_2154" custom="readingOrder {index:22;} structure {type:signature-mark;}">
-                <Coords points="1262,3640 1274,3641 1340,3643 1406,3644 1604,3644 1670,3643 1736,3641 1802,3638 1868,3634 1868,3694 1802,3698 1736,3701 1670,3703 1604,3704 1406,3704 1340,3703 1274,3701 1262,3700"/>
-                <Baseline points="1262,3680 1274,3681 1340,3683 1406,3684 1472,3684 1538,3684 1604,3684 1670,3683 1736,3681 1802,3678 1868,3674"/>
-                <TextEquiv>
-                    <Unicode>Gonzigue Guay</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l41" custom="readingOrder {index:23;} structure {type:signature-mark;}">
-                <Coords points="591,3818 616,3816 641,3815 666,3814 692,3812 717,3812 742,3811 767,3811 793,3811 818,3811 843,3811 869,3811 894,3811 919,3811 944,3811 970,3811 995,3811 1020,3811 1046,3809 1046,3740 1020,3742 995,3742 970,3742 944,3742 919,3742 894,3742 869,3742 843,3742 818,3742 793,3742 767,3742 742,3742 717,3743 692,3743 666,3745 641,3746 616,3747 591,3749"/>
-                <Baseline points="591,3795 616,3793 641,3792 666,3791 692,3789 717,3789 742,3788 767,3788 793,3788 818,3788 843,3788 869,3788 894,3788 919,3788 944,3788 970,3788 995,3788 1020,3788 1046,3786"/>
-                <TextEquiv>
-                    <Unicode>Sylvio Guay</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="line_1553870634900_1282" custom="readingOrder {index:24;} structure {type:signature-mark;}">
-                <Coords points="1909,3691 1962,3685 2003,3683 2060,3684 2059,3734 2004,3733 1966,3735 1915,3741"/>
-                <Baseline points="1914,3736 1966,3730 2004,3728 2059,3729"/>
-                <TextEquiv>
-                    <Unicode>Ptre</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="line_1553870580630_1273" custom="readingOrder {index:25;} structure {type:signature-mark;}">
-                <Coords points="1355,3742 1415,3743 1483,3742 1552,3741 1612,3740 1667,3740 1731,3741 1772,3741 1829,3740 1890,3741 1956,3744 2008,3755 2060,3747 2101,3743 2159,3727 2171,3776 2110,3792 2062,3797 2005,3801 1954,3796 1890,3791 1829,3790 1772,3791 1731,3791 1667,3790 1612,3790 1553,3791 1484,3792 1415,3793 1354,3792"/>
-                <Baseline points="1354,3783 1415,3784 1484,3783 1553,3782 1612,3781 1667,3781 1731,3782 1772,3782 1829,3781 1890,3782 1954,3784 2006,3793 2063,3792 2109,3783 2170,3767"/>
-                <TextEquiv>
-                    <Unicode>Thos O Neill curé</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>Le treize Août mil neuf cent-six, nous
-Prêtre curé soussigné, avons inhumé, dans 
-le cimetière de cette paroisse, le corps
-de Onézime Boivin, épouse de Pierre
-Guay; décédée avant-hier à l'âge de
-soixante-un ans et dix mois à Manchester
-N.H. Etats-Unis; et transportée pour
-inhumation sons permis de transport
-délivré par x(par A. L. Godbois) A.L.Gad
-bois, entrepreneur de Manchester, comté
-de Hillsborough, N.H. Etats-Unis. Témoins
-Pierre Guay époux sus-dit; Omer Guay, et
-Liguori, Gonzague, Silvio, Azilda, Emma
-Odile, Sylvia, Adwida : Tous enfants, de
-la défunte, aussi que des parents et amis
-un très grand nombre les suivants ont
-signé avec nous : lecture faite.
-x deux mots et deux lettres rayés nuls
-Omer Guay
-Liguori Guay
-Odile Guay
-Pierre Guay
-Gonzigue Guay
-Sylvio Guay
-Ptre
-Thos O Neill curé</Unicode>
-            </TextEquiv>
-        </TextRegion>
-    </Page>
-</PcGts>
\ No newline at end of file
diff --git a/arkindex/documents/tests/pagexml_samples/first_and_last_name.xml b/arkindex/documents/tests/pagexml_samples/first_and_last_name.xml
deleted file mode 100644
index d4c1bf72ec..0000000000
--- a/arkindex/documents/tests/pagexml_samples/first_and_last_name.xml
+++ /dev/null
@@ -1,185 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd">
-    <Metadata>
-        <Creator>prov=University of Rostock/Institute of Mathematics/CITlab/Tobias Gruening/tobias.gruening@uni-rostock.de:name=/net_tf/LA73_249_0mod360.pb:de.uros.citlab.segmentation.CITlab_LA_ML:v=?0.1
-TRP</Creator>
-        <Created>2019-02-28T13:35:11.448-05:00</Created>
-        <LastChange>2019-03-29T14:07:34.544+01:00</LastChange>
-    </Metadata>
-    <Page imageFilename="05S_CE501S27_1906_022.tif" imageWidth="2405" imageHeight="3912">
-        <ReadingOrder>
-            <OrderedGroup id="ro_1553864854598" caption="Regions reading order">
-                <RegionRefIndexed index="2" regionRef="TextRegion_1552403815227_1830"/>
-                <RegionRefIndexed index="0" regionRef="TextRegion_1552403812724_1826"/>
-                <RegionRefIndexed index="1" regionRef="TextRegion_1552403880332_1857"/>
-            </OrderedGroup>
-        </ReadingOrder>
-        <TextRegion orientation="0.0" type="marginalia" id="TextRegion_1552403815227_1830" custom="readingOrder {index:2;}">
-            <Coords points="71,1661 554,1662 557,2035 531,2036 531,2644 71,2644"/>
-            <TextLine id="r1l9" custom="readingOrder {index:0;} structure {type:ref;}">
-                <Coords points="198,1746 224,1740 251,1736 278,1733 305,1731 332,1731 359,1731 386,1731 413,1733 440,1734 467,1736 467,1681 440,1679 413,1678 386,1676 359,1676 332,1676 305,1676 278,1678 251,1681 224,1685 198,1691"/>
-                <Baseline points="198,1728 224,1722 251,1718 278,1715 305,1713 332,1713 359,1713 386,1713 413,1715 440,1716 467,1718"/>
-                <TextEquiv>
-                    <Unicode>S. 12</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l10" custom="readingOrder {index:1;} _nom {offset:0; length:7;}">
-                <Coords points="80,1823 105,1822 131,1821 157,1821 183,1820 209,1820 234,1820 260,1820 286,1820 312,1818 338,1818 363,1818 389,1818 415,1817 441,1817 467,1815 467,1760 441,1762 415,1762 389,1763 363,1763 338,1763 312,1763 286,1765 260,1765 234,1765 209,1765 183,1765 157,1766 131,1766 105,1767 80,1768"/>
-                <Baseline points="80,1805 105,1804 131,1803 157,1803 183,1802 209,1802 234,1802 260,1802 286,1802 312,1800 338,1800 363,1800 389,1800 415,1799 441,1799 467,1797"/>
-                <TextEquiv>
-                    <Unicode>Lapalme</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l11" custom="readingOrder {index:2;} _prenom {offset:0; length:14;_role:sujet;}">
-                <Coords points="71,1910 96,1913 121,1913 147,1914 172,1915 198,1914 223,1913 248,1913 274,1912 299,1910 325,1909 350,1909 375,1907 401,1906 426,1906 452,1904 477,1904 503,1903 503,1843 477,1844 452,1844 426,1846 401,1846 375,1847 350,1849 325,1849 299,1850 274,1852 248,1853 223,1853 198,1854 172,1855 147,1854 121,1853 96,1853 71,1850"/>
-                <Baseline points="71,1890 96,1893 121,1893 147,1894 172,1895 198,1894 223,1893 248,1893 274,1892 299,1890 325,1889 350,1889 375,1887 401,1886 426,1886 452,1884 477,1884 503,1883"/>
-                <TextEquiv>
-                    <Unicode>Charles-Albert</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l12" custom="readingOrder {index:3;}">
-                <Coords points="368,2011 401,2000 434,1997 544,1992 544,1932 434,1937 401,1940 368,1951"/>
-                <Baseline points="368,1991 401,1980 434,1977 535,1977"/>
-                <TextEquiv>
-                    <Unicode>P.107</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="line_1552403776822_1816" custom="readingOrder {index:4;} structure {type:signature-mark;}">
-                <Coords points="147,2324 312,2320 340,2321 527,2325 527,2385 320,2378 285,2382 147,2384"/>
-                <Baseline points="148,2362 211,2364 238,2364 264,2364 291,2364 312,2360 340,2361 368,2363 396,2366 425,2367 453,2369 481,2370 530,2370"/>
-                <TextEquiv>
-                    <Unicode>S. Chycoine</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l15" custom="readingOrder {index:5;}">
-                <Coords points="138,2498 163,2495 188,2491 213,2488 238,2486 263,2483 288,2481 314,2479 339,2478 364,2477 389,2477 414,2477 439,2479 465,2480 465,2420 439,2419 414,2417 389,2417 364,2417 339,2418 314,2419 288,2421 263,2423 238,2426 213,2428 188,2431 163,2435 138,2438"/>
-                <Baseline points="138,2478 163,2475 188,2471 213,2468 238,2466 263,2463 288,2461 314,2459 339,2458 364,2457 389,2457 414,2457 439,2459 465,2460"/>
-                <TextEquiv>
-                    <Unicode>voir folio 10</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>S. 12
-Lapalme
-Charles-Albert
-P.107
-S. Chycoine
-voir folio 10</Unicode>
-            </TextEquiv>
-        </TextRegion>
-        <TextRegion orientation="0.0" type="marginalia" id="TextRegion_1552403812724_1826" custom="readingOrder {index:0;}">
-            <Coords points="71,2644 541,2644 541,3296 71,3296"/>
-            <TextLine id="r1l16" custom="readingOrder {index:0;} structure {type:ref;}">
-                <Coords points="194,2776 219,2774 244,2770 269,2769 295,2767 320,2766 345,2766 370,2766 396,2767 396,2709 370,2708 345,2708 320,2708 295,2709 269,2711 244,2712 219,2716 194,2718"/>
-                <Baseline points="194,2757 219,2755 244,2751 269,2750 295,2748 320,2747 345,2747 370,2747 396,2748"/>
-                <TextEquiv>
-                    <Unicode>B. 31</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l17" custom="readingOrder {index:1;} _nom {offset:0; length:7;}">
-                <Coords points="105,2851 131,2851 158,2851 185,2851 212,2851 239,2850 266,2849 293,2848 320,2847 347,2846 374,2845 401,2845 428,2844 428,2787 401,2788 374,2788 347,2789 320,2790 293,2791 266,2792 239,2793 212,2794 185,2794 158,2794 131,2794 105,2794"/>
-                <Baseline points="105,2832 131,2832 158,2832 185,2832 212,2832 239,2831 266,2830 293,2829 320,2828 347,2827 374,2826 401,2826 428,2825"/>
-                <TextEquiv>
-                    <Unicode>Chaloux</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l18" custom="readingOrder {index:2;} gap {offset:2; length:0;} _prenom {offset:4; length:6; continued:true;_role:sujet;}">
-                <Coords points="126,2941 151,2941 177,2942 203,2944 229,2944 255,2944 280,2944 306,2945 332,2945 358,2945 384,2946 409,2947 435,2947 461,2948 487,2949 513,2950 513,2890 487,2889 461,2888 435,2887 409,2887 384,2886 358,2885 332,2885 306,2885 280,2884 255,2884 229,2884 203,2884 177,2882 151,2881 126,2881"/>
-                <Baseline points="126,2921 151,2921 177,2922 203,2924 229,2924 255,2924 280,2924 306,2925 332,2925 358,2925 384,2926 409,2927 435,2927 461,2928 487,2929 513,2930"/>
-                <TextEquiv>
-                    <Unicode>( ) Gérard</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l19" custom="readingOrder {index:3;} _prenom {offset:0; length:5; continued:true;_role:sujet;}">
-                <Coords points="170,3031 197,3031 225,3031 253,3029 281,3029 308,3028 336,3028 364,3028 392,3032 392,2972 364,2968 336,2968 308,2968 281,2969 253,2969 225,2971 197,2971 170,2971"/>
-                <Baseline points="170,3011 197,3011 225,3011 253,3009 281,3009 308,3008 336,3008 364,3008 392,3012"/>
-                <TextEquiv>
-                    <Unicode>Henri</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l20" custom="readingOrder {index:4;}">
-                <Coords points="357,3108 506,3093 508,3032 359,3043"/>
-                <Baseline points="357,3090 508,3079"/>
-                <TextEquiv>
-                    <Unicode>P.107</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l21" custom="readingOrder {index:5;} _lieu_residence {offset:2; length:4;}">
-                <Coords points="132,3229 213,3224 242,3223 271,3222 299,3220 328,3219 357,3216 357,3162 328,3165 299,3166 271,3168 242,3169 213,3170 132,3175"/>
-                <Baseline points="135,3212 213,3206 242,3205 271,3204 299,3202 328,3201 357,3198"/>
-                <TextEquiv>
-                    <Unicode>v N.H.</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l23" custom="readingOrder {index:6;} textStyle {offset:7; length:4;underlined:true; fontSize:0.0; kerning:0; superscript:true;} textStyle {offset:11; length:4;fontSize:0.0; kerning:0; subscript:true;}">
-                <Coords points="242,3296 269,3296 296,3294 323,3292 350,3290 377,3288 404,3285 431,3281 458,3278 485,3276 540,3266 540,3212 485,3222 458,3224 431,3227 404,3231 377,3234 350,3236 323,3238 296,3240 269,3242 242,3242"/>
-                <Baseline points="242,3278 269,3278 296,3276 323,3274 350,3272 377,3270 404,3267 431,3263 458,3260 485,3258 540,3245"/>
-                <TextEquiv>
-                    <Unicode>T.O.N. PtreCuré</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>B. 31
-Chaloux
-( ) Gérard
-Henri
-P.107
-v N.H.
-T.O.N. PtreCuré</Unicode>
-            </TextEquiv>
-        </TextRegion>
-        <TextRegion orientation="0.0" id="TextRegion_1552403880332_1857" custom="readingOrder {index:1;}">
-            <Coords points="547,813 701,813 1324,819 1576,823 2079,799 2334,808 2334,1644 2063,1650 1883,1654 1454,1642 1010,1650 766,1656 559,1660 547,1655"/>
-            <TextLine id="r2l10" custom="readingOrder {index:1;}">
-                <Coords points="590,986 671,987 753,987 834,987 916,989 998,989 1079,989 1161,989 1242,989 1324,989 1406,987 1487,987 1569,986 1650,984 1732,984 1814,983 1895,981 1977,980 2058,977 2140,975 2222,974 2222,911 2140,912 2058,914 1977,917 1895,918 1814,920 1732,921 1650,921 1569,923 1487,924 1406,924 1324,926 1242,926 1161,926 1079,926 998,926 916,926 834,924 753,924 671,924 590,923"/>
-                <Baseline points="590,965 671,966 753,966 834,966 916,968 998,968 1079,968 1161,968 1242,968 1324,968 1406,966 1487,966 1569,965 1650,963 1732,963 1814,962 1895,960 1977,959 2058,956 2140,954 2222,953"/>
-                <TextEquiv>
-                    <Unicode>Nous avons</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l11" custom="readingOrder {index:2;}">
-                <Coords points="566,1090 651,1089 736,1086 822,1084 907,1083 992,1081 1078,1080 1163,1080 1248,1078 1334,1078 1419,1077 1504,1077 1590,1077 1675,1075 1760,1075 1846,1075 1931,1075 2016,1074 2102,1074 2187,1074 2273,1072 2273,1006 2187,1008 2102,1008 2016,1008 1931,1009 1846,1009 1760,1009 1675,1009 1590,1011 1504,1011 1419,1011 1334,1012 1248,1012 1163,1014 1078,1014 992,1015 907,1017 822,1018 736,1020 651,1023 566,1024"/>
-                <Baseline points="566,1068 651,1067 736,1064 822,1062 907,1061 992,1059 1078,1058 1163,1058 1248,1056 1334,1056 1419,1055 1504,1055 1590,1055 1675,1053 1760,1053 1846,1053 1931,1053 2016,1052 2102,1052 2187,1052 2273,1050"/>
-                <TextEquiv>
-                    <Unicode>inhumé</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l12" custom="readingOrder {index:3;} _prenom {offset:14; length:6;_role:sujet;}">
-                <Coords points="554,1185 641,1185 728,1185 816,1183 903,1183 991,1182 1078,1180 1166,1179 1253,1177 1341,1176 1428,1174 1515,1173 1603,1171 1690,1170 1778,1170 1865,1168 1953,1168 2040,1168 2128,1168 2215,1168 2303,1170 2303,1104 2215,1102 2128,1102 2040,1102 1953,1102 1865,1102 1778,1104 1690,1104 1603,1105 1515,1107 1428,1108 1341,1110 1253,1111 1166,1113 1078,1114 991,1116 903,1117 816,1117 728,1119 641,1119 554,1119"/>
-                <Baseline points="554,1163 641,1163 728,1163 816,1161 903,1161 991,1160 1078,1158 1166,1157 1253,1155 1341,1154 1428,1152 1515,1151 1603,1149 1690,1148 1778,1148 1865,1146 1953,1146 2040,1146 2128,1146 2215,1146 2303,1148"/>
-                <TextEquiv>
-                    <Unicode>le corps d'un garçon issu</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l13" custom="readingOrder {index:4;} _prenom {offset:3; length:6;_role:père;} _nom {offset:10; length:3; continued:true;}">
-                <Coords points="576,1273 659,1273 743,1273 827,1273 911,1272 995,1272 1078,1272 1162,1270 1246,1269 1330,1269 1414,1267 1497,1266 1581,1264 1665,1263 1749,1263 1833,1261 1916,1260 2000,1258 2084,1255 2168,1254 2252,1252 2252,1186 2168,1188 2084,1189 2000,1192 1916,1194 1833,1195 1749,1197 1665,1197 1581,1198 1497,1200 1414,1201 1330,1203 1246,1203 1162,1204 1078,1206 995,1206 911,1206 827,1207 743,1207 659,1207 576,1207"/>
-                <Baseline points="576,1251 659,1251 743,1251 827,1251 911,1250 995,1250 1078,1250 1162,1248 1246,1247 1330,1247 1414,1245 1497,1244 1581,1242 1665,1241 1749,1241 1833,1239 1916,1238 2000,1236 2084,1233 2168,1232 2252,1230"/>
-                <TextEquiv>
-                    <Unicode>de Joseph Du-</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l14" custom="readingOrder {index:5;} _nom {offset:0; length:6; continued:true;}">
-                <Coords points="551,1374 635,1369 719,1366 803,1363 887,1360 971,1359 1055,1359 1140,1357 1224,1357 1308,1357 1392,1357 1476,1357 1560,1357 1644,1356 1729,1356 1813,1354 1897,1353 1981,1351 2065,1348 2149,1344 2234,1339 2234,1272 2149,1277 2065,1281 1981,1284 1897,1286 1813,1287 1729,1289 1644,1289 1560,1290 1476,1290 1392,1290 1308,1290 1224,1290 1140,1290 1055,1292 971,1292 887,1293 803,1296 719,1299 635,1302 551,1307"/>
-                <Baseline points="551,1352 635,1347 719,1344 803,1341 887,1338 971,1337 1055,1337 1140,1335 1224,1335 1308,1335 1392,1335 1476,1335 1560,1335 1644,1334 1729,1334 1813,1332 1897,1331 1981,1329 2065,1326 2149,1322 2234,1317"/>
-                <TextEquiv>
-                    <Unicode>moulin, et </Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l15" custom="readingOrder {index:6;} _prenom {offset:3; length:6;_role:mère;} _nom {offset:10; length:8;}">
-                <Coords points="569,1455 655,1456 741,1458 827,1459 914,1459 1000,1459 1086,1459 1172,1459 1259,1459 1345,1458 1431,1458 1517,1456 1604,1455 1690,1453 1776,1452 1862,1450 1949,1449 2035,1447 2121,1446 2207,1444 2294,1443 2294,1377 2207,1378 2121,1380 2035,1381 1949,1383 1862,1384 1776,1386 1690,1387 1604,1389 1517,1390 1431,1392 1345,1392 1259,1393 1172,1393 1086,1393 1000,1393 914,1393 827,1393 741,1392 655,1390 569,1389"/>
-                <Baseline points="569,1433 655,1434 741,1436 827,1437 914,1437 1000,1437 1086,1437 1172,1437 1259,1437 1345,1436 1431,1436 1517,1434 1604,1433 1690,1431 1776,1430 1862,1428 1949,1427 2035,1425 2121,1424 2207,1422 2294,1421"/>
-                <TextEquiv>
-                    <Unicode>de Valeda Paquette.</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>Nous avons
-inhumé
-le corps d'un garçon issu 
-de Joseph Du-
-moulin, et 
-de Valeda Paquette.</Unicode>
-            </TextEquiv>
-        </TextRegion>
-    </Page>
-</PcGts>
\ No newline at end of file
diff --git a/arkindex/documents/tests/pagexml_samples/merge.xml b/arkindex/documents/tests/pagexml_samples/merge.xml
deleted file mode 100644
index 3587fc11aa..0000000000
--- a/arkindex/documents/tests/pagexml_samples/merge.xml
+++ /dev/null
@@ -1,115 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd">
-    <Metadata>
-        <Creator>prov=University of Rostock/Institute of Mathematics/CITlab/Tobias Gruening/tobias.gruening@uni-rostock.de:name=/net_tf/LA73_249_0mod360.pb:de.uros.citlab.segmentation.CITlab_LA_ML:v=?0.1
-TRP</Creator>
-        <Created>2019-02-28T13:35:11.826-05:00</Created>
-        <LastChange>2019-03-29T15:22:20.862+01:00</LastChange>
-    </Metadata>
-    <Page imageFilename="05S_CE501S27_1906_024.tif" imageWidth="2405" imageHeight="3912">
-        <ReadingOrder>
-            <OrderedGroup id="ro_1553869340958" caption="Regions reading order">
-                <RegionRefIndexed index="1" regionRef="TextRegion_1552404189500_2036"/>
-                <RegionRefIndexed index="0" regionRef="TextRegion_1552404252071_2084"/>
-            </OrderedGroup>
-        </ReadingOrder>
-        <TextRegion orientation="0.0" type="marginalia" id="TextRegion_1552404189500_2036" custom="readingOrder {index:1;}">
-            <Coords points="68,161 571,160 566,1259 63,1261"/>
-            <TextLine id="r1l1" custom="readingOrder {index:0;} structure {type:ref;}">
-                <Coords points="108,293 134,293 160,294 186,295 212,296 238,298 264,298 290,300 316,301 342,302 368,302 394,302 420,301 420,232 394,233 368,233 342,233 316,232 290,231 264,229 238,229 212,227 186,226 160,225 134,224 108,224"/>
-                <Baseline points="108,270 134,270 160,271 186,272 212,273 238,275 264,275 290,277 316,278 342,279 368,279 394,279 420,278"/>
-                <TextEquiv>
-                    <Unicode>B. 32</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l2" custom="readingOrder {index:1;} _nom {offset:0; length:6;}">
-                <Coords points="108,422 133,421 158,420 183,418 208,418 233,415 258,414 283,412 308,412 333,412 358,412 383,415 383,346 358,343 333,343 308,343 283,343 258,345 233,346 208,349 183,349 158,351 133,352 108,353"/>
-                <Baseline points="108,399 133,398 158,397 183,395 208,395 233,392 258,391 283,389 308,389 333,389 358,389 383,392"/>
-                <TextEquiv>
-                    <Unicode>Landry</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l3" custom="readingOrder {index:2;} _prenom {offset:0; length:11; continued:true;_role:sujet;}">
-                <Coords points="93,529 118,526 144,524 170,523 196,521 222,520 248,519 274,518 300,517 325,517 351,517 377,517 403,516 429,515 455,515 481,515 507,515 543,514 543,445 507,446 481,446 455,446 429,446 403,447 377,448 351,448 325,448 300,448 274,449 248,450 222,451 196,452 170,454 144,455 118,457 93,460"/>
-                <Baseline points="93,506 118,503 144,501 170,500 196,498 222,497 248,496 274,495 300,494 325,494 351,494 377,494 403,493 429,492 455,492 481,492 507,492 542,492"/>
-                <TextEquiv>
-                    <Unicode>Marie-Alice</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l4" custom="readingOrder {index:3;} _prenom {offset:0; length:7; continued:true;_role:sujet;}">
-                <Coords points="116,622 141,620 166,619 192,617 217,616 242,614 268,614 293,612 319,611 344,610 369,609 395,608 420,607 446,605 446,536 420,538 395,539 369,540 344,541 319,542 293,543 268,545 242,545 217,547 192,548 166,550 141,551 116,553"/>
-                <Baseline points="116,599 141,597 166,596 192,594 217,593 242,591 268,591 293,589 319,588 344,587 369,586 395,585 420,584 446,582"/>
-                <TextEquiv>
-                    <Unicode>Rosilda</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="line_1552404152412_2006" custom="readingOrder {index:4;}">
-                <Coords points="225,655 330,659 444,629 456,678 336,709 223,705"/>
-                <Baseline points="221,695 333,699 453,668"/>
-                <TextEquiv>
-                    <Unicode>P.108</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>B. 32
-Landry
-Marie-Alice
-Rosilda
-P.108</Unicode>
-            </TextEquiv>
-        </TextRegion>
-        <TextRegion orientation="0.0" id="TextRegion_1552404252071_2084" custom="readingOrder {index:0;}">
-            <Coords points="570,245 2359,241 2359,1379 2061,1373 1380,1367 570,1352"/>
-            <TextLine id="r2l1" custom="readingOrder {index:0;}">
-                <Coords points="587,323 673,323 759,322 846,322 932,322 1019,320 1105,320 1192,320 1278,320 1365,320 1451,319 1537,319 1624,319 1710,319 1797,319 1883,317 1970,317 2056,316 2143,316 2229,314 2328,323 2328,253 2229,244 2143,246 2056,246 1970,247 1883,247 1797,249 1710,249 1624,249 1537,249 1451,249 1365,250 1278,250 1192,250 1105,250 1019,250 932,252 846,252 759,252 673,253 587,253"/>
-                <Baseline points="587,300 673,300 759,299 846,299 932,299 1019,297 1105,297 1192,297 1278,297 1365,297 1451,296 1537,296 1624,296 1710,296 1797,296 1883,294 1970,294 2056,293 2143,293 2229,291 2329,304"/>
-                <TextEquiv>
-                    <Unicode>Nous</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l2" custom="readingOrder {index:1;}">
-                <Coords points="599,426 683,426 768,426 853,426 938,426 1023,424 1108,423 1193,422 1278,420 1363,420 1448,418 1532,417 1617,415 1702,414 1787,414 1872,414 1957,414 2042,414 2127,415 2212,417 2316,416 2316,350 2212,351 2127,349 2042,348 1957,348 1872,348 1787,348 1702,348 1617,349 1532,351 1448,352 1363,354 1278,354 1193,356 1108,357 1023,358 938,360 853,360 768,360 683,360 599,360"/>
-                <Baseline points="599,404 683,404 768,404 853,404 938,404 1023,402 1108,401 1193,400 1278,398 1363,398 1448,396 1532,395 1617,393 1702,392 1787,392 1872,392 1957,392 2042,392 2127,393 2212,395 2317,400"/>
-                <TextEquiv>
-                    <Unicode>prêtre curé soussigné, avons baptisé</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l3" custom="readingOrder {index:2;} _prenom {offset:0; length:19;_role:sujet;}">
-                <Coords points="596,516 682,518 769,520 856,522 943,523 1030,523 1117,523 1203,523 1290,522 1377,522 1464,520 1551,520 1638,519 1725,517 1811,516 1898,516 1985,514 2072,514 2159,514 2246,514 2351,516 2351,450 2246,448 2159,448 2072,448 1985,448 1898,450 1811,450 1725,451 1638,453 1551,454 1464,454 1377,456 1290,456 1203,457 1117,457 1030,457 943,457 856,456 769,454 682,452 596,450"/>
-                <Baseline points="596,494 682,496 769,498 856,500 943,501 1030,501 1117,501 1203,501 1290,500 1377,500 1464,498 1551,498 1638,497 1725,495 1811,494 1898,494 1985,492 2072,492 2159,492 2246,492 2350,494"/>
-                <TextEquiv>
-                    <Unicode>Marie-Alice-Rosilda, née</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l4" custom="readingOrder {index:3;} _prenom {offset:3; length:7;_role:père;} _nom {offset:11; length:6;}">
-                <Coords points="579,610 666,613 753,615 840,616 927,616 1014,616 1101,616 1188,615 1275,615 1362,613 1449,612 1536,611 1623,610 1710,609 1797,607 1884,606 1971,606 2058,606 2145,606 2232,606 2333,607 2333,540 2232,539 2145,539 2058,539 1971,539 1884,539 1797,540 1710,542 1623,543 1536,544 1449,545 1362,546 1275,548 1188,548 1101,549 1014,549 927,549 840,549 753,548 666,546 579,543"/>
-                <Baseline points="579,588 666,591 753,593 840,594 927,594 1014,594 1101,594 1188,593 1275,593 1362,591 1449,590 1536,589 1623,588 1710,587 1797,585 1884,584 1971,584 2058,584 2145,584 2232,584 2333,586"/>
-                <TextEquiv>
-                    <Unicode>de Antoine Landry</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l5" custom="readingOrder {index:4;} _prenom {offset:6; length:6;_role:mère;}">
-                <Coords points="579,703 665,705 751,706 838,708 924,709 1011,709 1097,709 1183,709 1270,708 1356,708 1443,708 1529,706 1615,706 1702,705 1788,705 1875,705 1961,705 2047,705 2134,706 2220,708 2307,709 2307,642 2220,641 2134,639 2047,638 1961,638 1875,638 1788,638 1702,638 1615,639 1529,639 1443,641 1356,641 1270,641 1183,642 1097,642 1011,642 924,642 838,641 751,639 665,638 579,636"/>
-                <Baseline points="579,681 665,683 751,684 838,686 924,687 1011,687 1097,687 1183,687 1270,686 1356,686 1443,686 1529,684 1615,684 1702,683 1788,683 1875,683 1961,683 2047,683 2134,684 2220,686 2307,687"/>
-                <TextEquiv>
-                    <Unicode>et de Zelpha</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l6" custom="readingOrder {index:5;} _nom {offset:0; length:10;}">
-                <Coords points="563,800 647,803 732,806 817,808 902,809 987,809 1072,811 1157,811 1242,809 1327,809 1412,808 1496,808 1581,806 1666,805 1751,805 1836,803 1921,803 2006,803 2091,803 2176,803 2261,805 2261,735 2176,733 2091,733 2006,733 1921,733 1836,733 1751,735 1666,735 1581,736 1496,738 1412,738 1327,739 1242,739 1157,741 1072,741 987,739 902,739 817,738 732,736 647,733 563,730"/>
-                <Baseline points="563,777 647,780 732,783 817,785 902,786 987,786 1072,788 1157,788 1242,786 1327,786 1412,785 1496,785 1581,783 1666,782 1751,782 1836,780 1921,780 2006,780 2091,780 2176,780 2261,782"/>
-                <TextEquiv>
-                    <Unicode>Beauchesne.</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>Nous
-avons baptisé
-Marie-Alice-Rosilda, née
-de Antoine Landry
-et de Zelpha
-Beauchesne.</Unicode>
-            </TextEquiv>
-        </TextRegion>
-    </Page>
-</PcGts>
\ No newline at end of file
diff --git a/arkindex/documents/tests/pagexml_samples/regroup.xml b/arkindex/documents/tests/pagexml_samples/regroup.xml
deleted file mode 100644
index 8bd15c83ef..0000000000
--- a/arkindex/documents/tests/pagexml_samples/regroup.xml
+++ /dev/null
@@ -1,110 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd">
-    <Metadata>
-        <Creator>prov=University of Rostock/Institute of Mathematics/CITlab/Tobias Gruening/tobias.gruening@uni-rostock.de:name=/net_tf/LA73_249_0mod360.pb:de.uros.citlab.segmentation.CITlab_LA_ML:v=?0.1
-TRP</Creator>
-        <Created>2019-02-28T13:35:11.244-05:00</Created>
-        <LastChange>2019-03-29T16:18:59.654+01:00</LastChange>
-    </Metadata>
-    <Page imageFilename="05S_CE501S27_1906_021.tif" imageWidth="2381" imageHeight="3888">
-        <ReadingOrder>
-            <OrderedGroup id="ro_1553872739704" caption="Regions reading order">
-                <RegionRefIndexed index="1" regionRef="TextRegion_1552403600642_1718"/>
-                <RegionRefIndexed index="0" regionRef="TextRegion_1552403626791_1738"/>
-            </OrderedGroup>
-        </ReadingOrder>
-        <TextRegion orientation="0.0" type="marginalia" id="TextRegion_1552403600642_1718" custom="readingOrder {index:1;}">
-            <Coords points="45,2664 515,2664 515,3220 45,3220"/>
-            <TextLine id="r1l15" custom="readingOrder {index:0;} structure {type:ref;}">
-                <Coords points="134,2766 160,2766 186,2767 213,2767 239,2767 266,2767 292,2767 319,2766 345,2762 372,2758 372,2700 345,2704 319,2708 292,2709 266,2709 239,2709 213,2709 186,2709 160,2708 134,2708"/>
-                <Baseline points="134,2747 160,2747 186,2748 213,2748 239,2748 266,2748 292,2748 319,2747 345,2743 372,2739"/>
-                <TextEquiv>
-                    <Unicode>S. 10</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l16" custom="readingOrder {index:1;} _nom {offset:0; length:6;} textStyle {offset:0; length:6;strikethrough:true; fontSize:0.0; kerning:0;} strike_through {offset:0; length:6; continued:true;}">
-                <Coords points="111,2860 136,2862 162,2862 187,2863 213,2862 238,2862 264,2860 289,2860 315,2858 340,2857 366,2856 391,2854 417,2853 443,2851 443,2793 417,2795 391,2796 366,2798 340,2799 315,2800 289,2802 264,2802 238,2804 213,2804 187,2805 162,2804 136,2804 111,2802"/>
-                <Baseline points="111,2841 136,2843 162,2843 187,2844 213,2843 238,2843 264,2841 289,2841 315,2839 340,2838 366,2837 391,2835 417,2834 443,2832"/>
-                <TextEquiv>
-                    <Unicode>Dragon</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l17" custom="readingOrder {index:2;} _prenom {offset:0; length:5; continued:true;_role:sujet;} textStyle {offset:0; length:5;strikethrough:true; fontSize:0.0; kerning:0;} strike_through {offset:0; length:5; continued:true;}">
-                <Coords points="98,2959 123,2961 148,2961 174,2959 199,2959 224,2958 250,2956 275,2955 301,2953 326,2952 351,2950 377,2950 402,2949 428,2949 428,2891 402,2891 377,2892 351,2892 326,2894 301,2895 275,2897 250,2898 224,2900 199,2901 174,2901 148,2903 123,2903 98,2901"/>
-                <Baseline points="98,2940 123,2942 148,2942 174,2940 199,2940 224,2939 250,2937 275,2936 301,2934 326,2933 351,2931 377,2931 402,2930 428,2930"/>
-                <TextEquiv>
-                    <Unicode>Marie</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l18" custom="readingOrder {index:3;} _prenom {offset:0; length:7; continued:true;_role:sujet;} textStyle {offset:0; length:7;strikethrough:true; fontSize:0.0; kerning:0;} strike_through {offset:0; length:7; continued:true;}">
-                <Coords points="143,3055 168,3055 193,3054 219,3052 244,3050 270,3048 295,3046 321,3043 346,3040 372,3038 397,3036 423,3034 448,3034 474,3034 474,2976 448,2976 423,2976 397,2978 372,2980 346,2982 321,2985 295,2988 270,2990 244,2992 219,2994 193,2996 168,2997 143,2997"/>
-                <Baseline points="143,3036 168,3036 193,3035 219,3033 244,3031 270,3029 295,3027 321,3024 346,3021 372,3019 397,3017 423,3015 448,3015 474,3015"/>
-                <TextEquiv>
-                    <Unicode>Emillia</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l19" custom="readingOrder {index:4;} _prenom {offset:0; length:6; continued:true;_role:sujet;} textStyle {offset:0; length:6;strikethrough:true; fontSize:0.0; kerning:0;} strike_through {offset:0; length:6; continued:true;}">
-                <Coords points="171,3154 197,3156 223,3156 249,3156 276,3153 302,3151 328,3148 354,3146 381,3144 407,3143 433,3142 459,3142 486,3145 486,3087 459,3084 433,3084 407,3085 381,3086 354,3088 328,3090 302,3093 276,3095 249,3098 223,3098 197,3098 171,3096"/>
-                <Baseline points="171,3135 197,3137 223,3137 249,3137 276,3134 302,3132 328,3129 354,3127 381,3125 407,3124 433,3123 459,3123 486,3126"/>
-                <TextEquiv>
-                    <Unicode>Elmire</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>S. 10
-Dragon
-Marie
-Emillia
-Elmire</Unicode>
-            </TextEquiv>
-        </TextRegion>
-        <TextRegion orientation="0.0" id="TextRegion_1552403626791_1738" custom="readingOrder {index:0;}">
-            <Coords points="512,90 2318,88 2318,1435 512,1437"/>
-
-             <TextLine id="r2l6" custom="readingOrder {index:3;} _prenom {offset:6; length:5; continued:true;_role:mère;}">
-                <Coords points="509,730 595,729 682,727 769,726 855,724 942,723 1029,723 1115,721 1202,721 1289,720 1376,720 1462,718 1549,718 1636,717 1722,715 1809,715 1896,714 1982,712 2069,711 2156,708 2243,706 2243,640 2156,642 2069,645 1982,646 1896,648 1809,649 1722,649 1636,651 1549,652 1462,652 1376,654 1289,654 1202,655 1115,655 1029,657 942,657 855,658 769,660 682,661 595,663 509,664"/>
-                <Baseline points="509,708 595,707 682,705 769,704 855,702 942,701 1029,701 1115,699 1202,699 1289,698 1376,698 1462,696 1549,696 1636,695 1722,693 1809,693 1896,692 1982,690 2069,689 2156,686 2243,684"/>
-                <TextEquiv>
-                    <Unicode>et de Ermi-</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l4" custom="readingOrder {index:1;} _prenom {offset:5; length:13;_role:sujet;}">
-                <Coords points="527,524 614,525 702,528 790,530 878,531 966,531 1053,531 1141,531 1229,530 1317,530 1405,528 1492,527 1580,524 1668,522 1756,521 1844,518 1931,516 2019,513 2107,511 2195,509 2283,507 2283,443 2195,445 2107,447 2019,449 1931,452 1844,454 1756,457 1668,458 1580,460 1492,463 1405,464 1317,466 1229,466 1141,467 1053,467 966,467 878,467 790,466 702,464 614,461 527,460"/>
-                <Baseline points="527,503 614,504 702,507 790,509 878,510 966,510 1053,510 1141,510 1229,509 1317,509 1405,507 1492,506 1580,503 1668,501 1756,500 1844,497 1931,495 2019,492 2107,490 2195,488 2283,486"/>
-                <TextEquiv>
-                    <Unicode>tisé Marie-Evilina, fille de</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l5" custom="readingOrder {index:2;} _prenom {offset:0; length:7;_role:père;} _nom {offset:8; length:9;}">
-                <Coords points="510,635 598,632 687,629 776,626 865,624 953,622 1042,620 1131,620 1220,618 1308,617 1397,617 1486,615 1575,615 1663,614 1752,614 1841,612 1930,611 2018,609 2107,608 2196,606 2285,603 2285,539 2196,542 2107,544 2018,545 1930,547 1841,548 1752,550 1663,550 1575,551 1486,551 1397,553 1308,553 1220,554 1131,556 1042,556 953,558 865,560 776,562 687,565 598,568 510,571"/>
-                <Baseline points="510,614 598,611 687,608 776,605 865,603 953,601 1042,599 1131,599 1220,597 1308,596 1397,596 1486,594 1575,594 1663,593 1752,593 1841,591 1930,590 2018,588 2107,587 2196,585 2285,582"/>
-                <TextEquiv>
-                    <Unicode>Camille Thibeault</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l7" custom="readingOrder {index:4;} _prenom {offset:0; length:4; continued:true;_role:mère;} _nom {offset:5; length:8;}">
-                <Coords points="497,825 586,826 676,828 766,828 856,828 946,827 1036,826 1125,825 1215,823 1305,820 1395,819 1485,816 1575,814 1665,811 1754,810 1844,807 1934,805 2024,804 2114,801 2204,801 2294,799 2294,733 2204,735 2114,735 2024,738 1934,739 1844,741 1754,744 1665,745 1575,748 1485,750 1395,753 1305,754 1215,757 1125,759 1036,760 946,761 856,762 766,762 676,762 586,760 497,759"/>
-                <Baseline points="497,803 586,804 676,806 766,806 856,806 946,805 1036,804 1125,803 1215,801 1305,798 1395,797 1485,794 1575,792 1665,789 1754,788 1844,785 1934,783 2024,782 2114,779 2204,779 2294,777"/>
-                <TextEquiv>
-                    <Unicode>nise Champeau.</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l3" custom="readingOrder {index:0;}">
-                <Coords points="512,432 599,432 686,432 773,432 861,432 948,430 1035,430 1123,429 1210,427 1297,426 1385,425 1472,424 1559,423 1646,422 1734,421 1821,421 1908,420 1996,420 2083,421 2170,421 2272,423 2272,357 2170,355 2083,355 1996,354 1908,354 1821,355 1734,355 1646,356 1559,357 1472,358 1385,359 1297,360 1210,361 1123,363 1035,364 948,364 861,366 773,366 686,366 599,366 512,366"/>
-                <Baseline points="512,410 599,410 686,410 773,410 861,410 948,408 1035,408 1123,407 1210,405 1297,404 1385,403 1472,402 1559,401 1646,400 1734,399 1821,399 1908,398 1996,398 2083,399 2170,399 2274,404"/>
-                <TextEquiv>
-                    <Unicode>Nous avons bap-</Unicode>
-                </TextEquiv>
-            </TextLine>
-
-            <TextEquiv>
-                <Unicode>Nous avons bap-
-tisé Marie-Evilina, fille de
-Camille Thibeault
-et de Ermi-
-nise Champeau.
-</Unicode>
-            </TextEquiv>
-        </TextRegion>
-    </Page>
-</PcGts>
\ No newline at end of file
diff --git a/arkindex/documents/tests/pagexml_samples/transcript.xml b/arkindex/documents/tests/pagexml_samples/transcript.xml
deleted file mode 100644
index 93873def1a..0000000000
--- a/arkindex/documents/tests/pagexml_samples/transcript.xml
+++ /dev/null
@@ -1,90 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<PcGts xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd">
-    <Metadata>
-        <Creator>TRP</Creator>
-        <Created>2018-10-01T09:25:16.139-04:00</Created>
-        <LastChange>2019-02-19T19:45:19.222+01:00</LastChange>
-    </Metadata>
-    <Page imageFilename="01R_CE101S01_1907_005.tif" imageWidth="2415" imageHeight="3936">
-        <ReadingOrder>
-            <OrderedGroup id="ro_1550601919253" caption="Regions reading order">
-                <RegionRefIndexed index="0" regionRef="TextRegion_1540299380975_9"/>
-                <RegionRefIndexed index="1" regionRef="TextRegion_1540299473514_23"/>
-            </OrderedGroup>
-        </ReadingOrder>
-        <Relations>
-            <Relation type="link">
-                <RegionRef regionRef="TextRegion_1540299380975_9"/>
-                <RegionRef regionRef="TextRegion_1540299473514_23"/>
-            </Relation>
-        </Relations>
-        <TextRegion orientation="0.0" type="marginalia" id="TextRegion_1540299380975_9" custom="readingOrder {index:0;} structure {type:marginalia;}">
-            <Coords points="50,25 1200,25 1200,50 50,50"/>
-            <TextLine id="r1l6" custom="readingOrder {index:0;} structure {type:ref;}">
-                <Coords points="12,34 56,78 30,512"/>
-                <Baseline points="13,37 42,42 37,13"/>
-                <TextEquiv>
-                    <Unicode>B .1</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l7" custom="readingOrder {index:1;} _prenom {offset:0; length:12; continued:true;_role:sujet;}">
-                <Coords points="12,34 56,78 90,12"/>
-                <Baseline points="42,42 42,42 37,13"/>
-                <TextEquiv>
-                    <Unicode>Louis Joseph</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l8" custom="readingOrder {index:2;} _prenom {offset:0; length:13; continued:true;_role:sujet;}">
-                <Coords points="12,34 56,78 45,67"/>
-                <Baseline points="13,37 13,37 37,13"/>
-                <TextEquiv>
-                    <Unicode>Pierre Siméon</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r1l9" custom="readingOrder {index:3;} _nom {offset:0; length:7;}">
-                <Coords points="12,34 56,78 90,28"/>
-                <Baseline points="13,37 13,13 37,37"/>
-                <TextEquiv>
-                    <Unicode>Lemieux</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>B .1
-Louis Joseph
-Pierre Siméon
-Lemieux</Unicode>
-            </TextEquiv>
-        </TextRegion>
-        <TextRegion orientation="0.0" id="TextRegion_1540299473514_23" custom="readingOrder {index:1;}">
-            <Coords points="12,34 56,78 910,1112"/>
-            <TextLine id="r2l12" custom="readingOrder {index:0;} _date {offset:3; length:30;_enregistrement:1;}">
-                <Coords points="444,44 45,678 910,1112"/>
-                <Baseline points="13,37 11,22 37,13"/>
-                <TextEquiv>
-                    <Unicode>Le onze janvier mil neuf centsept</Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextLine id="r2l13" custom="readingOrder {index:1;} _prenom {offset:36; length:5; continued:true;_role:sujet;}">
-                <Coords points="12,34 42,78 910,11"/>
-                <Baseline points="13,37 33,44 37,13"/>
-                <TextEquiv>
-                    <Unicode>nous prêtre soussigné avons baptisé Louis</Unicode>
-                </TextEquiv>
-            </TextLine>
-	    <TextLine id="r2l14" custom="readingOrder {index: 2;}">
-                <Coords points=""/>
-                <Baseline points=""/>
-		<TextEquiv>
-                    <Unicode>        </Unicode>
-                </TextEquiv>
-            </TextLine>
-            <TextEquiv>
-                <Unicode>Le onze janvier mil neuf centsept
-nous prêtre soussigné avons baptisé Louis</Unicode>
-            </TextEquiv>
-        </TextRegion>
-        <TextRegion orientation="0.0" id="TextRegion_1540299473514_24" custom="readingOrder {index:2;}">
-            <Coords points=""/>
-        </TextRegion>
-    </Page>
-</PcGts>
diff --git a/arkindex/documents/tests/test_pagexml.py b/arkindex/documents/tests/test_pagexml.py
deleted file mode 100644
index 95ca6b7ebc..0000000000
--- a/arkindex/documents/tests/test_pagexml.py
+++ /dev/null
@@ -1,1027 +0,0 @@
-from pathlib import Path
-from unittest.mock import patch
-from mock import AsyncMock
-from django.urls import reverse
-from django.test import override_settings
-from rest_framework import status
-from arkindex.project.tests import FixtureAPITestCase
-from arkindex_common.enums import TranscriptionType, EntityType
-from arkindex.documents.models import Element, Entity, EntityRole, EntityLink, Transcription, TranscriptionEntity
-from arkindex.documents.pagexml import PageXmlParser
-
-FIXTURES = Path(__file__).absolute().parent / 'pagexml_samples'
-TRANSKRIBUS_ROLE = Path(__file__).absolute().parent / '../fixtures/roles_transkribus.yml'
-
-
-class TestPageXml(FixtureAPITestCase):
-
-    @classmethod
-    def setUpTestData(cls):
-        super().setUpTestData()
-        cls.page = cls.corpus.elements.get(name='Volume 2, page 1r')
-        cls.types = {
-            '_nom': EntityType.Person.value,
-            '_prenom': EntityType.Person.value,
-            '_lieu_residence': EntityType.Location.value,
-            '_profession': EntityType.Subject.value,
-        }
-
-    def all_tags(self, block):
-        for line in block['lines']:
-            for tag in line['tags']:
-                yield tag
-
-    def test_pagexml_import_requires_login(self):
-        with (FIXTURES / 'transcript.xml').open() as f:
-            resp = self.client.post(
-                reverse('api:pagexml-transcriptions', kwargs={'pk': str(self.page.id)}),
-                data=f.read(),
-                content_type='application/xml',
-            )
-        self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN)
-
-    @patch('arkindex.project.triggers.get_channel_layer')
-    def test_pagexml_import(self, get_layer_mock):
-        get_layer_mock.return_value.send = AsyncMock()
-        self.assertFalse(self.page.transcriptions.exists())
-        self.client.force_login(self.user)
-        with (FIXTURES / 'transcript.xml').open() as f:
-            resp = self.client.post(
-                reverse('api:pagexml-transcriptions', kwargs={'pk': str(self.page.id)}),
-                data=f.read(),
-                content_type='application/xml',
-            )
-        self.assertEqual(resp.status_code, status.HTTP_201_CREATED)
-
-        # Page should have 2 Paragraph elements containing respectively 3 and 2 Line elements
-        paragraphs = Element.objects.filter(paths__path__last=self.page.id).filter(type__slug='paragraph')
-        self.assertEqual(paragraphs.count(), 2)
-        self.assertCountEqual(paragraphs.values_list('transcriptions__type', 'transcriptions__text'), [
-            (TranscriptionType.Paragraph, (
-                'Le onze janvier mil neuf centsept\n'
-                'nous prêtre soussigné avons baptisé Louis'
-            )),
-            (TranscriptionType.Paragraph, (
-                'B .1\n'
-                'Louis Joseph\n'
-                'Pierre Siméon\n'
-                'Lemieux'
-            ))
-        ])
-
-        # Line elements have the same text
-        self.assertCountEqual(
-            [
-                list(Element.objects.filter(paths__path__last=id).order_by('name').values_list(
-                    'type__slug', 'name', 'transcriptions__type', 'transcriptions__text'
-                )) for id in paragraphs.values_list('id', flat=True)
-            ], [
-                [
-                    ('line', '1', TranscriptionType.Line, 'B .1'),
-                    ('line', '2', TranscriptionType.Line, 'Louis Joseph'),
-                    ('line', '3', TranscriptionType.Line, 'Pierre Siméon'),
-                    ('line', '4', TranscriptionType.Line, 'Lemieux'),
-                ], [
-                    ('line', '1', TranscriptionType.Line, 'Le onze janvier mil neuf centsept'),
-                    ('line', '2', TranscriptionType.Line, 'nous prêtre soussigné avons baptisé Louis')
-                ]
-            ]
-        )
-
-        # All transcriptions have a score of 100%
-        self.assertFalse(self.page.transcriptions.exclude(score=1.0).exists())
-
-        get_layer_mock().send.assert_called_once_with('reindex', {
-            'type': 'reindex.start',
-            'element': str(self.page.id),
-            'corpus': None,
-            'entity': None,
-            'transcriptions': True,
-            'entities': True,
-            'elements': True,
-            'drop': False,
-        })
-
-    @patch('arkindex.project.triggers.get_channel_layer')
-    @override_settings(ARKINDEX_FEATURES={'search': False})
-    def test_pagexml_import_no_search(self, get_layer_mock):
-        self.client.force_login(self.user)
-        with (FIXTURES / 'transcript.xml').open() as f:
-            resp = self.client.post(
-                reverse('api:pagexml-transcriptions', kwargs={'pk': str(self.page.id)}),
-                data=f.read(),
-                content_type='application/xml',
-            )
-        self.assertEqual(resp.status_code, status.HTTP_201_CREATED)
-        self.assertFalse(get_layer_mock().send.called)
-
-    def test_pagexml_import_requires_zone(self):
-        volume = self.corpus.elements.get(name='Volume 1')
-        self.assertIsNone(volume.zone)
-
-        self.client.force_login(self.user)
-        with (FIXTURES / 'transcript.xml').open() as f:
-            resp = self.client.post(
-                reverse('api:pagexml-transcriptions', kwargs={'pk': str(volume.id)}),
-                data=f.read(),
-                content_type='application/xml',
-            )
-        self.assertEqual(resp.status_code, status.HTTP_404_NOT_FOUND)
-
-    def test_pagexml_create_blocks(self):
-        parser = PageXmlParser(FIXTURES / 'create_blocks.xml', self.page.corpus)
-        annotations = parser.save(self.page)
-        blocks = parser.create_blocks(annotations)
-
-        text_region_0 = 'Le onze janvier mil neuf centsept\nnous prêtre soussigné avons baptisé Louis'
-        text_region_1 = 'B .1\nLouis Joseph\nPierre Siméon\nLemieux'
-        transcription_id_0 = Transcription.objects.get(text=text_region_0).id
-        transcription_id_1 = Transcription.objects.get(text=text_region_1).id
-
-        self.assertEqual(
-            blocks,
-            [
-                {
-                    'reading_order': 0,
-                    'lines': [
-                        {
-                            'reading_order': 0,
-                            'tags': [
-                                {
-                                    'name': 'readingOrder',
-                                    'index': '0',
-                                    'reading_order': 0,
-                                    'transcription_id': transcription_id_0,
-                                    'offset_in_region': 0
-                                },
-                                {
-                                    'name': '_date',
-                                    'offset': '3',
-                                    'length': '30',
-                                    '_enregistrement': '1',
-                                    'value': 'onze janvier mil neuf centsept',
-                                    'reading_order': 0,
-                                    'transcription_id': transcription_id_0,
-                                    'offset_in_region': 3
-                                }
-                            ],
-                            'length': 33
-                        },
-                        {
-                            'reading_order': 1,
-                            'tags': [
-                                {
-                                    'name': 'readingOrder',
-                                    'index': '1',
-                                    'reading_order': 1,
-                                    'transcription_id': transcription_id_0,
-                                    'offset_in_region': 34
-                                },
-                                {
-                                    'name': '_prenom',
-                                    'offset': '36',
-                                    'length': '5',
-                                    'continued': 'true',
-                                    '_role': 'sujet',
-                                    'value': 'Louis',
-                                    'reading_order': 1,
-                                    'transcription_id': transcription_id_0,
-                                    'offset_in_region': 70
-                                }
-                            ], 'length': 41
-                        }
-                    ]
-                },
-                {
-                    'reading_order': 1,
-                    'lines': [
-                        {
-                            'reading_order': 0,
-                            'tags': [
-                                {
-                                    'name': 'readingOrder',
-                                    'index': '0',
-                                    'reading_order': 0,
-                                    'transcription_id': transcription_id_1,
-                                    'offset_in_region': 0
-                                },
-                                {
-                                    'name': 'structure',
-                                    'type': 'ref',
-                                    'reading_order': 0,
-                                    'transcription_id': transcription_id_1,
-                                    'offset_in_region': 0
-                                }
-                            ],
-                            'length': 4
-                        },
-                        {
-                            'reading_order': 1,
-                            'tags': [
-                                {
-                                    'name': 'readingOrder',
-                                    'index': '1',
-                                    'reading_order': 1,
-                                    'transcription_id': transcription_id_1,
-                                    'offset_in_region': 5
-                                },
-                                {
-                                    'name': '_prenom',
-                                    'offset': '0',
-                                    'length': '12',
-                                    'continued': 'true',
-                                    '_role': 'sujet',
-                                    'value': 'Louis Joseph',
-                                    'reading_order': 1,
-                                    'transcription_id': transcription_id_1,
-                                    'offset_in_region': 5
-                                }
-                            ],
-                            'length': 12
-                        },
-                        {
-                            'reading_order': 2,
-                            'tags': [
-                                {
-                                    'name': 'readingOrder',
-                                    'index': '2',
-                                    'reading_order': 2,
-                                    'transcription_id': transcription_id_1,
-                                    'offset_in_region': 18
-                                },
-                                {
-                                    'name': '_prenom',
-                                    'offset': '0',
-                                    'length': '13',
-                                    'continued': 'true',
-                                    '_role': 'sujet',
-                                    'value': 'Pierre Siméon',
-                                    'reading_order': 2,
-                                    'transcription_id': transcription_id_1,
-                                    'offset_in_region': 18
-                                }
-                            ],
-                            'length': 13
-                        }, {
-                            'reading_order': 3,
-                            'tags': [
-                                {
-                                    'name': 'readingOrder',
-                                    'index': '3',
-                                    'reading_order': 3,
-                                    'transcription_id': transcription_id_1,
-                                    'offset_in_region': 32
-                                },
-                                {
-                                    'name': '_nom',
-                                    'offset': '0',
-                                    'length': '7',
-                                    'value': 'Lemieux',
-                                    'reading_order': 3,
-                                    'transcription_id': transcription_id_1,
-                                    'offset_in_region': 32
-                                }
-                            ],
-                            'length': 7
-                        }
-                    ]
-                }
-            ]
-        )
-
-    def test_pagexml_regroup(self):
-        parser = PageXmlParser(FIXTURES / 'regroup.xml', self.page.corpus)
-        transcriptions = parser.save(self.page)
-        blocks = parser.create_blocks(transcriptions)
-        self.assertEqual(len(blocks), 2)
-        self.assertEqual(len(blocks[0]['lines']), 5)
-        self.assertEqual(len(blocks[1]['lines']), 5)
-        tags = list(filter(lambda tag: tag['name'][0] == '_', self.all_tags(blocks[0])))
-
-        tags = parser.regroup(tags)
-        tags = parser.cesure(tags)
-
-        text_region = 'Nous avons bap-\ntisé Marie-Evilina, fille de\nCamille Thibeault\net de Ermi-\nnise Champeau.\n'
-        transcription_id = Transcription.objects.get(text=text_region).id
-
-        self.assertEqual(
-            tags,
-            [
-                {
-                    'name': '_prenom',
-                    'offset': '5',
-                    'length': 13,
-                    '_role': 'sujet',
-                    'value': 'Marie-Evilina',
-                    'reading_order': 1,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 21,
-                    'items': [
-                        {
-                            'name': '_prenom',
-                            'offset': '5',
-                            'length': '13',
-                            '_role': 'sujet',
-                            'value': 'Marie-Evilina',
-                            'reading_order': 1,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 21
-                        }
-                    ]
-                },
-                {
-                    'name': '_prenom',
-                    'offset': '0',
-                    'length': 7,
-                    '_role': 'père',
-                    'value': 'Camille',
-                    'reading_order': 2,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 45,
-                    'items': [
-                        {
-                            'name': '_prenom',
-                            'offset': '0',
-                            'length': '7',
-                            '_role': 'père',
-                            'value': 'Camille',
-                            'reading_order': 2,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 45
-                        }
-                    ]
-                },
-                {
-                    'name': '_nom',
-                    'offset': '8',
-                    'length': 9,
-                    'value': 'Thibeault',
-                    'reading_order': 2,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 53,
-                    'items': [
-                        {
-                            'name': '_nom',
-                            'offset': '8',
-                            'length': '9',
-                            'value': 'Thibeault',
-                            'reading_order': 2,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 53
-                        }
-                    ]
-                },
-                {
-                    'name': '_prenom',
-                    'offset': '6',
-                    'length': 10,
-                    'continued': 'true',
-                    '_role': 'mère',
-                    'value': 'Erminise',
-                    'reading_order': 4,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 69,
-                    'items': [
-                        {
-                            'name': '_prenom',
-                            'offset': '6',
-                            'length': '5',
-                            'continued': 'true',
-                            '_role': 'mère',
-                            'value': 'Ermi-',
-                            'reading_order': 3,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 69
-                        },
-                        {
-                            'name': '_prenom',
-                            'offset': '0',
-                            'length': '4',
-                            'continued': 'true',
-                            '_role': 'mère',
-                            'value': 'nise',
-                            'reading_order': 4,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 75
-                        }
-                    ]
-                },
-                {
-                    'name': '_nom',
-                    'offset': '5',
-                    'length': 8,
-                    'value': 'Champeau',
-                    'reading_order': 4,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 80,
-                    'items': [
-                        {
-                            'name': '_nom',
-                            'offset': '5',
-                            'length': '8',
-                            'value': 'Champeau',
-                            'reading_order': 4,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 80
-                        }
-                    ]
-                }
-            ]
-        )
-
-        tags = list(filter(lambda tag: tag['name'][0] == '_', self.all_tags(blocks[1])))
-        tags = parser.regroup(tags)
-        tags = parser.cesure(tags)
-
-        text_region = 'S. 10\nDragon\nMarie\nEmillia\nElmire'
-        transcription_id = Transcription.objects.get(text=text_region).id
-
-        self.assertEqual(
-            tags,
-            [
-                {
-                    'name': '_nom',
-                    'offset': '0',
-                    'length': 6,
-                    'value': 'Dragon',
-                    'reading_order': 1,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 6,
-                    'items': [
-                        {
-                            'name': '_nom',
-                            'offset': '0',
-                            'length': '6',
-                            'value': 'Dragon',
-                            'reading_order': 1,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 6
-                        }
-                    ]
-                },
-                {
-                    'name': '_prenom',
-                    'offset': '0',
-                    'length': 20,
-                    'continued': 'true',
-                    '_role': 'sujet',
-                    'value': 'Marie Emillia Elmire',
-                    'reading_order': 4,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 13,
-                    'items': [
-                        {
-                            'name': '_prenom',
-                            'offset': '0',
-                            'length': '5',
-                            'continued': 'true',
-                            '_role': 'sujet',
-                            'value': 'Marie',
-                            'reading_order': 2,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 13
-                        },
-                        {
-                            'name': '_prenom',
-                            'offset': '0',
-                            'length': '7',
-                            'continued': 'true',
-                            '_role': 'sujet',
-                            'value': 'Emillia',
-                            'reading_order': 3,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 19
-                        },
-                        {
-                            'name': '_prenom',
-                            'offset': '0',
-                            'length': '6',
-                            'continued': 'true',
-                            '_role': 'sujet',
-                            'value': 'Elmire',
-                            'reading_order': 4,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 27
-                        }
-                    ]
-                }
-            ]
-        )
-
-    def test_pagexml_first_and_last_name(self):
-        parser = PageXmlParser(FIXTURES / 'first_and_last_name.xml', self.page.corpus)
-        transcriptions = parser.save(self.page)
-        blocks = parser.create_blocks(transcriptions)
-        self.assertEqual(len(blocks), 3)
-        self.assertEqual(len(blocks[0]['lines']), 7)
-        self.assertEqual(len(blocks[1]['lines']), 6)
-        self.assertEqual(len(blocks[2]['lines']), 6)
-        tags = list(filter(lambda tag: tag['name'][0] == '_', self.all_tags(blocks[0])))
-
-        tags = parser.regroup(tags)
-        self.assertEqual(len(tags), 3)
-        tags = parser.first_and_last_name(tags)
-        tags = parser.cesure(tags)
-
-        text_region = 'B. 31\nChaloux\n( ) Gérard\nHenri\nP.107\nv N.H.\nT.O.N. PtreCuré'
-        transcription_id = Transcription.objects.get(text=text_region).id
-
-        self.assertEqual(
-            tags,
-            [
-                {
-                    'name': '_prenom',
-                    'offset': '4',
-                    'length': 20,
-                    'continued': 'true',
-                    '_role': 'sujet',
-                    'value': 'Gérard Henri Chaloux',
-                    'reading_order': 3,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 18,
-                    'items': [
-                        {
-                            'name': '_prenom',
-                            'offset': '4',
-                            'length': '6',
-                            'continued': 'true',
-                            '_role': 'sujet',
-                            'value': 'Gérard',
-                            'reading_order': 2,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 18
-                        },
-                        {
-                            'name': '_prenom',
-                            'offset': '0',
-                            'length': '5',
-                            'continued': 'true',
-                            '_role': 'sujet',
-                            'value': 'Henri',
-                            'reading_order': 3,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 25
-                        },
-                        {
-                            'name': '_nom',
-                            'offset': '0',
-                            'length': '7',
-                            'value': 'Chaloux',
-                            'reading_order': 1,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 6
-                        }
-                    ],
-                    '_role': 'sujet'
-                },
-                {
-                    'name': '_lieu_residence',
-                    'offset': '2',
-                    'length': 4,
-                    'value': 'N.H.',
-                    'reading_order': 5,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 39,
-                    'items': [
-                        {
-                            'name': '_lieu_residence',
-                            'offset': '2',
-                            'length': '4',
-                            'value': 'N.H.',
-                            'reading_order': 5,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 39
-                        }
-                    ]
-                }
-            ]
-        )
-
-        tags = list(filter(lambda tag: tag['name'][0] == '_', self.all_tags(blocks[1])))
-        tags = parser.regroup(tags)
-        self.assertEqual(len(tags), 5)
-        tags = parser.first_and_last_name(tags)
-        tags = parser.cesure(tags)
-
-        text_region = 'Nous avons\ninhumé\nle corps d\'un garçon issu \nde Joseph Du-\nmoulin, et \nde Valeda Paquette.'
-        transcription_id = Transcription.objects.get(text=text_region).id
-
-        self.assertEqual(
-            tags,
-            [
-                {
-                    'name': '_prenom',
-                    'offset': '14',
-                    'length': 17,
-                    '_role': 'sujet',
-                    'value': 'garçon Dumoulin',
-                    'reading_order': 3,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 32,
-                    'items': [
-                        {
-                            'name': '_prenom',
-                            'offset': '14',
-                            'length': '6',
-                            '_role': 'sujet',
-                            'value': 'garçon',
-                            'reading_order': 3,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 32
-                        },
-                        {
-                            'name': '_nom',
-                            'offset': '10',
-                            'length': '3',
-                            'continued': 'true',
-                            'value': 'Du-',
-                            'reading_order': 4,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 54
-                        },
-                        {
-                            'name': '_nom',
-                            'offset': '0',
-                            'length': '6',
-                            'continued': 'true',
-                            'value': 'moulin',
-                            'reading_order': 5,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 58
-                        }
-                    ]
-                },
-                {
-                    'name': '_prenom',
-                    'offset': '3',
-                    'length': 17,
-                    '_role': 'père',
-                    'value': 'Joseph Dumoulin',
-                    'reading_order': 4,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 47,
-                    'items': [
-                        {
-                            'name': '_prenom',
-                            'offset': '3',
-                            'length': '6',
-                            '_role': 'père',
-                            'value': 'Joseph',
-                            'reading_order': 4,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 47
-                        },
-                        {
-                            'name': '_nom',
-                            'offset': '10',
-                            'length': '3',
-                            'continued': 'true',
-                            'value': 'Du-',
-                            'reading_order': 4,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 54
-                        },
-                        {
-                            'name': '_nom',
-                            'offset': '0',
-                            'length': '6',
-                            'continued': 'true',
-                            'value': 'moulin',
-                            'reading_order': 5,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 58
-                        }
-                    ]
-                },
-                {
-                    'name': '_prenom',
-                    'offset': '3',
-                    'length': 15,
-                    '_role': 'mère',
-                    'value': 'Valeda Paquette',
-                    'reading_order': 6,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 73,
-                    'items': [
-                        {
-                            'name': '_prenom',
-                            'offset': '3',
-                            'length': '6',
-                            '_role': 'mère',
-                            'value': 'Valeda',
-                            'reading_order': 6,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 73
-                        },
-                        {
-                            'name': '_nom',
-                            'offset': '10',
-                            'length': '8',
-                            'value': 'Paquette',
-                            'reading_order': 6,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 80
-                        }
-                    ]
-                },
-            ]
-        )
-
-        tags = list(filter(lambda tag: tag['name'][0] == '_', self.all_tags(blocks[2])))
-        tags = parser.regroup(tags)
-        self.assertEqual(len(tags), 2)
-        tags = parser.first_and_last_name(tags)
-        tags = parser.cesure(tags)
-
-        text_region = 'S. 12\nLapalme\nCharles-Albert\nP.107\nS. Chycoine\nvoir folio 10'
-        transcription_id = Transcription.objects.get(text=text_region).id
-
-        self.assertEqual(
-            tags,
-            [
-                {
-                    'name': '_prenom',
-                    'offset': '0',
-                    'length': 22,
-                    '_role': 'sujet',
-                    'value': 'Charles-Albert Lapalme',
-                    'reading_order': 2,
-                    'transcription_id': transcription_id,
-                    'offset_in_region': 14,
-                    'items': [
-                        {
-                            'name': '_prenom',
-                            'offset': '0',
-                            'length': '14',
-                            '_role': 'sujet',
-                            'value': 'Charles-Albert',
-                            'reading_order': 2,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 14
-                        },
-                        {
-                            'name': '_nom',
-                            'offset': '0',
-                            'length': '7',
-                            'value': 'Lapalme',
-                            'reading_order': 1,
-                            'transcription_id': transcription_id,
-                            'offset_in_region': 6
-                        }
-                    ]
-                }
-            ]
-        )
-
-    def test_merge(self):
-        parser = PageXmlParser(FIXTURES / 'merge.xml', self.page.corpus)
-        transcriptions = parser.save(self.page)
-        blocks = parser.create_blocks(transcriptions)
-        blocks = parser.merge(blocks)
-
-        text_region_0 = 'Nous\navons baptisé\nMarie-Alice-Rosilda, née\nde Antoine Landry\net de Zelpha\nBeauchesne.'
-        text_region_1 = 'B. 32\nLandry\nMarie-Alice\nRosilda\nP.108'
-        transcription_id_0 = Transcription.objects.get(text=text_region_0).id
-        transcription_id_1 = Transcription.objects.get(text=text_region_1).id
-
-        self.assertEqual(
-            blocks,
-            {
-                0: [
-                    {
-                        'name': '_prenom',
-                        'offset': '0',
-                        'length': 26,
-                        '_role': 'sujet',
-                        'value': 'Marie-Alice-Rosilda Landry',
-                        'reading_order': 2,
-                        'transcription_id': transcription_id_0,
-                        'offset_in_region': 42,
-                        'items': [
-                            {
-                                'name': '_prenom',
-                                'offset': '0',
-                                'length': '19',
-                                '_role': 'sujet',
-                                'value': 'Marie-Alice-Rosilda',
-                                'reading_order': 2,
-                                'transcription_id': transcription_id_0,
-                                'offset_in_region': 42
-                            },
-                            {
-                                'name': '_nom',
-                                'offset': '11',
-                                'length': '6',
-                                'value': 'Landry',
-                                'reading_order': 3,
-                                'transcription_id': transcription_id_0,
-                                'offset_in_region': 78
-                            }
-                        ]
-                    },
-                    {
-                        'name': '_prenom',
-                        'offset': '3',
-                        'length': 14,
-                        '_role': 'père',
-                        'value': 'Antoine Landry',
-                        'reading_order': 3,
-                        'transcription_id': transcription_id_0,
-                        'offset_in_region': 70,
-                        'items': [
-                            {
-                                'name': '_prenom',
-                                'offset': '3',
-                                'length': '7',
-                                '_role': 'père',
-                                'value': 'Antoine',
-                                'reading_order': 3,
-                                'transcription_id': transcription_id_0,
-                                'offset_in_region': 70
-                            },
-                            {
-                                'name': '_nom',
-                                'offset': '11',
-                                'length': '6',
-                                'value': 'Landry',
-                                'reading_order': 3,
-                                'transcription_id': transcription_id_0,
-                                'offset_in_region': 78
-                            }
-                        ]
-                    },
-                    {
-                        'name': '_prenom',
-                        'offset': '6',
-                        'length': 17,
-                        '_role': 'mère',
-                        'value': 'Zelpha Beauchesne',
-                        'reading_order': 4,
-                        'transcription_id': transcription_id_0,
-                        'offset_in_region': 91,
-                        'items': [
-                            {
-                                'name': '_prenom',
-                                'offset': '6',
-                                'length': '6',
-                                '_role': 'mère',
-                                'value': 'Zelpha',
-                                'reading_order': 4,
-                                'transcription_id': transcription_id_0,
-                                'offset_in_region': 91
-                            },
-                            {
-                                'name': '_nom',
-                                'offset': '0',
-                                'length': '10',
-                                'value': 'Beauchesne',
-                                'reading_order': 5,
-                                'transcription_id': transcription_id_0,
-                                'offset_in_region': 98
-                            }
-                        ]
-                    }
-                ],
-                1: [
-                    {
-                        'name': '_prenom',
-                        'offset': '0',
-                        'length': 26,
-                        'continued': 'true',
-                        '_role': 'sujet',
-                        'value': 'Marie-Alice Rosilda Landry',
-                        'reading_order': 3,
-                        'transcription_id': transcription_id_1,
-                        'offset_in_region': 13,
-                        'items': [
-                            {
-                                'name': '_prenom',
-                                'offset': '0',
-                                'length': '11',
-                                'continued': 'true',
-                                '_role': 'sujet',
-                                'value': 'Marie-Alice',
-                                'reading_order': 2,
-                                'transcription_id': transcription_id_1,
-                                'offset_in_region': 13
-                            },
-                            {
-                                'name': '_prenom',
-                                'offset': '0',
-                                'length': '7',
-                                'continued': 'true',
-                                '_role': 'sujet',
-                                'value': 'Rosilda',
-                                'reading_order': 3,
-                                'transcription_id': transcription_id_1,
-                                'offset_in_region': 25
-                            },
-                            {
-                                'name': '_nom',
-                                'offset': '0',
-                                'length': '6',
-                                'value': 'Landry',
-                                'reading_order': 1,
-                                'transcription_id': transcription_id_1,
-                                'offset_in_region': 6
-                            }
-                        ]
-                    }
-                ]
-            }
-        )
-
-    def test_create_entities(self):
-        parser = PageXmlParser(FIXTURES / 'create_entities.xml', self.page.corpus)
-        transcriptions = parser.save(self.page)
-        blocks = parser.create_blocks(transcriptions)
-        blocks = parser.merge(blocks)
-
-        entities = []
-        for block in blocks:
-            entities += parser.create_entities(blocks[block], self.page, self.types)[0]
-
-        nb_entities = Entity.objects.filter(corpus_id=self.page.corpus.id).count()
-        self.assertEqual(len(entities), nb_entities)
-
-    def test_create_roles(self):
-        parser = PageXmlParser(FIXTURES / 'merge.xml', self.page.corpus)
-        transcriptions = parser.save(self.page)
-        blocks = parser.create_blocks(transcriptions)
-        blocks = parser.merge(blocks)
-
-        for block in blocks.values():
-            entities, tags = parser.create_entities(block, self.page, self.types)
-            parser.create_roles(tags, self.page, TRANSKRIBUS_ROLE, 0.90)
-
-        nb_roles = EntityRole.objects.filter(corpus_id=self.page.corpus.id).count()
-        self.assertEqual(2, nb_roles)
-
-    def test_create_links(self):
-        parser = PageXmlParser(FIXTURES / 'merge.xml', self.page.corpus)
-        transcriptions = parser.save(self.page)
-        blocks = parser.create_blocks(transcriptions)
-        blocks = parser.merge(blocks)
-
-        for block in blocks.values():
-            entities, tags = parser.create_entities(block, self.page, self.types)
-            tags = parser.create_roles(tags, self.page, TRANSKRIBUS_ROLE, 0.90)
-            parser.create_links(tags, TRANSKRIBUS_ROLE, 0.90)
-
-        nb_entities = Entity.objects.filter(corpus_id=self.page.corpus.id).count()
-        nb_roles = EntityRole.objects.filter(corpus_id=self.page.corpus.id).count()
-        nb_links = EntityLink.objects.filter(role__corpus__id=self.page.corpus.id).count()
-        self.assertEqual(4, nb_entities)
-        self.assertEqual(2, nb_roles)
-        self.assertEqual(2, nb_links)
-
-    def test_create_objects(self):
-        parser = PageXmlParser(FIXTURES / 'merge.xml', self.page.corpus)
-        annotations = parser.save(self.page)
-        blocks = parser.create_blocks(annotations)
-        blocks = parser.merge(blocks)
-        entities_id = parser.create_objects(blocks, self.page, TRANSKRIBUS_ROLE, self.types)
-
-        nb_entities = Entity.objects.filter(corpus_id=self.page.corpus.id).count()
-        nb_roles = EntityRole.objects.filter(corpus_id=self.page.corpus.id).count()
-        nb_links = EntityLink.objects.filter(role__corpus__id=self.page.corpus.id).count()
-        nb_transcription_entity = TranscriptionEntity.objects.filter(entity__id__in=entities_id).count()
-        self.assertEqual(4, nb_entities)
-        self.assertEqual(2, nb_roles)
-        self.assertEqual(2, nb_links)
-        self.assertEqual(5, nb_transcription_entity)
-
-    def test_cesure(self):
-        parser = PageXmlParser(FIXTURES / 'merge.xml', self.page.corpus)
-
-        personne = [
-            {
-                'items': [{'value': ' Marie    '}, {'value': ' Anne '}]
-            }
-        ]
-        completed_personne = parser.cesure(personne)
-        self.assertEqual(completed_personne[0]['value'], 'Marie Anne')
-
-        personne = [
-            {
-                'items': [{'value': ' Mari    '}, {'value': ' anne '}]
-            }
-        ]
-        completed_personne = parser.cesure(personne)
-        self.assertEqual(completed_personne[0]['value'], 'Marianne')
-
-        personne = [
-            {
-                'items': [{'value': '    Marie-  - '}, {'value': '---Anne '}]
-            }
-        ]
-        completed_personne = parser.cesure(personne)
-        self.assertEqual(completed_personne[0]['value'], 'Marie-Anne')
-
-        personne = [
-            {
-                'items': [{'value': '  Mari --'}, {'value': '- - anne   '}]
-            }
-        ]
-        completed_personne = parser.cesure(personne)
-        self.assertEqual(completed_personne[0]['value'], 'Marianne')
diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py
index 20bd8a2eb3..892635a602 100644
--- a/arkindex/project/api_v1.py
+++ b/arkindex/project/api_v1.py
@@ -13,7 +13,7 @@ from arkindex.documents.api.elements import (
 from arkindex.documents.api.search import ElementSearch, EntitySearch
 from arkindex.documents.api.ml import (
     ClassificationCreate, ClassificationValidate, ClassificationReject, ClassificationBulk, ElementTranscriptionsBulk,
-    TranscriptionCreate, TranscriptionBulk, PageXmlTranscriptionsImport, ElementMLStats, CorpusMLStats,
+    TranscriptionCreate, TranscriptionBulk, ElementMLStats, CorpusMLStats,
     ManageClassificationsSelection, MLClassList, CorpusMLClassList, TranscriptionEdit
 )
 from arkindex.documents.api.entities import (
@@ -61,11 +61,6 @@ api = [
     ),
     path('element/<uuid:pk>/ml-stats/', ElementMLStats.as_view(), name='element-ml-stats'),
     path('element/<uuid:child>/parent/<uuid:parent>/', ElementParent.as_view(), name='element-parent'),
-    path(
-        'element/<uuid:pk>/transcriptions/xml/',
-        PageXmlTranscriptionsImport.as_view(),
-        name='pagexml-transcriptions',
-    ),
 
     # Corpora
     path('corpus/', CorpusList.as_view(), name='corpus'),
diff --git a/arkindex/project/parsers.py b/arkindex/project/parsers.py
deleted file mode 100644
index d55f8f7579..0000000000
--- a/arkindex/project/parsers.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from rest_framework.parsers import BaseParser
-from lxml import etree
-
-
-class XMLParser(BaseParser):
-    """
-    A basic XML parser without serializer support
-    """
-    media_type = 'application/xml'
-
-    def parse(self, stream, media_type=None, parser_context=None):
-        """
-        Parse the request body into a lxml Element
-        """
-        return etree.parse(stream).getroot()
diff --git a/base/requirements.txt b/base/requirements.txt
index ec332a88ef..8f79c4baee 100644
--- a/base/requirements.txt
+++ b/base/requirements.txt
@@ -4,7 +4,6 @@ Django==2.2.13
 elasticsearch==6.2.0
 hiredis==1.0.0
 ijson==2.3
-lxml==4.2.3
 openpyxl==2.4.9
 psycopg2==2.8.5
 python-Levenshtein==0.12.0
diff --git a/requirements.txt b/requirements.txt
index c8f09a28a0..4b0df2e263 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,7 +15,6 @@ django-enumfields==1.0.0
 django-redis==4.12.1
 djangorestframework==3.11.0
 elasticsearch-dsl>=6.0.0,<7.0.0
-et-xmlfile==1.0.1
 gitpython==3.0.8
 idna==2.6
 jdcal==1.3
-- 
GitLab