diff --git a/arkindex/dataimport/admin.py b/arkindex/dataimport/admin.py
index 70978a4399b6a1c0d40da0d3caf27777c405b05d..f4624405c9498a5c6b88b304c91fb895ab49ff00 100644
--- a/arkindex/dataimport/admin.py
+++ b/arkindex/dataimport/admin.py
@@ -30,7 +30,7 @@ class RevisionInline(admin.StackedInline):
 class RepositoryAdmin(admin.ModelAdmin):
     list_display = ('id', 'url', 'corpus')
     list_filter = ('corpus', )
-    fields = ('id', 'url', 'corpus', 'hook_token', 'watched_branches')
+    fields = ('id', 'url', 'corpus', 'hook_token')
     readonly_fields = ('id', )
     inlines = [RevisionInline, ]
 
diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py
index 587a12fbda62faa63391180de934cd94d8ef417e..92df9250564ed55c8d68108212345a9b11f5a8f8 100644
--- a/arkindex/dataimport/api.py
+++ b/arkindex/dataimport/api.py
@@ -14,10 +14,11 @@ from rest_framework.exceptions import ValidationError
 from arkindex.project.mixins import CorpusACLMixin
 from arkindex.documents.models import Corpus, Right, Element, ElementType
 from arkindex.dataimport.models import \
-    DataImport, DataFile, DataImportState, DataImportMode, DataImportFailure, Repository
-from arkindex.dataimport.serializers import \
-    DataImportLightSerializer, DataImportSerializer, DataImportFailureSerializer, DataFileSerializer, \
-    RepositoryLightSerializer, RepositorySerializer, ExternalRepositorySerializer
+    DataImport, DataFile, DataImportState, DataImportMode, DataImportFailure, Repository, Event
+from arkindex.dataimport.serializers import (
+    DataImportLightSerializer, DataImportSerializer, DataImportFailureSerializer, DataFileSerializer,
+    RepositorySerializer, ExternalRepositorySerializer, EventSerializer
+)
 from arkindex.users.models import OAuthCredentials
 import hashlib
 import magic
@@ -251,7 +252,7 @@ class GitRepositoryImportHook(APIView):
 
 class RepositoryList(ListAPIView):
     permission_classes = (IsAuthenticated, )
-    serializer_class = RepositoryLightSerializer
+    serializer_class = RepositorySerializer
 
     def get_queryset(self):
         return Repository.objects.filter(
@@ -325,3 +326,14 @@ class RepositoryStartImport(RetrieveAPIView):
             raise ValidationError("An import is already running for the latest revision")
 
         return Response(data={'import_id': str(rev.start_import().id)})
+
+
+class ElementHistory(ListAPIView):
+    permission_classes = (IsAuthenticated, )
+    serializer_class = EventSerializer
+
+    def get_queryset(self):
+        return Event.objects.filter(
+            element_id=self.kwargs['pk'],
+            element__corpus__in=Corpus.objects.readable(self.request.user),
+        )
diff --git a/arkindex/dataimport/config.py b/arkindex/dataimport/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b5871c5a453321cc54a81750b0b0d85b69be025
--- /dev/null
+++ b/arkindex/dataimport/config.py
@@ -0,0 +1,189 @@
+import fnmatch
+import yaml
+from enum import Enum
+from django.core.validators import URLValidator
+from django.core.exceptions import ValidationError
+from django.utils.functional import cached_property
+from arkindex.documents.models import Corpus
+from arkindex.images.models import ImageServer
+
+
+class ImportType(Enum):
+    Volumes = 'volumes'
+    Transcriptions = 'transcriptions'
+    Surfaces = 'surfaces'
+    Acts = 'acts'
+    Metadata = 'metadata'
+
+
+class VolumesImportFormat(Enum):
+    IIIF = 'iiif'
+    TXT = 'txt'
+
+
+class TranscriptionsImportFormat(Enum):
+    IIIF = 'iiif'
+    Index = 'index'
+    GzippedIndex = 'index-gzip'
+
+
+class SurfacesImportFormat(Enum):
+    XML = 'xml'
+
+
+class ActsImportFormat(Enum):
+    CSV = 'csv'
+
+
+class MetadataImportFormat(Enum):
+    TEI = 'tei'
+
+
+class ConfigFile(object):
+    """
+    A .arkindex.yml configuration file
+    """
+
+    FILE_NAME = '.arkindex.yml'
+    REQUIRED_ITEMS = ('version', 'branches', 'corpus')
+    FORMAT_ENUMS = {
+        ImportType.Volumes: VolumesImportFormat,
+        ImportType.Transcriptions: TranscriptionsImportFormat,
+        ImportType.Surfaces: SurfacesImportFormat,
+        ImportType.Acts: ActsImportFormat,
+        ImportType.Metadata: MetadataImportFormat,
+    }
+
+    def __init__(self, data):
+        self._config = yaml.load(data)
+        self.validate()
+        self.setattrs()
+
+    @staticmethod
+    def from_path(path):
+        with open(path, 'rb') as f:
+            return ConfigFile(f)
+
+    def validate(self):
+        """
+        Validate an Arkindex configuration file. Will raise ValidationErrors for validation errors
+        """
+
+        # Required first level items
+        for item in self.REQUIRED_ITEMS:
+            if item not in self._config:
+                raise ValidationError("Missing '{}' setting".format(item))
+
+        # Format version
+        if self._config['version'] != 1:
+            raise ValidationError("Unsupported format version '{}'".format(self._config['version']))
+
+        # Branches list
+        if any(not isinstance(branch, str) for branch in self._config['branches']):
+            raise ValidationError("Bad 'branches' format: should be a list of branch names")
+
+        # Corpus info
+        for item in ('name', 'description'):
+            if item not in self._config['corpus'].keys():
+                raise ValidationError("Missing '{}' parameter in 'corpus'".format(item))
+
+        # At least one import type required
+        if not any(it.value in self._config for it in ImportType):
+            raise ValidationError("No import types were specified")
+
+        # Required 'paths' for each type
+        for it in ImportType:
+            if item not in self._config:
+                continue
+            if 'paths' not in self._config[it.value]:
+                raise ValidationError("Missing 'paths' parameter in '{}'".format(it.value))
+            if not all(isinstance(path, str) for path in self._config[it.value]['paths']):
+                raise ValidationError("Bad 'paths' format in '{}': should be a list of patterns".format(it.value))
+
+        # Import file formats
+        for import_type, format_enum in self.FORMAT_ENUMS.items():
+            if import_type.value not in self._config:
+                continue
+            if 'format' not in self._config[import_type.value]:
+                continue
+            try:
+                format_enum(self._config[import_type.value]['format'])
+            except KeyError:
+                raise ValidationError("Format setting in '{}' should be one of '{}'".format(
+                    import_type.value, "', '".join(fmt.value for fmt in format_enum)))
+
+        # Manifest import-specific validation
+        if ImportType.Volumes.value in self._config:
+            volumes = self._config[ImportType.Volumes.value]
+
+            if 'image_servers' in volumes:
+                if len(volumes['image_servers'].values()) != len(set(volumes['image_servers'].values())):
+                    raise ValueError("Duplicate server URLs in 'volumes.image_servers'")
+
+                validate_url = URLValidator(
+                    schemes=['http', 'https'],
+                    message="Invalid IIIF server URL in '{}.image_servers'".format(ImportType.Volumes.value),
+                )
+                for name, url in volumes['image_servers'].items():
+                    validate_url(url)
+
+    def setattrs(self):
+        """
+        Set attributes on this class from a valid configration
+        """
+        self.version = self._config['version']
+        self.branches = self._config['branches']
+        self.imports = list(filter(lambda it: it.value in self._config, ImportType))
+
+        # Default formats
+        self.volumes_format = VolumesImportFormat.IIIF
+        self.transcriptions_format = TranscriptionsImportFormat.GzippedIndex
+        self.surfaces_format = SurfacesImportFormat.XML
+        self.acts_format = ActsImportFormat.CSV
+        self.metadata_format = MetadataImportFormat.TEI
+
+        for import_type, format_enum in self.FORMAT_ENUMS.items():
+            if import_type.value not in self._config or 'format' not in self._config[import_type.value]:
+                continue
+            setattr(
+                self,
+                '{}_format'.format(import_type.value),
+                format_enum(self._config[import_type.value]['format']),
+            )
+
+        if ImportType.Volumes in self.imports:
+            self.volumes_lazy_checks = self._config[ImportType.Volumes.value].get('lazy_checks', False)
+            self.volumes_autoconvert_https = self._config[ImportType.Volumes.value].get('convert_https', False)
+
+    def path_match(self, import_type, path):
+        """
+        Check a given path matches any paths configured for a given import type.
+        """
+        if import_type not in self.imports:
+            return False
+        return any(fnmatch.fnmatch(path, pattern) for pattern in self._config[import_type.value]['paths'])
+
+    @cached_property
+    def corpus(self):
+        c, _ = Corpus.objects.get_or_create(name=self._config['corpus']['name'], defaults={'public': False})
+        if 'description' in self._config['corpus']:
+            c.description = self._config['corpus']['description']
+        if 'public' in self._config['corpus']:
+            c.public = bool(self._config['corpus']['public'])
+        c.save()
+        return c
+
+    @cached_property
+    def volumes_image_servers(self):
+        if ImportType.Volumes not in self.imports:
+            return []
+
+        servers_config = self._config[ImportType.Volumes.value].get('image_servers')
+        if not servers_config:
+            return list(ImageServer.objects.all())
+
+        servers = []
+        for name, url in servers_config.items():
+            s, _ = ImageServer.objects.get_or_create(url=url, defaults={'name': name})
+            servers.append(s)
+        return servers
diff --git a/arkindex/dataimport/filetypes.py b/arkindex/dataimport/filetypes.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7af7bd234e1430c8584cd3e8ea2f51733a793e4
--- /dev/null
+++ b/arkindex/dataimport/filetypes.py
@@ -0,0 +1,221 @@
+import os
+import logging
+from abc import ABC, abstractmethod
+from arkindex.dataimport.config import ConfigFile, ImportType, VolumesImportFormat, TranscriptionsImportFormat
+from arkindex.dataimport.iiif import ManifestParser
+
+
+logger = logging.getLogger(__name__)
+
+
+class FileType(ABC):
+    """
+    A file type that can be handled by Git import workflows
+    """
+
+    @staticmethod
+    def identify(path, config):
+        """
+        Return a FileType class corresponding to a given file path, or None if it is unknown.
+        """
+        return next((ft for ft in file_types if ft.match(path, config)), None)
+
+    @classmethod
+    @abstractmethod
+    def match(cls, path, config):
+        """
+        Returns True if the specified path matches this file type.
+        """
+
+    @classmethod
+    @abstractmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        """
+        Handle a Git diff on a single file. Should run synchronously.
+        """
+
+
+class ManifestFileType(FileType):
+    """
+    IIIF manifests describing volumes and pages, with the volume name as their file name
+    """
+
+    @classmethod
+    def match(cls, path, config):
+        assert isinstance(config, ConfigFile)
+        return config.path_match(ImportType.Volumes, path) and \
+            config.volumes_format == VolumesImportFormat.IIIF and \
+            path.endswith('.json')
+
+    @classmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        assert isinstance(config, ConfigFile)
+
+        # TODO: Handle a pure deletion diff
+        cls.run(dataimport, os.path.join(dataimport.revision.repo.clone_dir, new_path), config)
+
+    @classmethod
+    def run(cls, dataimport, path, config):
+        ManifestParser(
+            path,
+            dataimport.revision,
+            config.corpus,
+            lazy=config.volumes_lazy_checks,
+            servers=config.volumes_image_servers,
+            autocreate_servers=not config.volumes_image_servers,  # Autocreate if there are no listed servers
+            autoconvert_https=config.volumes_autoconvert_https,
+        ).run()
+
+
+class ManifestListFileType(FileType):
+    """
+    Lists of IIIF manifest URLs in text files
+    """
+
+    @classmethod
+    def match(cls, path, config):
+        assert isinstance(config, ConfigFile)
+        return config.path_match(ImportType.Volumes, path) and \
+            config.volumes_format == VolumesImportFormat.TXT and \
+            path.endswith('.txt')
+
+    @classmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        assert isinstance(config, ConfigFile)
+
+        # TODO: Handle a pure deletion diff
+        # TODO: Maybe look at the actual Git diff to see which manifests in the list were modified?
+        with open(os.path.join(dataimport.revision.repo.clone_dir, new_path)) as f:
+            paths = [line.strip() for line in f.read().splitlines()]
+
+        for path in paths:
+            logger.info('Parsing manifest {}'.format(path))
+            ManifestFileType.run(dataimport, path, config)
+
+
+class GzippedIndexFileType(FileType):
+    """
+    Index files, organized in folders corresponding to volumes and compressed with gzip,
+    with a part of the image path as their file name
+    """
+
+    @classmethod
+    def match(cls, path, config):
+        assert isinstance(config, ConfigFile)
+        return config.path_match(ImportType.Transcriptions, path) and \
+            config.transcriptions_format == TranscriptionsImportFormat.GzippedIndex and \
+            path.endswith('.idx.gz')
+
+    @classmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        logger.warning('Transcription imports are not yet supported')
+
+
+class IndexFileType(FileType):
+    """
+    Index files, organized in folders corresponding to volumes,
+    with a part of the image path as their file name
+    """
+
+    @classmethod
+    def match(cls, path, config):
+        assert isinstance(config, ConfigFile)
+        return config.path_match(ImportType.Transcriptions, path) and \
+            config.transcriptions_format == TranscriptionsImportFormat.Index and \
+            path.endswith('.idx')
+
+    @classmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        logger.warning('Transcription imports are not yet supported')
+
+
+class AnnotationListFileType(FileType):
+    """
+    IIIF annotation lists, organized in folders corresponding to volumes,
+    with a part of the image path as their file name
+    """
+
+    @classmethod
+    def match(cls, path, config):
+        assert isinstance(config, ConfigFile)
+        return config.path_match(ImportType.Transcriptions, path) and \
+            config.transcriptions_format == TranscriptionsImportFormat.IIIF and \
+            path.endswith('.json')
+
+    @classmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        logger.warning('Transcription imports are not yet supported')
+
+
+class ActsListFileType(FileType):
+    """
+    CSV files with a volume name as their filename and two columns: [act number, first folio]
+    """
+
+    @classmethod
+    def match(cls, path, config):
+        assert isinstance(config, ConfigFile)
+        return config.path_match(ImportType.Acts, path) and path.endswith('.csv')
+
+    @classmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        logger.warning('Acts imports are not yet supported')
+
+
+class SurfaceFileType(FileType):
+    """
+    XML surface files
+    """
+
+    @classmethod
+    def match(cls, path, config):
+        assert isinstance(config, ConfigFile)
+        return config.path_match(ImportType.Surfaces, path) and path.endswith('.xml')
+
+    @classmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        logger.warning('Surface imports are not yet supported')
+
+
+class MetadataFileType(FileType):
+    """
+    TEI-XML files
+    """
+
+    @classmethod
+    def match(cls, path, config):
+        assert isinstance(config, ConfigFile)
+        return config.path_match(ImportType.Metadata, path) and path.endswith('.xml')
+
+    @classmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        logger.warning('Metadata imports are not yet supported')
+
+
+class ConfigFileType(FileType):
+    """
+    An Arkindex repository configuration file
+    """
+
+    @classmethod
+    def match(cls, path, config):
+        return os.path.basename(path) == ConfigFile.FILE_NAME
+
+    @classmethod
+    def handle(cls, dataimport, change_type, old_path, new_path, config):
+        # Do nothing (config files are already handled in every other import)
+        return
+
+
+# Registered file types for the FileType.identify static method
+file_types = [
+    ManifestFileType,
+    ManifestListFileType,
+    GzippedIndexFileType,
+    IndexFileType,
+    AnnotationListFileType,
+    ActsListFileType,
+    SurfaceFileType,
+    MetadataFileType,
+    ConfigFileType,
+]
diff --git a/arkindex/dataimport/iiif.py b/arkindex/dataimport/iiif.py
new file mode 100644
index 0000000000000000000000000000000000000000..940e8afb4c859b03824be4f0367c124c20c3014a
--- /dev/null
+++ b/arkindex/dataimport/iiif.py
@@ -0,0 +1,387 @@
+import os
+import ijson
+import logging
+import requests
+import urllib.parse
+from io import BytesIO
+from django.core.validators import URLValidator
+from django.core.exceptions import ValidationError
+from django.db import transaction
+from arkindex.documents.models import Corpus, Element, ElementType, Page, MetaData, MetaType
+from arkindex.documents.importer import parse_folio
+from arkindex.images.models import ImageServer, Zone
+from arkindex.dataimport.models import Revision, EventType
+from arkindex.project.polygon import Polygon
+from arkindex.project.tools import random_string
+
+
+logger = logging.getLogger(__name__)
+
+
+class ManifestParser(object):
+    """
+    A class that parses a single IIIF manifest into a volume, a register, pages and images.
+    """
+
+    def __init__(self, path, revision, corpus,
+                 lazy=False, servers=[],
+                 autocreate_servers=False, autoconvert_https=False, volume_name=None):
+        """
+        path: Path or URL to the IIIF manifest to parse.
+        revision: A Revision instance that should be used for change events on elements
+        corpus: An instance of Corpus which will hold all the elements.
+        lazy: Boolean indicating whether the parser should not perform checks on image existence
+        servers: An optional list of servers to restrict image server search
+        autocreate_servers: If the ImageServer for a given image URL cannot be found,
+                            will try to autodetect the server's URL and create new image servers
+        autoconvert_https: If the ImageServer for a given image URL cannot be found and the URL uses HTTP,
+                           will check the HTTPS version exists and try to match a server.
+        volume_name: Set to a non-empty str to override the default volume name.
+        """
+        self.path = path
+        assert isinstance(revision, Revision)
+        self.revision = revision
+        assert isinstance(corpus, Corpus)
+        self.corpus = corpus
+        assert isinstance(lazy, bool)
+        self.lazy = lazy
+        if servers:
+            assert isinstance(servers, list)
+            self.servers = servers
+        else:
+            self.servers = list(ImageServer.objects.all())
+        assert isinstance(autoconvert_https, bool)
+        self.autoconvert_https = autoconvert_https
+        assert isinstance(autocreate_servers, bool)
+        self.autocreate_servers = autocreate_servers
+        self.stream = None
+
+        # ImageServer instances that may get created on autocreate
+        self.created_servers = []
+
+        self.volume_changed, self.register_changed = False, False
+        self.volume_name = None
+        if volume_name:
+            assert isinstance(volume_name, str)
+            self.volume_name = volume_name
+
+    def _get_or_instance(self, model, defaults={}, **filters):
+        """
+        Like model.objects.get_or_create(),
+        except it creates a Python instance that is not saved into DB.
+        """
+        try:
+            return model.objects.get(**filters), False
+        except model.DoesNotExist:
+            filters.update(defaults)
+            kwargs = {  # Filter to remove Django lookups
+                k: v for k, v in filters.items()
+                if '__' not in k
+            }
+            return model(**kwargs), True
+
+    def _first_or_instance(self, model, defaults={}, **filters):
+        """
+        Like model.objects.get_or_create(),
+        except it does not fail if there are multiple items (using filter().first()),
+        and it creates a Python instance that is not saved into DB.
+        """
+        result = model.objects.filter(**filters).first()
+        if result:
+            return result, False
+        filters.update(defaults)
+        kwargs = {  # Filter to remove Django lookups
+            k: v for k, v in filters.items()
+            if '__' not in k
+        }
+        return model(**kwargs), True
+
+    def open(self):
+        """
+        Open a stream for the given manifest and save the volume name.
+        """
+        try:
+            URLValidator(schemes=['http', 'https'])(self.path)
+            resp = requests.get(self.path)
+            resp.raise_for_status()
+            # Cannot use stream=True here: the parser uses the seek(int) method which is unsupported
+            self.stream = BytesIO(resp.content)
+            name, ext = os.path.splitext(os.path.basename(urllib.parse.urlparse(self.path).path))
+        except ValidationError:
+            assert os.path.isfile(self.path), "File does not exist"
+            name, ext = os.path.splitext(os.path.basename(self.path))
+            self.stream = open(self.path, 'rb')
+        assert ext == '.json', "File does not have a JSON extension"
+        self.volume_name = self.volume_name or name
+
+    def close(self):
+        """
+        Close the stream if it is opened.
+        """
+        if self.stream and not self.stream.closed:
+            self.stream.close()
+
+    def check_manifest_type(self):
+        """
+        Check the file is an actual IIIF manifest.
+        """
+        jsonld_type = next((
+            value
+            for prefix, event, value in ijson.parse(self.stream)
+            if (prefix, event) == ('@type', 'string')
+        ), None)
+
+        if jsonld_type is None:
+            raise ValueError("Missing @type property in JSON data")
+
+        if jsonld_type != 'sc:Manifest':
+            raise ValueError("JSON file is not a IIIF manifest")
+
+        self.stream.seek(0)
+
+    def make_parents(self):
+        self.volume, new_volume = self._get_or_instance(
+            Element, type=ElementType.Volume, name=self.volume_name, corpus=self.corpus)
+        self.register, new_register = self._get_or_instance(
+            Element, type=ElementType.Register, name=self.volume_name, corpus=self.corpus)
+        self.is_new = new_volume and new_register
+
+    def find_image_server(self, image_url):
+        try:  # Look in the specified servers
+            return next(server for server in self.servers if image_url.startswith(server.url))
+        except StopIteration:
+            pass
+
+        splat = urllib.parse.urlsplit(image_url)
+        if self.autoconvert_https and splat.scheme == 'http':  # Try again but with HTTPS
+            splat.scheme = 'https'
+            new_url = urllib.parse.urlunparse(splat)
+            logger.info("Trying '{}' instead of '{}'".format(new_url, image_url))
+            try:
+                # Look for a server before checking the server exists;
+                # no need to wait for a HTTP request if there is no associated server
+                serv = next(server for server in self.servers if new_url.startswith(server.url))
+                requests.head(new_url, timeout=5).raise_for_status()
+                return serv
+            except (requests.exceptions.RequestException, StopIteration):
+                pass
+
+        if not self.autocreate_servers:
+            return
+
+        logger.warning("No known image server for image {} - attempting autodetection".format(image_url))
+
+        if splat.path.startswith('/iiif'):
+            server_url = "{0}://{1}/iiif".format(splat.scheme, splat.netloc)
+            server_name = '_'.join(self.corpus.name, random_string(5))
+            new_server = ImageServer(name=server_name, url=server_url)
+            self.created_servers.append(new_server)
+
+            logger.info("Created IIIF image server '{0}' with URL '{1}'".format(server_name, server_url))
+            return new_server
+
+    def parse_metadata(self):
+        """
+        Parse a manifest's metadata property into MetaData elements linked to the volume.
+        """
+        logger.info("Parsing metadata")
+        self.metadata = []
+
+        for item in ijson.items(self.stream, 'metadata.item'):
+            if not all(prop in item for prop in ('label', 'value')):
+                logger.warning('Metadata does not have the required label and value properties')
+                continue
+
+            md, created = self._get_or_instance(
+                MetaData,
+                element=self.volume,
+                type=MetaType.Text,
+                name=item['label'],
+                defaults={
+                    'revision': self.revision,
+                    'value': item['value'],
+                }
+            )
+            # Set volume as changed whenever any metadata is created
+            self.volume_changed = self.volume_changed or created
+            # Only update existing metadata revisions when values get updated
+            if not created and md.value != item['value']:
+                # Set volume as changed whenever any metadata is updated
+                self.volume_changed = True
+                md.value = item['value']
+                md.revision = self.revision
+            self.metadata.append(md)
+
+        self.stream.seek(0)
+
+    def parse_canvases(self):
+        """
+        Parse all canvases in all sequences of the manifest into Page
+        """
+        logger.info("Parsing canvases")
+        self.change_count, self.pages, self.images = 0, [], []
+        for canvas in ijson.items(self.stream, 'sequences.item.canvases.item'):
+            # Label contains the folio
+            folio = canvas.get('label')
+            if folio is None:
+                logger.warning(
+                    "Found an image canvas with ID {}, but no folio (label) was specified".format(canvas.get('@id')))
+                continue
+
+            if 'images' not in canvas or len(canvas['images']) < 1:
+                logger.warning("Canvas {} has no image".format(canvas.get('@id')))
+                continue
+
+            # Get the image resource
+            resource = canvas['images'][0].get('resource')
+            if resource is None:
+                logger.warning("Canvas {} has no image resource".format(canvas.get('@id')))
+                continue
+
+            # Go find the service ID to get the image URL
+            if 'service' not in resource:
+                logger.warning("Found an image resource with ID {} on canvas {}, but no service was specified".format(
+                    resource.get('@id'), canvas.get('@id')))
+                continue
+            if '@id' not in resource['service']:
+                logger.warning(
+                    "Found an image service on canvas {}, but no service ID was specified".format(canvas.get('@id')))
+                continue
+            service_id = resource['service']['@id']
+
+            # Find the right server
+            image_server = self.find_image_server(service_id)
+            if image_server is None:
+                logger.warning("No image server found for image {}".format(service_id))
+                continue
+
+            # Strip server URL to get just the path
+            image_path = service_id[len(image_server.url):].lstrip('/')
+            image = image_server.find_image(
+                image_path,
+                offline=self.lazy,
+                width=canvas['width'],
+                height=canvas['height'],
+            )
+            self.images.append(image)
+
+            page_name = "Page {0} du volume {1}".format(folio, self.volume.name)
+            page_type, page_nb, page_direction, page_complement = parse_folio(folio)
+
+            poly = Polygon.from_coords(0, 0, canvas['width'], canvas['height'])
+            zone, created = self._first_or_instance(
+                Zone,
+                image=image,
+                polygon=poly,
+                elements__type=ElementType.Page,
+                elements__corpus=self.corpus,
+            )
+
+            new_page = Page(
+                zone=zone,
+                corpus_id=self.corpus.id,
+                name=page_name,
+                folio=folio,
+                page_type=page_type,
+                nb=page_nb,
+                direction=page_direction,
+                complement=page_complement,
+            )
+
+            if created:
+                self.change_count += 1
+                self.pages.append((new_page, EventType.Addition))
+            else:
+                # Get a page in that zone
+                old_page = zone.elements.filter(type=ElementType.Page, corpus_id=self.corpus.id).first().page
+                if old_page.same_as(new_page):
+                    self.pages.append((old_page, None))
+                else:
+                    new_page.id = old_page.id
+                    new_page.created = old_page.created
+                    self.change_count += 1
+                    self.pages.append((new_page, EventType.Edit))
+
+            logger.debug('Parsed page {}'.format(folio))
+
+    def parse(self):
+        """
+        Run the full parsing process.
+        """
+        self.open()
+        self.check_manifest_type()
+        self.make_parents()
+        self.parse_metadata()
+        self.parse_canvases()
+
+    @transaction.atomic
+    def save(self):
+        logger.info('Saving volume and register...')
+        # There is no point of comparison between volumes and registers if their names changed;
+        # we can only recreate them.
+        self.volume.save()
+        self.register.save()
+        self.register.add_parent(self.volume)
+
+        # Volume metadata can on the other hand get updated or deleted
+        logger.info('Saving metadata...')
+        for md in self.metadata:
+            md.save()
+        deleted_metadatas = self.volume.metadatas.exclude(id__in=[md.id for md in self.metadata])
+        if deleted_metadatas.exists():
+            self.volume_changed = True
+            deleted_metadatas.delete()
+
+        if self.created_servers:
+            logger.info('Saving new servers...')
+            ImageServer.objects.bulk_create(self.created_servers)
+
+        if self.images:
+            logger.info('Saving images...')
+            for image in self.images:
+                image.save()
+
+        if self.pages:
+            logger.info('Saving changed pages...')
+            for i, (page, event_type) in enumerate(self.pages):
+                if event_type:  # Addition or edit
+                    # Set volume and register as changed since a page changed
+                    self.volume_changed, self.register_changed = True, True
+                    page.zone.save()
+                    page.save()
+                    page.events.create(
+                        revision=self.revision,
+                        type=event_type,
+                    )
+                # Create path or update ordering
+                page.add_parent(self.register, order=i)
+
+        if not self.is_new:  # Deleted pages cannot happen on a new volume
+            deleted_pages = Page.objects.get_descending(self.volume.id).exclude(id__in=[p.id for p, _ in self.pages])
+            if deleted_pages.exists():
+                # Set volume and register as changed since one or more pages got deleted
+                self.volume_changed, self.register_changed = True, True
+                logger.info('Removing {} deleted pages...'.format(len(deleted_pages)))
+                deleted_pages.delete()
+
+        if self.is_new or self.volume_changed:
+            self.volume.events.create(
+                revision=self.revision,
+                type=EventType.Addition if self.is_new else EventType.Edit,
+            )
+        if self.is_new or self.register_changed:
+            self.register.events.create(
+                revision=self.revision,
+                type=EventType.Addition if self.is_new else EventType.Edit,
+            )
+
+    def run(self):
+        try:
+            self.parse()
+            logger.info("Parsed volume {}: {} metadata in volume, {} pages ({} changed), {} new servers".format(
+                self.volume.name,
+                len(self.metadata), len(self.pages), self.change_count, len(self.created_servers)
+            ))
+            self.save()
+        finally:
+            self.close()
diff --git a/arkindex/dataimport/management/commands/import_repo.py b/arkindex/dataimport/management/commands/import_repo.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d3f343c15152874f6ac4c0874b0845c94544327
--- /dev/null
+++ b/arkindex/dataimport/management/commands/import_repo.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+from django.core.management.base import BaseCommand, CommandError
+from arkindex.dataimport.models import DataImport, DataImportMode, DataImportState, Repository
+from arkindex.dataimport.tasks import clone_repo, diff_repo, cleanup_repo
+from pprint import pprint
+
+
+class Command(BaseCommand):
+    help = 'Detect changes in a repository'
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            'repository',
+            help='ID of the repository to check on',
+        )
+        parser.add_argument(
+            '--hash',
+            help='Hash of a revision in the repository to check on. Defaults to latest commit on master',
+            default=None,
+        )
+        parser.add_argument(
+            '--sync',
+            help='Run synchronously and show the diff, but do not actually import',
+            action='store_true',
+            default=False,
+        )
+
+    def handle(self, *args, **options):
+        try:
+            repo = Repository.objects.get(id=options['repository'])
+        except Repository.DoesNotExist:
+            raise CommandError('Repository {} not found'.format(options['repository']))
+
+        if repo.provider_class is None:
+            raise ValueError("No repository provider found for {}".format(repo.url))
+
+        if 'hash' in options and options['hash'] is not None:
+            rev, created = repo.provider.get_or_create_revision(repo, options['hash'])
+        else:
+            rev, created = repo.provider.get_or_create_latest_revision(repo)
+
+        if created:
+            print('Created revision {} "{}" on repository {}'.format(rev.hash, rev.message, repo.url))
+
+        di = DataImport.objects.create(
+            creator=repo.credentials.user,
+            corpus=repo.corpus,
+            mode=DataImportMode.Repository,
+            state=DataImportState.Configured,
+            revision=rev,
+            task_count=3,
+        )
+
+        if options['sync']:
+            # Run synchronously and print results
+            # For now, sync run means no progress, no messages, no logging, nothing.
+            di.state = DataImportState.Running
+            di.save()
+            print('Cloning repo...')
+            clone_repo(di)
+            print('Computing diff...')
+            pprint(diff_repo(di))
+            print('Cleaning up...')
+            cleanup_repo(di)
+            di.state = DataImportState.Done
+            di.save()
+        else:
+            di.start()
diff --git a/arkindex/dataimport/migrations/0002_repository_revision.py b/arkindex/dataimport/migrations/0002_repository_revision.py
index e97c33f85bb45d43abf2e99cc6f2d6674cb886f9..9f14677c0680653b374405728b15419a9445d028 100644
--- a/arkindex/dataimport/migrations/0002_repository_revision.py
+++ b/arkindex/dataimport/migrations/0002_repository_revision.py
@@ -82,7 +82,8 @@ class Migration(migrations.Migration):
             name='watched_branches',
             field=arkindex.project.fields.ArrayField(
                 base_field=models.CharField(max_length=50),
-                default=arkindex.dataimport.models.repository_default_branches,
+                # arkindex.dataimport.models.repository_default_branches has been removed
+                default=lambda: ['refs/heads/master'],
                 size=None,
             ),
         ),
diff --git a/arkindex/dataimport/migrations/0008_events.py b/arkindex/dataimport/migrations/0008_events.py
new file mode 100644
index 0000000000000000000000000000000000000000..83d0746242876c6da45295efa18a3f2df153f918
--- /dev/null
+++ b/arkindex/dataimport/migrations/0008_events.py
@@ -0,0 +1,66 @@
+# Generated by Django 2.1 on 2018-10-02 08:33
+
+import arkindex.dataimport.models
+from django.db import migrations, models
+import django.db.models.deletion
+import django.utils.timezone
+import enumfields.fields
+import uuid
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0025_avoid_doublons'),
+        ('dataimport', '0007_datafile_image'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='Event',
+            fields=[
+                ('id', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
+                ('type', enumfields.fields.EnumField(enum=arkindex.dataimport.models.EventType, max_length=10)),
+                ('element', models.ForeignKey(
+                    on_delete=django.db.models.deletion.CASCADE,
+                    related_name='events',
+                    to='documents.Element',
+                )),
+                ('revision', models.ForeignKey(
+                    on_delete=django.db.models.deletion.CASCADE,
+                    related_name='events',
+                    to='dataimport.Revision',
+                )),
+                ('created', models.DateTimeField(auto_now_add=True)),
+                ('updated', models.DateTimeField(auto_now=True)),
+            ],
+        ),
+        migrations.AddField(
+            model_name='revision',
+            name='elements',
+            field=models.ManyToManyField(related_name='revisions', through='dataimport.Event', to='documents.Element'),
+        ),
+        migrations.AlterUniqueTogether(
+            name='event',
+            unique_together={('element', 'revision')},
+        ),
+        migrations.AlterModelOptions(
+            name='event',
+            options={'ordering': ['element_id', 'created']},
+        ),
+        migrations.AddField(
+            model_name='revision',
+            name='created',
+            field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now),
+            preserve_default=False,
+        ),
+        migrations.AddField(
+            model_name='revision',
+            name='updated',
+            field=models.DateTimeField(auto_now=True),
+        ),
+        migrations.RemoveField(
+            model_name='repository',
+            name='watched_branches',
+        ),
+    ]
diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py
index 91af5dccbaa819b299d4ba38b595e18c62194071..b13d3bc8d1c84fe6ab92ffc114c2b923e60b8e99 100644
--- a/arkindex/dataimport/models.py
+++ b/arkindex/dataimport/models.py
@@ -9,7 +9,6 @@ from enumfields import EnumField, Enum
 from arkindex.project.celery import app as celery_app
 from arkindex.dataimport.providers import git_providers, get_provider
 from arkindex.project.models import IndexableModel
-from arkindex.project.fields import ArrayField
 import uuid
 import os
 import re
@@ -81,8 +80,8 @@ class DataImport(IndexableModel):
             return workflow
 
         elif self.mode == DataImportMode.Repository:
-            from arkindex.dataimport.tasks import download_repo, import_repo, cleanup_repo
-            return download_repo.si(self) | import_repo.si(self) | cleanup_repo.si(self)
+            from arkindex.dataimport.tasks import clone_repo, diff_repo, dispatch_imports, cleanup_repo
+            return clone_repo.si(self) | diff_repo.si(self) | dispatch_imports.s(self) | cleanup_repo.si(self)
 
         else:
             raise NotImplementedError
@@ -179,15 +178,6 @@ class DataFile(models.Model):
         return os.path.join(settings.MEDIA_ROOT, str(self.id))
 
 
-def repository_default_branches():
-    '''
-    This is needed to avoid re-using the same list instance
-    as Repository.watched_branches default on new instances
-    See Django warning postgres.E003
-    '''
-    return ['refs/heads/master']
-
-
 class Repository(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4)
     url = models.URLField(unique=True)
@@ -195,7 +185,6 @@ class Repository(models.Model):
     corpus = models.ForeignKey('documents.Corpus', on_delete=models.CASCADE, related_name='repos')
     credentials = models.ForeignKey(
         'users.OAuthCredentials', on_delete=models.CASCADE, related_name='repos', blank=True, null=True)
-    watched_branches = ArrayField(models.CharField(max_length=50), default=repository_default_branches)
     provider_name = models.CharField(
         max_length=50,
         choices=[(p.__name__, p.display_name) for p in git_providers],
@@ -217,13 +206,14 @@ class Repository(models.Model):
         return os.path.join(settings.CELERY_WORKING_DIR, str(self.id))
 
 
-class Revision(models.Model):
+class Revision(IndexableModel):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4)
     repo = models.ForeignKey('dataimport.Repository', on_delete=models.CASCADE, related_name='revisions')
     hash = models.CharField(max_length=50)
     ref = models.CharField(max_length=50)
     message = models.TextField()
     author = models.CharField(max_length=50)
+    elements = models.ManyToManyField('documents.Element', through='dataimport.Event', related_name='revisions')
 
     class Meta:
         unique_together = (('repo', 'hash'), )
@@ -241,3 +231,25 @@ class Revision(models.Model):
         )
         dataimport.start()
         return dataimport
+
+    def __str__(self):
+        return '{} "{}" by {}'.format(self.hash[:8], self.message.splitlines()[0], self.author)
+
+
+class EventType(Enum):
+    Addition = 'A'
+    Edit = 'M'
+    Deletion = 'D'
+
+
+class Event(models.Model):
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4)
+    created = models.DateTimeField(auto_now_add=True)
+    updated = models.DateTimeField(auto_now=True)
+    element = models.ForeignKey('documents.Element', on_delete=models.CASCADE, related_name='events')
+    revision = models.ForeignKey('dataimport.Revision', on_delete=models.CASCADE, related_name='events')
+    type = EnumField(EventType, max_length=10)
+
+    class Meta:
+        unique_together = (('element', 'revision'), )
+        ordering = ['element_id', 'created']
diff --git a/arkindex/dataimport/providers.py b/arkindex/dataimport/providers.py
index a73ce6c2a6be83628a18de7efb3e3cb3c77a1cd1..12457a72747c96819c29b65700d2a911a9530656 100644
--- a/arkindex/dataimport/providers.py
+++ b/arkindex/dataimport/providers.py
@@ -3,9 +3,11 @@ from django.urls import reverse
 from rest_framework.exceptions import NotAuthenticated, AuthenticationFailed, APIException, ValidationError
 from gitlab import Gitlab, GitlabGetError, GitlabCreateError
 from arkindex.documents.models import Corpus
+from arkindex.dataimport.config import ConfigFile
 import urllib.parse
 import base64
 import uuid
+import git
 
 
 class GitProvider(ABC):
@@ -33,16 +35,44 @@ class GitProvider(ABC):
         Create a Repository instance from an external repository
         """
 
+    @abstractmethod
+    def clone_repo(self, repo, dest_dir, **kwargs):
+        """
+        Get a git.Repo instance for a repository cloned in a given destination directory.
+        """
+
     @abstractmethod
     def download_archive(self, revision, path):
         """
         Download an archive for a given Revision instance.
         """
 
+    def get_or_create_revision(self, repo, sha):
+        from arkindex.dataimport.models import Revision
+        try:
+            return self.get_revision(repo, sha), False
+        except Revision.DoesNotExist:
+            return self.create_revision(repo, sha), True
+
+    def get_revision(self, repo, sha):
+        return repo.revisions.get(hash=sha)
+
+    @abstractmethod
+    def create_revision(self, repo, sha):
+        """
+        Create a Revision instance for a given commit hash of a given repository.
+        """
+
     @abstractmethod
     def get_or_create_latest_revision(self, repo):
         """
-        Get a Revision instance for the last revision on the main branch of a given repository.
+        Get or create a Revision instance for the last revision on the main branch of a given repository.
+        """
+
+    @abstractmethod
+    def get_file_content(self, repo, path, ref="master"):
+        """
+        Get the contents of a given file on a given repository.
         """
 
     @abstractmethod
@@ -57,6 +87,12 @@ class GitLabProvider(GitProvider):
     display_name = "GitLab"
     url = 'https://gitlab.com'
 
+    def _try_get_project(self, gl, id):
+        try:
+            return gl.projects.get(id)
+        except GitlabGetError as e:
+            raise APIException("Error while fetching GitLab project: {}".format(str(e)))
+
     def list_repos(self, query=None):
         if not self.credentials:
             raise NotAuthenticated()
@@ -68,10 +104,7 @@ class GitLabProvider(GitProvider):
         if not self.credentials and request:
             raise NotAuthenticated()
         gl = Gitlab(self.url, oauth_token=self.credentials.token)
-        try:
-            project = gl.projects.get(int(id))
-        except GitlabGetError as e:
-            raise APIException("Error while fetching GitLab project: {}".format(str(e)))
+        project = self._try_get_project(gl, int(id))
 
         from arkindex.dataimport.models import Repository
         if Repository.objects.filter(url=project.web_url).exists():
@@ -80,7 +113,6 @@ class GitLabProvider(GitProvider):
         repo = self.credentials.repos.create(
             corpus=corpus,
             url=project.web_url,
-            watched_branches=['refs/heads/{}'.format(project.default_branch)],
             hook_token=str(base64.b64encode(uuid.uuid4().bytes)),
             provider_name=self.__class__.__name__,
         )
@@ -98,26 +130,41 @@ class GitLabProvider(GitProvider):
 
         return repo
 
+    def clone_repo(self, repo, dest_dir, **kwargs):
+        parsed = list(urllib.parse.urlsplit(repo.url))
+
+        # Clone over HTTPS using an OAuth token is an undocumented feature supported since GitLab 8.12
+        # https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/10677
+        parsed[1] = 'oauth2:{}@{}'.format(repo.credentials.token, parsed[1])
+
+        return git.Repo.clone_from(urllib.parse.urlunsplit(parsed), dest_dir, **kwargs)
+
     def download_archive(self, revision, path):
         gl = Gitlab(self.url, oauth_token=revision.repo.credentials.token)
-        try:
-            project = gl.projects.get(urllib.parse.urlsplit(revision.repo.url).path.strip('/'))
-        except GitlabGetError as e:
-            raise APIException("Error while fetching GitLab project: {}".format(str(e)))
+        project = self._try_get_project(gl, urllib.parse.urlsplit(revision.repo.url).path.strip('/'))
 
         with open(path, 'wb') as f:
             project.repository_archive(sha=revision.hash, streamed=True, action=f.write)
 
+    def create_revision(self, repo, sha):
+        gl = Gitlab(self.url, oauth_token=repo.credentials.token)
+        project = self._try_get_project(gl, urllib.parse.urlsplit(repo.url).path.strip('/'))
+
+        commit = project.commits.get(sha)
+
+        return repo.revisions.create(
+            hash=sha,
+            ref=commit.refs()[0]['name'],
+            message=commit.message,
+            author=commit.author_name,
+        )
+
     def get_or_create_latest_revision(self, repo):
         gl = Gitlab(self.url, oauth_token=repo.credentials.token)
-        try:
-            project = gl.projects.get(urllib.parse.urlsplit(repo.url).path.strip('/'))
-        except GitlabGetError as e:
-            raise APIException("Error while fetching GitLab project: {}".format(str(e)))
+        project = self._try_get_project(gl, urllib.parse.urlsplit(repo.url).path.strip('/'))
 
         latest_commit = project.commits.list()[0]
         return repo.revisions.get_or_create(
-            repo=repo,
             hash=latest_commit.id,
             defaults={
                 'ref': latest_commit.refs()[0]['name'],
@@ -126,6 +173,12 @@ class GitLabProvider(GitProvider):
             },
         )
 
+    def get_file_content(self, repo, path, ref="master"):
+        gl = Gitlab(self.url, oauth_token=repo.credentials.token)
+        project = self._try_get_project(gl, urllib.parse.urlsplit(repo.url).path.strip('/'))
+
+        return project.files.get(file_path=path, ref=ref).decode()
+
     def handle_webhook(self, repo, request):
         if 'HTTP_X_GITLAB_EVENT' not in request.META:
             raise ValidationError("Missing GitLab event type")
@@ -140,15 +193,20 @@ class GitLabProvider(GitProvider):
         assert isinstance(request.data, dict)
         assert request.data['object_kind'] == 'push'
 
-        if request.data['ref'] not in repo.watched_branches:
-            return
-
         # Already took care of this event
         if repo.revisions.filter(
                 ref=request.data['ref'],
                 hash=request.data['checkout_sha']).exists():
             return
 
+        # Filter on configured branches
+        config = ConfigFile(self.get_file_content(repo, ConfigFile.FILE_NAME, ref=request.data['ref']))
+        human_ref = request.data['ref']
+        if human_ref.startswith('refs/heads/'):
+            human_ref = human_ref[11:]
+        if human_ref not in config.branches:
+            return
+
         rev = repo.revisions.create(
             hash=request.data['checkout_sha'],
             ref=request.data['ref'],
diff --git a/arkindex/dataimport/serializers.py b/arkindex/dataimport/serializers.py
index 17c79039f97496403429c679fac97c947b8938c7..1280e8ebf90c0d93e075dceebe14efcaa3d56fdb 100644
--- a/arkindex/dataimport/serializers.py
+++ b/arkindex/dataimport/serializers.py
@@ -1,8 +1,10 @@
 from rest_framework import serializers
 from rest_framework.utils import model_meta
 from arkindex.project.serializer_fields import EnumField
-from arkindex.dataimport.models import \
-    DataImport, DataImportMode, DataImportState, DataImportFailure, DataFile, Repository, Revision
+from arkindex.dataimport.models import (
+    DataImport, DataImportMode, DataImportState, DataImportFailure, DataFile,
+    Repository, Revision, Event, EventType
+)
 from arkindex.documents.models import Corpus
 from arkindex.documents.serializers.light import ElementLightSerializer
 import gitlab.v4.objects
@@ -94,10 +96,13 @@ class RevisionSerializer(serializers.ModelSerializer):
     Serialize a repository revision
     """
 
+    date = serializers.DateTimeField(source='created')
+
     class Meta:
         model = Revision
         fields = (
             'id',
+            'date',
             'hash',
             'ref',
             'message',
@@ -191,7 +196,7 @@ class DataImportFailureSerializer(serializers.ModelSerializer):
         )
 
 
-class RepositoryLightSerializer(serializers.ModelSerializer):
+class RepositorySerializer(serializers.ModelSerializer):
     """
     Serialize a repository
     """
@@ -209,20 +214,6 @@ class RepositoryLightSerializer(serializers.ModelSerializer):
         }
 
 
-class RepositorySerializer(RepositoryLightSerializer):
-    """
-    Fully serialize a repository
-    """
-
-    class Meta(RepositoryLightSerializer.Meta):
-        fields = (
-            'id',
-            'url',
-            'corpus',
-            'watched_branches',
-        )
-
-
 class ExternalRepositorySerializer(serializers.BaseSerializer):
     """
     Serialize a Git repository from an external API
@@ -253,6 +244,23 @@ class ExternalRepositorySerializer(serializers.BaseSerializer):
 
         return {
             'id': data['id'],
-            'corpus': Corpus.objects.writable(self.request.user)
+            'corpus': Corpus.objects.writable(self.context['request'].user)
                                     .get(id=data['corpus'])
         }
+
+
+class EventSerializer(serializers.ModelSerializer):
+    """
+    Serialize a diff event for an element on a revision
+    """
+
+    type = EnumField(EventType)
+    revision = RevisionSerializer()
+
+    class Meta:
+        model = Event
+        fields = (
+            'id',
+            'type',
+            'revision',
+        )
diff --git a/arkindex/dataimport/tasks.py b/arkindex/dataimport/tasks.py
index 616f2bb053aed3b646f0b6bcea6ad561cde8c7a3..179a9cf3f3d34f851e338dcae28e6cc89dda2220 100644
--- a/arkindex/dataimport/tasks.py
+++ b/arkindex/dataimport/tasks.py
@@ -4,18 +4,24 @@ from celery.signals import task_postrun
 from celery.states import EXCEPTION_STATES
 from django.conf import settings
 from django.db import transaction
+from django.core.exceptions import ValidationError
 from arkindex.project.celery import ReportingTask
 from arkindex.documents.models import Element, ElementType, Page
 from arkindex.documents.importer import import_page
 from arkindex.documents.tei import TeiParser
 from arkindex.images.models import ImageServer, ImageStatus
 from arkindex.dataimport.models import DataImport, DataImportState, DataImportMode
+from arkindex.dataimport.config import ConfigFile
+from arkindex.dataimport.filetypes import FileType
+from collections import namedtuple
+from enum import Enum
 from PIL import Image
 import os
 import glob
 import logging
 import shutil
 import urllib.parse
+import git
 
 root_logger = logging.getLogger(__name__)
 logger = get_task_logger(__name__)
@@ -165,6 +171,7 @@ def download_repo(self, dataimport):
     repo_dir = dataimport.revision.repo.clone_dir
     if os.path.exists(repo_dir):
         shutil.rmtree(repo_dir)
+
     archive_path = "{}.tar.gz".format(repo_dir)
     commit_hash = dataimport.revision.hash
 
@@ -180,7 +187,139 @@ def download_repo(self, dataimport):
 
 
 @shared_task(bind=True, base=ReportingTask)
-def import_repo(self, dataimport):
+def clone_repo(self, dataimport):
+    assert isinstance(dataimport, DataImport)
+    assert dataimport.mode == DataImportMode.Repository
+    assert dataimport.revision is not None
+
+    repo_dir = dataimport.revision.repo.clone_dir
+    commit_hash = dataimport.revision.hash
+
+    if os.path.exists(repo_dir):
+        shutil.rmtree(repo_dir)
+
+    if dataimport.revision.repo.provider_class is None:
+        raise ValueError("No repository provider found for {}".format(dataimport.revision.repo.url))
+
+    self.report_progress(0, "Cloning repository...")
+    try:
+        repo = dataimport.revision.repo.provider.clone_repo(dataimport.revision.repo, repo_dir, no_checkout=True)
+    except Exception:
+        raise Exception("An error occured while cloning the repository.")
+
+    self.report_progress(0.5, "Checking out commit {}...".format(commit_hash))
+    try:
+        repo.head.reference = repo.create_head('commit_{}'.format(commit_hash), commit_hash)
+        repo.head.reset(index=True, working_tree=True)
+    except Exception:
+        raise Exception("An error occured while checking out commit {}".format(commit_hash))
+
+    config_path = os.path.join(repo_dir, ConfigFile.FILE_NAME)
+    if not os.path.isfile(config_path):
+        raise IOError("An Arkindex repository configuration file (.arkindex.yml) is required.")
+
+    try:
+        ConfigFile.from_path(config_path)
+    except ValidationError as e:
+        raise ValueError("Invalid Arkindex config file (.arkindex.yml): {}".format(str(e)))
+    except Exception:
+        raise Exception("An error occured while opening the .arkindex.yml file.")
+
+
+class DiffType(Enum):
+    Addition = 'A'
+    Modification = 'M'
+    Deletion = 'D'
+    Rename = 'R'
+    Transtype = 'T'
+    # The following types exist but should never happen in a simple cloning
+    Copy = 'C'
+    Unmerged = 'U'
+    BrokenPairing = 'B'
+    Unknown = 'X'
+
+
+SimpleDiff = namedtuple('SimpleDiff', 'type, old_path, new_path')
+
+
+@shared_task(bind=True, base=ReportingTask)
+def diff_repo(self, dataimport):
+    assert isinstance(dataimport, DataImport)
+    assert dataimport.mode == DataImportMode.Repository
+    assert dataimport.revision is not None
+
+    commit_hash = dataimport.revision.hash
+    repo = git.Repo(dataimport.revision.repo.clone_dir)
+    current_commit = repo.commit(commit_hash)
+
+    # Iterate over the commit's tree and exclude the current commit to get all parents
+    parent_commits = {
+        c.hexsha: c
+        for c in repo.iter_commits(commit_hash)
+        if not c.hexsha == commit_hash
+    }
+
+    # Look for revisions that match the commit hashes
+    parent_revisions = dataimport.revision.repo.revisions.filter(hash__in=parent_commits.keys()).order_by('-created')
+
+    if not parent_revisions.exists():
+        self.report_message("No known parent revision found.")
+        # No known revision, just return all the repo's files as additions
+        # Call git ls-files directly
+        return [SimpleDiff(DiffType.Addition, path, path) for path in repo.git.ls_files().splitlines()]
+
+    # Pick the Git commit from the latest revision and perform the diff
+    diffs = parent_commits[parent_revisions.first().hash].diff(current_commit)
+
+    # Return diff types and paths
+    return [SimpleDiff(DiffType(diff.change_type), diff.a_path, diff.b_path) for diff in diffs]
+
+
+@shared_task(bind=True, base=ReportingTask)
+def dispatch_imports(self, diffs, dataimport):
+    assert isinstance(dataimport, DataImport)
+    assert all(isinstance(diff, SimpleDiff) for diff in diffs)
+
+    handler = TaskLoggingHandler(self)
+    root_logger.addHandler(handler)
+
+    try:
+        config = ConfigFile.from_path(os.path.join(dataimport.revision.repo.clone_dir, ConfigFile.FILE_NAME))
+    except ValidationError as e:
+        raise ValueError("YAML configuration file validation failed: {}".format(str(e)))
+    except Exception:
+        raise Exception("An error occured while loading the .arkindex.yml file.")
+
+    self.report_progress(0, "Fetching file types...")
+    actions = [(
+        diff,
+        FileType.identify(diff.old_path, config),
+        FileType.identify(diff.new_path, config),
+    ) for diff in diffs]
+
+    count = len(actions)
+    for i, (diff, old_type, new_type) in enumerate(actions):
+        self.report_progress(i / count, "Parsing diff {} from '{}' to '{}'".format(
+            diff.type.value, diff.old_path, diff.new_path))
+
+        if not new_type:
+            self.report_message("Unknown file type for '{}'".format(diff.new_path), level=logging.WARNING)
+            continue
+
+        if old_type != new_type:
+            self.report_message("File type changes are not supported", level=logging.WARNING)
+            continue
+        try:
+            new_type.handle(dataimport, *diff, config)
+        except Exception as e:
+            self.report_message("Error while parsing diff {} from '{}' to '{}': {}".format(
+                diff.type.value, diff.old_path, diff.new_path, str(e)), level=logging.WARNING)
+
+    root_logger.removeHandler(handler)
+
+
+@shared_task(bind=True, base=ReportingTask)
+def import_metadata_repo(self, dataimport):
     handler = TaskLoggingHandler(self)
     root_logger.addHandler(handler)
 
diff --git a/arkindex/dataimport/tests/manifest_samples/.arkindex.yml b/arkindex/dataimport/tests/manifest_samples/.arkindex.yml
new file mode 100644
index 0000000000000000000000000000000000000000..558726ce7b8370daf6deee1bfb98c670f991a1f9
--- /dev/null
+++ b/arkindex/dataimport/tests/manifest_samples/.arkindex.yml
@@ -0,0 +1,14 @@
+version: 1
+branches:
+  - master
+
+corpus:
+  name: Unit Tests
+  description: Unit Tests corpus
+
+volumes:
+  format: iiif
+  paths:
+    - "*"
+  lazy_checks: true
+  autoconvert_https: false
diff --git a/arkindex/dataimport/tests/manifest_samples/base.json b/arkindex/dataimport/tests/manifest_samples/base.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5bdb03b7fde48ea29b97dbf8fcb7c9c81523741
--- /dev/null
+++ b/arkindex/dataimport/tests/manifest_samples/base.json
@@ -0,0 +1,98 @@
+{
+    "@context": "http://iiif.io/api/presentation/2/context.json",
+    "@type": "sc:Manifest",
+    "@id": "http://server/manifest",
+    "viewingDirection": "left-to-right",
+    "viewingHint": "individuals",
+    "label": "Manifest label",
+    "description": "",
+    "thumbnail": {
+        "@id": "http://server/thumbnail/full/150,/0/default.jpg",
+        "service": {
+            "@context": "http://iiif.io/api/image/2/context.json",
+            "profile": "http://iiif.io/api/image/2/level2.json",
+            "@id": "http://server/thumbnail"
+        }
+    },
+    "metadata": [
+        {
+            "label": "Label 1",
+            "value": "Value 1"
+        },
+        {
+            "label": "Label 2",
+            "value": "Value 2"
+        },
+        {
+            "label": "Label 3",
+            "value": "Value 3"
+        }
+    ],
+    "license": "http://creativecommons.org/licenses/by-nc/3.0/deed.fr",
+    "attribution": "Archives nationales de PARIS",
+    "logo": "http://server/logo",
+    "related": [],
+    "seeAlso": "",
+    "within": "http://server/collection",
+    "sequences": [
+        {
+            "@id": "http://server/sequence",
+            "@type": "sc:Sequence",
+            "label": "Reproduction intégrale",
+            "canvases": [
+                {
+                    "@id": "http://server/canvas-1",
+                    "@type": "sc:Canvas",
+                    "label": "plat supérieur",
+                    "height": 1000,
+                    "width": 2000,
+                    "images": [
+                        {
+                            "@type": "oa:Annotation",
+                            "motivation": "sc:painting",
+                            "resource": {
+                                "@id": "http://server/image-1/full/full/0/default.jpg",
+                                "@type": "dctypes:Image",
+                                "format": "image/jpeg",
+                                "height": 1000,
+                                "width": 2000,
+                                "service": {
+                                    "@context": "http://iiif.io/api/image/2/context.json",
+                                    "@id": "http://server/image-1",
+                                    "profile": "http://iiif.io/api/image/2/level2.json"
+                                }
+                            },
+                            "on": "http://server/canvas-1"
+                        }
+                    ]
+                },
+                {
+                    "@id": "http://server/canvas-2",
+                    "@type": "sc:Canvas",
+                    "label": "001r",
+                    "height": 1000,
+                    "width": 2000,
+                    "images": [
+                        {
+                            "@type": "oa:Annotation",
+                            "motivation": "sc:painting",
+                            "resource": {
+                                "@id": "http://server/image-2/full/full/0/default.jpg",
+                                "@type": "dctypes:Image",
+                                "format": "image/jpeg",
+                                "height": 1000,
+                                "width": 2000,
+                                "service": {
+                                    "@context": "http://iiif.io/api/image/2/context.json",
+                                    "@id": "http://server/image-2",
+                                    "profile": "http://iiif.io/api/image/2/level2.json"
+                                }
+                            },
+                            "on": "http://server/canvas-2"
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
diff --git a/arkindex/dataimport/tests/manifest_samples/changed.json b/arkindex/dataimport/tests/manifest_samples/changed.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c42ee53bd5c59000f6b59f47701c90d9b86d5e1
--- /dev/null
+++ b/arkindex/dataimport/tests/manifest_samples/changed.json
@@ -0,0 +1,98 @@
+{
+    "@context": "http://iiif.io/api/presentation/2/context.json",
+    "@type": "sc:Manifest",
+    "@id": "http://server/manifest",
+    "viewingDirection": "left-to-right",
+    "viewingHint": "individuals",
+    "label": "Manifest label",
+    "description": "",
+    "thumbnail": {
+        "@id": "http://server/thumbnail/full/150,/0/default.jpg",
+        "service": {
+            "@context": "http://iiif.io/api/image/2/context.json",
+            "profile": "http://iiif.io/api/image/2/level2.json",
+            "@id": "http://server/thumbnail"
+        }
+    },
+    "metadata": [
+        {
+            "label": "Label 1",
+            "value": "Updated value 1"
+        },
+        {
+            "label": "Label 2",
+            "value": "Value 2"
+        },
+        {
+            "label": "Label 4",
+            "value": "Value 4"
+        }
+    ],
+    "license": "http://creativecommons.org/licenses/by-nc/3.0/deed.fr",
+    "attribution": "Archives nationales de PARIS",
+    "logo": "http://server/logo",
+    "related": [],
+    "seeAlso": "",
+    "within": "http://server/collection",
+    "sequences": [
+        {
+            "@id": "http://server/sequence",
+            "@type": "sc:Sequence",
+            "label": "Reproduction intégrale",
+            "canvases": [
+                {
+                    "@id": "http://server/canvas-3",
+                    "@type": "sc:Canvas",
+                    "label": "plat supérieur",
+                    "height": 1000,
+                    "width": 2000,
+                    "images": [
+                        {
+                            "@type": "oa:Annotation",
+                            "motivation": "sc:painting",
+                            "resource": {
+                                "@id": "http://server/image-3/full/full/0/default.jpg",
+                                "@type": "dctypes:Image",
+                                "format": "image/jpeg",
+                                "height": 1000,
+                                "width": 2000,
+                                "service": {
+                                    "@context": "http://iiif.io/api/image/2/context.json",
+                                    "@id": "http://server/image-3",
+                                    "profile": "http://iiif.io/api/image/2/level2.json"
+                                }
+                            },
+                            "on": "http://server/image-3"
+                        }
+                    ]
+                },
+                {
+                    "@id": "http://server/canvas-2",
+                    "@type": "sc:Canvas",
+                    "label": "002r",
+                    "height": 1000,
+                    "width": 2000,
+                    "images": [
+                        {
+                            "@type": "oa:Annotation",
+                            "motivation": "sc:painting",
+                            "resource": {
+                                "@id": "http://server/image-2/full/full/0/default.jpg",
+                                "@type": "dctypes:Image",
+                                "format": "image/jpeg",
+                                "height": 1000,
+                                "width": 2000,
+                                "service": {
+                                    "@context": "http://iiif.io/api/image/2/context.json",
+                                    "@id": "http://server/image-2",
+                                    "profile": "http://iiif.io/api/image/2/level2.json"
+                                }
+                            },
+                            "on": "http://server/canvas-2"
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
diff --git a/arkindex/dataimport/tests/test_gitlab_provider.py b/arkindex/dataimport/tests/test_gitlab_provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa65cbe983e49e1c21c39901eaa42796552a6b7a
--- /dev/null
+++ b/arkindex/dataimport/tests/test_gitlab_provider.py
@@ -0,0 +1,462 @@
+from arkindex.project.tests import FixtureTestCase, RedisMockMixin
+from arkindex.dataimport.models import Repository
+from arkindex.users.models import OAuthCredentials
+from arkindex.dataimport.providers import GitLabProvider
+from rest_framework.exceptions import APIException, NotAuthenticated, AuthenticationFailed, ValidationError
+from gitlab.exceptions import GitlabGetError, GitlabCreateError
+from unittest.mock import patch, MagicMock
+import yaml
+
+
+class TestGitLabProvider(RedisMockMixin, FixtureTestCase):
+    """
+    Test the GitLabProvider class
+    """
+
+    @classmethod
+    def setUpTestData(cls):
+        super().setUpTestData()
+        cls.creds = OAuthCredentials.objects.create(
+            user=cls.user,
+            provider_name='GitLabOAuthProvider',
+            provider_url='https://somewhere',
+            token='oauth-token'
+        )
+        cls.repo = Repository.objects.create(
+            url='http://gitlab/repo',
+            hook_token='hook-token',
+            corpus=cls.corpus,
+            credentials=cls.creds,
+            provider_name='GitLabProvider',
+        )
+        cls.rev = cls.repo.revisions.create(
+            hash='42',
+            ref='refs/heads/master',
+            message='a',
+            author='me',
+        )
+        cls.gl_patch = patch('arkindex.dataimport.providers.Gitlab')
+
+    def setUp(self):
+        super().setUp()
+        self.gl_mock = self.gl_patch.start()
+
+    def tearDown(self):
+        super().tearDown()
+        self.gl_patch.stop()
+
+    def test_list_repos(self):
+        """
+        Test GitLabProvider can list repositories from GitLab
+        """
+        GitLabProvider(url='http://aaa', credentials=self.creds).list_repos()
+
+        self.assertEqual(self.gl_mock.call_count, 1)
+        args, kwargs = self.gl_mock.call_args
+        self.assertTupleEqual(args, ('http://aaa', ))
+        self.assertDictEqual(kwargs, {'oauth_token': self.creds.token})
+
+        self.assertEqual(self.gl_mock().projects.list.call_count, 1)
+        args, kwargs = self.gl_mock().projects.list.call_args
+        self.assertTupleEqual(args, ())
+        self.assertDictEqual(kwargs, {'membership': True, 'search': None})
+
+    def test_list_repos_query(self):
+        """
+        Test GitLabProvider can search repositories from GitLab
+        """
+        GitLabProvider(url='http://aaa', credentials=self.creds).list_repos(query='meh')
+
+        self.assertEqual(self.gl_mock.call_count, 1)
+        args, kwargs = self.gl_mock.call_args
+        self.assertTupleEqual(args, ('http://aaa', ))
+        self.assertDictEqual(kwargs, {'oauth_token': self.creds.token})
+
+        self.assertEqual(self.gl_mock().projects.list.call_count, 1)
+        args, kwargs = self.gl_mock().projects.list.call_args
+        self.assertTupleEqual(args, ())
+        self.assertDictEqual(kwargs, {'membership': True, 'search': 'meh'})
+
+    def test_list_repos_requires_credentials(self):
+        """
+        Test GitLabProvider checks for credentials when requesting repositories list
+        """
+        with self.assertRaises(NotAuthenticated):
+            GitLabProvider(url='http://aaa').list_repos()
+
+    def test_create_repo(self):
+        """
+        Test GitLabProvider can create a Repository instance from a GitLab repo
+        """
+        self.gl_mock().projects.get.return_value.web_url = 'http://new_repo_url'
+        self.gl_mock().projects.get.return_value.default_branch = 'branchname'
+
+        request_mock = MagicMock()
+        request_mock.build_absolute_uri.return_value = 'http://hook'
+        new_repo = GitLabProvider(url='http://aaa', credentials=self.creds).create_repo(
+            id='1337', request=request_mock, corpus=self.corpus)
+
+        self.assertEqual(self.gl_mock().projects.get.call_count, 1)
+        args, kwargs = self.gl_mock().projects.get.call_args
+        self.assertTupleEqual(args, (1337, ))
+        self.assertDictEqual(kwargs, {})
+
+        self.assertEqual(new_repo.url, 'http://new_repo_url')
+        self.assertEqual(new_repo.corpus, self.corpus)
+        self.assertEqual(new_repo.provider_name, 'GitLabProvider')
+
+        self.assertEqual(self.gl_mock().projects.get().hooks.create.call_count, 1)
+        args, kwargs = self.gl_mock().projects.get().hooks.create.call_args
+        self.assertEqual(len(args), 1)
+        self.assertDictEqual(kwargs, {})
+        self.assertDictEqual(args[0], {
+            'url': 'http://hook',
+            'push_events': True,
+            'token': new_repo.hook_token,
+        })
+
+    def test_create_repo_requires_credentials(self):
+        """
+        Test GitLabProvider checks for credentials when requesting a repository creation
+        """
+        request_mock = MagicMock()
+        request_mock.build_absolute_uri.return_value = 'http://hook'
+        with self.assertRaises(NotAuthenticated):
+            GitLabProvider(url='http://aaa').create_repo(
+                id='repo_id', request=request_mock, corpus=self.corpus)
+
+    def test_create_repo_already_exists(self):
+        """
+        Test GitLabProvider checks for duplicate repositories
+        """
+        self.gl_mock().projects.get.return_value.web_url = 'http://new_repo_url'
+        self.gl_mock().projects.get.return_value.default_branch = 'branchname'
+
+        request_mock = MagicMock()
+        request_mock.build_absolute_uri.return_value = 'http://hook'
+        GitLabProvider(url='http://aaa', credentials=self.creds).create_repo(
+            id='1337', request=request_mock, corpus=self.corpus)
+
+        with self.assertRaises(ValidationError):
+            GitLabProvider(url='http://aaa', credentials=self.creds).create_repo(
+                id='1337', request=request_mock, corpus=self.corpus)
+
+    def test_create_repo_handle_get_error(self):
+        """
+        Test GitLabProvider handles GitLab repo GET errors
+        """
+        self.gl_mock().projects.get.side_effect = GitlabGetError
+
+        request_mock = MagicMock()
+        request_mock.build_absolute_uri.return_value = 'http://hook'
+
+        with self.assertRaises(APIException):
+            GitLabProvider(url='http://aaa', credentials=self.creds).create_repo(
+                id='1337', request=request_mock, corpus=self.corpus)
+
+        self.assertEqual(self.gl_mock().projects.get.call_count, 1)
+
+    def test_create_repo_handle_hook_create_error(self):
+        """
+        Test GitLabProvider handles GitLab hook creation errors
+        """
+        self.gl_mock().projects.get.return_value.web_url = 'http://new_repo_url'
+        self.gl_mock().projects.get.return_value.default_branch = 'branchname'
+        self.gl_mock().projects.get.return_value.hooks.create.side_effect = GitlabCreateError
+
+        request_mock = MagicMock()
+        request_mock.build_absolute_uri.return_value = 'http://hook'
+
+        with self.assertRaises(APIException):
+            GitLabProvider(url='http://aaa', credentials=self.creds).create_repo(
+                id='1337', request=request_mock, corpus=self.corpus)
+
+        self.assertEqual(self.gl_mock().projects.get.call_count, 1)
+        self.assertEqual(self.gl_mock().projects.get().hooks.create.call_count, 1)
+
+    @patch('arkindex.dataimport.providers.git')
+    def test_clone_repo(self, git_mock):
+        """
+        Test GitLabProvider can clone a repository
+        """
+        GitLabProvider(url='http://aaa', credentials=self.creds).clone_repo(self.repo, 'somewhere', a='a', b='b')
+
+        self.assertEqual(git_mock.Repo.clone_from.call_count, 1)
+        args, kwargs = git_mock.Repo.clone_from.call_args
+        self.assertTupleEqual(args, ('http://oauth2:oauth-token@gitlab/repo', 'somewhere'))
+        self.assertDictEqual(kwargs, {'a': 'a', 'b': 'b'})
+
+    @patch('arkindex.dataimport.providers.open')
+    def test_download_archive(self, open_mock):
+        """
+        Test GitLabProvider can download an archive of a revision onto a specified path
+        """
+        GitLabProvider(url='http://aaa', credentials=self.creds).download_archive(self.rev, 'somewhere')
+
+        self.assertEqual(self.gl_mock().projects.get.call_count, 1)
+        args, kwargs = self.gl_mock().projects.get.call_args
+        self.assertTupleEqual(args, ('repo', ))
+        self.assertDictEqual(kwargs, {})
+
+        self.assertEqual(open_mock.call_count, 1)
+        self.assertEqual(self.gl_mock().projects.get().repository_archive.call_count, 1)
+        args, kwargs = self.gl_mock().projects.get().repository_archive.call_args
+        self.assertTupleEqual(args, ())
+        self.assertDictEqual(kwargs, {
+            'sha': '42',
+            'streamed': True,
+            'action': open_mock().__enter__().write,
+        })
+
+    @patch('arkindex.dataimport.providers.open')
+    def test_download_archive_get_error(self, open_mock):
+        """
+        Test GitLabProvider can handle GitLab errors while downloading an archive
+        """
+        self.gl_mock().projects.get.side_effect = GitlabGetError
+
+        with self.assertRaises(APIException):
+            GitLabProvider(url='http://aaa', credentials=self.creds).download_archive(self.rev, 'somewhere')
+
+        self.assertEqual(self.gl_mock().projects.get.call_count, 1)
+        self.assertEqual(open_mock.call_count, 0)
+
+    def test_get_revision(self):
+        """
+        Test GitLabProvider can create a Revision instance for a repo by hash
+        """
+        revision, created = GitLabProvider(url='http://aaa', credentials=self.creds) \
+            .get_or_create_revision(self.repo, '42')
+
+        self.assertEqual(revision, self.rev)
+        self.assertFalse(created)
+        self.assertEqual(self.gl_mock.call_count, 0)
+
+    def test_create_revision(self):
+        """
+        Test GitLabProvider can create a Revision instance for a repo by hash
+        """
+        self.gl_mock().projects.get.return_value.commits.get.return_value.refs.return_value = [
+            {'name': 'refs/heads/branch1'},
+            {'name': 'refs/heads/branch2'},
+        ]
+        self.gl_mock().projects.get.return_value.commits.get.return_value.message = 'commit message'
+        self.gl_mock().projects.get.return_value.commits.get.return_value.author_name = 'bob'
+
+        revision, created = GitLabProvider(url='http://aaa', credentials=self.creds) \
+            .get_or_create_revision(self.repo, '1337')
+
+        self.assertTrue(created)
+        self.assertEqual(revision.hash, '1337')
+        self.assertEqual(revision.ref, 'refs/heads/branch1')
+        self.assertEqual(revision.message, 'commit message')
+        self.assertEqual(revision.author, 'bob')
+
+        self.assertEqual(self.gl_mock().projects.get.call_count, 1)
+        self.assertEqual(self.gl_mock().projects.get().commits.get.call_count, 1)
+        args, kwargs = self.gl_mock().projects.get().commits.get.call_args
+        self.assertTupleEqual(args, ('1337', ))
+        self.assertDictEqual(kwargs, {})
+
+    def test_get_latest_revision(self):
+        """
+        Test GitLabProvider can get the latest revision on a repo
+        """
+        latest_commit = MagicMock()
+        latest_commit.id = '42'
+        latest_commit.refs.return_value = [
+            {'name': 'refs/heads/master'},
+        ]
+        latest_commit.message = 'a'
+        latest_commit.author_name = 'me'
+
+        self.gl_mock().projects.get.return_value.commits.list.return_value = [latest_commit, ]
+
+        revision, created = GitLabProvider(url='http://aaa', credentials=self.creds) \
+            .get_or_create_latest_revision(self.repo)
+
+        self.assertFalse(created)
+        self.assertEqual(revision, self.rev)
+
+        self.assertEqual(self.gl_mock().projects.get.call_count, 1)
+        self.assertEqual(self.gl_mock().projects.get().commits.list.call_count, 1)
+
+    def test_create_latest_revision(self):
+        """
+        Test GitLabProvider can create the latest revision on a repo
+        """
+        latest_commit = MagicMock()
+        latest_commit.id = '1337'
+        latest_commit.refs.return_value = [
+            {'name': 'refs/heads/branch1'},
+            {'name': 'refs/heads/branch2'},
+        ]
+        latest_commit.message = 'commit message'
+        latest_commit.author_name = 'bob'
+
+        self.gl_mock().projects.get.return_value.commits.list.return_value = [latest_commit, ]
+
+        revision, created = GitLabProvider(url='http://aaa', credentials=self.creds) \
+            .get_or_create_latest_revision(self.repo)
+
+        self.assertTrue(created)
+        self.assertEqual(revision.hash, '1337')
+        self.assertEqual(revision.ref, 'refs/heads/branch1')
+        self.assertEqual(revision.message, 'commit message')
+        self.assertEqual(revision.author, 'bob')
+
+        self.assertEqual(self.gl_mock().projects.get.call_count, 1)
+        self.assertEqual(self.gl_mock().projects.get().commits.list.call_count, 1)
+
+    def test_handle_webhook(self):
+        """
+        Test GitLabProvider correctly handles GitLab webhook push events
+        """
+        request_mock = MagicMock()
+        request_mock.META = {
+            'HTTP_X_GITLAB_EVENT': 'Push Hook',
+            'HTTP_X_GITLAB_TOKEN': 'hook-token',
+        }
+        request_mock.data = {
+            'object_kind': 'push',
+            'ref': 'refs/heads/master',
+            'checkout_sha': '1337',
+            'commits': [
+                {
+                    'message': 'commit message',
+                    'author': {
+                        'name': 'bob',
+                    }
+                }
+            ]
+        }
+        self.gl_mock().projects.get().files.get.return_value.decode.return_value = yaml.dump({
+            'version': 1,
+            'branches': ['master'],
+            'corpus': {'name': 'Unit Tests', 'description': 'Unit Tests', 'public': False},
+            'volumes': {'paths': ['*'], 'format': 'iiif'},
+        })
+
+        GitLabProvider(url='http://aaa', credentials=self.creds).handle_webhook(self.repo, request_mock)
+        self.assertGreater(self.redis.llen('celery'), 0)
+
+        self.assertEqual(self.gl_mock().projects.get().files.get.call_count, 1)
+        self.assertEqual(self.gl_mock().projects.get().files.get().decode.call_count, 1)
+
+        revision = self.repo.revisions.get(hash='1337')
+        self.assertEqual(revision.ref, 'refs/heads/master')
+        self.assertEqual(revision.message, 'commit message')
+        self.assertEqual(revision.author, 'bob')
+        self.assertEqual(len(revision.dataimports.all()), 1)
+
+    def test_handle_webhook_missing_headers(self):
+        """
+        Test GitLabProvider checks HTTP headers on webhooks
+        """
+        glp = GitLabProvider(url='http://aaa', credentials=self.creds)
+
+        request_mock = MagicMock()
+        request_mock.data = {
+            'object_kind': 'push',
+            'ref': 'refs/heads/master',
+            'checkout_sha': '1337',
+            'commits': [
+                {
+                    'message': 'commit message',
+                    'author': {
+                        'name': 'bob',
+                    }
+                }
+            ]
+        }
+
+        # Missing HTTP_X_GITLAB_TOKEN
+        request_mock.META = {
+            'HTTP_X_GITLAB_EVENT': 'Push Hook',
+        }
+        with self.assertRaises(NotAuthenticated):
+            glp.handle_webhook(self.repo, request_mock)
+
+        # Missing HTTP_X_GITLAB_EVENT
+        request_mock.META = {
+            'HTTP_X_GITLAB_TOKEN': 'hook-token',
+        }
+        with self.assertRaises(ValidationError):
+            glp.handle_webhook(self.repo, request_mock)
+
+        # Wrong HTTP_X_GITLAB_EVENT
+        request_mock.META = {
+            'HTTP_X_GITLAB_EVENT': 'Not a Push Hook',
+            'HTTP_X_GITLAB_TOKEN': 'hook-token',
+        }
+        with self.assertRaises(ValidationError):
+            glp.handle_webhook(self.repo, request_mock)
+
+        # Wrong HTTP_X_GITLAB_TOKEN
+        request_mock.META = {
+            'HTTP_X_GITLAB_EVENT': 'Push Hook',
+            'HTTP_X_GITLAB_TOKEN': 'not-the-hook-token',
+        }
+        with self.assertRaises(AuthenticationFailed):
+            glp.handle_webhook(self.repo, request_mock)
+
+    def test_handle_webhook_duplicate_events(self):
+        """
+        Test GitLabProvider checks for already handled events
+        """
+        request_mock = MagicMock()
+        request_mock.META = {
+            'HTTP_X_GITLAB_EVENT': 'Push Hook',
+            'HTTP_X_GITLAB_TOKEN': 'hook-token',
+        }
+        request_mock.data = {
+            'object_kind': 'push',
+            'ref': 'refs/heads/master',
+            'checkout_sha': '42',
+            'commits': [
+                {
+                    'message': 'a',
+                    'author': {
+                        'name': 'me',
+                    }
+                }
+            ]
+        }
+
+        GitLabProvider(url='http://aaa', credentials=self.creds).handle_webhook(self.repo, request_mock)
+        self.assertEqual(self.redis.llen('celery'), 0)
+
+    def test_handle_webhook_watched_branches(self):
+        """
+        Test GitLabProvider only accepts events for watched branches
+        """
+        request_mock = MagicMock()
+        request_mock.META = {
+            'HTTP_X_GITLAB_EVENT': 'Push Hook',
+            'HTTP_X_GITLAB_TOKEN': 'hook-token',
+        }
+        request_mock.data = {
+            'object_kind': 'push',
+            'ref': 'refs/heads/nope',
+            'checkout_sha': '1337',
+            'commits': [
+                {
+                    'message': 'commit message',
+                    'author': {
+                        'name': 'bob',
+                    }
+                }
+            ]
+        }
+        self.gl_mock().projects.get().files.get.return_value.decode.return_value = yaml.dump({
+            'version': 1,
+            'branches': ['master'],
+            'corpus': {'name': 'Unit Tests', 'description': 'Unit Tests', 'public': False},
+            'volumes': {'paths': ['*'], 'format': 'iiif'},
+        })
+
+        GitLabProvider(url='http://aaa', credentials=self.creds).handle_webhook(self.repo, request_mock)
+
+        self.assertEqual(self.gl_mock().projects.get().files.get.call_count, 1)
+        self.assertEqual(self.gl_mock().projects.get().files.get().decode.call_count, 1)
+        self.assertEqual(self.redis.llen('celery'), 0)
diff --git a/arkindex/dataimport/tests/test_iiif.py b/arkindex/dataimport/tests/test_iiif.py
new file mode 100644
index 0000000000000000000000000000000000000000..654d459ee24d341ddf76aa18af54d1f0004ff5cc
--- /dev/null
+++ b/arkindex/dataimport/tests/test_iiif.py
@@ -0,0 +1,271 @@
+from unittest.mock import patch
+from arkindex.project.tests import FixtureTestCase
+from arkindex.documents.models import Element, ElementType, Page, MetaType
+from arkindex.images.models import ImageStatus
+from arkindex.dataimport.models import EventType, DataImportMode, DataImportState
+from arkindex.dataimport.iiif import ManifestParser
+import os.path
+import git
+import shutil
+
+FIXTURES = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)),
+    'manifest_samples',
+)
+
+
+class TestManifestParser(FixtureTestCase):
+
+    @classmethod
+    def setUpTestData(cls):
+        super().setUpTestData()
+        cls.creds = cls.user.credentials.create(
+            provider_name='GitLabOAuthProvider',
+            provider_url='https://somewhere',
+            token='oauth-token'
+        )
+        cls.repo = cls.creds.repos.create(
+            url='http://gitlab/repo',
+            hook_token='hook-token',
+            corpus=cls.corpus,
+            provider_name='GitLabProvider',
+        )
+        cls.rev = cls.repo.revisions.create(
+            hash='42',
+            ref='refs/heads/master',
+            message='a',
+            author='me',
+        )
+
+    def _assert_first_import(self, first_rev):
+        """
+        Check importing base.json
+        """
+
+        vol = Element.objects.get(type=ElementType.Volume, name='ParserTest')
+        reg = Element.objects.get(type=ElementType.Register, name='ParserTest')
+        pages = Page.objects.get_descending(vol.id)
+
+        # Volume metadata
+        self.assertEqual(vol.metadatas.count(), 3)
+        self.assertCountEqual(
+            vol.metadatas.values_list('name', 'type', 'revision', 'value'),
+            [
+                ('Label 1', MetaType.Text, first_rev.id, 'Value 1'),
+                ('Label 2', MetaType.Text, first_rev.id, 'Value 2'),
+                ('Label 3', MetaType.Text, first_rev.id, 'Value 3'),
+            ]
+        )
+        self.assertEqual(reg.metadatas.count(), 0)
+
+        # Pages
+        self.assertEqual(pages.count(), 2)
+        first_page, second_page = pages
+        self.assertEqual(first_page.folio, "plat supérieur")
+        self.assertEqual(second_page.folio, "001r")
+        self.assertEqual(first_page.zone.image.path, 'image-1')
+        self.assertEqual(second_page.zone.image.path, 'image-2')
+
+        for p in (first_page, second_page):
+            self.assertEqual(p.zone.polygon.x, 0)
+            self.assertEqual(p.zone.polygon.y, 0)
+            self.assertEqual(p.zone.polygon.width, 2000)
+            self.assertEqual(p.zone.polygon.height, 1000)
+            self.assertEqual(p.zone.image.status, ImageStatus.Unchecked)
+            self.assertEqual(p.zone.image.server, self.imgsrv)
+            self.assertEqual(p.zone.image.width, 2000)
+            self.assertEqual(p.zone.image.height, 1000)
+
+        # Events
+        for elt in (vol, reg, first_page, second_page):
+            self.assertEqual(elt.events.count(), 1)
+            event = elt.events.get()
+            self.assertEqual(event.type, EventType.Addition)
+            self.assertEqual(event.revision, first_rev)
+
+    def _assert_second_import(self, first_rev, second_rev):
+        """
+        Check importing changed.json after base.json
+        """
+        vol = Element.objects.get(type=ElementType.Volume, name='ParserTest')
+        reg = Element.objects.get(type=ElementType.Register, name='ParserTest')
+        pages = Page.objects.get_descending(vol.id)
+
+        # Volume metadata
+        self.assertEqual(vol.metadatas.count(), 3)
+        self.assertCountEqual(
+            vol.metadatas.values_list('name', 'type', 'revision', 'value'),
+            [
+                ('Label 1', MetaType.Text, second_rev.id, 'Updated value 1'),
+                ('Label 2', MetaType.Text, first_rev.id, 'Value 2'),
+                ('Label 4', MetaType.Text, second_rev.id, 'Value 4'),
+            ]
+        )
+        self.assertEqual(reg.metadatas.count(), 0)
+
+        # Pages
+        self.assertEqual(pages.count(), 2)
+        first_page, second_page = pages
+        self.assertEqual(first_page.folio, "plat supérieur")
+        self.assertEqual(second_page.folio, "002r")
+        self.assertEqual(first_page.zone.image.path, 'image-3')
+        self.assertEqual(second_page.zone.image.path, 'image-2')
+
+        for p in (first_page, second_page):
+            self.assertEqual(p.zone.polygon.x, 0)
+            self.assertEqual(p.zone.polygon.y, 0)
+            self.assertEqual(p.zone.polygon.width, 2000)
+            self.assertEqual(p.zone.polygon.height, 1000)
+            self.assertEqual(p.zone.image.status, ImageStatus.Unchecked)
+            self.assertEqual(p.zone.image.server, self.imgsrv)
+            self.assertEqual(p.zone.image.width, 2000)
+            self.assertEqual(p.zone.image.height, 1000)
+
+        # Events
+        self.assertCountEqual(
+            vol.events.values_list('type', 'revision'),
+            [
+                (EventType.Addition, first_rev.id),
+                (EventType.Edit, second_rev.id),
+            ],
+        )
+        self.assertCountEqual(
+            reg.events.values_list('type', 'revision'),
+            [
+                (EventType.Addition, first_rev.id),
+                (EventType.Edit, second_rev.id),
+            ],
+        )
+        self.assertCountEqual(
+            first_page.events.values_list('type', 'revision'),
+            [(EventType.Addition, second_rev.id)],
+        )
+        self.assertCountEqual(
+            second_page.events.values_list('type', 'revision'),
+            [
+                (EventType.Addition, first_rev.id),
+                (EventType.Edit, second_rev.id),
+            ],
+        )
+
+    def test_import_once(self):
+        """
+        Import a manifest file from scratch
+        """
+        self.assertFalse(Element.objects.filter(type=ElementType.Volume, name='ParserTest').exists())
+        self.assertFalse(Element.objects.filter(type=ElementType.Register, name='ParserTest').exists())
+        ManifestParser(
+            os.path.join(FIXTURES, 'base.json'),
+            self.rev,
+            self.corpus,
+            servers=[self.imgsrv],
+            lazy=True,
+            autocreate_servers=False,
+            autoconvert_https=False,
+            volume_name='ParserTest',
+        ).run()
+        self._assert_first_import(self.rev)
+
+    def test_import_changes(self):
+        """
+        Import a manifest file from scratch, then apply another manifest with some changes
+        """
+        self.assertFalse(Element.objects.filter(type=ElementType.Volume, name='ParserTest').exists())
+        self.assertFalse(Element.objects.filter(type=ElementType.Register, name='ParserTest').exists())
+
+        # First import
+        ManifestParser(
+            os.path.join(FIXTURES, 'base.json'),
+            self.rev,
+            self.corpus,
+            servers=[self.imgsrv],
+            lazy=True,
+            autocreate_servers=False,
+            autoconvert_https=False,
+            volume_name='ParserTest',
+        ).run()
+        self._assert_first_import(self.rev)
+
+        # Second import
+        new_rev = self.repo.revisions.create(
+            hash='1337',
+            ref='refs/heads/master',
+            message='b',
+            author='me',
+        )
+        ManifestParser(
+            os.path.join(FIXTURES, 'changed.json'),
+            new_rev,
+            self.corpus,
+            servers=[self.imgsrv],
+            lazy=True,
+            autocreate_servers=False,
+            autoconvert_https=False,
+            volume_name='ParserTest',
+        ).run()
+        self._assert_second_import(self.rev, new_rev)
+
+    @patch('arkindex.dataimport.providers.git.Repo.clone_from')
+    def test_git_import(self, clone_mock):
+        """
+        Import manifest files from a Git repo
+        """
+        # Create a Git repo
+        repo_dir = os.path.join(FIXTURES, 'repo')
+        if os.path.exists(repo_dir):
+            shutil.rmtree(repo_dir)
+        os.makedirs(repo_dir, exist_ok=True)
+        repo = git.Repo.init(repo_dir)
+
+        # Prevent cloning from anywhere else but this repo
+        clone_mock.side_effect = lambda src, dest, **kwargs: repo.clone(dest, **kwargs)
+
+        def copy_commit(message, src=[], dst=[]):
+            src = [os.path.join(FIXTURES, path) for path in src]
+            dst = [os.path.join(repo_dir, path) for path in dst]
+            list(map(shutil.copyfile, src, dst))
+            repo.index.add(dst)
+            return repo.index.commit(message)
+
+        def run_import(commit):
+            """
+            Create a revision and run a synchronous import
+            """
+            new_rev = self.repo.revisions.create(
+                hash=commit.hexsha,
+                message=commit.message,
+                ref='refs/heads/master',
+                author=commit.author,
+            )
+            workflow = new_rev.dataimports.create(
+                corpus=self.corpus,
+                creator=self.user,
+                state=DataImportState.Running,
+                mode=DataImportMode.Repository,
+            ).build_workflow()
+            # Run synchronously
+            workflow.apply()
+            return new_rev
+
+        # Make commits
+        first_commit = copy_commit(
+            'First commit',
+            src=['.arkindex.yml', 'base.json'],
+            dst=['.arkindex.yml', 'ParserTest.json'],
+        )
+        second_commit = copy_commit(
+            'Second commit',
+            src=['changed.json'],
+            dst=['ParserTest.json'],
+        )
+
+        # Run first import
+        first_rev = run_import(first_commit)
+        self._assert_first_import(first_rev)
+
+        # Run second import
+        second_rev = run_import(second_commit)
+        self._assert_second_import(first_rev, second_rev)
+
+        # Remove the repo
+        shutil.rmtree(repo_dir)
diff --git a/arkindex/dataimport/tests/test_tasks.py b/arkindex/dataimport/tests/test_tasks.py
index 9c4580c069f7174439578eb3493ad5d25ed4c7a7..8b3fcbd6811db1b077359eca51821c031ca3b560 100644
--- a/arkindex/dataimport/tests/test_tasks.py
+++ b/arkindex/dataimport/tests/test_tasks.py
@@ -1,7 +1,7 @@
 from django.core.management import call_command
 from arkindex.project.tests import RedisMockAPITestCase, FixtureMixin
 from arkindex.dataimport.tasks import save_ml_results
-from arkindex.documents.models import Page, Corpus, Element, ElementType
+from arkindex.documents.models import Page, Element, ElementType
 
 
 class TestTasks(FixtureMixin, RedisMockAPITestCase):
@@ -9,9 +9,8 @@ class TestTasks(FixtureMixin, RedisMockAPITestCase):
     Test data imports tasks
     """
     def test_save_ml_results(self):
-        corpus = Corpus.objects.create(name='test class')
-        dog = Page.objects.create(corpus=corpus, name='A dog')
-        cat = Page.objects.create(corpus=corpus, name='A cat')
+        dog = Page.objects.create(corpus=self.corpus, name='A dog')
+        cat = Page.objects.create(corpus=self.corpus, name='A cat')
 
         classification = {
             dog.id: {
diff --git a/arkindex/documents/admin.py b/arkindex/documents/admin.py
index 67aad328999ae8c6ac8c1839b81eb354a0fdd128..d016502169b3bb66663368673442b841fedd7ae6 100644
--- a/arkindex/documents/admin.py
+++ b/arkindex/documents/admin.py
@@ -4,6 +4,7 @@ from django.urls import reverse
 from django.utils.html import format_html
 from arkindex.documents.models import Corpus, Page, Element, ElementType, Act, Transcription, MetaData
 from arkindex.documents.views import DumpActs
+from arkindex.dataimport.models import Event
 from enumfields.admin import EnumFieldListFilter
 
 
@@ -11,11 +12,16 @@ class CorpusAdmin(admin.ModelAdmin):
     list_display = ('id', 'name', 'public', )
 
 
+class EventInline(admin.TabularInline):
+    model = Event
+
+
 class PageAdmin(admin.ModelAdmin):
     list_display = ('id', 'name', 'page_type', 'nb', 'direction', )
     list_filter = [('page_type', EnumFieldListFilter)]
     fields = ('id', 'name', 'folio', 'page_type', 'nb', 'direction', 'classification', 'text')
     readonly_fields = ('id', )
+    inlines = (EventInline, )
 
 
 class MetaDataInline(admin.TabularInline):
@@ -27,7 +33,7 @@ class ElementAdmin(admin.ModelAdmin):
     list_filter = [('type', EnumFieldListFilter), 'corpus']
     fields = ('id', 'type', 'name', 'corpus')
     readonly_fields = ('id', 'element_actions')
-    inlines = (MetaDataInline, )
+    inlines = (MetaDataInline, EventInline)
 
     def get_urls(self):
         urls = super().get_urls()
@@ -53,7 +59,7 @@ class ActAdmin(admin.ModelAdmin):
     list_display = ('id', 'name')
     fields = ('id', 'name', 'folio', 'number')
     readonly_fields = ('id', )
-    inlines = (MetaDataInline, )
+    inlines = (MetaDataInline, EventInline)
 
 
 class TranscriptionAdmin(admin.ModelAdmin):
diff --git a/arkindex/documents/apps.py b/arkindex/documents/apps.py
index ad5872afc6428463700cfb2e9884fe49c7405c63..22b2c724fdc7f9cc9f05e449f5341786e4cdcdd1 100644
--- a/arkindex/documents/apps.py
+++ b/arkindex/documents/apps.py
@@ -11,7 +11,8 @@ class DocumentsConfig(AppConfig):
 
     def _package_version(self, name):
         try:
-            return open(os.path.join(settings.BASE_DIR, '..', 'VERSION')).read()
+            with open(os.path.join(settings.BASE_DIR, '..', 'VERSION')) as f:
+                return f.read()
         except (OSError, AttributeError, ImproperlyConfigured) as e:
             # File not found or settings module not ready
             pass
diff --git a/arkindex/documents/importer.py b/arkindex/documents/importer.py
index 2f3fd3a3bf40e2c80ca19d87f019783af8ed2ac6..98cbd24727d257567c7878204d009ba100c85962 100644
--- a/arkindex/documents/importer.py
+++ b/arkindex/documents/importer.py
@@ -11,6 +11,7 @@ import os
 import fnmatch
 import uuid
 import ijson
+import warnings
 
 
 logger = logging.getLogger(__name__)
@@ -132,6 +133,11 @@ class ManifestsImporter(ABC):
         """Initialize a manifest importer
         `imgserv` can be either one ImageServer or a list of ImageServers.
         When `volume_name` is set, it overrides the manifest volume name."""
+
+        warnings.warn(
+            "ManifestsImporter and subclasses are deprecated; use arkindex.dataimport.iiif.ManifestParser instead",
+            category=DeprecationWarning, stacklevel=2)
+
         if isinstance(imgserv, ImageServer):
             self.imgserv = [imgserv]
         else:
diff --git a/arkindex/documents/migrations/0026_corpus_description.py b/arkindex/documents/migrations/0026_corpus_description.py
new file mode 100644
index 0000000000000000000000000000000000000000..1433476e6a024c16ce3465f61ea5948f9ccf20ad
--- /dev/null
+++ b/arkindex/documents/migrations/0026_corpus_description.py
@@ -0,0 +1,18 @@
+# Generated by Django 2.1 on 2018-10-02 13:51
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0025_avoid_doublons'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='corpus',
+            name='description',
+            field=models.TextField(default=''),
+        ),
+    ]
diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py
index 5115a00d096eb15d7d6cf65040daa8e289d18ed8..48937a7061a029c4a781e5398f7ba17fc5261733 100644
--- a/arkindex/documents/models.py
+++ b/arkindex/documents/models.py
@@ -27,6 +27,7 @@ class Corpus(models.Model):
     '''
     id = models.UUIDField(default=uuid.uuid4, primary_key=True)
     name = models.CharField(max_length=250)
+    description = models.TextField(default="")
 
     # Is this corpus publicly readable ?
     public = models.BooleanField(default=False)
@@ -331,6 +332,19 @@ class Page(Element):
         self.type = ElementType.Page
         super().save(*args, **kwargs)
 
+    def same_as(self, other):
+        """
+        Python's comparison methods are used on this class to perform sorting.
+        This method checks for equality as in there are no differences between the two pages.
+        Used by Git imports to tell if a page has been modified in a newer revision.
+        """
+        # Prevent AttributeError when a Page is an Element and its subclass has not been used
+        if not isinstance(other, Page):
+            if not isinstance(other, Element) and hasattr(other, 'page'):
+                return False
+            other = other.page
+        return self.zone_id == other.zone_id and self.folio == other.folio
+
     def __lt__(self, other):
         if self.page_type == other.page_type:
             if self.nb == other.nb:
diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py
index 4b0112d4e536304aeb4b33080925169fe3faaa39..6d660013ac5bfa3fcb29e7d88aecebfbf9ee1c07 100644
--- a/arkindex/documents/serializers/elements.py
+++ b/arkindex/documents/serializers/elements.py
@@ -69,6 +69,7 @@ class CorpusSerializer(serializers.ModelSerializer):
         fields = (
             'id',
             'name',
+            'description',
             'public',
             'rights',
         )
diff --git a/arkindex/documents/tests/test_corpus.py b/arkindex/documents/tests/test_corpus.py
index 8c5eaaf0fe9efd9e0fe8c2dce4b409fedb92ca3b..d0fb5dbbbe8f11ac48d6272e3bfbf9430e86f89a 100644
--- a/arkindex/documents/tests/test_corpus.py
+++ b/arkindex/documents/tests/test_corpus.py
@@ -30,6 +30,7 @@ class TestCorpus(FixtureAPITestCase):
                     'rights': ['read'],
                     'public': True,
                     'name': 'Unit Tests',
+                    'description': '',
                 }
             ]
         )
@@ -48,12 +49,14 @@ class TestCorpus(FixtureAPITestCase):
                     'rights': ['read', 'write'],
                     'public': False,
                     'name': 'B Private',
+                    'description': '',
                 },
                 {
                     'id': str(self.corpus_public.id),
                     'rights': ['read', 'write', 'admin'],
                     'public': True,
                     'name': 'Unit Tests',
+                    'description': '',
                 }
             ]
         )
@@ -72,6 +75,7 @@ class TestCorpus(FixtureAPITestCase):
                     'rights': ['read', 'write', 'admin'],
                     'public': c.public,
                     'name': c.name,
+                    'description': '',
                 }
                 for c in Corpus.objects.all().order_by('name')
             ]
diff --git a/arkindex/images/models.py b/arkindex/images/models.py
index 3fecb557ddda83ca70b9461a43d0ed3717edff91..007353308b5f8c1661bbe78c505c856d75503741 100644
--- a/arkindex/images/models.py
+++ b/arkindex/images/models.py
@@ -26,61 +26,30 @@ class ImageServer(models.Model):
     def __str__(self):
         return self.name
 
-    def find_image(self, path, offline=False, width=None, height=None):
+    def find_image(self, path, offline=False, width=None, height=None, save=True):
         """
-        Lookup an image on server
+        Lookup an image on a server
         This is the preferred way to construct an image
         """
         img = None
         # Try direct access to path
-        try:
-            img = self.images.get(path=path)
-        except Image.DoesNotExist:
-            pass
+        img = self.images.filter(path=path).first()
 
         if img is None:
             # Try the url encoded path
-            try:
-                img = self.images.get(
-                    path=urllib.parse.quote_plus(path),
-                )
-            except Image.DoesNotExist:
-                pass
+            img = self.images.filter(
+                path=urllib.parse.quote_plus(path),
+            ).first()
 
-        # Support offline queries
-        if offline:
-            if img is None:
-                img = self.images.create(path=path, width=width, height=height)
-            return img
+        if img is None:
+            img = Image(server=self, path=path, width=width, height=height)
 
-        # Check the source
-        if not path.endswith('/'):
-            path += '/'
-        info_url = urllib.parse.urljoin(self.build_url(path), 'info.json')
-        resp = requests.get(info_url, allow_redirects=True)
-        resp.raise_for_status()
-        data = resp.json()
+        # Support offline queries
+        if not offline:
+            img.perform_check(save=save)
 
-        if img is None:
-            # Use Image id from IIIF server and create image
-            image_id = data.get('@id')
-            assert image_id is not None, \
-                'Missing image id in server response'
-            assert image_id.startswith(self.url), \
-                'Image id does not start with server url ({} vs. {})'.format(
-                    image_id, self.url)
-            image_path = image_id[len(self.url) + 1:]
-            img = self.images.create(path=image_path)
-
-        assert 'width' in data, 'Missing image width in server response'
-        assert 'height' in data, 'Missing image height in server response'
-        if img.width != data['width'] or img.height != data['height']:
-            # Missing width or height data in image
-            img.width = data['width']
-            img.height = data['height']
-
-        img.status = ImageStatus.Checked
-        img.save()
+        if save:
+            img.save()
 
         return img
 
@@ -144,6 +113,37 @@ class Image(IndexableModel):
             resp.raise_for_status()
             return PIL.Image.open(resp.raw)
 
+    def perform_check(self, save=True):
+        """
+        Check the image's existence and update width, height and status properties
+        """
+
+        path = self.path
+        if not path.endswith('/'):
+            path += '/'
+        info_url = urllib.parse.urljoin(self.url, 'info.json')
+        resp = requests.get(info_url, allow_redirects=True)
+        if not resp.ok:
+            self.status = ImageStatus.Error
+            return
+
+        data = resp.json()
+        if any(item not in data for item in ('@id', 'width', 'height')):
+            self.status = ImageStatus.Error
+            return
+
+        image_id = data['@id']
+        assert image_id.startswith(self.url), \
+            'Image id does not start with server url ({} vs. {})'.format(image_id, self.url)
+
+        # Use image resource ID from IIIF server to update the image path if needed
+        self.path = image_id[len(self.url) + 1:]
+        self.width, self.height = int(data['width']), int(data['height'])
+
+        self.status = ImageStatus.Checked
+        if save:
+            self.save()
+
     def __str__(self):
         return '{} - {}'.format(self.id, self.url)
 
diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py
index e49f47af73e2bb28a73e8371cef8642b710989d4..5d932eb86ad23d43e5aaadf4470fcb6657578429 100644
--- a/arkindex/project/api_v1.py
+++ b/arkindex/project/api_v1.py
@@ -1,21 +1,26 @@
 from django.conf.urls import url
 from django.views.generic.base import RedirectView
 
-from arkindex.documents.api.elements import \
-     ElementsList, RelatedElementsList, ElementRetrieve, ElementPages, ElementSurfaces, CorpusList, CorpusPages, \
-     ActEdit, PageDetails, SurfaceDetails
+from arkindex.documents.api.elements import (
+     ElementsList, RelatedElementsList, ElementRetrieve, ElementPages, ElementSurfaces,
+     CorpusList, CorpusPages, ActEdit, PageDetails, SurfaceDetails,
+)
 from arkindex.documents.api.search import PageSearch, ActSearch
 from arkindex.documents.api.transcriptions import TranscriptionCreate, TranscriptionBulk
-from arkindex.documents.api.iiif import \
-    VolumeManifest, ActManifest, PageAnnotationList, PageActAnnotationList, SurfaceAnnotationList, \
-    TranscriptionSearchAnnotationList
-from arkindex.dataimport.api import \
-    DataImportsList, DataImportDetails, DataImportFailures, DataImportDemo, \
-    DataFileList, DataFileRetrieve, DataFileUpload, \
-    GitRepositoryImportHook, RepositoryList, AvailableRepositoriesList, RepositoryRetrieve, RepositoryStartImport
-from arkindex.users.api import \
-    ProvidersList, CredentialsList, CredentialsRetrieve, \
+from arkindex.documents.api.iiif import (
+    VolumeManifest, ActManifest, PageAnnotationList, PageActAnnotationList, SurfaceAnnotationList,
+    TranscriptionSearchAnnotationList,
+)
+from arkindex.dataimport.api import (
+    DataImportsList, DataImportDetails, DataImportFailures, DataImportDemo,
+    DataFileList, DataFileRetrieve, DataFileUpload,
+    RepositoryList, RepositoryRetrieve, RepositoryStartImport,
+    GitRepositoryImportHook, AvailableRepositoriesList, ElementHistory,
+)
+from arkindex.users.api import (
+    ProvidersList, CredentialsList, CredentialsRetrieve,
     UserRetrieve, UserCreate, UserEmailLogin, UserEmailVerification
+)
 
 api = [
 
@@ -28,6 +33,7 @@ api = [
         RelatedElementsList.as_view(), name='related-elements'),
     url(r'elements/$', ElementsList.as_view(), name='elements'),
     url(r'element/(?P<pk>[\w\-]+)/$', ElementRetrieve.as_view(), name='element-retrieve'),
+    url(r'element/(?P<pk>[\w\-]+)/history/$', ElementHistory.as_view(), name='element-history'),
     url(r'page/(?P<pk>[\w\-]+)/$', PageDetails.as_view(), name='page-details'),
     url(r'surface/(?P<pk>[\w\-]+)/$', SurfaceDetails.as_view(), name='surface-details'),
     url(r'corpus/$', CorpusList.as_view(), name='corpus'),
diff --git a/arkindex/project/celery.py b/arkindex/project/celery.py
index 702479d4cbe07a09b0bc7216bc073f3573ff4814..0539a52d8d3deff25e6d3eedd8b5974a0af4de36 100644
--- a/arkindex/project/celery.py
+++ b/arkindex/project/celery.py
@@ -76,7 +76,9 @@ class ReportingTask(Task):
 
     def report_progress(self, progress, message=None):
         assert 0.0 <= progress <= 1.0
-        self.update_state(state='PROGRESS', meta={'progress': progress})
+        # State only works on tasks run in workers, not locally
+        if not self.request.is_eager and not self.request.called_directly:
+            self.update_state(state='PROGRESS', meta={'progress': progress})
 
         # Report message as info
         if not isinstance(message, str):
@@ -86,6 +88,9 @@ class ReportingTask(Task):
     def report_message(self, message, level=logging.INFO):
         assert isinstance(message, str)
         logger.log(msg=message, level=level)
+        # Do not use a result backend for tasks run locally
+        if self.request.is_eager or self.request.called_directly:
+            return
         self.backend.add_message(
             self.request.id,
             {
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index 856019d9dd787af3377e19c149760e4bbbe0cd15..a8f9f60cef63f50af6bce66955520245e912b1bf 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -363,6 +363,11 @@ CORS_ORIGIN_WHITELIST = env2list('CORS_ORIGIN_WHITELIST')
 CORS_ALLOW_CREDENTIALS = True
 CORS_URLS_REGEX = r'^/api/.*$'
 
+# Show all warnings in debug mode
+if DEBUG:
+    import warnings
+    warnings.simplefilter('default')
+
 # Optional unit tests runner with code coverage
 try:
     import django_nose # noqa
diff --git a/requirements.txt b/requirements.txt
index 4c81764cd7d48b9fefa5c00354c851aacb620f1d..ec73c8bdf8dff1b456a0e57e9bd8b1b7a7be9e69 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,6 +17,7 @@ python-gitlab==1.5.1
 python-magic==0.4.15
 python-memcached==1.59
 pytz==2017.2
+PyYAML==3.13
 requests==2.18.4
 roman==2.0.0
 urllib3==1.22