From 211bdf65adfd79a27c5263ec85ac5655d9537875 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Fri, 27 Jul 2018 10:59:00 +0000 Subject: [PATCH] Import TEI metadata from Git repos --- Dockerfile | 6 +- MANIFEST.in | 1 + arkindex/dataimport/admin.py | 15 +++- arkindex/dataimport/api.py | 52 ++++++++++- .../migrations/0002_repository_revision.py | 89 +++++++++++++++++++ arkindex/dataimport/models.py | 87 ++++++++++++++++-- arkindex/dataimport/serializers.py | 19 +++- arkindex/dataimport/tasks.py | 85 +++++++++++++++++- .../management/commands/import_tei.py | 2 +- .../migrations/0020_metadata_revision.py | 25 ++++++ arkindex/documents/models.py | 1 + arkindex/documents/serializers.py | 3 + arkindex/documents/tei.py | 61 +++++++------ arkindex/documents/tests/test_act.py | 9 +- arkindex/project/api_v1.py | 3 +- arkindex/project/settings.py | 2 + requirements.txt | 1 + setup.py | 1 + 18 files changed, 412 insertions(+), 50 deletions(-) create mode 100644 MANIFEST.in create mode 100644 arkindex/dataimport/migrations/0002_repository_revision.py create mode 100644 arkindex/documents/migrations/0020_metadata_revision.py diff --git a/Dockerfile b/Dockerfile index 188421e69d..f05773143f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,11 +12,11 @@ RUN addgroup -g 1000 teklia && adduser -D -u 1000 -G teklia ark RUN mkdir -p $PYTHON_EGG_CACHE && chmod a+rxw $PYTHON_EGG_CACHE # Allow access to medias and logs -RUN mkdir -p /medias/staging /medias/iiif /logs -RUN chown -R ark:teklia /medias /logs +RUN mkdir -p /medias/staging /medias/iiif /logs /workers +RUN chown -R ark:teklia /medias /logs /workers # Add system dependencies -RUN apk add --update --no-cache postgresql-dev jpeg-dev build-base wget gzip zlib-dev libmagic libxml2-dev libxslt-dev +RUN apk add --update --no-cache postgresql-dev jpeg-dev build-base wget gzip zlib-dev libmagic libxml2-dev libxslt-dev git # Setup frontend ENV FRONTEND_DIR="/frontend/dist" diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000..3c88a74a70 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include arkindex/documents/*.xsl diff --git a/arkindex/dataimport/admin.py b/arkindex/dataimport/admin.py index 67485324db..9530bca1c2 100644 --- a/arkindex/dataimport/admin.py +++ b/arkindex/dataimport/admin.py @@ -1,6 +1,6 @@ from django.contrib import admin from enumfields.admin import EnumFieldListFilter -from arkindex.dataimport.models import DataImport, DataFile +from arkindex.dataimport.models import DataImport, DataFile, Repository, Revision class DataFileInline(admin.StackedInline): @@ -23,5 +23,18 @@ class DataFileAdmin(admin.ModelAdmin): inlines = [DataFileInline, ] +class RevisionInline(admin.StackedInline): + model = Revision + + +class RepositoryAdmin(admin.ModelAdmin): + list_display = ('id', 'url', 'user', 'corpus') + list_filter = ('corpus', ) + fields = ('id', 'url', 'user', 'corpus', 'clone_user', 'clone_token', 'hook_token', 'watched_branches') + readonly_fields = ('id', ) + inlines = [RevisionInline, ] + + admin.site.register(DataImport, DataImportAdmin) admin.site.register(DataFile, DataFileAdmin) +admin.site.register(Repository, RepositoryAdmin) diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index c667164a10..4e88be861a 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -1,3 +1,4 @@ +from django.shortcuts import get_object_or_404 from rest_framework.generics import \ ListAPIView, ListCreateAPIView, RetrieveUpdateDestroyAPIView from rest_framework.views import APIView @@ -5,9 +6,10 @@ from rest_framework.parsers import MultiPartParser, FileUploadParser from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework import status -from rest_framework.exceptions import ValidationError +from rest_framework.exceptions import ValidationError, NotAuthenticated, AuthenticationFailed from arkindex.documents.models import Corpus -from arkindex.dataimport.models import DataImport, DataFile, DataImportState, DataImportMode +from arkindex.dataimport.models import \ + DataImport, DataFile, DataImportState, DataImportMode, Repository, RepositorySource, Revision from arkindex.dataimport.serializers import \ DataImportLightSerializer, DataImportSerializer, DataFileSerializer import hashlib @@ -135,3 +137,49 @@ class DataFileUpload(APIView): return Response( data=DataFileSerializer(df).data, status=status.HTTP_201_CREATED) + + +class GitRepositoryImportHook(APIView): + """ + Handle Git push events + """ + + def post(self, request, pk=None, **kwargs): + repo = get_object_or_404(Repository, id=pk) + + if repo.source == RepositorySource.GitLab: + if 'HTTP_X_GITLAB_EVENT' not in request.META: + raise ValidationError("Missing GitLab event type") + if request.META['HTTP_X_GITLAB_EVENT'] != 'Push Hook': + raise ValidationError("Unsupported GitLab event type") + + if 'HTTP_X_GITLAB_TOKEN' not in request.META: + raise NotAuthenticated("Missing GitLab secret token") + if request.META['HTTP_X_GITLAB_TOKEN'] != repo.hook_token: + raise AuthenticationFailed("Invalid GitLab secret token") + + assert isinstance(request.data, dict) + assert request.data['object_kind'] == 'push' + + if request.data['ref'] not in repo.watched_branches: + return Response(status=status.HTTP_204_NO_CONTENT) + + # Already took care of this event + if Revision.objects.filter( + repo=repo, + ref=request.data['ref'], + hash=request.data['checkout_sha']).exists(): + return Response(status=status.HTTP_204_NO_CONTENT) + + rev = Revision.objects.create( + repo=repo, + hash=request.data['checkout_sha'], + ref=request.data['ref'], + message=request.data['commits'][-1]['message'], + author=request.data['commits'][-1]['author']['name'], + ) + else: + raise NotImplementedError + + rev.start_import() + return Response(status=status.HTTP_204_NO_CONTENT) diff --git a/arkindex/dataimport/migrations/0002_repository_revision.py b/arkindex/dataimport/migrations/0002_repository_revision.py new file mode 100644 index 0000000000..93169f0c49 --- /dev/null +++ b/arkindex/dataimport/migrations/0002_repository_revision.py @@ -0,0 +1,89 @@ +# Generated by Django 2.0 on 2018-07-26 09:46 + +import arkindex.project.fields +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('dataimport', '0001_initial'), + ('documents', '0019_metadatas'), + ] + + operations = [ + migrations.CreateModel( + name='Repository', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)), + ('url', models.URLField(unique=True)), + ('hook_token', models.CharField(max_length=250, unique=True)), + ('clone_user', models.CharField(max_length=100)), + ('clone_token', models.CharField(max_length=250)), + ('corpus', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='repos', + to='documents.Corpus', + )), + ('user', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='repos', + to=settings.AUTH_USER_MODEL, + )), + ], + options={ + 'verbose_name_plural': 'repositories', + }, + ), + migrations.AlterModelOptions( + name='datafile', + options={'ordering': ['corpus', 'name']}, + ), + migrations.AlterModelOptions( + name='dataimport', + options={'ordering': ['corpus', '-created']}, + ), + migrations.CreateModel( + name='Revision', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)), + ('hash', models.CharField(max_length=50)), + ('ref', models.CharField(max_length=50)), + ('message', models.TextField()), + ('author', models.CharField(max_length=50)), + ('repo', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='revisions', + to='dataimport.Repository', + )), + ], + ), + migrations.AddField( + model_name='dataimport', + name='revision', + field=models.OneToOneField( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name='dataimport', + to='dataimport.Revision', + ), + ), + migrations.AlterUniqueTogether( + name='revision', + unique_together={('repo', 'hash')}, + ), + migrations.AddField( + model_name='repository', + name='watched_branches', + field=arkindex.project.fields.ArrayField( + base_field=models.CharField(max_length=50), + default=['refs/heads/master'], + size=None, + ), + ), + ] diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index b1591fa56e..500a52ad5a 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -7,8 +7,10 @@ from celery.canvas import Signature from celery.result import AsyncResult, GroupResult from enumfields import EnumField, Enum from arkindex.project.models import IndexableModel +from arkindex.project.fields import ArrayField import uuid import os +import urllib.parse class DataImportState(Enum): @@ -24,6 +26,7 @@ class DataImportMode(Enum): Annotations = 'annotations' Surfaces = 'surfaces' Acts = 'acts' + Repository = 'repository' class DataImport(IndexableModel): @@ -36,6 +39,8 @@ class DataImport(IndexableModel): state = EnumField(DataImportState, default=DataImportState.Created, max_length=30) mode = EnumField(DataImportMode, max_length=30) files = models.ManyToManyField('dataimport.DataFile', related_name='imports') + revision = models.OneToOneField( + 'dataimport.Revision', related_name='dataimport', on_delete=models.CASCADE, blank=True, null=True) payload = JSONField(null=True, blank=True) root_id = models.UUIDField(null=True, blank=True) task_count = models.PositiveSmallIntegerField(null=True, blank=True) @@ -66,12 +71,15 @@ class DataImport(IndexableModel): return self.tasks[-1].result def build_workflow(self): - # Only Images import is supported - assert self.mode == DataImportMode.Images - - # Prevent circular imports - from arkindex.dataimport.tasks import check_images, import_images - return check_images.s(self) | import_images.s(self) + if self.mode == DataImportMode.Images: + # Prevent circular imports + from arkindex.dataimport.tasks import check_images, import_images + return check_images.s(self) | import_images.s(self) + elif self.mode == DataImportMode.Repository: + from arkindex.dataimport.tasks import clone_repo, import_repo, cleanup_repo + return clone_repo.si(self) | import_repo.si(self) | cleanup_repo.si(self) + else: + raise NotImplementedError def get_task_count(self, signature): assert isinstance(signature, Signature) @@ -131,3 +139,70 @@ class DataFile(models.Model): @property def staging_path(self): return os.path.join(settings.MEDIA_ROOT, str(self.id)) + + +class RepositorySource(Enum): + GitHub = 'github' + GitLab = 'gitlab' + + +class Repository(models.Model): + id = models.UUIDField(primary_key=True, default=uuid.uuid4) + url = models.URLField(unique=True) + hook_token = models.CharField(max_length=250, unique=True) + clone_user = models.CharField(max_length=100) + clone_token = models.CharField(max_length=250) + corpus = models.ForeignKey('documents.Corpus', on_delete=models.CASCADE, related_name='repos') + user = models.ForeignKey('users.User', on_delete=models.CASCADE, related_name='repos') + watched_branches = ArrayField(models.CharField(max_length=50), default=['refs/heads/master']) + + class Meta: + verbose_name_plural = 'repositories' + + @property + def auth_url(self): + """Repository URL with added credentials""" + parsed = list(urllib.parse.urlsplit(self.url)) + if '@' in parsed[1]: # URL seems to already have credentials + return self.url + parsed[1] = '{}:{}@{}'.format(self.clone_user, self.clone_token, parsed[1]) + return urllib.parse.urlunsplit(parsed) + + @property + def source(self): + parsed = urllib.parse.urlsplit(self.url) + if parsed.netloc == 'gitlab.com': + return RepositorySource.GitLab + elif parsed.netloc == 'github.com': + return RepositorySource.GitHub + else: + raise ValueError('Unknown repository source') + + @property + def clone_dir(self): + return os.path.join(settings.CELERY_WORKING_DIR, str(self.id)) + + +class Revision(models.Model): + id = models.UUIDField(primary_key=True, default=uuid.uuid4) + repo = models.ForeignKey('dataimport.Repository', on_delete=models.CASCADE, related_name='revisions') + hash = models.CharField(max_length=50) + ref = models.CharField(max_length=50) + message = models.TextField() + author = models.CharField(max_length=50) + + class Meta: + unique_together = (('repo', 'hash'), ) + + @property + def commit_url(self): + return '{}/commit/{}'.format(self.repo.url.rstrip('/'), self.hash) + + def start_import(self): + DataImport.objects.create( + creator=self.repo.user, + corpus=self.repo.corpus, + mode=DataImportMode.Repository, + state=DataImportState.Configured, + revision=self, + ).start() diff --git a/arkindex/dataimport/serializers.py b/arkindex/dataimport/serializers.py index 627fedc673..3a7a2f8e6a 100644 --- a/arkindex/dataimport/serializers.py +++ b/arkindex/dataimport/serializers.py @@ -1,7 +1,7 @@ from rest_framework import serializers from rest_framework.utils import model_meta from arkindex.project.serializer_fields import EnumField -from arkindex.dataimport.models import DataImport, DataImportMode, DataImportState, DataFile +from arkindex.dataimport.models import DataImport, DataImportMode, DataImportState, DataFile, Revision import celery.states @@ -145,3 +145,20 @@ class DataFileSerializer(serializers.ModelSerializer): 'size', ) read_only_fields = ('id', 'size', 'content_type', ) + + +class RevisionSerializer(serializers.ModelSerializer): + """ + Serialize a repository revision + """ + + class Meta: + model = Revision + fields = ( + 'id', + 'hash', + 'ref', + 'message', + 'author', + 'commit_url', + ) diff --git a/arkindex/dataimport/tasks.py b/arkindex/dataimport/tasks.py index b565920393..63f69c9359 100644 --- a/arkindex/dataimport/tasks.py +++ b/arkindex/dataimport/tasks.py @@ -3,20 +3,36 @@ from celery.utils.log import get_task_logger from celery.signals import task_postrun from celery.states import EXCEPTION_STATES from django.conf import settings +from django.db import transaction from arkindex.project.celery import ReportingTask from arkindex.documents.models import Element, ElementType from arkindex.documents.importer import import_page +from arkindex.documents.tei import TeiParser from arkindex.images.models import ImageServer, ImageStatus -from arkindex.dataimport.models import DataImport, DataImportState +from arkindex.dataimport.models import DataImport, DataImportState, DataImportMode from PIL import Image -from shutil import copyfile import os +import glob import logging +import shutil +import git import urllib.parse +root_logger = logging.getLogger(__name__) logger = get_task_logger(__name__) +class TaskLoggingHandler(logging.Handler): + + def __init__(self, task, level=logging.WARNING): + assert isinstance(task, ReportingTask) + super().__init__(level=level) + self.task = task + + def emit(self, record): + self.task.report_message(record.getMessage(), level=record.levelno) + + @shared_task(bind=True, base=ReportingTask) def check_images(self, dataimport): assert isinstance(dataimport, DataImport) @@ -75,7 +91,7 @@ def import_images(self, valid_files, dataimport, server_id=settings.LOCAL_IMAGES ext = '.jp2' if pillow_img.format == 'JPEG2000' else '.jpg' newfilename = str(datafile.id) + ext - copyfile(datafile.staging_path, os.path.join(dataimport.iiif_path, newfilename)) + shutil.copyfile(datafile.staging_path, os.path.join(dataimport.iiif_path, newfilename)) img, _ = server.images.get_or_create( path=urllib.parse.urljoin(dataimport.folder_name + '/', newfilename), @@ -92,6 +108,69 @@ def import_images(self, valid_files, dataimport, server_id=settings.LOCAL_IMAGES return {'volume': str(vol.id)} +@shared_task(bind=True, base=ReportingTask) +def clone_repo(self, dataimport): + assert isinstance(dataimport, DataImport) + assert dataimport.mode == DataImportMode.Repository + assert dataimport.revision is not None + + self.report_progress(0, "Cloning repository...") + repo_dir = dataimport.revision.repo.clone_dir + if os.path.exists(repo_dir): + shutil.rmtree(repo_dir) + repo = git.Repo.clone_from(dataimport.revision.repo.auth_url, repo_dir, no_checkout=True) + + commit_hash = dataimport.revision.hash + self.report_progress(0.5, "Checking out commit {}...".format(commit_hash)) + repo.head.reference = repo.create_head('commit_{}'.format(commit_hash), commit_hash) + repo.head.reset(index=True, working_tree=True) + + +@shared_task(bind=True, base=ReportingTask) +def import_repo(self, dataimport): + handler = TaskLoggingHandler(self) + root_logger.addHandler(handler) + + self.report_progress(0, "Finding XML files...") + xml_files = glob.glob(os.path.join(dataimport.revision.repo.clone_dir, '**/*.xml'), recursive=True) + + for i, xml_file in enumerate(xml_files, 1): + filename = os.path.basename(xml_file) + self.report_progress(i / len(xml_files), 'Importing file {} of {}: {}'.format(i, len(xml_files), filename)) + + try: + parser = TeiParser(xml_file) + parser.check() + matches = parser.match_database(dataimport.corpus) + + for db_elt, tei_elt in matches: + with transaction.atomic(): + # Remove old metadatas + db_elt.metadatas.all().delete() + + # Create new ones + for name, meta in tei_elt.build_metadata().items(): + if not meta[1]: + continue + db_elt.metadatas.create( + name=name, + type=meta[0], + value=meta[1], + revision=dataimport.revision, + ) + + except Exception as e: + self.report_message( + "Importing of {} failed: {}".format(filename, str(e)), level=logging.WARNING) + + root_logger.removeHandler(handler) + + +@shared_task(bind=True, base=ReportingTask) +def cleanup_repo(self, dataimport): + shutil.rmtree(dataimport.revision.repo.clone_dir) + + @task_postrun.connect def dataimport_postrun(task_id, task, state, args=(), **kwargs): ''' diff --git a/arkindex/documents/management/commands/import_tei.py b/arkindex/documents/management/commands/import_tei.py index 2858c54ea0..518a62c8b1 100644 --- a/arkindex/documents/management/commands/import_tei.py +++ b/arkindex/documents/management/commands/import_tei.py @@ -25,7 +25,7 @@ class Command(BaseCommand): parser.add_argument( '--corpus', type=str, - help='Slug of corpus to import manifests into', + help='Slug of corpus to import metadata into', required=True, ) parser.add_argument( diff --git a/arkindex/documents/migrations/0020_metadata_revision.py b/arkindex/documents/migrations/0020_metadata_revision.py new file mode 100644 index 0000000000..6cdae89588 --- /dev/null +++ b/arkindex/documents/migrations/0020_metadata_revision.py @@ -0,0 +1,25 @@ +# Generated by Django 2.0 on 2018-07-25 09:52 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('dataimport', '0002_repository_revision'), + ('documents', '0019_metadatas'), + ] + + operations = [ + migrations.AddField( + model_name='metadata', + name='revision', + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + to='dataimport.Revision', + ), + ), + ] diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index a9f916e2a0..1fb40615c7 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -415,6 +415,7 @@ class MetaData(models.Model): name = models.CharField(max_length=250) type = EnumField(MetaType, max_length=50, db_index=True) value = models.TextField() + revision = models.ForeignKey('dataimport.Revision', on_delete=models.CASCADE, blank=True, null=True) class Meta: ordering = ('element', 'name') diff --git a/arkindex/documents/serializers.py b/arkindex/documents/serializers.py index d78eddea56..567359b392 100644 --- a/arkindex/documents/serializers.py +++ b/arkindex/documents/serializers.py @@ -5,6 +5,7 @@ from arkindex.documents.models import \ Element, ElementType, Transcription, Page, PageType, PageDirection, Act, Corpus, MetaData, MetaType from arkindex.images.models import Image, Zone from arkindex.images.serializers import ZoneSerializer, ImageSerializer +from arkindex.dataimport.serializers import RevisionSerializer from arkindex.project.serializer_fields import EnumField, ViewerURLField from arkindex.project.tools import sslify_url import urllib.parse @@ -15,6 +16,7 @@ class MetaDataSerializer(serializers.ModelSerializer): Serialises some Metadata for any Element """ type = EnumField(MetaType) + revision = RevisionSerializer() class Meta: model = MetaData @@ -23,6 +25,7 @@ class MetaDataSerializer(serializers.ModelSerializer): 'type', 'name', 'value', + 'revision', ) diff --git a/arkindex/documents/tei.py b/arkindex/documents/tei.py index b5a8121763..a5b4ded094 100644 --- a/arkindex/documents/tei.py +++ b/arkindex/documents/tei.py @@ -247,9 +247,9 @@ class TeiParser(object): self.corpus = Corpus(root) def check(self): - logging.info(self.corpus) + logger.info(self.corpus) for tei in self.corpus.tei: - logging.info('{} - completion {:.1%}'.format(tei, tei.completion)) + logger.info('{} - completion {:.1%}'.format(tei, tei.completion)) def match_database(self, corpus): ''' @@ -262,37 +262,40 @@ class TeiParser(object): # Match volumes out = [] for tei in self.corpus.tei: + if not tei.witness: + logger.warning('No witness in {}'.format(str(tei))) + continue + tei_name = tei.witness.id or tei.witness.repository_id volume = find_closest(tei_name, volumes) - if volume: - - out.append((volume, tei)) - logger.info('Matched {} with {}'.format(volume, tei)) - - # Load volume acts - volume_acts = Element.objects.get_descending(volume.id, type=ElementType.Act) - if not volume_acts.exists(): - logger.warn('No acts in DB for {}'.format(volume)) + if not volume: + logger.warning('No match for {}'.format(tei)) + continue + + out.append((volume, tei)) + logger.info('Matched {} with {}'.format(volume, tei)) + + # Load volume acts + volume_acts = Element.objects.get_descending(volume.id, type=ElementType.Act) + if not volume_acts.exists(): + logger.warning('No acts in DB for {}'.format(volume)) + continue + + # Match acts + for text in tei.texts: + if text.witness is None: + logger.warning('No witness on text, skipping.') continue - # Match acts - for text in tei.texts: - if text.witness is None: - logger.warn('No witness on text, skipping.') - continue - - act = Act.objects.filter( - id__in=volume_acts, - number=text.witness.id, - ).first() - if act: - out.append((act, text)) - logger.info('Matched {} with {}'.format(act, text)) - else: - logger.warn('No match for {}'.format(text)) - - else: - logger.warn('No match for {}'.format(tei)) + act = Act.objects.filter( + id__in=volume_acts, + number=text.witness.id, + ).first() + if act: + out.append((act, text)) + logger.info('Matched {} with {}'.format(act, text)) + else: + logger.warning('No match for {}'.format(text)) return out diff --git a/arkindex/documents/tests/test_act.py b/arkindex/documents/tests/test_act.py index d8cdfaece9..c4958d21e7 100644 --- a/arkindex/documents/tests/test_act.py +++ b/arkindex/documents/tests/test_act.py @@ -85,13 +85,16 @@ class TestAct(FixtureAPITestCase): [{'id': str(metas[1].id), 'name': 'origin', 'type': 'date', - 'value': '2010/01'}, + 'value': '2010/01', + 'revision': None}, {'id': str(metas[2].id), 'name': 'place', 'type': 'location', - 'value': 'somewhere'}, + 'value': 'somewhere', + 'revision': None}, {'id': str(metas[0].id), 'name': 'test', 'type': 'text', - 'value': 'aha'}] + 'value': 'aha', + 'revision': None}] ) diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 9f00ae8136..608d65f7d5 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -7,7 +7,7 @@ from arkindex.documents.api import \ TranscriptionSearch, ActSearch, TranscriptionSearchAnnotationList, \ ActEdit, TranscriptionCreate, TranscriptionBulk, SurfaceDetails from arkindex.dataimport.api import \ - DataImportsList, DataImportDetails, DataFileList, DataFileRetrieve, DataFileUpload + DataImportsList, DataImportDetails, DataFileList, DataFileRetrieve, DataFileUpload, GitRepositoryImportHook api = [ @@ -73,4 +73,5 @@ api = [ url(r'^imports/files/(?P<pk>[\w\-]+)$', DataFileList.as_view(), name='file-list'), url(r'^imports/file/(?P<pk>[\w\-]+)$', DataFileRetrieve.as_view(), name='file-retrieve'), url(r'^imports/upload/(?P<pk>[\w\-]+)$', DataFileUpload.as_view(), name='file-upload'), + url(r'^imports/hook/(?P<pk>[\w\-]+)$', GitRepositoryImportHook.as_view(), name='import-hook'), ] diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index 8677a611f9..be963cbad7 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -227,6 +227,7 @@ IIIF_TRANSCRIPTION_LIST = False # TEI XSLT file path TEI_XSLT_PATH = os.path.join(BASE_DIR, 'documents/teitohtml.xsl') + # Cache into memcached CACHES = { 'default': { @@ -311,6 +312,7 @@ CELERY_ONCE = { 'default_timeout': 3600, } } +CELERY_WORKING_DIR = os.environ.get('CELERY_WORKING_DIR', os.path.join(BASE_DIR, 'workers')) # Email EMAIL_SUBJECT_PREFIX = '[Arkindex {}] '.format(ARKINDEX_ENV) diff --git a/requirements.txt b/requirements.txt index f723301e56..6e67d2223c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ djangorestframework==3.7.1 django-webpack-loader==0.5.0 elasticsearch==6.2.0 et-xmlfile==1.0.1 +gitpython==2.1.11 idna==2.6 ijson==2.3 jdcal==1.3 diff --git a/setup.py b/setup.py index 54a67be927..7e120f314f 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ setup( 'test': tests_requirements, }, packages=find_packages(), + include_package_data=True, py_modules=['arkindex', ], scripts=[ 'arkindex/manage.py', -- GitLab