diff --git a/Dockerfile b/Dockerfile index 3ab098a477be9c787bdb7cbcd097a19cb16070ec..3fdb08f9a82f41077ac12d4950e40b4aa5dead3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,7 @@ RUN \ # Install arkindex and its deps # Uses a source archive instead of full local copy to speedup docker build COPY dist/arkindex-*.tar.gz /tmp/arkindex.tar.gz -RUN pip install /tmp/arkindex.tar.gz gunicorn +RUN pip install /tmp/arkindex.tar.gz gunicorn && rm /tmp/arkindex.tar.gz # Allow access to medias and logs RUN mkdir -p /medias/staging /medias/iiif /logs /workers diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index f96dc06b794c51d1c79413293a963150bd2fab03..ed28e2131dbd470f75d7086d1a9e047857c3ad63 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -132,9 +132,10 @@ class PageDetails(RetrieveAPIView): serializer_class = PageSerializer def get_queryset(self): - return Page.objects.filter( - corpus__in=Corpus.objects.readable(self.request.user) - ).select_related('zone__image__server') + return Page.objects \ + .filter(corpus__in=Corpus.objects.readable(self.request.user)) \ + .select_related('zone__image__server', 'corpus') \ + .prefetch_related('classifications__source', 'transcriptions__source') def get_object(self): page = super().get_object() diff --git a/arkindex/documents/management/commands/delete_corpus.py b/arkindex/documents/management/commands/delete_corpus.py index 65162304651bacdf48fde3046584913df54b27ba..c05450ff30ef84abc3328924f6060aac0ecaf33c 100644 --- a/arkindex/documents/management/commands/delete_corpus.py +++ b/arkindex/documents/management/commands/delete_corpus.py @@ -1,8 +1,11 @@ from django.conf import settings +from django.db.models.signals import pre_delete from ponos.management.base import PonosCommand from arkindex.project.argparse import CorpusArgument +from arkindex.project.tools import disconnect_signal from arkindex.dataimport.models import DataImportFailure, Revision, Event from arkindex.documents.models import Element, ElementPath, Transcription, Classification, MetaData +from arkindex.documents.signals import pre_delete_handler from arkindex.images.models import Zone import logging @@ -35,7 +38,7 @@ class Command(PonosCommand): parser.add_argument( '--batch-size', type=int, - default=1000, + default=10000, help='Size of each batch of elements to delete', ) @@ -75,17 +78,20 @@ class Command(PonosCommand): logger.info('Would delete corpus {}'.format(corpus.name)) return - logger.info('Deleting {} elements'.format(element_count)) - deleted = 0 - for i in range(0, element_count, batch_size): - elts = Element.objects.filter(id__in=element_ids[i:i+batch_size]) - deleted += elts.count() - elts.delete() - logger.info('Deleted {} elements out of {}Â ({: >3}%)'.format( - deleted, - element_count, - int(100.0 * deleted / element_count), - )) + # Temporarily disconnect the pre_delete signal for Elements that removes paths + # Makes the deletion process 6 times faster + with disconnect_signal(pre_delete, sender=Element, receiver=pre_delete_handler): + logger.info('Deleting {} elements'.format(element_count)) + deleted = 0 + for i in range(0, element_count, batch_size): + elts = Element.objects.filter(id__in=element_ids[i:i+batch_size]) + deleted += elts.count() + elts.delete() + logger.info('Deleted {} elements out of {}Â ({: >3}%)'.format( + deleted, + element_count, + int(100.0 * deleted / element_count), + )) logger.info('Deleting corpus {}'.format(corpus.name)) corpus.delete() diff --git a/arkindex/documents/tests/commands/test_delete_corpus.py b/arkindex/documents/tests/commands/test_delete_corpus.py index ff5a5091694615dfbdebe13b8540b6f7921e1cdc..83a83653ad297ab806d9743bdb8d51e07ed42c96 100644 --- a/arkindex/documents/tests/commands/test_delete_corpus.py +++ b/arkindex/documents/tests/commands/test_delete_corpus.py @@ -1,4 +1,5 @@ from django.core.management import call_command +from django.db.models.signals import pre_delete from arkindex_common.ml_tool import MLToolType from arkindex.project.tests import FixtureTestCase from arkindex.documents.models import Corpus, Element, Page, ElementType, TranscriptionType, MetaType, DataSource @@ -98,11 +99,16 @@ class TestDeleteCorpus(FixtureTestCase): self.assertEqual(self.imgsrv.images.count(), 6) def test_run(self): + receivers = pre_delete.receivers + call_command( 'delete_corpus', self.corpus, ) + # Ensure the command restores the signal receivers + self.assertEqual(pre_delete.receivers, receivers) + self.assertEqual(Corpus.objects.count(), 1) with self.assertRaises(Corpus.DoesNotExist): self.corpus.refresh_from_db() diff --git a/arkindex/project/tools.py b/arkindex/project/tools.py index e49379393035f9084038eeb2d7ed503033949ae8..255482e23f8bfc210f8c0d0838b75e7f6c4a5bc6 100644 --- a/arkindex/project/tools.py +++ b/arkindex/project/tools.py @@ -114,3 +114,19 @@ class Timer(object): end = self.timer() self.elapsed = end - self.start self.delta = datetime.timedelta(seconds=self.elapsed) + + +class disconnect_signal(): + """ + Context manager to temporarily disconnect a signal + """ + + def __init__(self, signal, **kwargs): + self.signal = signal + self.kwargs = kwargs + + def __enter__(self): + self.signal.disconnect(**self.kwargs) + + def __exit__(self, *args): + self.signal.connect(**self.kwargs)