Skip to content
Snippets Groups Projects
Commit c03bc32b authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Optimizations

parent 3113bfc2
No related branches found
No related tags found
No related merge requests found
......@@ -26,7 +26,7 @@ RUN \
# Install arkindex and its deps
# Uses a source archive instead of full local copy to speedup docker build
COPY dist/arkindex-*.tar.gz /tmp/arkindex.tar.gz
RUN pip install /tmp/arkindex.tar.gz gunicorn
RUN pip install /tmp/arkindex.tar.gz gunicorn && rm /tmp/arkindex.tar.gz
# Allow access to medias and logs
RUN mkdir -p /medias/staging /medias/iiif /logs /workers
......
......@@ -132,9 +132,10 @@ class PageDetails(RetrieveAPIView):
serializer_class = PageSerializer
def get_queryset(self):
return Page.objects.filter(
corpus__in=Corpus.objects.readable(self.request.user)
).select_related('zone__image__server')
return Page.objects \
.filter(corpus__in=Corpus.objects.readable(self.request.user)) \
.select_related('zone__image__server', 'corpus') \
.prefetch_related('classifications__source', 'transcriptions__source')
def get_object(self):
page = super().get_object()
......
from django.conf import settings
from django.db.models.signals import pre_delete
from ponos.management.base import PonosCommand
from arkindex.project.argparse import CorpusArgument
from arkindex.project.tools import disconnect_signal
from arkindex.dataimport.models import DataImportFailure, Revision, Event
from arkindex.documents.models import Element, ElementPath, Transcription, Classification, MetaData
from arkindex.documents.signals import pre_delete_handler
from arkindex.images.models import Zone
import logging
......@@ -35,7 +38,7 @@ class Command(PonosCommand):
parser.add_argument(
'--batch-size',
type=int,
default=1000,
default=10000,
help='Size of each batch of elements to delete',
)
......@@ -75,17 +78,20 @@ class Command(PonosCommand):
logger.info('Would delete corpus {}'.format(corpus.name))
return
logger.info('Deleting {} elements'.format(element_count))
deleted = 0
for i in range(0, element_count, batch_size):
elts = Element.objects.filter(id__in=element_ids[i:i+batch_size])
deleted += elts.count()
elts.delete()
logger.info('Deleted {} elements out of {} ({: >3}%)'.format(
deleted,
element_count,
int(100.0 * deleted / element_count),
))
# Temporarily disconnect the pre_delete signal for Elements that removes paths
# Makes the deletion process 6 times faster
with disconnect_signal(pre_delete, sender=Element, receiver=pre_delete_handler):
logger.info('Deleting {} elements'.format(element_count))
deleted = 0
for i in range(0, element_count, batch_size):
elts = Element.objects.filter(id__in=element_ids[i:i+batch_size])
deleted += elts.count()
elts.delete()
logger.info('Deleted {} elements out of {} ({: >3}%)'.format(
deleted,
element_count,
int(100.0 * deleted / element_count),
))
logger.info('Deleting corpus {}'.format(corpus.name))
corpus.delete()
......
from django.core.management import call_command
from django.db.models.signals import pre_delete
from arkindex_common.ml_tool import MLToolType
from arkindex.project.tests import FixtureTestCase
from arkindex.documents.models import Corpus, Element, Page, ElementType, TranscriptionType, MetaType, DataSource
......@@ -98,11 +99,16 @@ class TestDeleteCorpus(FixtureTestCase):
self.assertEqual(self.imgsrv.images.count(), 6)
def test_run(self):
receivers = pre_delete.receivers
call_command(
'delete_corpus',
self.corpus,
)
# Ensure the command restores the signal receivers
self.assertEqual(pre_delete.receivers, receivers)
self.assertEqual(Corpus.objects.count(), 1)
with self.assertRaises(Corpus.DoesNotExist):
self.corpus.refresh_from_db()
......
......@@ -114,3 +114,19 @@ class Timer(object):
end = self.timer()
self.elapsed = end - self.start
self.delta = datetime.timedelta(seconds=self.elapsed)
class disconnect_signal():
"""
Context manager to temporarily disconnect a signal
"""
def __init__(self, signal, **kwargs):
self.signal = signal
self.kwargs = kwargs
def __enter__(self):
self.signal.disconnect(**self.kwargs)
def __exit__(self, *args):
self.signal.connect(**self.kwargs)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment