Skip to content
Snippets Groups Projects
Commit 2a543dea authored by Valentin Rigal's avatar Valentin Rigal Committed by Erwan Rouchet
Browse files

Remove thumbnails creation management from backend

parent 4e8bd210
No related branches found
No related tags found
No related merge requests found
......@@ -112,7 +112,6 @@ Aside from the usual Django commands, some custom commands are available via `ma
* `import_annotations`: Import index files from a folder into a specific volume;
* `import_acts`: Import XML surface files and CSV act files;
* `delete_corpus`: Delete a big corpus using a Ponos task;
* `generate_thumbnails`: Generate thumbnails for volumes;
* `reindex`: Run asynchronous tasks on the Celery worker to reindex transcriptions in ElasticSearch;
* `telegraf`: A special command with InfluxDB-compatible output for Grafana statistics.
......
#!/usr/bin/env python3
from django.core.management.base import CommandError
from django.conf import settings
from ponos.management.base import PonosCommand
from arkindex.project.argparse import CorpusArgument, ElementArgument
from arkindex.documents.models import Element
class Command(PonosCommand):
help = '(Re)generate thumbnails for elements'
docker_image = settings.ARKINDEX_APP_IMAGE
base_recipe = settings.PONOS_RECIPE
def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument(
'--all',
help='Create thumbnails for every volume in every corpus',
action='store_true',
default=False,
)
parser.add_argument(
'--corpus',
help='ID or part of the name of the corpus to fetch volumes from',
type=CorpusArgument(),
)
parser.add_argument(
'--element',
help='ID or part of the name of a single element to build a thumbnail for',
type=ElementArgument(type__folder=True),
)
parser.add_argument(
'--force',
help='Recreate thumbnails even if they already exist',
action='store_true',
default=False,
)
def validate_args(self, corpus=None, element=None, all=False, force=False, **options):
if all:
if corpus or element:
raise CommandError('--all cannot be used together with --corpus or --element')
return {'elements': Element.objects.filter(type__folder=True), 'force': force}
if not corpus:
raise CommandError('--corpus is required when not using --all')
if element:
if not corpus.elements.filter(pk=element.pk).exists():
raise CommandError('Element {} is not in corpus {}'.format(element, corpus))
elts = [element, ]
else:
elts = corpus.elements.filter(type__folder=True)
return {'elements': elts, 'force': force}
def run(self, elements=[], force=False):
# TODO: Create a workflow instead when Ponos initial artifacts are implemented
for element in elements:
try:
element.generate_thumbnail(force=force)
except Exception as e:
self.stdout.write(self.style.ERROR('Failed to generate thumbnail for {}: {}'.format(element.id, e)))
......@@ -261,13 +261,6 @@ class Element(IndexableModel):
from arkindex.images.models import Thumbnail # Prevent circular imports
return Thumbnail(self)
def generate_thumbnail(self, **kwargs):
'''
Build a thumbnail for this element
'''
logger.info("Creating thumbnail for element {}".format(self))
self.thumbnail.create(**kwargs)
def __str__(self):
return '{}: {}'.format(self.type.display_name, self.name)
......
from django.core.management import call_command
from django.core.management.base import CommandError
from unittest.mock import patch, call
from arkindex.project.tests import FixtureTestCase
from arkindex.documents.models import Corpus
class TestGenerateThumbnailsCommand(FixtureTestCase):
"""
Test the generate_thumbnails command.
"""
@classmethod
def setUpTestData(cls):
super().setUpTestData()
cls.vol1 = cls.corpus.elements.get(name="Volume 1")
cls.vol2 = cls.corpus.elements.get(name="Volume 2")
corpus2 = Corpus.objects.create(name='Other corpus')
cls.vol3 = corpus2.elements.create(
type=corpus2.types.create(slug='volume', folder=True),
name='Volume 3',
)
cls.thumb_patch = patch('arkindex.images.models.Thumbnail')
def setUp(self):
super().setUp()
self.thumb_mock = self.thumb_patch.start()
def tearDown(self):
super().tearDown()
self.thumb_patch.stop()
def test_start_corpus(self):
"""
Test generate_thumbnails runs generation for each volume of a corpus
"""
call_command(
'generate_thumbnails',
corpus=self.corpus,
)
self.assertCountEqual(self.thumb_mock.call_args_list, [
call(self.vol1),
call(self.vol2),
])
self.assertEqual(self.thumb_mock().create.call_count, 2)
def test_start_element(self):
"""
Test generate_thumbnails runs generation for an element
"""
call_command(
'generate_thumbnails',
corpus=self.corpus,
element=self.vol1,
)
self.assertCountEqual(self.thumb_mock.call_args_list, [
call(self.vol1),
])
self.assertEqual(self.thumb_mock().create.call_count, 1)
@patch('arkindex.documents.models.Element.generate_thumbnail')
def test_force(self, gen_mock):
"""
Test generate_thumbnails passes the --force argument to tasks
"""
call_command(
'generate_thumbnails',
corpus=self.corpus,
element=self.vol1,
force=True,
)
self.assertCountEqual(gen_mock.call_args_list, [
call(force=True),
])
def test_all(self):
"""
Test generate_thumbnails picks all volumes when using --all
"""
call_command(
'generate_thumbnails',
all=True,
)
self.assertCountEqual(self.thumb_mock.call_args_list, [
call(self.vol1),
call(self.vol2),
call(self.vol3),
])
self.assertEqual(self.thumb_mock().create.call_count, 3)
def test_all_xor_corpus(self):
"""
Test generate_thumbnails does not allow --all and --corpus simultaneously
"""
with self.assertRaisesRegex(CommandError, r'--all.+--corpus'):
call_command(
'generate_thumbnails',
all=True,
corpus=self.corpus,
)
def test_corpus_required(self):
"""
Test generate_thumbnails requires either --all or --corpus
"""
with self.assertRaisesRegex(CommandError, '--corpus'):
call_command('generate_thumbnails')
def test_element_in_corpus(self):
"""
Test generate_thumbnails requires --element to be inside --corpus
"""
with self.assertRaisesRegex(CommandError, 'not in corpus'):
call_command(
'generate_thumbnails',
corpus=self.corpus,
element=self.vol3,
)
......@@ -6,13 +6,11 @@ from django.db.models.functions import Concat, Substr
from django.utils.functional import cached_property
from django.utils.text import slugify
from arkindex.dataimport.models import DataFile
from arkindex.documents.models import Element
from arkindex.images.managers import ImageServerManager
from arkindex.project.models import IndexableModel
from arkindex.project.fields import StripSlashURLField, LStripTextField, MD5HashField
from arkindex.project.polygon import PolygonField
from arkindex.project.aws import S3FileMixin, S3FileModelMixin, S3FileStatus
from botocore.client import ClientError
from io import BytesIO
from PIL import Image as PillowImage
import logging
......@@ -353,67 +351,6 @@ class Thumbnail(S3FileMixin):
def s3_key(self):
return self.name
def create(self, width=900, height=400, max_element_count=3, force=False):
"""
Generate a thumbnail for an Element and store it in the IIIF server
TODO: Remove this after generate_thumbnails starts using a Ponos workflow
"""
if not force:
try:
self.s3_object.load()
# This did not raise anything so the thumbnail exists
return
except ClientError as e:
if e.response['Error']['Code'] != '404':
raise
# Get at most 'max_element_count' first pages
pages = Element.objects \
.get_descending(self.element.id) \
.filter(
type__folder=False,
type__hidden=False,
zone__isnull=False,
) \
.prefetch_related('zone__image__server')[:max_element_count]
if not pages:
raise Element.DoesNotExist("No pages found for thumbnail generation")
# Width of a single image in the thumbnail
single_width = int(width / len(pages))
# Open images with Pillow
images = [p.zone.image.pillow_open(max_width=single_width) for p in pages]
# Create an image
thumbnail = PillowImage.new('RGB', (width, height))
# Resize, crop and assemble the images together horizontally
offset = 0
for img in images:
# Resize
imgwidth, imgheight = img.size
ratio = max(single_width / imgwidth, height / imgheight)
newsize = int(imgwidth * ratio), int(imgheight * ratio)
img = img.resize(newsize, PillowImage.BICUBIC)
# Crop
imgwidth, imgheight = img.size
left = int((imgwidth - single_width) / 2)
top = int((imgheight - height) / 2)
img = img.crop((left, top, left + single_width, top + height))
# Assemble
thumbnail.paste(img, (offset, 0))
offset += single_width
b = BytesIO()
thumbnail.save(b, format='jpeg')
b.seek(0)
self.s3_object.upload_fileobj(b)
class Zone(IndexableModel):
"""
......
from arkindex.project.tests import FixtureTestCase
from arkindex.documents.models import Element
from unittest.mock import patch, call
from django.test import override_settings
from botocore.exceptions import ClientError
@override_settings(LOCAL_IMAGESERVER_ID=999, IIIF_DOWNLOAD_TIMEOUT=(13, 37))
......@@ -52,111 +50,3 @@ class TestThumbnail(FixtureTestCase):
self.s3_mock.meta.client.generate_presigned_url.call_args,
call('get_object', Params={'Bucket': 'derp', 'Key': 'meme.jpg'}),
)
@patch('arkindex.images.models.BytesIO')
@patch('arkindex.images.models.PillowImage')
@patch('arkindex.images.models.requests')
def test_create(self, requests_mock, pil_mock, bytes_mock):
"""
Test Thumbnail.create creates a thumbnail if it does not exist
"""
self.s3_mock.Object.return_value.load.side_effect = ClientError({'Error': {'Code': '404'}}, 'get_object')
requests_mock.get.return_value.content = b'something'
pil_mock.open.return_value.size = (1000, 1000)
pil_mock.open.return_value.resize.return_value.size = (400, 400)
self.vol1.thumbnail.create()
self.assertEqual(pil_mock.new.call_count, 1)
self.assertEqual(pil_mock.new.call_args, call('RGB', (900, 400)))
self.assertEqual(pil_mock.open.call_count, 3)
self.assertEqual(requests_mock.get.call_count, 3)
self.assertEqual(requests_mock.get.call_args_list, [
call('http://server/img1/full/300,/0/default.jpg', timeout=(13, 37)),
call('http://server/img2/full/300,/0/default.jpg', timeout=(13, 37)),
call('http://server/img3/full/300,/0/default.jpg', timeout=(13, 37)),
])
self.assertEqual(pil_mock.open().resize.call_count, 3)
self.assertListEqual(
pil_mock.open().resize.call_args_list,
[call((400, 400), pil_mock.BICUBIC)] * 3,
)
self.assertEqual(pil_mock.open().resize().crop.call_count, 3)
self.assertEqual(
pil_mock.open().resize().crop.call_args_list,
[call((50, 0, 350, 400))] * 3,
)
self.assertEqual(pil_mock.new().paste.call_count, 3)
self.assertEqual(pil_mock.new().paste.call_args_list, [
call(pil_mock.open().resize().crop(), (0, 0)),
call(pil_mock.open().resize().crop(), (300, 0)),
call(pil_mock.open().resize().crop(), (600, 0)),
])
# BytesIO is used three times for opening images, then once for the thumbnail save
self.assertEqual(bytes_mock.call_count, 4)
self.assertEqual(bytes_mock.call_args_list, [call(b'something')] * 3 + [call()])
self.assertEqual(pil_mock.new().save.call_count, 1)
self.assertEqual(
pil_mock.new().save.call_args,
call(bytes_mock(), format='jpeg'),
)
self.assertEqual(bytes_mock().seek.call_count, 1)
self.assertEqual(bytes_mock().seek.call_args, call(0))
self.assertEqual(self.s3_mock.Object.call_count, 1)
self.assertEqual(self.s3_mock.Object().upload_fileobj.call_count, 1)
self.assertEqual(self.s3_mock.Object().upload_fileobj.call_args, call(bytes_mock()))
@patch('arkindex.images.models.PillowImage')
def test_create_exists(self, pil_mock):
"""
Test Thumbnail.create ignores creation if the thumbnail exists (if S3 does not raise 404)
"""
self.s3_mock.Object.return_value.load.side_effect = None
self.vol1.thumbnail.create()
self.assertEqual(self.s3_mock.Object.call_count, 1)
self.assertEqual(self.s3_mock.Object().load.call_count, 1)
self.assertEqual(self.s3_mock.Object().upload_fileobj.call_count, 0)
self.assertEqual(pil_mock.new.call_count, 0)
self.assertEqual(pil_mock.open.call_count, 0)
def test_create_exception(self):
"""
Test Thumbnail.create raises any error that is not 404
"""
self.s3_mock.Object.return_value.load.side_effect = ClientError({'Error': {'Code': '999'}}, 'get_object')
with self.assertRaises(ClientError):
self.vol1.thumbnail.create()
@patch('arkindex.images.models.BytesIO')
@patch('arkindex.images.models.PillowImage')
@patch('arkindex.images.models.requests')
def test_create_force(self, requests_mock, pil_mock, bytes_mock):
"""
Test Thumbnail.create ignores existing thumbnails with force=True
"""
requests_mock.get.return_value.content = b'something'
pil_mock.open.return_value.size = (1000, 1000)
pil_mock.open.return_value.resize.return_value.size = (400, 400)
self.vol1.thumbnail.create(force=True)
self.assertEqual(self.s3_mock.Object.call_count, 1)
self.assertEqual(self.s3_mock.Object().load.call_count, 0)
self.assertEqual(pil_mock.new.call_count, 1)
self.assertEqual(pil_mock.open.call_count, 3)
self.assertEqual(self.s3_mock.Object().upload_fileobj.call_count, 1)
self.assertEqual(self.s3_mock.Object().upload_fileobj.call_args, call(bytes_mock()))
def test_create_empty(self):
"""
Test Thumbnail.create fails if there are no pages
"""
self.s3_mock.Object.return_value.load.side_effect = ClientError({'Error': {'Code': '404'}}, 'get_object')
Element.objects.get_descending(self.vol1.id).filter(
type__folder=False,
type__hidden=False,
zone__isnull=False,
).delete()
with self.assertRaises(Element.DoesNotExist):
self.vol1.thumbnail.create()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment