diff --git a/README.md b/README.md index bea7f5901075405c6be33d566a0853a7bcad35aa..487f752689b105328b5c5d32873bce10133841ff 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,6 @@ Aside from the usual Django commands, some custom commands are available via `ma * `import_annotations`: Import index files from a folder into a specific volume; * `import_acts`: Import XML surface files and CSV act files; * `delete_corpus`: Delete a big corpus using a Ponos task; -* `generate_thumbnails`: Generate thumbnails for volumes; * `reindex`: Run asynchronous tasks on the Celery worker to reindex transcriptions in ElasticSearch; * `telegraf`: A special command with InfluxDB-compatible output for Grafana statistics. diff --git a/arkindex/documents/management/commands/generate_thumbnails.py b/arkindex/documents/management/commands/generate_thumbnails.py deleted file mode 100644 index aae709178c71dc6afc447f8049440361078415ec..0000000000000000000000000000000000000000 --- a/arkindex/documents/management/commands/generate_thumbnails.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 -from django.core.management.base import CommandError -from django.conf import settings -from ponos.management.base import PonosCommand -from arkindex.project.argparse import CorpusArgument, ElementArgument -from arkindex.documents.models import Element - - -class Command(PonosCommand): - help = '(Re)generate thumbnails for elements' - docker_image = settings.ARKINDEX_APP_IMAGE - base_recipe = settings.PONOS_RECIPE - - def add_arguments(self, parser): - super().add_arguments(parser) - parser.add_argument( - '--all', - help='Create thumbnails for every volume in every corpus', - action='store_true', - default=False, - ) - parser.add_argument( - '--corpus', - help='ID or part of the name of the corpus to fetch volumes from', - type=CorpusArgument(), - ) - parser.add_argument( - '--element', - help='ID or part of the name of a single element to build a thumbnail for', - type=ElementArgument(type__folder=True), - ) - parser.add_argument( - '--force', - help='Recreate thumbnails even if they already exist', - action='store_true', - default=False, - ) - - def validate_args(self, corpus=None, element=None, all=False, force=False, **options): - if all: - if corpus or element: - raise CommandError('--all cannot be used together with --corpus or --element') - return {'elements': Element.objects.filter(type__folder=True), 'force': force} - - if not corpus: - raise CommandError('--corpus is required when not using --all') - - if element: - if not corpus.elements.filter(pk=element.pk).exists(): - raise CommandError('Element {} is not in corpus {}'.format(element, corpus)) - elts = [element, ] - else: - elts = corpus.elements.filter(type__folder=True) - - return {'elements': elts, 'force': force} - - def run(self, elements=[], force=False): - # TODO: Create a workflow instead when Ponos initial artifacts are implemented - for element in elements: - try: - element.generate_thumbnail(force=force) - except Exception as e: - self.stdout.write(self.style.ERROR('Failed to generate thumbnail for {}: {}'.format(element.id, e))) diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index b3790405be35bf966d6a252192b1653c4caefbaa..60e140697b2ace7c1dd7e2dbf99cfd87ff853821 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -261,13 +261,6 @@ class Element(IndexableModel): from arkindex.images.models import Thumbnail # Prevent circular imports return Thumbnail(self) - def generate_thumbnail(self, **kwargs): - ''' - Build a thumbnail for this element - ''' - logger.info("Creating thumbnail for element {}".format(self)) - self.thumbnail.create(**kwargs) - def __str__(self): return '{}: {}'.format(self.type.display_name, self.name) diff --git a/arkindex/documents/tests/commands/test_generate_thumbnails.py b/arkindex/documents/tests/commands/test_generate_thumbnails.py deleted file mode 100644 index f54afd14e74058abb13ed90cbe5a8a9c2a8f2cfa..0000000000000000000000000000000000000000 --- a/arkindex/documents/tests/commands/test_generate_thumbnails.py +++ /dev/null @@ -1,118 +0,0 @@ -from django.core.management import call_command -from django.core.management.base import CommandError -from unittest.mock import patch, call -from arkindex.project.tests import FixtureTestCase -from arkindex.documents.models import Corpus - - -class TestGenerateThumbnailsCommand(FixtureTestCase): - """ - Test the generate_thumbnails command. - """ - - @classmethod - def setUpTestData(cls): - super().setUpTestData() - cls.vol1 = cls.corpus.elements.get(name="Volume 1") - cls.vol2 = cls.corpus.elements.get(name="Volume 2") - corpus2 = Corpus.objects.create(name='Other corpus') - cls.vol3 = corpus2.elements.create( - type=corpus2.types.create(slug='volume', folder=True), - name='Volume 3', - ) - cls.thumb_patch = patch('arkindex.images.models.Thumbnail') - - def setUp(self): - super().setUp() - self.thumb_mock = self.thumb_patch.start() - - def tearDown(self): - super().tearDown() - self.thumb_patch.stop() - - def test_start_corpus(self): - """ - Test generate_thumbnails runs generation for each volume of a corpus - """ - call_command( - 'generate_thumbnails', - corpus=self.corpus, - ) - self.assertCountEqual(self.thumb_mock.call_args_list, [ - call(self.vol1), - call(self.vol2), - ]) - self.assertEqual(self.thumb_mock().create.call_count, 2) - - def test_start_element(self): - """ - Test generate_thumbnails runs generation for an element - """ - call_command( - 'generate_thumbnails', - corpus=self.corpus, - element=self.vol1, - ) - self.assertCountEqual(self.thumb_mock.call_args_list, [ - call(self.vol1), - ]) - self.assertEqual(self.thumb_mock().create.call_count, 1) - - @patch('arkindex.documents.models.Element.generate_thumbnail') - def test_force(self, gen_mock): - """ - Test generate_thumbnails passes the --force argument to tasks - """ - call_command( - 'generate_thumbnails', - corpus=self.corpus, - element=self.vol1, - force=True, - ) - self.assertCountEqual(gen_mock.call_args_list, [ - call(force=True), - ]) - - def test_all(self): - """ - Test generate_thumbnails picks all volumes when using --all - """ - call_command( - 'generate_thumbnails', - all=True, - ) - self.assertCountEqual(self.thumb_mock.call_args_list, [ - call(self.vol1), - call(self.vol2), - call(self.vol3), - ]) - self.assertEqual(self.thumb_mock().create.call_count, 3) - - def test_all_xor_corpus(self): - """ - Test generate_thumbnails does not allow --all and --corpus simultaneously - """ - with self.assertRaisesRegex(CommandError, r'--all.+--corpus'): - call_command( - 'generate_thumbnails', - all=True, - corpus=self.corpus, - ) - - def test_corpus_required(self): - """ - Test generate_thumbnails requires either --all or --corpus - """ - with self.assertRaisesRegex(CommandError, '--corpus'): - call_command('generate_thumbnails') - - def test_element_in_corpus(self): - """ - Test generate_thumbnails requires --element to be inside --corpus - """ - with self.assertRaisesRegex(CommandError, 'not in corpus'): - call_command( - 'generate_thumbnails', - corpus=self.corpus, - element=self.vol3, - ) diff --git a/arkindex/images/models.py b/arkindex/images/models.py index 29471b1f7e2304a93e9c0b2c29b22223c4598d43..c962edc4ce2732b36d041591ffa74751aca42d04 100644 --- a/arkindex/images/models.py +++ b/arkindex/images/models.py @@ -6,13 +6,11 @@ from django.db.models.functions import Concat, Substr from django.utils.functional import cached_property from django.utils.text import slugify from arkindex.dataimport.models import DataFile -from arkindex.documents.models import Element from arkindex.images.managers import ImageServerManager from arkindex.project.models import IndexableModel from arkindex.project.fields import StripSlashURLField, LStripTextField, MD5HashField from arkindex.project.polygon import PolygonField from arkindex.project.aws import S3FileMixin, S3FileModelMixin, S3FileStatus -from botocore.client import ClientError from io import BytesIO from PIL import Image as PillowImage import logging @@ -353,67 +351,6 @@ class Thumbnail(S3FileMixin): def s3_key(self): return self.name - def create(self, width=900, height=400, max_element_count=3, force=False): - """ - Generate a thumbnail for an Element and store it in the IIIF server - - TODO: Remove this after generate_thumbnails starts using a Ponos workflow - """ - - if not force: - try: - self.s3_object.load() - # This did not raise anything so the thumbnail exists - return - except ClientError as e: - if e.response['Error']['Code'] != '404': - raise - - # Get at most 'max_element_count' first pages - pages = Element.objects \ - .get_descending(self.element.id) \ - .filter( - type__folder=False, - type__hidden=False, - zone__isnull=False, - ) \ - .prefetch_related('zone__image__server')[:max_element_count] - if not pages: - raise Element.DoesNotExist("No pages found for thumbnail generation") - - # Width of a single image in the thumbnail - single_width = int(width / len(pages)) - - # Open images with Pillow - images = [p.zone.image.pillow_open(max_width=single_width) for p in pages] - - # Create an image - thumbnail = PillowImage.new('RGB', (width, height)) - - # Resize, crop and assemble the images together horizontally - offset = 0 - for img in images: - # Resize - imgwidth, imgheight = img.size - ratio = max(single_width / imgwidth, height / imgheight) - newsize = int(imgwidth * ratio), int(imgheight * ratio) - img = img.resize(newsize, PillowImage.BICUBIC) - - # Crop - imgwidth, imgheight = img.size - left = int((imgwidth - single_width) / 2) - top = int((imgheight - height) / 2) - img = img.crop((left, top, left + single_width, top + height)) - - # Assemble - thumbnail.paste(img, (offset, 0)) - offset += single_width - - b = BytesIO() - thumbnail.save(b, format='jpeg') - b.seek(0) - self.s3_object.upload_fileobj(b) - class Zone(IndexableModel): """ diff --git a/arkindex/images/tests/test_thumbnail.py b/arkindex/images/tests/test_thumbnail.py index fa1699d12eaf3b34f3fdf262a5157949eb9c4573..1ba75758c30c3f0339b81533f93a7dffd4f42676 100644 --- a/arkindex/images/tests/test_thumbnail.py +++ b/arkindex/images/tests/test_thumbnail.py @@ -1,8 +1,6 @@ from arkindex.project.tests import FixtureTestCase -from arkindex.documents.models import Element from unittest.mock import patch, call from django.test import override_settings -from botocore.exceptions import ClientError @override_settings(LOCAL_IMAGESERVER_ID=999, IIIF_DOWNLOAD_TIMEOUT=(13, 37)) @@ -52,111 +50,3 @@ class TestThumbnail(FixtureTestCase): self.s3_mock.meta.client.generate_presigned_url.call_args, call('get_object', Params={'Bucket': 'derp', 'Key': 'meme.jpg'}), ) - - @patch('arkindex.images.models.BytesIO') - @patch('arkindex.images.models.PillowImage') - @patch('arkindex.images.models.requests') - def test_create(self, requests_mock, pil_mock, bytes_mock): - """ - Test Thumbnail.create creates a thumbnail if it does not exist - """ - self.s3_mock.Object.return_value.load.side_effect = ClientError({'Error': {'Code': '404'}}, 'get_object') - requests_mock.get.return_value.content = b'something' - pil_mock.open.return_value.size = (1000, 1000) - pil_mock.open.return_value.resize.return_value.size = (400, 400) - - self.vol1.thumbnail.create() - - self.assertEqual(pil_mock.new.call_count, 1) - self.assertEqual(pil_mock.new.call_args, call('RGB', (900, 400))) - self.assertEqual(pil_mock.open.call_count, 3) - self.assertEqual(requests_mock.get.call_count, 3) - self.assertEqual(requests_mock.get.call_args_list, [ - call('http://server/img1/full/300,/0/default.jpg', timeout=(13, 37)), - call('http://server/img2/full/300,/0/default.jpg', timeout=(13, 37)), - call('http://server/img3/full/300,/0/default.jpg', timeout=(13, 37)), - ]) - self.assertEqual(pil_mock.open().resize.call_count, 3) - self.assertListEqual( - pil_mock.open().resize.call_args_list, - [call((400, 400), pil_mock.BICUBIC)] * 3, - ) - self.assertEqual(pil_mock.open().resize().crop.call_count, 3) - self.assertEqual( - pil_mock.open().resize().crop.call_args_list, - [call((50, 0, 350, 400))] * 3, - ) - self.assertEqual(pil_mock.new().paste.call_count, 3) - self.assertEqual(pil_mock.new().paste.call_args_list, [ - call(pil_mock.open().resize().crop(), (0, 0)), - call(pil_mock.open().resize().crop(), (300, 0)), - call(pil_mock.open().resize().crop(), (600, 0)), - ]) - - # BytesIO is used three times for opening images, then once for the thumbnail save - self.assertEqual(bytes_mock.call_count, 4) - self.assertEqual(bytes_mock.call_args_list, [call(b'something')] * 3 + [call()]) - self.assertEqual(pil_mock.new().save.call_count, 1) - self.assertEqual( - pil_mock.new().save.call_args, - call(bytes_mock(), format='jpeg'), - ) - self.assertEqual(bytes_mock().seek.call_count, 1) - self.assertEqual(bytes_mock().seek.call_args, call(0)) - self.assertEqual(self.s3_mock.Object.call_count, 1) - self.assertEqual(self.s3_mock.Object().upload_fileobj.call_count, 1) - self.assertEqual(self.s3_mock.Object().upload_fileobj.call_args, call(bytes_mock())) - - @patch('arkindex.images.models.PillowImage') - def test_create_exists(self, pil_mock): - """ - Test Thumbnail.create ignores creation if the thumbnail exists (if S3 does not raise 404) - """ - self.s3_mock.Object.return_value.load.side_effect = None - self.vol1.thumbnail.create() - self.assertEqual(self.s3_mock.Object.call_count, 1) - self.assertEqual(self.s3_mock.Object().load.call_count, 1) - self.assertEqual(self.s3_mock.Object().upload_fileobj.call_count, 0) - self.assertEqual(pil_mock.new.call_count, 0) - self.assertEqual(pil_mock.open.call_count, 0) - - def test_create_exception(self): - """ - Test Thumbnail.create raises any error that is not 404 - """ - self.s3_mock.Object.return_value.load.side_effect = ClientError({'Error': {'Code': '999'}}, 'get_object') - with self.assertRaises(ClientError): - self.vol1.thumbnail.create() - - @patch('arkindex.images.models.BytesIO') - @patch('arkindex.images.models.PillowImage') - @patch('arkindex.images.models.requests') - def test_create_force(self, requests_mock, pil_mock, bytes_mock): - """ - Test Thumbnail.create ignores existing thumbnails with force=True - """ - requests_mock.get.return_value.content = b'something' - pil_mock.open.return_value.size = (1000, 1000) - pil_mock.open.return_value.resize.return_value.size = (400, 400) - - self.vol1.thumbnail.create(force=True) - - self.assertEqual(self.s3_mock.Object.call_count, 1) - self.assertEqual(self.s3_mock.Object().load.call_count, 0) - self.assertEqual(pil_mock.new.call_count, 1) - self.assertEqual(pil_mock.open.call_count, 3) - self.assertEqual(self.s3_mock.Object().upload_fileobj.call_count, 1) - self.assertEqual(self.s3_mock.Object().upload_fileobj.call_args, call(bytes_mock())) - - def test_create_empty(self): - """ - Test Thumbnail.create fails if there are no pages - """ - self.s3_mock.Object.return_value.load.side_effect = ClientError({'Error': {'Code': '404'}}, 'get_object') - Element.objects.get_descending(self.vol1.id).filter( - type__folder=False, - type__hidden=False, - zone__isnull=False, - ).delete() - with self.assertRaises(Element.DoesNotExist): - self.vol1.thumbnail.create()