Skip to content
Snippets Groups Projects
test_generate_thumbnails.py 14.48 KiB
# -*- coding: utf-8 -*-
import json
import tempfile
from pathlib import Path
from unittest import TestCase
from unittest.mock import patch

import requests_mock
from PIL import Image, ImageChops

from arkindex_tasks.generate_thumbnails import ThumbnailGenerator

from .image_helpers import root_mean_square

SAMPLES = Path(__file__).absolute().parent / "samples"


@patch(
    "arkindex_tasks.generate_thumbnails.ThumbnailGenerator.upload_thumbnail.retry.wait.wait_fixed",
    new=0,
)
class TestThumbnailGenerator(TestCase):
    def setUp(self):
        self.path = None

    def _write_file(self, data):
        self.path = Path(tempfile.mkstemp()[1])
        with self.path.open("w") as f:
            json.dump(data, f)
        return self.path

    def tearDown(self):
        if self.path:
            self.path.unlink()

    def test_file_exists(self):
        with self.assertRaisesRegex(AssertionError, "does not exist"):
            ThumbnailGenerator(Path("/dev/null"))

    def test_file_is_list(self):
        self._write_file({"not": "a list"})
        with self.assertRaisesRegex(AssertionError, "should hold a list"):
            ThumbnailGenerator(self.path)

    def test_get_folder_requires_id(self):
        self._write_file([{"id": "volumeid"}])
        with self.assertRaisesRegex(AssertionError, "Missing element ID"):
            ThumbnailGenerator(self.path).get_folder({"without": "an ID"})

    @requests_mock.Mocker()
    def test_get_folder_not_found(self, mock):
        mock.get("/api/v1/element/notfound/", status_code=404)
        self._write_file([{"id": "notfound"}])
        self.assertIsNone(ThumbnailGenerator(self.path).get_folder({"id": "notfound"}))

    @requests_mock.Mocker()
    def test_get_folder_not_a_folder(self, mock):
        self._write_file([{"id": "pageid"}])
        mock.get(
            "/api/v1/element/pageid/",
            json={"id": "pageid", "type": "page", "corpus": {"id": "corpusid"}},
        )
        gen = ThumbnailGenerator(self.path)
        gen.corpora = [
            {
                "id": "corpusid",
                "types": [{"slug": "page", "display_name": "Page", "folder": False}],
            }
        ]
        self.assertIsNone(gen.get_folder({"id": "pageid"}))

    @requests_mock.Mocker()
    def test_get_folder(self, mock):
        self._write_file([{"id": "volumeid"}])
        volume_data = {"id": "volumeid", "type": "volume", "corpus": {"id": "corpusid"}}
        mock.get("/api/v1/element/volumeid/", json=volume_data)
        gen = ThumbnailGenerator(self.path)
        gen.corpora = [
            {
                "id": "corpusid",
                "types": [{"slug": "volume", "display_name": "Volume", "folder": True}],
            }
        ]
        self.assertDictEqual(gen.get_folder({"id": "volumeid"}), volume_data)

    @requests_mock.Mocker()
    def test_get_first_images_max(self, mock):
        self._write_file([{"id": "volumeid"}])
        mock.get(
            "/api/v1/elements/volumeid/children/?folder=False&with_zone=True",
            # Require an exact match of the entire query string, not just a portion of it
            complete_qs=True,
            json={
                "count": 3,
                "number": 1,
                "results": [
                    {"zone": {"image": {"url": "https://url1", "width": 1000}}},
                    {"zone": {"image": {"url": "https://url2", "width": 1000}}},
                    {"zone": {"image": {"url": "https://url3", "width": 1000}}},
                ],
            },
        )
        mock.get(
            "https://url1/full/50,/0/default.jpg",
            [{"body": open(SAMPLES / "img1.jpg", "rb")}],
        )
        mock.get(
            "https://url2/full/50,/0/default.jpg",
            [{"body": open(SAMPLES / "img2.jpg", "rb")}],
        )
        gen = ThumbnailGenerator(self.path)
        self.assertListEqual(
            gen.get_first_images("volumeid", n=2, width=50, height=40),
            [Image.open(SAMPLES / "img1.jpg"), Image.open(SAMPLES / "img2.jpg")],
        )
        self.assertEqual(mock.call_count, 3)

    @requests_mock.Mocker()
    def test_get_first_images_deduplicates_url(self, mock):
        self._write_file([{"id": "volumeid"}])
        mock.get(
            "/api/v1/elements/volumeid/children/?folder=False&with_zone=True",
            complete_qs=True,
            json={
                "count": 3,
                "number": 1,
                "results": [
                    {"zone": {"image": {"url": "https://url1", "width": 1000}}},
                    {"zone": {"image": {"url": "https://url1", "width": 1000}}},
                ],
            },
        )
        mock.get(
            "/api/v1/elements/volumeid/children/?folder=False&with_zone=True&recursive=True",
            complete_qs=True,
            json={
                "count": 3,
                "number": 1,
                "results": [
                    {"zone": {"image": {"url": "https://url1", "width": 1000}}},
                    {"zone": {"image": {"url": "https://url1", "width": 1000}}},
                ],
            },
        )
        mock.get(
            "https://url1/full/50,/0/default.jpg",
            [{"body": open(SAMPLES / "img1.jpg", "rb")}],
        )
        gen = ThumbnailGenerator(self.path)
        self.assertListEqual(
            gen.get_first_images("volumeid", n=2, width=50, height=40),
            [Image.open(SAMPLES / "img1.jpg")],
        )
        self.assertEqual(mock.call_count, 3)

    @requests_mock.Mocker()
    def test_get_first_images_handles_errors(self, mock):
        """
        In case of errors, element images are skipped until we got n images or no element are left
        """
        self._write_file([{"id": "volumeid"}])
        mock.get(
            "/api/v1/elements/volumeid/children/?folder=False&with_zone=True",
            complete_qs=True,
            json={
                "count": 3,
                "number": 1,
                # This should not be called, since only the first non-recursive page should be retrieved
                "next": "/nowhere",
                "results": [
                    {"zone": {"image": {"url": "https://url1", "width": 1000}}},
                    {"zone": {"image": {"url": "https://url2", "width": 1000}}},
                    {"zone": {"image": {"url": "https://url3", "width": 1000}}},
                ],
            },
        )
        mock.get(
            "/api/v1/elements/volumeid/children/?folder=False&with_zone=True&recursive=True",
            complete_qs=True,
            json={
                "count": 3,
                "number": 1,
                "next": "/api/v1/elements/volumeid/children/?folder=False&with_zone=True&recursive=True&page=2",
                "results": [
                    {"zone": {"image": {"url": "https://url1", "width": 1000}}},
                ],
            },
        )
        mock.get(
            "/api/v1/elements/volumeid/children/?folder=False&with_zone=True&recursive=True&page=2",
            complete_qs=True,
            json={
                "count": 3,
                "number": 1,
                "next": "/api/v1/elements/volumeid/children/?folder=False&with_zone=True&recursive=True&page=3",
                "results": [
                    {"zone": {"image": {"url": "https://url2", "width": 1000}}},
                ],
            },
        )
        mock.get(
            "/api/v1/elements/volumeid/children/?folder=False&with_zone=True&recursive=True&page=3",
            complete_qs=True,
            json={
                "count": 3,
                "number": 1,
                "next": None,
                "results": [
                    {"zone": {"image": {"url": "https://url3", "width": 1000}}},
                ],
            },
        )
        mock.get("https://url1/full/50,/0/default.jpg", status_code=404)
        mock.get("https://url2/full/50,/0/default.jpg", status_code=502)
        mock.get(
            "https://url3/full/50,/0/default.jpg",
            [{"body": open(SAMPLES / "img3.jpg", "rb")}],
        )
        gen = ThumbnailGenerator(self.path)
        self.assertListEqual(
            gen.get_first_images("volumeid", n=2, width=50, height=40),
            [Image.open(SAMPLES / "img3.jpg")],
        )
        self.assertEqual(mock.call_count, 7)

    @requests_mock.Mocker()
    def test_open_image(self, mock):
        self._write_file([{"id": "volumeid"}])
        mock.get(
            "http://someimage/full/50,/0/default.jpg",
            body=open(SAMPLES / "img1.jpg", "rb"),
        )
        gen = ThumbnailGenerator(self.path)

        expected_img = Image.open(str(SAMPLES / "img1.jpg"))
        actual_img = gen.open_image(
            "http://someimage",
            image_width=50,
            max_width=100,
        )

        # See https://effbot.org/zone/pil-comparing-images.htm
        self.assertIsNone(ImageChops.difference(expected_img, actual_img).getbbox())
        self.assertEqual(mock.call_count, 1)

    @requests_mock.Mocker()
    def test_open_image_max_width(self, mock):
        self._write_file([{"id": "volumeid"}])
        mock.get(
            "http://someimage/full/100,/0/default.jpg",
            body=open(SAMPLES / "img1.jpg", "rb"),
        )
        gen = ThumbnailGenerator(self.path)
        gen.open_image(
            "http://someimage",
            image_width=1000,
            max_width=100,
        )
        self.assertEqual(mock.call_count, 1)

    @requests_mock.Mocker()
    def test_generate_thumbnail(self, mock):
        self._write_file([{"id": "volumeid"}])
        images = [Image.open(SAMPLES / f"img{n}.jpg") for n in range(1, 4)]

        gen = ThumbnailGenerator(self.path)

        expected_thumbnail = Image.open(str(SAMPLES / "thumb.jpg"))
        actual_thumbnail = gen.generate_thumbnail(images)

        self.assertLessEqual(
            root_mean_square(expected_thumbnail, actual_thumbnail), 10.0
        )

    @requests_mock.Mocker()
    def test_upload_thumbnail(self, mock):
        self._write_file([{"id": "volumeid"}])
        mock.put(
            "http://s3/somewhere",
            [{"status_code": 502}, {"status_code": 503}, {"status_code": 200}],
        )
        gen = ThumbnailGenerator(self.path)
        gen.upload_thumbnail(
            thumbnail=Image.open(str(SAMPLES / "thumb.jpg")), url="http://s3/somewhere"
        )
        self.assertEqual(mock.call_count, 3)

    @requests_mock.Mocker()
    def test_run(self, mock):
        self._write_file([{"id": "notfound"}, {"id": "pageid"}, {"id": "volumeid"}])
        mock.get(
            "/api/v1/corpus/",
            json=[
                {
                    "id": "corpusid",
                    "types": [
                        {"slug": "volume", "display_name": "Volume", "folder": True},
                        {"slug": "page", "display_name": "Page", "folder": False},
                    ],
                }
            ],
        )
        mock.get("/api/v1/element/notfound/", status_code=404)
        mock.get(
            "/api/v1/element/pageid/",
            json={"id": "pageid", "type": "page", "corpus": {"id": "corpusid"}},
        )
        mock.get(
            "/api/v1/element/volumeid/",
            json={
                "id": "volumeid",
                "name": "Some volume",
                "type": "volume",
                "corpus": {"id": "corpusid"},
                "thumbnail_put_url": "http://s3/somewhere",
            },
        )
        mock.get(
            "/api/v1/elements/volumeid/children/?folder=False&with_zone=True",
            complete_qs=True,
            json={
                "count": 2,
                "number": 1,
                "results": [
                    {
                        "zone": {
                            "image": {"url": "http://url1", "width": 500},
                        }
                    },
                    {
                        "zone": {
                            "image": {"url": "http://url2", "width": 750},
                        }
                    },
                ],
            },
        )
        mock.get(
            "/api/v1/elements/volumeid/children/?folder=False&with_zone=True&recursive=True",
            complete_qs=True,
            json={
                "count": 4,
                "number": 1,
                "results": [
                    {
                        "zone": {
                            "image": {"url": "http://url1", "width": 500},
                        }
                    },
                    {
                        "zone": {
                            "image": {"url": "http://url2", "width": 750},
                        }
                    },
                    {
                        "zone": {
                            "image": {"url": "http://url3", "width": 1000},
                        }
                    },
                    {
                        "zone": {
                            "image": {"url": "http://url4", "width": 1000},
                        }
                    },
                ],
            },
        )
        mock.get(
            "http://url1/full/500,/0/default.jpg",
            body=open(SAMPLES / "img1.jpg", "rb"),
        )
        mock.get(
            "http://url2/full/750,/0/default.jpg",
            body=open(SAMPLES / "img2.jpg", "rb"),
        )
        mock.get(
            "http://url3/full/900,/0/default.jpg",
            body=open(SAMPLES / "img3.jpg", "rb"),
        )
        mock.put("http://s3/somewhere", [{"status_code": 502}, {"status_code": 200}])

        gen = ThumbnailGenerator(self.path)
        gen.run()

        self.assertListEqual(
            [(req.method, req.url) for req in mock.request_history],
            [
                ("GET", "https://arkindex.teklia.com/api/v1/corpus/"),
                ("GET", "https://arkindex.teklia.com/api/v1/element/notfound/"),
                ("GET", "https://arkindex.teklia.com/api/v1/element/pageid/"),
                ("GET", "https://arkindex.teklia.com/api/v1/element/volumeid/"),
                (
                    "GET",
                    "https://arkindex.teklia.com/api/v1/elements/volumeid/children/?folder=False&with_zone=True",
                ),
                ("GET", "http://url1/full/500,/0/default.jpg"),
                ("GET", "http://url2/full/750,/0/default.jpg"),
                (
                    "GET",
                    "https://arkindex.teklia.com/api/v1/elements/volumeid/children/?folder=False&recursive=True&with_zone=True",
                ),
                ("GET", "http://url3/full/900,/0/default.jpg"),
                ("PUT", "http://s3/somewhere"),
                ("PUT", "http://s3/somewhere"),
            ],
        )