Skip to content
Snippets Groups Projects
test_worker.py 9.55 KiB
Newer Older
Eva Bardou's avatar
Eva Bardou committed
import importlib
import io
import logging
from pathlib import Path
Eva Bardou's avatar
Eva Bardou committed

from PIL import Image
Eva Bardou's avatar
Eva Bardou committed

from arkindex_worker.models import Element

SAMPLES = Path(__file__).absolute().parent / "samples"
Eva Bardou's avatar
Eva Bardou committed


def test_import():
    """Import our newly created module, through importlib to avoid parsing issues"""
    worker = importlib.import_module("worker_thumbnails_generator.worker")
    assert hasattr(worker, "ThumbnailsGenerator")
    assert hasattr(worker.ThumbnailsGenerator, "process_element")


def test_get_first_images_max_width(mock_worker, pages_payload, responses):
    mock_worker.first_n = 2

    folder = Element({"id": "folder_id"})
    mock_worker.api_client.add_response(
        "ListElementChildren",
        id=folder.id,
        folder=False,
        response=pages_payload,
    )

    # Only retrieving the first 2 images as first_n=2
    responses.get(
        "https://iiifserver/image1/full/900,/0/default.jpg",
        body=(SAMPLES / "image1.jpg").read_bytes(),
    )
    responses.get(
        "https://iiifserver/image2/full/900,/0/default.jpg",
        body=(SAMPLES / "image2.jpg").read_bytes(),
    )

    images = mock_worker.get_first_images(folder)
    assert images == [
        Image.open(SAMPLES / "image1.jpg"),
        Image.open(SAMPLES / "image2.jpg"),
    ]

    # No need to list children recursively as we have everything we need in the first page
    assert len(mock_worker.api_client.history) == 1
    assert len(mock_worker.api_client.responses) == 0

    assert len(responses.calls) == 2
    assert [(call.request.method, call.request.url) for call in responses.calls] == [
        ("GET", "https://iiifserver/image1/full/900,/0/default.jpg"),
        ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
    ]


def test_get_first_images_deduplicates_url(
    mock_worker, page_1_payload, page_2_payload, responses
):
    # Page 1 and 2 point to the same image, we will retrieve it once
    page_2_payload["zone"]["image"]["url"] = page_1_payload["zone"]["image"]["url"]

    folder = Element({"id": "folder_id"})
    mock_worker.api_client.add_response(
        "ListElementChildren",
        id=folder.id,
        folder=False,
        response=[page_1_payload, page_2_payload],
    )
    mock_worker.api_client.add_response(
        "ListElementChildren",
        id=folder.id,
        folder=False,
        recursive=True,
        response=[page_1_payload, page_2_payload],
    )

    responses.get(
        "https://iiifserver/image1/full/900,/0/default.jpg",
        body=(SAMPLES / "image1.jpg").read_bytes(),
    )

    images = mock_worker.get_first_images(folder)
    assert images == [Image.open(SAMPLES / "image1.jpg")]

    assert len(mock_worker.api_client.history) == 2
    assert len(mock_worker.api_client.responses) == 0

    assert len(responses.calls) == 1
    assert [(call.request.method, call.request.url) for call in responses.calls] == [
        ("GET", "https://iiifserver/image1/full/900,/0/default.jpg"),
    ]


def test_get_first_images_handles_errors(mock_worker, pages_payload, responses):
    """
    In case of errors, images are skipped until we got first_n images or no elements are left
    """
    folder = Element({"id": "folder_id"})
    mock_worker.api_client.add_response(
        "ListElementChildren",
        id=folder.id,
        folder=False,
        response=pages_payload,
    )
    mock_worker.api_client.add_response(
        "ListElementChildren",
        id=folder.id,
        folder=False,
        recursive=True,
        response=pages_payload,
    )

    responses.get(
        "https://iiifserver/image1/full/900,/0/default.jpg",
        body=(SAMPLES / "image1.jpg").read_bytes(),
    )
    responses.get(
        "https://iiifserver/image2/full/900,/0/default.jpg",
        status=404,
    )
    responses.get(
        "https://iiifserver/image3/full/900,/0/default.jpg",
        status=500,
    )

    images = mock_worker.get_first_images(folder)
    # We were looking for 3 images but the second and third ones failed and we reached the end of the child list
    assert images == [Image.open(SAMPLES / "image1.jpg")]

    assert len(mock_worker.api_client.history) == 2
    assert len(mock_worker.api_client.responses) == 0

    # The image server is called once for each image, failing calls are retried 2 times
    assert len(responses.calls) == 7
    assert [(call.request.method, call.request.url) for call in responses.calls] == [
        ("GET", "https://iiifserver/image1/full/900,/0/default.jpg"),
        # First call fails with a 404, it is retried twice
        ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
        ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
        ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
        # First call fails with a 500, it is retried twice
        ("GET", "https://iiifserver/image3/full/900,/0/default.jpg"),
        ("GET", "https://iiifserver/image3/full/900,/0/default.jpg"),
        ("GET", "https://iiifserver/image3/full/900,/0/default.jpg"),
    ]


def test_generate_thumbnail(mock_worker):
    images = [Image.open(SAMPLES / f"image{n}.jpg") for n in range(1, 4)]
    generated_thumbnail = mock_worker.generate_thumbnail(images)

    # Save the generated thumbnail to bytes to compare it
    generated_bytes = io.BytesIO()
    generated_thumbnail.save(generated_bytes, format="JPEG")

    assert (SAMPLES / "thumbnail.jpg").read_bytes() == generated_bytes.getvalue()


def test_process_element_not_folder(caplog, mock_worker, page_1_payload):
    page = Element(page_1_payload)
    mock_worker.process_element(page)

    assert [(level, msg) for (_, level, msg) in caplog.record_tuples] == [
        (logging.INFO, "Skipping page 1 (page_1) since it is not a folder")
    ]


def test_process_element_no_images_found(caplog, mock_worker):
    folder = Element({"id": "folder_id", "type": "folder", "name": "My folder"})
    mock_worker.api_client.add_response(
        "ListElementChildren",
        id=folder.id,
        folder=False,
        response=[],
    )
    mock_worker.api_client.add_response(
        "ListElementChildren",
        id=folder.id,
        folder=False,
        recursive=True,
        response=[],
    )

    mock_worker.process_element(folder)

    assert [(level, msg) for (_, level, msg) in caplog.record_tuples] == [
        (
            logging.INFO,
            "Downloading images of the first elements in folder My folder (folder_id)",
        ),
        (
            logging.WARNING,
            "No elements with a valid image were found for folder My folder (folder_id) - skipping generation",
        ),
    ]


def test_process_element(
    caplog, mock_worker, page_1_payload, page_2_payload, pages_payload, responses
):
    folder = Element(
        {
            "id": "folder_id",
            "type": "folder",
            "name": "My folder",
            "thumbnail_put_url": "https://s3/somewhere",
        }
    )
    # We want 3 images, we found 2 on the first children page
    mock_worker.api_client.add_response(
        "ListElementChildren",
        id=folder.id,
        folder=False,
        response=[page_1_payload, page_2_payload],
    )
    # The third image is available when using recursive=True
    mock_worker.api_client.add_response(
        "ListElementChildren",
        id=folder.id,
        folder=False,
        recursive=True,
        response=pages_payload,
    )

    # All three images to build the thumbnail
    responses.get(
        "https://iiifserver/image1/full/900,/0/default.jpg",
        body=(SAMPLES / "image1.jpg").read_bytes(),
    )
    responses.get(
        "https://iiifserver/image2/full/900,/0/default.jpg",
        body=(SAMPLES / "image2.jpg").read_bytes(),
    )
    responses.get(
        "https://iiifserver/image3/full/900,/0/default.jpg",
        body=(SAMPLES / "image3.jpg").read_bytes(),
    )

    # Uploading the thumbnail can fail a few times, that will be retried
    responses.put("https://s3/somewhere", status=502)
    responses.put("https://s3/somewhere", status=200)

    mock_worker.process_element(folder)

    assert len(mock_worker.api_client.history) == 2
    assert len(mock_worker.api_client.responses) == 0

    assert len(responses.calls) == 5
    assert [(call.request.method, call.request.url) for call in responses.calls] == [
        ("GET", "https://iiifserver/image1/full/900,/0/default.jpg"),
        ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
        ("GET", "https://iiifserver/image3/full/900,/0/default.jpg"),
        ("PUT", "https://s3/somewhere"),
        ("PUT", "https://s3/somewhere"),
    ]

    assert [
        (level, msg.split(" in 0:00")[0]) for (_, level, msg) in caplog.record_tuples
    ] == [
        (
            logging.INFO,
            "Downloading images of the first elements in folder My folder (folder_id)",
        ),
        (
            logging.INFO,
            "Downloaded image https://iiifserver/image1/full/900,/0/default.jpg - size=300x400",
        ),
        (
            logging.INFO,
            "Downloaded image https://iiifserver/image2/full/900,/0/default.jpg - size=300x400",
        ),
        (
            logging.INFO,
            "Downloaded image https://iiifserver/image3/full/900,/0/default.jpg - size=300x400",
        ),
        (logging.INFO, "Generating thumbnail for folder My folder (folder_id)"),
        (logging.INFO, "Uploading thumbnail for folder My folder (folder_id)"),
        (
            logging.WARNING,
            "Request to https://s3/somewhere failed (HTTPError('502 Server Error: Bad Gateway for url: https://s3/somewhere')), retrying in 2.0 seconds",
        ),
        (logging.INFO, "Uploaded image to https://s3/somewhere"),
    ]