import importlib import io import logging from pathlib import Path from PIL import Image from arkindex_worker.models import Element SAMPLES = Path(__file__).absolute().parent / "samples" def test_import(): """Import our newly created module, through importlib to avoid parsing issues""" worker = importlib.import_module("worker_thumbnails_generator.worker") assert hasattr(worker, "ThumbnailsGenerator") assert hasattr(worker.ThumbnailsGenerator, "process_element") def test_get_first_images_max_width(mock_worker, pages_payload, responses): mock_worker.first_n = 2 folder = Element({"id": "folder_id"}) mock_worker.api_client.add_response( "ListElementChildren", id=folder.id, folder=False, response=pages_payload, ) # Only retrieving the first 2 images as first_n=2 responses.get( "https://iiifserver/image1/full/900,/0/default.jpg", body=(SAMPLES / "image1.jpg").read_bytes(), ) responses.get( "https://iiifserver/image2/full/900,/0/default.jpg", body=(SAMPLES / "image2.jpg").read_bytes(), ) images = mock_worker.get_first_images(folder) assert images == [ Image.open(SAMPLES / "image1.jpg"), Image.open(SAMPLES / "image2.jpg"), ] # No need to list children recursively as we have everything we need in the first page assert len(mock_worker.api_client.history) == 1 assert len(mock_worker.api_client.responses) == 0 assert len(responses.calls) == 2 assert [(call.request.method, call.request.url) for call in responses.calls] == [ ("GET", "https://iiifserver/image1/full/900,/0/default.jpg"), ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"), ] def test_get_first_images_deduplicates_url( mock_worker, page_1_payload, page_2_payload, responses ): # Page 1 and 2 point to the same image, we will retrieve it once page_2_payload["zone"]["image"]["url"] = page_1_payload["zone"]["image"]["url"] folder = Element({"id": "folder_id"}) mock_worker.api_client.add_response( "ListElementChildren", id=folder.id, folder=False, response=[page_1_payload, page_2_payload], ) mock_worker.api_client.add_response( "ListElementChildren", id=folder.id, folder=False, recursive=True, response=[page_1_payload, page_2_payload], ) responses.get( "https://iiifserver/image1/full/900,/0/default.jpg", body=(SAMPLES / "image1.jpg").read_bytes(), ) images = mock_worker.get_first_images(folder) assert images == [Image.open(SAMPLES / "image1.jpg")] assert len(mock_worker.api_client.history) == 2 assert len(mock_worker.api_client.responses) == 0 assert len(responses.calls) == 1 assert [(call.request.method, call.request.url) for call in responses.calls] == [ ("GET", "https://iiifserver/image1/full/900,/0/default.jpg"), ] def test_get_first_images_handles_errors(mock_worker, pages_payload, responses): """ In case of errors, images are skipped until we got first_n images or no elements are left """ folder = Element({"id": "folder_id"}) mock_worker.api_client.add_response( "ListElementChildren", id=folder.id, folder=False, response=pages_payload, ) mock_worker.api_client.add_response( "ListElementChildren", id=folder.id, folder=False, recursive=True, response=pages_payload, ) responses.get( "https://iiifserver/image1/full/900,/0/default.jpg", body=(SAMPLES / "image1.jpg").read_bytes(), ) responses.get( "https://iiifserver/image2/full/900,/0/default.jpg", status=404, ) responses.get( "https://iiifserver/image3/full/900,/0/default.jpg", status=500, ) images = mock_worker.get_first_images(folder) # We were looking for 3 images but the second and third ones failed and we reached the end of the child list assert images == [Image.open(SAMPLES / "image1.jpg")] assert len(mock_worker.api_client.history) == 2 assert len(mock_worker.api_client.responses) == 0 # The image server is called once for each image, failing calls are retried 2 times assert len(responses.calls) == 7 assert [(call.request.method, call.request.url) for call in responses.calls] == [ ("GET", "https://iiifserver/image1/full/900,/0/default.jpg"), # First call fails with a 404, it is retried twice ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"), ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"), ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"), # First call fails with a 500, it is retried twice ("GET", "https://iiifserver/image3/full/900,/0/default.jpg"), ("GET", "https://iiifserver/image3/full/900,/0/default.jpg"), ("GET", "https://iiifserver/image3/full/900,/0/default.jpg"), ] def test_generate_thumbnail(mock_worker): images = [Image.open(SAMPLES / f"image{n}.jpg") for n in range(1, 4)] generated_thumbnail = mock_worker.generate_thumbnail(images) # Save the generated thumbnail to bytes to compare it generated_bytes = io.BytesIO() generated_thumbnail.save(generated_bytes, format="JPEG") assert (SAMPLES / "thumbnail.jpg").read_bytes() == generated_bytes.getvalue() def test_process_element_not_folder(caplog, mock_worker, page_1_payload): page = Element(page_1_payload) mock_worker.process_element(page) assert [(level, msg) for (_, level, msg) in caplog.record_tuples] == [ (logging.INFO, "Skipping page 1 (page_1) since it is not a folder") ] def test_process_element_no_images_found(caplog, mock_worker): folder = Element({"id": "folder_id", "type": "folder", "name": "My folder"}) mock_worker.api_client.add_response( "ListElementChildren", id=folder.id, folder=False, response=[], ) mock_worker.api_client.add_response( "ListElementChildren", id=folder.id, folder=False, recursive=True, response=[], ) mock_worker.process_element(folder) assert [(level, msg) for (_, level, msg) in caplog.record_tuples] == [ ( logging.INFO, "Downloading images of the first elements in folder My folder (folder_id)", ), ( logging.WARNING, "No elements with a valid image were found for folder My folder (folder_id) - skipping generation", ), ] def test_process_element( caplog, mock_worker, page_1_payload, page_2_payload, pages_payload, responses ): folder = Element( { "id": "folder_id", "type": "folder", "name": "My folder", "thumbnail_put_url": "https://s3/somewhere", } ) # We want 3 images, we found 2 on the first children page mock_worker.api_client.add_response( "ListElementChildren", id=folder.id, folder=False, response=[page_1_payload, page_2_payload], ) # The third image is available when using recursive=True mock_worker.api_client.add_response( "ListElementChildren", id=folder.id, folder=False, recursive=True, response=pages_payload, ) # All three images to build the thumbnail responses.get( "https://iiifserver/image1/full/900,/0/default.jpg", body=(SAMPLES / "image1.jpg").read_bytes(), ) responses.get( "https://iiifserver/image2/full/900,/0/default.jpg", body=(SAMPLES / "image2.jpg").read_bytes(), ) responses.get( "https://iiifserver/image3/full/900,/0/default.jpg", body=(SAMPLES / "image3.jpg").read_bytes(), ) # Uploading the thumbnail can fail a few times, that will be retried responses.put("https://s3/somewhere", status=502) responses.put("https://s3/somewhere", status=200) mock_worker.process_element(folder) assert len(mock_worker.api_client.history) == 2 assert len(mock_worker.api_client.responses) == 0 assert len(responses.calls) == 5 assert [(call.request.method, call.request.url) for call in responses.calls] == [ ("GET", "https://iiifserver/image1/full/900,/0/default.jpg"), ("GET", "https://iiifserver/image2/full/900,/0/default.jpg"), ("GET", "https://iiifserver/image3/full/900,/0/default.jpg"), ("PUT", "https://s3/somewhere"), ("PUT", "https://s3/somewhere"), ] assert [ (level, msg.split(" in 0:00")[0]) for (_, level, msg) in caplog.record_tuples ] == [ ( logging.INFO, "Downloading images of the first elements in folder My folder (folder_id)", ), ( logging.INFO, "Downloaded image https://iiifserver/image1/full/900,/0/default.jpg - size=300x400", ), ( logging.INFO, "Downloaded image https://iiifserver/image2/full/900,/0/default.jpg - size=300x400", ), ( logging.INFO, "Downloaded image https://iiifserver/image3/full/900,/0/default.jpg - size=300x400", ), (logging.INFO, "Generating thumbnail for folder My folder (folder_id)"), (logging.INFO, "Uploading thumbnail for folder My folder (folder_id)"), ( logging.WARNING, "Request to https://s3/somewhere failed (HTTPError('502 Server Error: Bad Gateway for url: https://s3/somewhere')), retrying in 2.0 seconds", ), (logging.INFO, "Uploaded image to https://s3/somewhere"), ]