Newer
Older
import io
import logging
from pathlib import Path
from arkindex_worker.models import Element
SAMPLES = Path(__file__).absolute().parent / "samples"
def test_import():
"""Import our newly created module, through importlib to avoid parsing issues"""
worker = importlib.import_module("worker_thumbnails_generator.worker")
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
assert hasattr(worker, "ThumbnailsGenerator")
assert hasattr(worker.ThumbnailsGenerator, "process_element")
def test_get_first_images_max_width(mock_worker, pages_payload, responses):
mock_worker.first_n = 2
folder = Element({"id": "folder_id"})
mock_worker.api_client.add_response(
"ListElementChildren",
id=folder.id,
folder=False,
response=pages_payload,
)
# Only retrieving the first 2 images as first_n=2
responses.get(
"https://iiifserver/image1/full/900,/0/default.jpg",
body=(SAMPLES / "image1.jpg").read_bytes(),
)
responses.get(
"https://iiifserver/image2/full/900,/0/default.jpg",
body=(SAMPLES / "image2.jpg").read_bytes(),
)
images = mock_worker.get_first_images(folder)
assert images == [
Image.open(SAMPLES / "image1.jpg"),
Image.open(SAMPLES / "image2.jpg"),
]
# No need to list children recursively as we have everything we need in the first page
assert len(mock_worker.api_client.history) == 1
assert len(mock_worker.api_client.responses) == 0
assert len(responses.calls) == 2
assert [(call.request.method, call.request.url) for call in responses.calls] == [
("GET", "https://iiifserver/image1/full/900,/0/default.jpg"),
("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
]
def test_get_first_images_deduplicates_url(
mock_worker, page_1_payload, page_2_payload, responses
):
# Page 1 and 2 point to the same image, we will retrieve it once
page_2_payload["zone"]["image"]["url"] = page_1_payload["zone"]["image"]["url"]
folder = Element({"id": "folder_id"})
mock_worker.api_client.add_response(
"ListElementChildren",
id=folder.id,
folder=False,
response=[page_1_payload, page_2_payload],
)
mock_worker.api_client.add_response(
"ListElementChildren",
id=folder.id,
folder=False,
recursive=True,
response=[page_1_payload, page_2_payload],
)
responses.get(
"https://iiifserver/image1/full/900,/0/default.jpg",
body=(SAMPLES / "image1.jpg").read_bytes(),
)
images = mock_worker.get_first_images(folder)
assert images == [Image.open(SAMPLES / "image1.jpg")]
assert len(mock_worker.api_client.history) == 2
assert len(mock_worker.api_client.responses) == 0
assert len(responses.calls) == 1
assert [(call.request.method, call.request.url) for call in responses.calls] == [
("GET", "https://iiifserver/image1/full/900,/0/default.jpg"),
]
def test_get_first_images_handles_errors(mock_worker, pages_payload, responses):
"""
In case of errors, images are skipped until we got first_n images or no elements are left
"""
folder = Element({"id": "folder_id"})
mock_worker.api_client.add_response(
"ListElementChildren",
id=folder.id,
folder=False,
response=pages_payload,
)
mock_worker.api_client.add_response(
"ListElementChildren",
id=folder.id,
folder=False,
recursive=True,
response=pages_payload,
)
responses.get(
"https://iiifserver/image1/full/900,/0/default.jpg",
body=(SAMPLES / "image1.jpg").read_bytes(),
)
responses.get(
"https://iiifserver/image2/full/900,/0/default.jpg",
status=404,
)
responses.get(
"https://iiifserver/image3/full/900,/0/default.jpg",
status=500,
)
images = mock_worker.get_first_images(folder)
# We were looking for 3 images but the second and third ones failed and we reached the end of the child list
assert images == [Image.open(SAMPLES / "image1.jpg")]
assert len(mock_worker.api_client.history) == 2
assert len(mock_worker.api_client.responses) == 0
# The image server is called once for each image, failing calls are retried 2 times
assert len(responses.calls) == 7
assert [(call.request.method, call.request.url) for call in responses.calls] == [
("GET", "https://iiifserver/image1/full/900,/0/default.jpg"),
# First call fails with a 404, it is retried twice
("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
# First call fails with a 500, it is retried twice
("GET", "https://iiifserver/image3/full/900,/0/default.jpg"),
("GET", "https://iiifserver/image3/full/900,/0/default.jpg"),
("GET", "https://iiifserver/image3/full/900,/0/default.jpg"),
]
def test_generate_thumbnail(mock_worker):
images = [Image.open(SAMPLES / f"image{n}.jpg") for n in range(1, 4)]
generated_thumbnail = mock_worker.generate_thumbnail(images)
# Save the generated thumbnail to bytes to compare it
generated_bytes = io.BytesIO()
generated_thumbnail.save(generated_bytes, format="JPEG")
assert (SAMPLES / "thumbnail.jpg").read_bytes() == generated_bytes.getvalue()
def test_process_element_not_folder(caplog, mock_worker, page_1_payload):
page = Element(page_1_payload)
mock_worker.process_element(page)
assert [(level, msg) for (_, level, msg) in caplog.record_tuples] == [
(logging.INFO, "Skipping page 1 (page_1) since it is not a folder")
]
def test_process_element_no_images_found(caplog, mock_worker):
folder = Element({"id": "folder_id", "type": "folder", "name": "My folder"})
mock_worker.api_client.add_response(
"ListElementChildren",
id=folder.id,
folder=False,
response=[],
)
mock_worker.api_client.add_response(
"ListElementChildren",
id=folder.id,
folder=False,
recursive=True,
response=[],
)
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
mock_worker.process_element(folder)
assert [(level, msg) for (_, level, msg) in caplog.record_tuples] == [
(
logging.INFO,
"Downloading images of the first elements in folder My folder (folder_id)",
),
(
logging.WARNING,
"No elements with a valid image were found for folder My folder (folder_id) - skipping generation",
),
]
def test_process_element(
caplog, mock_worker, page_1_payload, page_2_payload, pages_payload, responses
):
folder = Element(
{
"id": "folder_id",
"type": "folder",
"name": "My folder",
"thumbnail_put_url": "https://s3/somewhere",
}
)
# We want 3 images, we found 2 on the first children page
mock_worker.api_client.add_response(
"ListElementChildren",
id=folder.id,
folder=False,
response=[page_1_payload, page_2_payload],
)
# The third image is available when using recursive=True
mock_worker.api_client.add_response(
"ListElementChildren",
id=folder.id,
folder=False,
recursive=True,
response=pages_payload,
)
# All three images to build the thumbnail
responses.get(
"https://iiifserver/image1/full/900,/0/default.jpg",
body=(SAMPLES / "image1.jpg").read_bytes(),
)
responses.get(
"https://iiifserver/image2/full/900,/0/default.jpg",
body=(SAMPLES / "image2.jpg").read_bytes(),
)
responses.get(
"https://iiifserver/image3/full/900,/0/default.jpg",
body=(SAMPLES / "image3.jpg").read_bytes(),
)
# Uploading the thumbnail can fail a few times, that will be retried
responses.put("https://s3/somewhere", status=502)
responses.put("https://s3/somewhere", status=200)
mock_worker.process_element(folder)
assert len(mock_worker.api_client.history) == 2
assert len(mock_worker.api_client.responses) == 0
assert len(responses.calls) == 5
assert [(call.request.method, call.request.url) for call in responses.calls] == [
("GET", "https://iiifserver/image1/full/900,/0/default.jpg"),
("GET", "https://iiifserver/image2/full/900,/0/default.jpg"),
("GET", "https://iiifserver/image3/full/900,/0/default.jpg"),
("PUT", "https://s3/somewhere"),
("PUT", "https://s3/somewhere"),
]
assert [
(level, msg.split(" in 0:00")[0]) for (_, level, msg) in caplog.record_tuples
] == [
(
logging.INFO,
"Downloading images of the first elements in folder My folder (folder_id)",
),
(
logging.INFO,
"Downloaded image https://iiifserver/image1/full/900,/0/default.jpg - size=300x400",
),
(
logging.INFO,
"Downloaded image https://iiifserver/image2/full/900,/0/default.jpg - size=300x400",
),
(
logging.INFO,
"Downloaded image https://iiifserver/image3/full/900,/0/default.jpg - size=300x400",
),
(logging.INFO, "Generating thumbnail for folder My folder (folder_id)"),
(logging.INFO, "Uploading thumbnail for folder My folder (folder_id)"),
(
logging.WARNING,
"Request to https://s3/somewhere failed (HTTPError('502 Server Error: Bad Gateway for url: https://s3/somewhere')), retrying in 2.0 seconds",
),
(logging.INFO, "Uploaded image to https://s3/somewhere"),
]