Eva Bardou
--- a/tests/test_worker.py

+ 11

− 6
+++ b/tests/test_worker.py

+ 11

− 6
 @@ -19,11 +19,13 @@ def test_process_split(tmp_path, downloaded_images):
    worker = DatasetExtractor()
    # Parse some arguments
    worker.args = Namespace(database=None)
+    worker.data_folder = tmp_path
    worker.configure_cache()
    worker.cached_images = dict()

    # Where to save the downloaded images
-    worker.image_folder = tmp_path
+    worker.images_folder = tmp_path / "images"
+    worker.images_folder.mkdir(parents=True)

    first_page_id = UUID("e26e6803-18da-4768-be30-a0a68132107c")
    second_page_id = UUID("c673bd94-96b1-4a2e-8662-a4d806940b5f")
 @@ -80,11 +82,6 @@ def test_process_split(tmp_path, downloaded_images):
            == f"https://europe-gamma.iiif.teklia.com/iiif/2/public%2Fiam%2F{page_name}.png"
        )

-    assert sorted(tmp_path.rglob("*")) == [
-        tmp_path / f"{first_image_id}.jpg",
-        tmp_path / f"{second_image_id}.jpg",
-    ]
-
    # Should have created 17 transcriptions
    assert CachedTranscription.select().count() == 17
    # Check transcription of first line on first page
 @@ -127,3 +124,11 @@ def test_process_split(tmp_path, downloaded_images):
    assert tr_entity.length == 23
    assert tr_entity.confidence == 1.0
    assert tr_entity.worker_run_id is None
+
+    # Full structure of the archive
+    assert sorted(tmp_path.rglob("*")) == [
+        tmp_path / "db.sqlite",
+        tmp_path / "images",
+        tmp_path / "images" / f"{first_image_id}.jpg",
+        tmp_path / "images" / f"{second_image_id}.jpg",
+    ]