Eva Bardou · Eva Bardou
--- a/tests/test_worker.py

+ 14

− 12
+++ b/tests/test_worker.py

+ 14

− 12
 @@ -11,13 +11,11 @@ from arkindex_worker.cache import (
    CachedTranscription,
    CachedTranscriptionEntity,
 )
+from worker_generic_training_dataset.db import retrieve_element
 from worker_generic_training_dataset.worker import DatasetExtractor


 def test_process_split(tmp_path, downloaded_images):
-    # Parent is train folder
-    parent_id: UUID = UUID("a0c4522d-2d80-4766-a01c-b9d686f41f6a")
-
    worker = DatasetExtractor()
    # Parse some arguments
    worker.args = Namespace(database=None)
 @@ -27,18 +25,22 @@ def test_process_split(tmp_path, downloaded_images):
    # Where to save the downloaded images
    worker.image_folder = tmp_path

-    worker.process_split("train", parent_id)
-
-    # Should have created 20 elements in total
-    assert CachedElement.select().count() == 20
+    first_page_id = UUID("e26e6803-18da-4768-be30-a0a68132107c")
+    second_page_id = UUID("c673bd94-96b1-4a2e-8662-a4d806940b5f")

-    # Should have created two pages under root folder
-    assert (
-        CachedElement.select().where(CachedElement.parent_id == parent_id).count() == 2
+    worker.process_split(
+        "train",
+        [
+            retrieve_element(first_page_id),
+            retrieve_element(second_page_id),
+        ],
    )

-    first_page_id = UUID("e26e6803-18da-4768-be30-a0a68132107c")
-    second_page_id = UUID("c673bd94-96b1-4a2e-8662-a4d806940b5f")
+    # Should have created 20 elements in total
+    assert CachedElement.select().count() == 19
+
+    # Should have created two pages at root
+    assert CachedElement.select().where(CachedElement.parent_id.is_null()).count() == 2

    # Should have created 8 text_lines under first page
    assert (