diff --git a/arkindex_worker/worker/training.py b/arkindex_worker/worker/training.py index ffe52d6f9632882743dbc7a5340d4fc5b1c5ef66..bacd1dde8f451206ff68169e0364191cd17b4387 100644 --- a/arkindex_worker/worker/training.py +++ b/arkindex_worker/worker/training.py @@ -50,10 +50,14 @@ def create_archive(path: DirPath) -> Tuple[Path, Hash, FileSize, Hash]: # Create an uncompressed tar archive with all the needed files # Files hierarchy ifs kept in the archive. - + file_list = [] with tarfile.open(path_to_tar_archive, "w") as tar: - tar.add(path) - file_list = [member for member in tar.getnames() if os.path.isfile(member)] + for p in path.glob("**/*"): + x = p.relative_to(path) + tar.add(p, arcname=x, recursive=False) + # Only keep files when computing the hash + if p.is_file(): + file_list.append(p) # Sort by path file_list.sort() diff --git a/tests/conftest.py b/tests/conftest.py index 8b6f16ef834712c769badc8308143c81e58a3a88..1ab4047abefe4013e8ed78b527bc6df5b3053966 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,7 +26,7 @@ from arkindex_worker.worker import BaseWorker, ElementsWorker from arkindex_worker.worker.transcription import TextOrientation FIXTURES_DIR = Path(__file__).resolve().parent / "data" -SAMPLES_DIR = Path("tests") / "samples" +SAMPLES_DIR = Path(__file__).resolve().parent / "samples" __yaml_cache = {} @@ -282,7 +282,7 @@ def model_file_dir(): @pytest.fixture def model_file_dir_with_subfolder(): - return SAMPLES_DIR / "model_files_with_subfolder" + return SAMPLES_DIR / "root_folder" @pytest.fixture diff --git a/tests/samples/model_files_with_subfolder/model-best/model_file.pth b/tests/samples/root_folder/model_file.pth similarity index 100% rename from tests/samples/model_files_with_subfolder/model-best/model_file.pth rename to tests/samples/root_folder/model_file.pth diff --git a/tests/samples/model_files_with_subfolder/model-last/model_file.pth b/tests/samples/root_folder/subfolder1/model_file.pth similarity index 100% rename from tests/samples/model_files_with_subfolder/model-last/model_file.pth rename to tests/samples/root_folder/subfolder1/model_file.pth diff --git a/tests/samples/root_folder/subfolder2/model_file.pth b/tests/samples/root_folder/subfolder2/model_file.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc78ba3026c620f5d0e8c5b65071ae8ae2dfe157 --- /dev/null +++ b/tests/samples/root_folder/subfolder2/model_file.pth @@ -0,0 +1 @@ +Wow this is actually the data of the best model ever created on Arkindex \ No newline at end of file diff --git a/tests/test_elements_worker/test_training.py b/tests/test_elements_worker/test_training.py index 4448554da951fcaaa87ccde715af8c31865a92fd..551fdf6d42121733d68a00fc3ec659153919583b 100644 --- a/tests/test_elements_worker/test_training.py +++ b/tests/test_elements_worker/test_training.py @@ -56,7 +56,7 @@ def test_create_archive_with_subfolder(model_file_dir_with_subfolder): ): assert os.path.exists(zst_archive_path), "The archive was not created" assert ( - hash == "e2fa86cefc33b24502ad4151a638dd29" + hash == "3e453881404689e6e125144d2db3e605" ), "Hash was not properly computed" assert 300 < size < 1500