diff --git a/worker_generic_training_dataset/utils.py b/worker_generic_training_dataset/utils.py index 91ba65536c62106a01f9dfc9957e3c06dc0f377b..01ee0f9c5d112d78403f460ee47207d766f4a932 100644 --- a/worker_generic_training_dataset/utils.py +++ b/worker_generic_training_dataset/utils.py @@ -7,7 +7,6 @@ from urllib.parse import urljoin import cv2 import imageio.v2 as iio -from arkindex_worker.utils import close_delete_file, create_tar_archive, zstd_compress from worker_generic_training_dataset.exceptions import ImageDownloadError logger = logging.getLogger(__name__) @@ -53,11 +52,3 @@ def download_image(element, folder: Path): tries += 1 except Exception as e: raise ImageDownloadError(element.id, e) - - -def create_tar_zstd_archive(folder_path, destination: Path): - tar_fd, tar_archive, _ = create_tar_archive(folder_path) - - _, _, _ = zstd_compress(tar_archive, destination) - - close_delete_file(tar_fd, tar_archive) diff --git a/worker_generic_training_dataset/worker.py b/worker_generic_training_dataset/worker.py index 1bc7bd5ad3d0929670db9db37dce99fbaf2f75f9..30120a142d8c518169d5c14acecafb46f266446f 100644 --- a/worker_generic_training_dataset/worker.py +++ b/worker_generic_training_dataset/worker.py @@ -21,6 +21,7 @@ from arkindex_worker.cache import ( ) from arkindex_worker.cache import db as cache_database from arkindex_worker.cache import init_cache_db +from arkindex_worker.utils import create_tar_zst_archive from arkindex_worker.worker.base import BaseWorker from worker_generic_training_dataset.db import ( get_children, @@ -29,10 +30,7 @@ from worker_generic_training_dataset.db import ( retrieve_element, retrieve_entities, ) -from worker_generic_training_dataset.utils import ( - create_tar_zstd_archive, - download_image, -) +from worker_generic_training_dataset.utils import download_image logger = logging.getLogger(__name__) @@ -278,7 +276,8 @@ class DatasetExtractor(BaseWorker): # TAR + ZSTD Image folder and store as task artifact zstd_archive_path = self.work_dir / "arkindex_data.zstd" logger.info(f"Compressing the images to {zstd_archive_path}") - create_tar_zstd_archive(folder_path=image_folder, destination=zstd_archive_path) + + create_tar_zst_archive(source=image_folder, destination=zstd_archive_path) # Cleanup image folder shutil.rmtree(image_folder)