From f3a8c17dbd935d75e4b13459f63a2dac3933a629 Mon Sep 17 00:00:00 2001 From: EvaBardou <bardou@teklia.com> Date: Thu, 26 Oct 2023 18:23:52 +0200 Subject: [PATCH] Nit and bump --- requirements.txt | 2 +- tests/test_worker.py | 2 +- worker_generic_training_dataset/worker.py | 14 +++++++++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 069ba1a..2fc24b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -arkindex-base-worker @ git+https://gitlab.teklia.com/workers/base-worker.git@master +arkindex-base-worker==0.3.5rc4 arkindex-export==0.1.7 diff --git a/tests/test_worker.py b/tests/test_worker.py index c64e177..2b4651c 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -19,7 +19,7 @@ def test_process_split(tmp_path, downloaded_images): worker = DatasetExtractor() # Parse some arguments worker.args = Namespace(database=None) - worker.data_folder = tmp_path + worker.data_folder_path = tmp_path worker.configure_cache() worker.cached_images = dict() diff --git a/worker_generic_training_dataset/worker.py b/worker_generic_training_dataset/worker.py index 3e54552..20c0934 100644 --- a/worker_generic_training_dataset/worker.py +++ b/worker_generic_training_dataset/worker.py @@ -62,7 +62,8 @@ class DatasetExtractor(DatasetWorker): self.download_latest_export() def configure_storage(self) -> None: - self.data_folder = Path(tempfile.mkdtemp(suffix="-arkindex-data")) + self.data_folder = tempfile.TemporaryDirectory(suffix="-arkindex-data") + self.data_folder_path = Path(self.data_folder.name) # Initialize db that will be written self.configure_cache() @@ -71,7 +72,7 @@ class DatasetExtractor(DatasetWorker): self.cached_images = dict() # Where to save the downloaded images - self.images_folder = self.data_folder / "images" + self.images_folder = self.data_folder_path / "images" self.images_folder.mkdir(parents=True) logger.info(f"Images will be saved at `{self.images_folder}`.") @@ -80,8 +81,8 @@ class DatasetExtractor(DatasetWorker): Create an SQLite database compatible with base-worker cache and initialize it. """ self.use_cache = True - self.cache_path: Path = self.data_folder / "db.sqlite" - logger.info(f"Cached database will be saved at `{self.data_folder}`.") + self.cache_path: Path = self.data_folder_path / "db.sqlite" + logger.info(f"Cached database will be saved at `{self.cache_path}`.") init_cache_db(self.cache_path) @@ -318,7 +319,10 @@ class DatasetExtractor(DatasetWorker): # TAR + ZSTD the cache and the images folder, and store as task artifact zstd_archive_path: Path = self.work_dir / f"{dataset.id}.zstd" logger.info(f"Compressing the images to {zstd_archive_path}") - create_tar_zst_archive(source=self.data_folder, destination=zstd_archive_path) + create_tar_zst_archive( + source=self.data_folder_path, destination=zstd_archive_path + ) + self.data_folder.cleanup() def main(): -- GitLab