diff --git a/requirements.txt b/requirements.txt index 069ba1a1b7ad9d90a3d426ed9a0a8fb3c8ad4fc7..2fc24b331041a7d09932164a1865a9765a4a82c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -arkindex-base-worker @ git+https://gitlab.teklia.com/workers/base-worker.git@master +arkindex-base-worker==0.3.5rc4 arkindex-export==0.1.7 diff --git a/tests/test_worker.py b/tests/test_worker.py index c64e177f343a3a661fc237c8c7f0a420cce821ef..2b4651cf5be92644af93e13ff1e091844f470937 100644 --- a/tests/test_worker.py +++ b/tests/test_worker.py @@ -19,7 +19,7 @@ def test_process_split(tmp_path, downloaded_images): worker = DatasetExtractor() # Parse some arguments worker.args = Namespace(database=None) - worker.data_folder = tmp_path + worker.data_folder_path = tmp_path worker.configure_cache() worker.cached_images = dict() diff --git a/worker_generic_training_dataset/worker.py b/worker_generic_training_dataset/worker.py index 3e545522c75e6b27a04a9e4a80154e20ba35e18f..20c09348c8e0d88bc97e16babadd90772fed3a37 100644 --- a/worker_generic_training_dataset/worker.py +++ b/worker_generic_training_dataset/worker.py @@ -62,7 +62,8 @@ class DatasetExtractor(DatasetWorker): self.download_latest_export() def configure_storage(self) -> None: - self.data_folder = Path(tempfile.mkdtemp(suffix="-arkindex-data")) + self.data_folder = tempfile.TemporaryDirectory(suffix="-arkindex-data") + self.data_folder_path = Path(self.data_folder.name) # Initialize db that will be written self.configure_cache() @@ -71,7 +72,7 @@ class DatasetExtractor(DatasetWorker): self.cached_images = dict() # Where to save the downloaded images - self.images_folder = self.data_folder / "images" + self.images_folder = self.data_folder_path / "images" self.images_folder.mkdir(parents=True) logger.info(f"Images will be saved at `{self.images_folder}`.") @@ -80,8 +81,8 @@ class DatasetExtractor(DatasetWorker): Create an SQLite database compatible with base-worker cache and initialize it. """ self.use_cache = True - self.cache_path: Path = self.data_folder / "db.sqlite" - logger.info(f"Cached database will be saved at `{self.data_folder}`.") + self.cache_path: Path = self.data_folder_path / "db.sqlite" + logger.info(f"Cached database will be saved at `{self.cache_path}`.") init_cache_db(self.cache_path) @@ -318,7 +319,10 @@ class DatasetExtractor(DatasetWorker): # TAR + ZSTD the cache and the images folder, and store as task artifact zstd_archive_path: Path = self.work_dir / f"{dataset.id}.zstd" logger.info(f"Compressing the images to {zstd_archive_path}") - create_tar_zst_archive(source=self.data_folder, destination=zstd_archive_path) + create_tar_zst_archive( + source=self.data_folder_path, destination=zstd_archive_path + ) + self.data_folder.cleanup() def main():