Skip to content
Snippets Groups Projects
Commit f3a8c17d authored by Eva Bardou's avatar Eva Bardou :frog:
Browse files

Nit and bump

parent 316f067b
No related branches found
No related tags found
1 merge request!8New DatasetExtractor using a DatasetWorker
Pipeline #138726 passed
......@@ -19,7 +19,7 @@ def test_process_split(tmp_path, downloaded_images):
worker = DatasetExtractor()
# Parse some arguments
worker.args = Namespace(database=None)
worker.data_folder = tmp_path
worker.data_folder_path = tmp_path
worker.configure_cache()
worker.cached_images = dict()
......
......@@ -62,7 +62,8 @@ class DatasetExtractor(DatasetWorker):
self.download_latest_export()
def configure_storage(self) -> None:
self.data_folder = Path(tempfile.mkdtemp(suffix="-arkindex-data"))
self.data_folder = tempfile.TemporaryDirectory(suffix="-arkindex-data")
self.data_folder_path = Path(self.data_folder.name)
# Initialize db that will be written
self.configure_cache()
......@@ -71,7 +72,7 @@ class DatasetExtractor(DatasetWorker):
self.cached_images = dict()
# Where to save the downloaded images
self.images_folder = self.data_folder / "images"
self.images_folder = self.data_folder_path / "images"
self.images_folder.mkdir(parents=True)
logger.info(f"Images will be saved at `{self.images_folder}`.")
......@@ -80,8 +81,8 @@ class DatasetExtractor(DatasetWorker):
Create an SQLite database compatible with base-worker cache and initialize it.
"""
self.use_cache = True
self.cache_path: Path = self.data_folder / "db.sqlite"
logger.info(f"Cached database will be saved at `{self.data_folder}`.")
self.cache_path: Path = self.data_folder_path / "db.sqlite"
logger.info(f"Cached database will be saved at `{self.cache_path}`.")
init_cache_db(self.cache_path)
......@@ -318,7 +319,10 @@ class DatasetExtractor(DatasetWorker):
# TAR + ZSTD the cache and the images folder, and store as task artifact
zstd_archive_path: Path = self.work_dir / f"{dataset.id}.zstd"
logger.info(f"Compressing the images to {zstd_archive_path}")
create_tar_zst_archive(source=self.data_folder, destination=zstd_archive_path)
create_tar_zst_archive(
source=self.data_folder_path, destination=zstd_archive_path
)
self.data_folder.cleanup()
def main():
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment