Skip to content
Snippets Groups Projects

Update dataset archive extension to `.tar.zst`

Merged Eva Bardou requested to merge ext-tar-zst into main
1 file
+ 4
4
Compare changes
  • Side-by-side
  • Inline
@@ -355,11 +355,11 @@ class DatasetExtractor(DatasetWorker):
casted_elements = list(map(_format_element, elements))
self.process_split(split_name, casted_elements)
# TAR + ZSTD the cache and the images folder, and store as task artifact
zstd_archive_path: Path = self.work_dir / f"{dataset.id}.zstd"
logger.info(f"Compressing the images to {zstd_archive_path}")
# TAR + ZST the cache and the images folder, and store as task artifact
zst_archive_path: Path = self.work_dir / f"{dataset.id}.tar.zst"
logger.info(f"Compressing the images to {zst_archive_path}")
create_tar_zst_archive(
source=self.data_folder_path, destination=zstd_archive_path
source=self.data_folder_path, destination=zst_archive_path
)
self.data_folder.cleanup()
Loading