Skip to content
Snippets Groups Projects

Only use files when computing model archive hash

Merged Thibault Lavigne requested to merge only-use-files-when-computing-model-archive-hash into master
All threads resolved!
5 files
+ 37
9
Compare changes
  • Side-by-side
  • Inline
Files
5
@@ -30,9 +30,7 @@ FileSize = NewType("FileSize", int)
@contextmanager
def create_archive(
path: DirPath, use_parent_folder: bool = False
) -> Tuple[Path, Hash, FileSize, Hash]:
def create_archive(path: DirPath) -> Tuple[Path, Hash, FileSize, Hash]:
"""
Create a tar archive from the files at the given location then compress it to a zst archive.
@@ -54,17 +52,21 @@ def create_archive(
# Files hierarchy ifs kept in the archive.
file_list = []
with tarfile.open(path_to_tar_archive, "w") as tar:
if use_parent_folder:
# Only publish contents of the given folder without parent folder
tar.add(path, arcname=".")
else:
tar.add(path)
file_list = [member for member in tar.getnames() if os.path.isfile(member)]
for p in path.glob("**/*"):
x = p.relative_to(path)
tar.add(p, arcname=x, recursive=False)
if p.is_dir():
continue
file_list.append(p)
# Sort by path
file_list.sort()
# Compute hash of the files
for file_path in file_list:
print("BOOM PATH", file_path)
with open(file_path, "rb") as file_data:
for chunk in iter(lambda: file_data.read(CHUNK_SIZE), b""):
content_hasher.update(chunk)
@@ -100,6 +102,7 @@ class TrainingMixin(object):
model_id: str,
tag: Optional[str] = None,
description: Optional[str] = None,
use_parent_folder=False,
):
"""
This method creates a model archive and its associated hash,
Loading