Skip to content
Snippets Groups Projects
Commit fcfdfcbe authored by Martin's avatar Martin
Browse files

add sorts to make the splits reproducible

parent ada7640d
No related branches found
No related tags found
1 merge request!10Make the splits reproducible
Pipeline #74286 passed
......@@ -368,7 +368,8 @@ class KaldiPartitionSplitter:
self.use_existing_split = use_existing_split
def page_level_split(self, line_ids: list) -> dict:
page_ids = list({"_".join(line_id.split("_")[:-1]) for line_id in line_ids})
# need to sort again, because `set` will lose the order
page_ids = sorted({"_".join(line_id.split("_")[:-1]) for line_id in line_ids})
random.Random(SEED).shuffle(page_ids)
page_count = len(page_ids)
......@@ -398,7 +399,7 @@ class KaldiPartitionSplitter:
lines_path = Path(f"{self.out_dir_base}/Lines")
line_ids = [
str(file.relative_to(lines_path).with_suffix(""))
for file in lines_path.glob("**/*.jpg")
for file in sorted(lines_path.glob("**/*.jpg"))
]
if self.use_existing_split:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment