diff --git a/kaldi_data_generator/kaldi_data_generator.py b/kaldi_data_generator/kaldi_data_generator.py
index 5156c7735c3adf1af8714b0d75bba6a490cf6ba4..df09faf0379606944d667d66dca25d3b48c359d9 100644
--- a/kaldi_data_generator/kaldi_data_generator.py
+++ b/kaldi_data_generator/kaldi_data_generator.py
@@ -368,7 +368,8 @@ class KaldiPartitionSplitter:
         self.use_existing_split = use_existing_split
 
     def page_level_split(self, line_ids: list) -> dict:
-        page_ids = list({"_".join(line_id.split("_")[:-1]) for line_id in line_ids})
+        # need to sort again, because `set` will lose the order
+        page_ids = sorted({"_".join(line_id.split("_")[:-1]) for line_id in line_ids})
         random.Random(SEED).shuffle(page_ids)
         page_count = len(page_ids)
 
@@ -398,7 +399,7 @@ class KaldiPartitionSplitter:
         lines_path = Path(f"{self.out_dir_base}/Lines")
         line_ids = [
             str(file.relative_to(lines_path).with_suffix(""))
-            for file in lines_path.glob("**/*.jpg")
+            for file in sorted(lines_path.glob("**/*.jpg"))
         ]
 
         if self.use_existing_split: