diff --git a/bio_parser/__init__.py b/bio_parser/__init__.py
index e142d5a75614f7be2c8b9f89afe60064e177db3c..7769488795dbb6de906c6e5895bd001f322c6f60 100644
--- a/bio_parser/__init__.py
+++ b/bio_parser/__init__.py
@@ -17,3 +17,6 @@ logging.basicConfig(
 # Add colorful tracebacks to crash with elegance
 # https://rich.readthedocs.io/en/latest/traceback.html
 traceback.install()
+
+# Reserved name for global statistics
+GLOBAL_STAT_NAME = "total"
diff --git a/bio_parser/parse/exceptions.py b/bio_parser/parse/exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..6009799d4369d4c47384ab365fb5cb9cf5bc8a47
--- /dev/null
+++ b/bio_parser/parse/exceptions.py
@@ -0,0 +1,38 @@
+"""Exceptions raised during file parsing."""
+from pathlib import Path
+
+
+class FileProcessingError(Exception):
+    """Raised when a problem is encountered while parsing a file."""
+
+    filename: Path
+    """
+    Path of the file being processed.
+    """
+
+    def __init__(self, filename: Path, *args: object) -> None:
+        super().__init__(*args)
+        self.filename = filename
+
+
+class InvalidFile(FileProcessingError):
+    """Raised when the file is not valid."""
+
+    def __str__(self) -> str:
+        return f"BIO file {self.filename} is not valid"
+
+
+class ForbiddenEntityName(FileProcessingError):
+    """Raised when the file is not valid."""
+
+    entity_name: str
+    """
+    Forbidden entity name encountered.
+    """
+
+    def __init__(self, filename: Path, entity_name: str, *args: object) -> None:
+        super().__init__(filename=filename, *args)
+        self.entity_name = entity_name
+
+    def __str__(self) -> str:
+        return f"Invalid entity name {self.entity_name}: reserved for global statistics ({self.filename})."
diff --git a/bio_parser/utils.py b/bio_parser/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5f485cb51097d800e7555cb8e2e9670b413f256
--- /dev/null
+++ b/bio_parser/utils.py
@@ -0,0 +1,112 @@
+"""Utils functions."""
+
+import logging
+from operator import attrgetter
+from pathlib import Path
+
+from bio_parser.parse.document import Document
+from bio_parser.parse.exceptions import ForbiddenEntityName, InvalidFile
+
+logger = logging.getLogger(__name__)
+
+
+def check_complete(labels: list[Path], predictions: list[Path]):
+    """Check that each label BIO file has a corresponding prediction BIO file and each prediction BIO file has a corresponding label BIO file. Otherwise raise an error.
+
+    Args:
+        labels: List of sorted label BIO files.
+        predictions: List of sorted prediction BIO files.
+    """
+    # List filenames in prediction and label directories.
+    label_filenames = {label.name for label in labels}
+    prediction_filenames = {prediction.name for prediction in predictions}
+
+    # Raise an error if there are any missing files.
+    if label_filenames != prediction_filenames:
+        messages = []
+        missing_label_files = prediction_filenames.difference(label_filenames)
+        missing_pred_files = label_filenames.difference(prediction_filenames)
+        if len(missing_pred_files) > 0:
+            messages.append(f"Missing prediction files: {missing_pred_files}.")
+        if len(missing_label_files) > 0:
+            messages.append(f"Missing label files: {missing_label_files}.")
+        raise FileNotFoundError("\n".join(messages))
+
+
+def check_valid_bio(
+    bio_files: list[Path], global_stat_name: str | None = None
+) -> list[Document]:
+    """Check that BIO files exists and are valid.
+
+    Args:
+        bio_files (list[Path]): List of BIO files to check
+        global_stat_name (str | None, optional): Forbid an entity name. Defaults to None.
+
+    Raises:
+        FileNotFoundError: A file could not be found.
+        FileNotFoundError:
+        Exception: Forbidden entity name used in a file.
+
+    Returns:
+        list[Document]: _description_
+    """
+    parsed = []
+    for filename in bio_files:
+        # Raise an error if the document does not exist
+        if not filename.exists():
+            raise FileNotFoundError(
+                f"BIO file {filename} does not exist.",
+            )
+
+        # Raise an error if the document is not valid
+        try:
+            document = Document.from_file(filename)
+        except Exception as e:
+            raise InvalidFile(filename) from e
+
+        # Raise an error if an entity is named global_stat_name
+        if global_stat_name and global_stat_name in {
+            entity[0] for entity in document.entities
+        }:
+            raise ForbiddenEntityName(filename=filename, entity_name=global_stat_name)
+        parsed.append(document)
+    return parsed
+
+
+def load_dataset(
+    label_dir: Path,
+    prediction_dir: Path,
+) -> list[tuple[Document, Document]]:
+    """Load BIO files for a given dataset.
+
+    Args:
+        label_dir (Path): Path to the label directory.
+        prediction_dir (Path): Path to prediction directory.
+
+    Returns:
+        list[tuple[Document, Document]]: A list of tuple containing the label and corresponding prediction Documents.
+    """
+    sorted_labels = sorted(label_dir.glob("*.bio"), key=attrgetter("name"))
+    sorted_predictions = sorted(prediction_dir.glob("*.bio"), key=attrgetter("name"))
+
+    # Check if a directory is empty
+    if not (sorted_labels and sorted_predictions):
+        messages = []
+        if not sorted_labels:
+            messages.append(f"Empty label directory: {label_dir}.")
+        if not sorted_predictions:
+            messages.append(f"Empty prediction directory: {prediction_dir}.")
+        raise FileNotFoundError("\n".join(messages))
+
+    # Check that the dataset is complete and valid
+    check_complete(sorted_labels, sorted_predictions)
+
+    logger.info("Loading labels...")
+    labels = check_valid_bio(sorted_labels)
+
+    logger.info("Loading prediction...")
+    predictions = check_valid_bio(sorted_predictions)
+
+    logger.info("The dataset is complete and valid.")
+    # Return each label and prediction Document couple
+    return list(zip(labels, predictions))
diff --git a/docs/reference/parse/exceptions.md b/docs/reference/parse/exceptions.md
new file mode 100644
index 0000000000000000000000000000000000000000..8f3ef960e179ed7f197186cb151d86cbc206cb0d
--- /dev/null
+++ b/docs/reference/parse/exceptions.md
@@ -0,0 +1,3 @@
+# Exceptions
+
+::: bio_parser.parse.exceptions
\ No newline at end of file
diff --git a/docs/reference/utils.md b/docs/reference/utils.md
new file mode 100644
index 0000000000000000000000000000000000000000..972dce9f72788955ca360f5a0be679e55fa83ea7
--- /dev/null
+++ b/docs/reference/utils.md
@@ -0,0 +1,3 @@
+# Utils
+
+::: bio_parser.utils
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index bc0dff87b554bb20d8d38e6086c895c4565695b0..e7eb4efe29d2dd5faf4a18e5051d2529ec044370 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,6 +53,8 @@ ignore = [
     "D417",
     # May cause some conflicts
     "COM812",
+    # Missing docstring in __init__ and other magic methods
+    "D105", "D107",
 ]
 select = [
     # pycodestyle
diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..c0a662153efc99ebddde92082e540481eb46e360 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,3 @@
+from pathlib import Path
+
+FIXTURES = Path(__file__).with_name("fixtures")
diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index c0a662153efc99ebddde92082e540481eb46e360..0000000000000000000000000000000000000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from pathlib import Path
-
-FIXTURES = Path(__file__).with_name("fixtures")
diff --git a/tests/fixtures/utils/bad_entity_name.bio b/tests/fixtures/utils/bad_entity_name.bio
new file mode 100644
index 0000000000000000000000000000000000000000..82e85d7c66ce7d7f5ce9c5b65c49d1ded28b3625
--- /dev/null
+++ b/tests/fixtures/utils/bad_entity_name.bio
@@ -0,0 +1,5 @@
+This 0
+entity 0
+type B-total
+is I-total
+reserved I-total
\ No newline at end of file
diff --git a/tests/fixtures/utils/bad_format.bio b/tests/fixtures/utils/bad_format.bio
new file mode 100644
index 0000000000000000000000000000000000000000..416a0b6e5c55a12743303476ac8997da401bd6ca
--- /dev/null
+++ b/tests/fixtures/utils/bad_format.bio
@@ -0,0 +1,5 @@
+This 0
+is 0
+a 0
+bad I-adj
+format 0
\ No newline at end of file
diff --git a/tests/fixtures/utils/labels/example_0.bio b/tests/fixtures/utils/labels/example_0.bio
new file mode 100644
index 0000000000000000000000000000000000000000..a922b118f09a51046ef179df4b6b99a5bff2d4a0
--- /dev/null
+++ b/tests/fixtures/utils/labels/example_0.bio
@@ -0,0 +1,5 @@
+What O
+a O
+nice O
+toolbox O
+! O
\ No newline at end of file
diff --git a/tests/fixtures/utils/labels/example_1.bio b/tests/fixtures/utils/labels/example_1.bio
new file mode 100644
index 0000000000000000000000000000000000000000..122f1c0d2c5e2d1b430d7debe4498b2a4187cb6b
--- /dev/null
+++ b/tests/fixtures/utils/labels/example_1.bio
@@ -0,0 +1,37 @@
+Dissapte O
+a O
+5 O
+rebere O
+de O
+Joan B-husband_name
+Massuet B-husband_surname
+pages B-husband_occupation
+del O
+regne B-husband_location
+de I-husband_location
+frança I-husband_location
+ha= O
+bitant O
+en O
+Collsabadell B-husband_location
+ab O
+Joana B-wife_name
+donsella B-wife_state
+filla O
+de O
+Bathomeu B-wifes_father_name
+Pi= B-wifes_father_surname
+joan I-wifes_father_surname
+texidor B-wifes_father_occupation
+de I-wifes_father_occupation
+llana I-wifes_father_occupation
+de O
+Sta B-wifes_father_location
+Maria I-wifes_father_location
+de I-wifes_father_location
+Palau I-wifes_father_location
+tordera I-wifes_father_location
+y O
+de O
+Elisabeth B-wifes_mother_name
+defuncts O
\ No newline at end of file
diff --git a/tests/fixtures/utils/labels/example_2.bio b/tests/fixtures/utils/labels/example_2.bio
new file mode 100644
index 0000000000000000000000000000000000000000..f9a0330e1c0c5b7026ecc2588b3f19271f88037f
--- /dev/null
+++ b/tests/fixtures/utils/labels/example_2.bio
@@ -0,0 +1,88 @@
+So O
+he O
+put O
+up O
+for O
+the O
+night B-time
+at O
+The B-fac
+Admiral's I-fac
+Head I-fac
+, O
+that O
+famous O
+Portsmouth B-gpe
+hostelry O
+, O
+second B-ordinal
+only O
+in O
+historic O
+interest O
+to O
+The B-fac
+George I-fac
+, O
+unhappily O
+destroyed O
+by O
+German B-norp
+bombs O
+during O
+the O
+last O
+war O
+. O
+Having O
+deposited O
+his O
+baggage O
+and O
+unpacked O
+his O
+overnight-bag O
+he O
+went O
+in O
+search O
+of O
+a O
+drink O
+. O
+The O
+lower O
+bar O
+was O
+empty O
+, O
+save O
+for O
+the O
+lady O
+known O
+by O
+all O
+habitue O
+*?2s O
+as O
+' O
+Seaweed B-per
+' O
+, O
+and O
+a O
+youngish O
+, O
+sharp-eyed O
+man O
+who O
+was O
+staring O
+moodily O
+into O
+a O
+gin O
+and O
+tonic O
+. O
\ No newline at end of file
diff --git a/tests/fixtures/utils/labels/example_3.bio b/tests/fixtures/utils/labels/example_3.bio
new file mode 100644
index 0000000000000000000000000000000000000000..c8eecd1e5d6a06edd5c22282dc0dfa06701acbd5
--- /dev/null
+++ b/tests/fixtures/utils/labels/example_3.bio
@@ -0,0 +1,19 @@
+SAINT-LOUIS B-intitule
+en I-intitule
+l'ISLE I-intitule
+(Les I-intitule
+administrateurs I-intitule
+de I-intitule
+la I-intitule
+compagnie I-intitule
+de I-intitule
+charité I-intitule
+des I-intitule
+pauvres I-intitule
+de I-intitule
+l'église I-intitule
+royale I-intitule
+de) I-intitule
+X1A B-cote_serie
+4701 B-cote_article
+41 B-precisions_sur_cote
\ No newline at end of file
diff --git a/tests/fixtures/utils/labels/example_4.bio b/tests/fixtures/utils/labels/example_4.bio
new file mode 100644
index 0000000000000000000000000000000000000000..d49037bcf492ed37a220cb0e1205ceced9549e22
--- /dev/null
+++ b/tests/fixtures/utils/labels/example_4.bio
@@ -0,0 +1,5 @@
+d° B-surname
+Jeannine B-firstname
+17 B-birth_date
+P B-location_of_birth
+f B-link
\ No newline at end of file
diff --git a/tests/fixtures/utils/predictions/example_0.bio b/tests/fixtures/utils/predictions/example_0.bio
new file mode 100644
index 0000000000000000000000000000000000000000..a922b118f09a51046ef179df4b6b99a5bff2d4a0
--- /dev/null
+++ b/tests/fixtures/utils/predictions/example_0.bio
@@ -0,0 +1,5 @@
+What O
+a O
+nice O
+toolbox O
+! O
\ No newline at end of file
diff --git a/tests/fixtures/utils/predictions/example_1.bio b/tests/fixtures/utils/predictions/example_1.bio
new file mode 100644
index 0000000000000000000000000000000000000000..122f1c0d2c5e2d1b430d7debe4498b2a4187cb6b
--- /dev/null
+++ b/tests/fixtures/utils/predictions/example_1.bio
@@ -0,0 +1,37 @@
+Dissapte O
+a O
+5 O
+rebere O
+de O
+Joan B-husband_name
+Massuet B-husband_surname
+pages B-husband_occupation
+del O
+regne B-husband_location
+de I-husband_location
+frança I-husband_location
+ha= O
+bitant O
+en O
+Collsabadell B-husband_location
+ab O
+Joana B-wife_name
+donsella B-wife_state
+filla O
+de O
+Bathomeu B-wifes_father_name
+Pi= B-wifes_father_surname
+joan I-wifes_father_surname
+texidor B-wifes_father_occupation
+de I-wifes_father_occupation
+llana I-wifes_father_occupation
+de O
+Sta B-wifes_father_location
+Maria I-wifes_father_location
+de I-wifes_father_location
+Palau I-wifes_father_location
+tordera I-wifes_father_location
+y O
+de O
+Elisabeth B-wifes_mother_name
+defuncts O
\ No newline at end of file
diff --git a/tests/fixtures/utils/predictions/example_2.bio b/tests/fixtures/utils/predictions/example_2.bio
new file mode 100644
index 0000000000000000000000000000000000000000..0da0ac3818975fd907922818cbb3791909044436
--- /dev/null
+++ b/tests/fixtures/utils/predictions/example_2.bio
@@ -0,0 +1,90 @@
+So O
+he O
+put O
+up O
+for O
+the O
+night O
+at O
+the O
+Admiral's B-fac
+Head O
+, O
+that O
+famour O
+Portsmarith B-gpe
+hostelry O
+, O
+secand O
+only O
+in O
+historic O
+interest O
+to O
+the O
+George B-gpe
+, O
+unhappily O
+destrayed O
+by O
+German B-norp
+lomber O
+during O
+the O
+last O
+war O
+. O
+Having O
+deposited O
+his O
+buggage O
+and O
+cmpacked O
+his O
+overnight-leg O
+he O
+went O
+in O
+search O
+af O
+a O
+drink O
+. O
+The O
+lower O
+bar O
+was O
+empty O
+, O
+save O
+for O
+the O
+lady O
+known O
+by O
+all O
+hahitue O
+? B-percent
+? O
+'s O
+as O
+' O
+Seaweed B-work_of_art
+, O
+, O
+anda O
+youngish O
+, O
+sharp O
+eyed O
+man O
+who O
+was O
+storing O
+moodily O
+into O
+a O
+gin O
+and O
+tonic O
+. O
\ No newline at end of file
diff --git a/tests/fixtures/utils/predictions/example_3.bio b/tests/fixtures/utils/predictions/example_3.bio
new file mode 100644
index 0000000000000000000000000000000000000000..2bb5920bd778e7efb5f27f68beea2caa2b625b1c
--- /dev/null
+++ b/tests/fixtures/utils/predictions/example_3.bio
@@ -0,0 +1,22 @@
+SAINT-LOUIS B-intitule
+ent I-intitule
+ISLE I-intitule
+(les I-intitule
+administrateurs I-intitule
+de I-intitule
+la I-intitule
+compagnie I-intitule
+de I-intitule
+Charité I-intitule
+des I-intitule
+pauvres I-intitule
+de I-intitule
+l'église I-intitule
+royale I-intitule
+de) I-intitule
+8 B-date
+janvier I-date
+1771 I-date
+X1A B-cote_serie
+4701 B-cote_article
+41 B-precisions_sur_cote
\ No newline at end of file
diff --git a/tests/fixtures/utils/predictions/example_4.bio b/tests/fixtures/utils/predictions/example_4.bio
new file mode 100644
index 0000000000000000000000000000000000000000..2b27cabdcd18680d33b55390ad683aa10f45dbef
--- /dev/null
+++ b/tests/fixtures/utils/predictions/example_4.bio
@@ -0,0 +1,5 @@
+d° B-surname
+Jeaniine B-firstname
+17 B-birth_date
+P B-location_of_birth
+f B-link
\ No newline at end of file
diff --git a/tests/parse/__init__.py b/tests/parse/__init__.py
index 62facd4acb6d8fd508632266c4e06640cf7e892c..be6a932cc2da4165f3b8f519bdfbdddb155822d0 100644
--- a/tests/parse/__init__.py
+++ b/tests/parse/__init__.py
@@ -1,3 +1,3 @@
-from tests.conftest import FIXTURES
+from tests import FIXTURES
 
 DATA_DIR = FIXTURES / "parse"
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0162bffb89ba6bbac3f07a2be5b2ba6d2f1f92e9
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,160 @@
+import pytest
+from bio_parser.parse.document import Document
+from bio_parser.utils import load_dataset, check_valid_bio, check_complete
+from tests import FIXTURES
+
+DATA = FIXTURES / "utils"
+
+
+@pytest.mark.parametrize(
+    "filenames",
+    (
+        (
+            [
+                DATA / "bad_format.bio",
+            ]
+        ),
+        (
+            [
+                DATA / "bad_entity_name.bio",
+            ]
+        ),
+        (
+            [
+                DATA / "labels" / "example_0.bio",
+                DATA / "labels" / "example_1.bio",
+                DATA / "labels" / "example_2.bio",
+                DATA / "bad_entity_name.bio",
+            ]
+        ),
+    ),
+)
+def test_check_valid_bio_raise(filenames):
+    with pytest.raises(Exception):
+        check_valid_bio(filenames)
+
+
+@pytest.mark.parametrize(
+    "filenames",
+    (
+        (
+            [
+                DATA / "labels" / "example_0.bio",
+                DATA / "labels" / "example_1.bio",
+                DATA / "labels" / "example_2.bio",
+            ]
+        ),
+        (
+            [
+                DATA / "predictions" / "example_0.bio",
+                DATA / "predictions" / "example_1.bio",
+                DATA / "predictions" / "example_2.bio",
+            ]
+        ),
+        ([]),
+    ),
+)
+def test_check_valid_bio(filenames):
+    check_valid_bio(filenames)
+
+
+@pytest.mark.parametrize(
+    "labels, predictions",
+    (
+        (
+            [
+                DATA / "labels" / "example_0.bio",
+                DATA / "labels" / "example_1.bio",
+                DATA / "labels" / "example_2.bio",
+            ],
+            [
+                DATA / "predictions" / "example_0.bio",
+                DATA / "predictions" / "example_1.bio",
+                DATA / "predictions" / "example_2.bio",
+            ],
+        ),
+        (
+            [],
+            [],
+        ),
+    ),
+)
+def test_check_complete(labels, predictions):
+    check_complete(labels, predictions)
+
+
+@pytest.mark.parametrize(
+    "labels, predictions, message",
+    (
+        (
+            [
+                DATA / "labels" / "example_0.bio",
+                DATA / "labels" / "example_1.bio",
+                DATA / "labels" / "example_2.bio",
+            ],
+            [
+                DATA / "predictions" / "example_0.bio",
+                DATA / "predictions" / "example_1.bio",
+            ],
+            "Missing prediction files: {'example_2.bio'}.",
+        ),
+        (
+            [
+                DATA / "labels" / "example_0.bio",
+                DATA / "labels" / "example_2.bio",
+            ],
+            [
+                DATA / "predictions" / "example_0.bio",
+                DATA / "predictions" / "example_1.bio",
+                DATA / "predictions" / "example_2.bio",
+            ],
+            "Missing label files: {'example_1.bio'}.",
+        ),
+        (
+            [
+                DATA / "labels" / "example_0.bio",
+                DATA / "labels" / "example_2.bio",
+            ],
+            [
+                DATA / "predictions" / "example_1.bio",
+                DATA / "predictions" / "example_2.bio",
+            ],
+            "Missing prediction files: {'example_0.bio'}.\nMissing label files: {'example_1.bio'}.",
+        ),
+    ),
+)
+def test_check_complete_raise(labels, predictions, message):
+    with pytest.raises(FileNotFoundError, match=message):
+        check_complete(labels, predictions)
+
+
+def test_load_dataset():
+    label_dir = DATA / "labels"
+    prediction_dir = DATA / "predictions"
+    documents = load_dataset(label_dir, prediction_dir)
+    for i in range(3):
+        filename = f"example_{i}.bio"
+        assert documents[i] == (
+            Document.from_file(label_dir / filename),
+            Document.from_file(prediction_dir / filename),
+        )
+
+
+@pytest.mark.parametrize(
+    "label_dir, prediction_dir, message",
+    (
+        (
+            DATA / "labels_empty",
+            DATA / "predictions",
+            "Empty label directory",
+        ),
+        (
+            DATA / "labels",
+            DATA / "predictions_empty",
+            "Empty prediction directory",
+        ),
+    ),
+)
+def test_load_empty_dataset(label_dir, prediction_dir, message):
+    with pytest.raises(FileNotFoundError, match=f"^{message}: .*"):
+        load_dataset(label_dir, prediction_dir)