diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d0e03edff4763ef7ec4b616915b7447ea584c59d..77842d92a1ca6f9bd7f09f4fd0605e8961914930 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,7 +3,6 @@ repos:
     rev: v4.5.0
     hooks:
       - id: check-ast
-      - id: check-docstring-first
       - id: check-executables-have-shebangs
       - id: check-merge-conflict
       - id: check-symlinks
diff --git a/README.md b/README.md
index abf5283684939cea4d0f81d82fe5da51fbe08e00..05718c8d6512883458dc5233b5fad43ebd87a86a 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
-# BIO Parser
+# BIO2 Parser
+
+**Disclaimer**: This package only supports BIO2 and doesn't support BIO (yet). More on the distinction between formats in [Wikipedia](https://en.wikipedia.org/wiki/Inside%E2%80%93outside%E2%80%93beginning_(tagging)).
 
 ## Documentation
 
diff --git a/bio_parser/__init__.py b/bio_parser/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..e142d5a75614f7be2c8b9f89afe60064e177db3c 100644
--- a/bio_parser/__init__.py
+++ b/bio_parser/__init__.py
@@ -0,0 +1,19 @@
+import logging
+import sys
+
+from rich import traceback
+from rich.console import Console
+from rich.logging import RichHandler
+
+# Colorful logging
+# https://rich.readthedocs.io/en/latest/logging.html
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(message)s",
+    datefmt="[%X]",
+    handlers=[RichHandler(console=Console(file=sys.stderr))],
+)
+
+# Add colorful tracebacks to crash with elegance
+# https://rich.readthedocs.io/en/latest/traceback.html
+traceback.install()
diff --git a/bio_parser/cli.py b/bio_parser/cli.py
index b37574a16daca85d1089f0413744004d439063ab..15d01c6301f8367a1697d702d1c97a7d1cbaf5b2 100644
--- a/bio_parser/cli.py
+++ b/bio_parser/cli.py
@@ -1,4 +1,7 @@
 import argparse
+import errno
+
+from bio_parser.parse import add_validate_parser
 
 
 def main():
@@ -7,15 +10,17 @@ def main():
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
-    # To add a sub-command, you can un-comment this snippet
-    # More information on https://docs.python.org/3/library/argparse.html#sub-commands
-    # commands = parser.add_subparsers(help="Explain your sub commands globally here")
-    # my_command = commands.add_parser("commandX", help="Do something")
-    # my_command.set_defaults(func=command_main)
-    # my_command.add_argument("element_id", type=uuid.UUID)
+    commands = parser.add_subparsers()
+    add_validate_parser(commands)
 
     args = vars(parser.parse_args())
     if "func" in args:
-        args.pop("func")(**args)
+        # Run the subcommand's function
+        try:
+            status = args.pop("func")(**args)
+            parser.exit(status=status)
+        except KeyboardInterrupt:
+            # Just quit silently on ^C instead of displaying a long traceback
+            parser.exit(status=errno.EOWNERDEAD)
     else:
         parser.print_help()
diff --git a/bio_parser/parse/__init__.py b/bio_parser/parse/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..06ae06dc92286f48f1a6652f6d3d3244933c70d4
--- /dev/null
+++ b/bio_parser/parse/__init__.py
@@ -0,0 +1,27 @@
+"""
+Validate a given BIO file.
+"""
+
+from argparse import ArgumentParser
+from pathlib import Path
+
+from bio_parser.parse.validate import run
+
+
+def _check_bio_ext(filename: str) -> Path:
+    filepath = Path(filename)
+    assert filepath.suffix == ".bio"
+    return filepath
+
+
+def add_validate_parser(subcommands):
+    parser: ArgumentParser = subcommands.add_parser(
+        "validate",
+        help=__doc__,
+        description=__doc__,
+    )
+    parser.set_defaults(func=run)
+
+    parser.add_argument(
+        "filepaths", help="Files to validate.", type=_check_bio_ext, nargs="*"
+    )
diff --git a/bio_parser/parse/document.py b/bio_parser/parse/document.py
new file mode 100644
index 0000000000000000000000000000000000000000..983c5ae551bb55d85164961e7f9271bff3ed2b0f
--- /dev/null
+++ b/bio_parser/parse/document.py
@@ -0,0 +1,393 @@
+"""Parse BIO files."""
+import logging
+import re
+from dataclasses import dataclass, field
+from enum import Enum
+from itertools import pairwise
+from operator import attrgetter
+from pathlib import Path
+
+PARSE_TOKEN = re.compile(r"(?P<text>[^\s]+) (?P<tag>(I|O|B))(\-(?P<ent>[^\s]+))?")
+"""Regex that parses a line of a BIO file"""
+
+_logger = logging.getLogger(__name__)
+
+
+class Tag(Enum):
+    """Supported Beginning-Inside-Outside tags."""
+
+    BEGINNING = "B"
+    INSIDE = "I"
+    OUTSIDE = "O"
+
+
+def _make_ner_label(tag: Tag, label: str | None) -> str:
+    """Create the corresponding IOB label from the given tag and label.
+
+    Args:
+        tag (Tag): Beginning-Inside-Outside tag.
+        label (str | None): Label of the token.
+
+    Returns:
+        str: Corresponding IOB label.
+
+    Examples:
+        >>> _make_ner_label(tag=Tag.BEGINNING, label="GPE")
+        'B-GPE'
+        >>> _make_ner_label(tag=Tag.INSIDE, label="GPE")
+        'I-GPE'
+        >>> _make_ner_label(tag=Tag.OUTSIDE, label=None)
+        'O'
+
+    """
+    if tag == Tag.OUTSIDE:
+        assert label is None, f"Invalid label `{label}` with tag `{tag.value}`"
+        return tag.value
+
+    assert label, f"No named entity label found with tag `{tag.value}`"
+
+    return f"{tag.value}-{label}"
+
+
+@dataclass(slots=True)
+class Token:
+    """Token as tokenized in the BIO document."""
+
+    idx: int
+    """Index of the token in the document."""
+    text: str
+    """Text representation of the token."""
+
+    @property
+    def _data(self) -> re.Match:
+        parsed = PARSE_TOKEN.match(self.text)
+        assert parsed is not None, "Could not parse annotation."
+        return parsed
+
+    @property
+    def word(self) -> str:
+        """Text content of the token.
+
+        Examples:
+            >>> Token(idx=0, text="Chicken B-Animal").word
+            'Chicken'
+        """
+        return self._data.group("text")
+
+    @property
+    def label(self) -> str | None:
+        """Named entity type of this token.
+
+        Examples:
+            >>> Token(idx=0, text="Chicken B-Animal").label
+            'Animal'
+        """
+        return self._data.group("ent")
+
+    @property
+    def tag(self) -> Tag:
+        """IOB code of named entity tag.
+
+        Examples:
+            >>> Token(idx=0, text="Chicken B-Animal").tag
+            <Tag.BEGINNING: 'B'>
+        """
+        return Tag(self._data.group("tag"))
+
+    @property
+    def iob_label(self) -> str:
+        """IOB label (Tag + Entity).
+
+        Examples:
+            >>> Token(idx=0, text="Chicken B-Animal").iob_label
+            'B-Animal'
+        """
+        return _make_ner_label(tag=self.tag, label=self.label)
+
+    @property
+    def labels(self) -> list[str]:
+        """Character-level IOB labels.
+
+        Examples:
+            >>> Token(idx=0, text="Some B-PER").labels
+            ['B-PER', 'I-PER', 'I-PER', 'I-PER']
+
+            >>> Token(idx=1, text="one I-PER").labels
+            ['I-PER', 'I-PER', 'I-PER'].
+        """
+        if self.tag == Tag.OUTSIDE:
+            return [self.iob_label] * len(self.word)
+        return [self.iob_label] + [
+            _make_ner_label(tag=Tag.INSIDE, label=self.label),
+        ] * (len(self.word) - 1)
+
+    @property
+    def chars(self) -> list[str]:
+        """The list of characters making up the token.
+
+        Examples:
+            >>> Token(idx=0, text="Chicken B-Animal").chars
+            ['C', 'h', 'i', 'c', 'k', 'e', 'n']
+        """
+        return list(self.word)
+
+
+@dataclass(slots=True)
+class Span:
+    """Representation of a Named Entity Span."""
+
+    tokens: list[Token] = field(default_factory=list)
+    """List of tokens in the Span"""
+
+    @property
+    def text(self) -> str:
+        """Join every word of the span by a whitespace.
+
+        Examples:
+            >>> Span(tokens=[
+            ...         Token(idx=0, text="Chicken B-Animal"),
+            ...         Token(idx=1, text="run I-Animal")
+            ... ]).text
+            'Chicken run'
+        """
+        return " ".join(map(attrgetter("word"), self.tokens))
+
+    @property
+    def label(self) -> str | None:
+        """The named entity type of this span. All tokens composing the span have the same.
+
+        Examples:
+            >>> Span(tokens=[
+            ...         Token(idx=0, text="Chicken B-Animal"),
+            ...         Token(idx=1, text="run I-Animal")
+            ... ]).label
+            'Animal'
+        """
+        if not self.tokens:
+            return
+        return self.tokens[0].label
+
+    @property
+    def idx(self) -> int | None:
+        """The index of the first token of the span.
+
+        Examples:
+            >>> Span(tokens=[
+            ...         Token(idx=0, text="Chicken B-Animal"),
+            ...         Token(idx=1, text="run I-Animal")
+            ... ]).idx
+            0
+        """
+        if not self.tokens:
+            return None
+        return self.tokens[0].idx
+
+    @property
+    def end(self) -> int | None:
+        """The index of the first token after the span.
+
+        Examples:
+            >>> Span(tokens=[
+            ...         Token(idx=0, text="Chicken B-Animal"),
+            ...         Token(idx=1, text="run I-Animal")
+            ... ]).end
+            2
+        """
+        if not self.tokens:
+            return
+        return self.tokens[-1].idx + 1
+
+    def add_token(self, token: Token) -> None:
+        """Add the provided token to this span. The token's label must match the Span's.
+
+        Args:
+            token (Token): Token to add to this span.
+        """
+        if self.label:
+            assert (
+                token.label == self.label
+            ), "This token doesn't have the same label as this span."
+        self.tokens.append(token)
+
+    @property
+    def labels(self) -> list[str]:
+        """Character-level IOB labels.
+
+        Examples:
+            >>> Span(tokens=[
+            ...         Token(idx=0, text="Chicken B-Animal"),
+            ...         Token(idx=1, text="run I-Animal")
+            ... ]).labels
+            ['B-Animal', 'I-Animal', 'I-Animal', 'I-Animal', 'I-Animal', 'I-Animal', 'I-Animal', 'I-Animal', 'I-Animal', 'I-Animal', 'I-Animal']
+        """
+        if not self.tokens:
+            return []
+
+        return [_make_ner_label(tag=Tag.BEGINNING, label=self.label)] + [
+            _make_ner_label(tag=Tag.INSIDE, label=self.label),
+        ] * (len(self.text) - 1)
+
+    @property
+    def chars(self) -> list[str]:
+        """Characters making up the span.
+
+        Examples:
+            >>> Span(
+            ...     tokens=[
+            ...             Token(idx=0, text="Chicken B-Animal"),
+            ...             Token(idx=1, text="run I-Animal")
+            ...     ]
+            ... ).chars
+            ['C', 'h', 'i', 'c', 'k', 'e', 'n', ' ', 'r', 'u', 'n']
+        """
+        return list(self.text)
+
+
+@dataclass(slots=True)
+class Document:
+    """Representation of a BIO document."""
+
+    bio_repr: str
+    """Full BIO representation of the Document"""
+    tokens: list[Token] = field(default_factory=list)
+    """List of the tokens in the Document"""
+
+    spans: list[Span] = field(default_factory=list)
+    """List of the spans in the Document"""
+
+    def __post_init__(self):
+        """Parses the tokens and the entity spans in the document."""
+        span: Span | None = None
+        for idx, line in enumerate(self.bio_repr.splitlines()):
+            try:
+                token = Token(idx=idx, text=line)
+                self.tokens.append(token)
+                # Build spans
+                match token.tag:
+                    case Tag.OUTSIDE:
+                        # Close current span if present
+                        if span:
+                            self.spans.append(span)
+                            span = None
+                    case Tag.INSIDE:
+                        assert span, f"Found `{Tag.INSIDE}` before `{Tag.BEGINNING}`."
+                        span.add_token(token)
+                    case Tag.BEGINNING:
+                        # Close current span if present
+                        if span:
+                            self.spans.append(span)
+                        # Start new one
+                        span = Span()
+                        span.add_token(token)
+            except AssertionError as e:
+                _logger.error(f"Error on token n°{token.idx}: {e}")
+                raise Exception from e
+
+        # Last span
+        if span and span.tokens:
+            self.spans.append(span)
+
+    @property
+    def words(self) -> list[str]:
+        """List of words making up the document."""
+        return list(map(attrgetter("word"), self.tokens))
+
+    @property
+    def entities(self) -> list[tuple[str, str]]:
+        """List of entities making up the document."""
+        return list(
+            map(attrgetter("label", "text"), filter(attrgetter("label"), self.spans)),
+        )
+
+    @property
+    def word_entities(self) -> list[tuple[str, str]]:
+        """List of entities in the words making up the document."""
+        return list(
+            map(attrgetter("label", "word"), filter(attrgetter("label"), self.tokens)),
+        )
+
+    @property
+    def text(self) -> str:
+        """Join every word of the span by a whitespace."""
+        return " ".join(map(attrgetter("word"), self.tokens))
+
+    @property
+    def char_labels(self) -> list[str]:
+        r"""Character-level IOB labels.
+
+        Spaces between two tokens with the same label get the same label, others get 'O'.
+
+        Examples:
+            The space between 'I' and 'run' is tagged as 'I-Animal', because it's the same named entity label.
+            >>> Document(bio_repr="I B-Animal\nrun I-Animal").char_labels
+            ['B-Animal', 'I-Animal', 'I-Animal', 'I-Animal', 'I-Animal']
+
+            The space between 'run' and 'fast' is tagged as 'O', because it's not the same label.
+            >>> Document(bio_repr="run B-Animal\nfast O").char_labels
+            ['B-Animal', 'I-Animal', 'I-Animal', 'O', 'O', 'O', 'O', 'O']
+        """
+        tags = []
+        for token, next_token in pairwise(self.tokens + [None]):
+            # Add token tags
+            tags.extend(token.labels)
+            if next_token and token.label == next_token.label:
+                tags.append(next_token.iob_label)
+            elif next_token:
+                tags.append(Tag.OUTSIDE.value)
+        return tags
+
+    @property
+    def word_labels(self) -> list[str]:
+        r"""Word-level IOB labels.
+
+        Spaces between two tokens with the same label get the same label, others get 'O'.
+
+        Examples:
+            The space between 'I' and 'run' is tagged as 'I-Animal', because it's the same named entity label.
+            >>> Document(bio_repr="I B-Animal\nrun I-Animal").word_labels
+            ['Animal', 'Animal', 'Animal']
+
+            The space between 'run' and 'fast' is tagged as 'O', because it's not the same label.
+            >>> Document(bio_repr="run B-Animal\nfast O").word_labels
+            ['Animal', 'O', 'O']
+        """
+        tags = []
+        for token, next_token in pairwise(self.tokens + [None]):
+            # Add token tags
+            tags.append(token.label or Tag.OUTSIDE.value)
+
+            # Token of the next space
+            if (
+                # This is not the last token
+                next_token
+                # This token is not tagged as O
+                and token.tag != Tag.OUTSIDE
+                # Same label between consecutive tokens
+                and token.label == next_token.label
+            ):
+                tags.append(token.label)
+            elif next_token:
+                tags.append(Tag.OUTSIDE.value)
+        return tags
+
+    @property
+    def chars(self) -> list[str]:
+        r"""Characters making up the token.
+
+        Examples:
+            >>> Document(bio_repr="I B-Animal\nrun I-Animal").chars
+            ['I', ' ', 'r', 'u', 'n']
+        """
+        return list(self.text)
+
+    @classmethod
+    def from_file(cls, filepath: Path) -> "Document":
+        """Load a Document from a IOB file.
+
+        Args:
+            filepath (Path): Path to the file to load.
+
+        Returns:
+            Document: Parsed document
+        """
+        return Document(filepath.read_text())
diff --git a/bio_parser/parse/validate.py b/bio_parser/parse/validate.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f83192e82329f741ca5071ec912a6dc1a9c66c2
--- /dev/null
+++ b/bio_parser/parse/validate.py
@@ -0,0 +1,25 @@
+"""Validates the construction of the BIO file."""
+import json
+import logging
+from dataclasses import asdict
+from pathlib import Path
+
+from bio_parser.parse.document import Document
+
+logger = logging.getLogger(__name__)
+
+
+def run(filepaths: list[Path]) -> None:
+    """Validate the construction of multiple BIO files.
+
+    Args:
+        filepaths (list[Path]): Files to check.
+    """
+    for filepath in filepaths:
+        logger.info(f"Parsing file @ `{filepath}`")
+        try:
+            doc = Document.from_file(filepath)
+            filepath.with_suffix(".json").write_text(json.dumps(asdict(doc), indent=2))
+        except Exception as e:
+            logger.error(f"Could not load the file @ `{filepath}`: {e}")
+        logger.info(f"The file @ `{filepath}` is valid!")
diff --git a/docs/reference/parse/document.md b/docs/reference/parse/document.md
new file mode 100644
index 0000000000000000000000000000000000000000..24a7d4a9c71ef7692a350058a6af63e3933c70a4
--- /dev/null
+++ b/docs/reference/parse/document.md
@@ -0,0 +1,3 @@
+# Document
+
+::: bio_parser.parse.document
\ No newline at end of file
diff --git a/docs/reference/parse/validate.md b/docs/reference/parse/validate.md
new file mode 100644
index 0000000000000000000000000000000000000000..a7da7f6d6f4025b7ad4e4f1e07cc68616221018a
--- /dev/null
+++ b/docs/reference/parse/validate.md
@@ -0,0 +1,3 @@
+# Validate
+
+::: bio_parser.parse.validate
\ No newline at end of file
diff --git a/docs/usage/index.md b/docs/usage/index.md
new file mode 100644
index 0000000000000000000000000000000000000000..d9c5583da89c08c46e0fc5993c4ef1309f8663cd
--- /dev/null
+++ b/docs/usage/index.md
@@ -0,0 +1,6 @@
+# Usage
+
+When `bio-parser` is installed in your environment, you may use the following commands:
+
+`bio-parser validate`
+: To parse and validate the structure of one or more BIO files. More details in the [dedicated page](./validate.md).
diff --git a/docs/usage/validate.md b/docs/usage/validate.md
new file mode 100644
index 0000000000000000000000000000000000000000..1bf35b1467c5c414b95703e38bed7a1b22aec320
--- /dev/null
+++ b/docs/usage/validate.md
@@ -0,0 +1,56 @@
+# Validation
+
+Use the `bio-parser validate` command to parse and validate the structure of one or more BIO2 files.
+
+## Supported format
+
+The BIO2 format is a common tagging format in NER (Named entities recognition) tasks. More details about it on [Wikipedia](https://en.wikipedia.org/wiki/Inside%E2%80%93outside%E2%80%93beginning_(tagging)).
+
+An example of such a tagging format is given below.
+```plaintext
+Alex B-PER
+is O
+going O
+to O
+Los B-LOC
+Angeles I-LOC
+in O
+California B-LOC
+```
+
+## Usage
+
+You can specify one or more paths to your BIO files. The extension used has to be `.bio`.
+The parser will check them one by one and report the first error encountered.
+
+```shell
+$ bio-parser validate input.bio
+[12:37:20] INFO     Parsing file @ `input.bio`                                                                                            validate.py:19
+           INFO     The file @ `input.bio` is valid!                                                                                      validate.py:25
+```
+
+With multiple files:
+```shell
+$ bio-parser validate input1.bio input2.bio
+[12:37:20] INFO     Parsing file @ `input1.bio`                                                                                            validate.py:19
+           INFO     The file @ `input1.bio` is valid!                                                                                      validate.py:25
+[12:37:20] INFO     Parsing file @ `input2.bio`                                                                                            validate.py:19
+           INFO     The file @ `input2.bio` is valid!                                                                                      validate.py:25
+```
+
+With an invalid file.
+```shell
+$ bio-parser validate invalid.bio
+[12:41:16] INFO     Parsing file @ `invalid.bio`                                                                                                               validate.py:19
+           ERROR    Error on token n°0: Found `Tag.INSIDE` before `Tag.BEGINNING`.                                                                            document.py:283
+           ERROR    Could not load the file @ `invalid.bio`:                                                                                                   validate.py:24
+           INFO     The file @ `invalid.bio` is valid!                                                                                                         validate.py:25
+```
+
+In addition to validating the structure of the file, a JSON representation of the BIO file is also saved at the same location.
+
+This JSON file has three keys:
+
+- `bio_repr`: The string in BIO format passed to the command,
+- `tokens`: the list of tokens in the file, with their index and text,
+- `spans`: the list of NER entities found and their tokens.
diff --git a/mkdocs.yml b/mkdocs.yml
index d4caef47259954802b7299fca1824909552ef532..d12830219d4a8bf05be273048a587c34f9825d58 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -61,6 +61,9 @@ nav:
   - Get started:
     - get_started/index.md
     - Development: get_started/development.md
+  - Usage:
+    - usage/index.md
+    - Validation: usage/validate.md
   # defer to literate-nav
   - Code Reference: reference/
 
diff --git a/pyproject.toml b/pyproject.toml
index ecad63fdd828e504d5ce8dcf2138a6784f467bf2..bc0dff87b554bb20d8d38e6086c895c4565695b0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,8 @@ ignore = [
     # On top of the Google convention, disable `D417`, which requires
     # documentation for every function parameter.
     "D417",
+    # May cause some conflicts
+    "COM812",
 ]
 select = [
     # pycodestyle
diff --git a/requirements.txt b/requirements.txt
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9bc85868e90ab73e44625aa50199c64c222351f6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1 @@
+rich==13.7.0
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0a662153efc99ebddde92082e540481eb46e360
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,3 @@
+from pathlib import Path
+
+FIXTURES = Path(__file__).with_name("fixtures")
diff --git a/tests/fixtures/parse/valid.bio b/tests/fixtures/parse/valid.bio
new file mode 100644
index 0000000000000000000000000000000000000000..c924d42b0355ca8379baae7ac9be122deb57b556
--- /dev/null
+++ b/tests/fixtures/parse/valid.bio
@@ -0,0 +1,7 @@
+San B-GPE
+Francisco I-GPE
+considers O
+banning B-VERB
+sidewalk O
+delivery O
+robots O
\ No newline at end of file
diff --git a/tests/parse/__init__.py b/tests/parse/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..62facd4acb6d8fd508632266c4e06640cf7e892c
--- /dev/null
+++ b/tests/parse/__init__.py
@@ -0,0 +1,3 @@
+from tests.conftest import FIXTURES
+
+DATA_DIR = FIXTURES / "parse"
diff --git a/tests/parse/test_document.py b/tests/parse/test_document.py
new file mode 100644
index 0000000000000000000000000000000000000000..496a046861d67b1427cb0a42744704769abf3dbe
--- /dev/null
+++ b/tests/parse/test_document.py
@@ -0,0 +1,200 @@
+from bio_parser.parse.document import Document, Span, Tag, Token
+from tests.parse import DATA_DIR
+import pytest
+
+from bio_parser.parse.document import _make_ner_label
+
+FILEPATH = DATA_DIR / "valid.bio"
+
+
+@pytest.fixture
+def document():
+    return Document.from_file(FILEPATH)
+
+
+@pytest.mark.parametrize(
+    "tag, label, output",
+    (
+        (Tag.OUTSIDE, None, "O"),
+        (Tag.BEGINNING, "GPE", "B-GPE"),
+        (Tag.INSIDE, "GPE", "I-GPE"),
+    ),
+)
+def test_make_ner_label(tag: Tag, label: str, output: str):
+    assert _make_ner_label(tag=tag, label=label) == output
+
+
+@pytest.mark.parametrize(
+    "tag, label, error",
+    (
+        (Tag.OUTSIDE, "GPE", "Invalid label `GPE` with tag `O`"),
+        (Tag.BEGINNING, None, "No named entity label found with tag `B`"),
+        (Tag.INSIDE, None, "No named entity label found with tag `I`"),
+    ),
+)
+def test_make_ner_label_invalid(tag: Tag, label: str, error: str):
+    with pytest.raises(AssertionError, match=error):
+        _ = _make_ner_label(tag=tag, label=label)
+
+
+def test_parse_document(document: Document):
+    # Check words
+    assert document.words == [
+        "San",
+        "Francisco",
+        "considers",
+        "banning",
+        "sidewalk",
+        "delivery",
+        "robots",
+    ]
+
+    # Check entities
+    assert document.entities == [
+        ("GPE", "San Francisco"),
+        ("VERB", "banning"),
+    ]
+
+    # Check word entities
+    assert document.word_entities == [
+        ("GPE", "San"),
+        ("GPE", "Francisco"),
+        ("VERB", "banning"),
+    ]
+
+    # Check text
+    assert document.text == "San Francisco considers banning sidewalk delivery robots"
+
+    # Check labels
+    assert document.char_labels == ["B-GPE"] + ["I-GPE"] * len("an Francisco") + [
+        "O"
+    ] * len(" considers ") + ["B-VERB"] + ["I-VERB"] * len("anning") + ["O"] * len(
+        " sidewalk delivery robots"
+    )
+    print(document.word_labels)
+    assert document.word_labels == [
+        "GPE",
+        "GPE",
+        "GPE",
+        "O",
+        "O",
+        "O",
+        "VERB",
+        "O",
+        "O",
+        "O",
+        "O",
+        "O",
+        "O",
+    ]
+
+    # Check chars
+    assert document.chars == list(
+        "San Francisco considers banning sidewalk delivery robots"
+    )
+
+
+def test_parse_span(document: Document):
+    span: Span = document.spans[0]
+
+    # Check text
+    assert span.text == "San Francisco"
+
+    # Check label
+    assert span.label == "GPE"
+
+    # Check idx
+    assert span.idx == 0
+
+    # Check end
+    assert span.end == 2
+
+    # Check chars
+    assert span.chars == list("San Francisco")
+
+    # Check labels
+    assert span.labels == ["B-GPE"] + ["I-GPE"] * len("an Francisco")
+
+
+def test_parse_token(document: Document):
+    # B- token
+    token: Token = document.spans[0].tokens[0]
+
+    # Check word
+    assert token.word == "San"
+
+    # Check label
+    assert token.label == "GPE"
+
+    # Check label
+    assert token.tag == Tag.BEGINNING
+
+    # Check IOB Label
+    assert token.iob_label == "B-GPE"
+
+    # Check labels
+    assert token.labels == ["B-GPE", "I-GPE", "I-GPE"]
+
+    # Check chars
+    assert token.chars == ["S", "a", "n"]
+
+    # I- token
+    token: Token = document.spans[0].tokens[1]
+
+    # Check word
+    assert token.word == "Francisco"
+
+    # Check label
+    assert token.label == "GPE"
+
+    # Check label
+    assert token.tag == Tag.INSIDE
+
+    # Check IOB Label
+    assert token.iob_label == "I-GPE"
+
+    # Check labels
+    assert token.labels == [
+        "I-GPE",
+        "I-GPE",
+        "I-GPE",
+        "I-GPE",
+        "I-GPE",
+        "I-GPE",
+        "I-GPE",
+        "I-GPE",
+        "I-GPE",
+    ]
+
+    # Check chars
+    assert token.chars == ["F", "r", "a", "n", "c", "i", "s", "c", "o"]
+
+    # O token
+    token: Token = document.tokens[-1]
+
+    # Check word
+    assert token.word == "robots"
+
+    # Check label
+    assert token.label is None
+
+    # Check label
+    assert token.tag == Tag.OUTSIDE
+
+    # Check IOB Label
+    assert token.iob_label == "O"
+
+    # Check labels
+    assert token.labels == ["O", "O", "O", "O", "O", "O"]
+
+    # Check chars
+    assert token.chars == ["r", "o", "b", "o", "t", "s"]
+
+
+@pytest.mark.parametrize(
+    "annotation",
+    ("Something something", "Something A-GPE", "Something GPE-A", "Something A"),
+)
+def test_invalid_token(annotation: str):
+    with pytest.raises(AssertionError, match="Could not parse annotation"):
+        _ = Token(idx=0, text=annotation).word
diff --git a/tests/parse/test_validate.py b/tests/parse/test_validate.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9ef73f02e6bd67fb60a201ab99d59f98cb8e864
--- /dev/null
+++ b/tests/parse/test_validate.py
@@ -0,0 +1,38 @@
+import json
+from bio_parser.parse.validate import run as validate
+from tests.parse import DATA_DIR
+
+
+def test_valid():
+    filepath = DATA_DIR / "valid.bio"
+    validate([filepath])
+
+    # A JSON file should have been generated
+    output = filepath.with_suffix(".json")
+    assert output.exists()
+
+    # Check content of JSON
+    assert json.loads(output.read_text()) == {
+        "bio_repr": "San B-GPE\nFrancisco I-GPE\nconsiders O\nbanning B-VERB\nsidewalk O\ndelivery O\nrobots O",
+        "tokens": [
+            {"idx": 0, "text": "San B-GPE"},
+            {"idx": 1, "text": "Francisco I-GPE"},
+            {"idx": 2, "text": "considers O"},
+            {"idx": 3, "text": "banning B-VERB"},
+            {"idx": 4, "text": "sidewalk O"},
+            {"idx": 5, "text": "delivery O"},
+            {"idx": 6, "text": "robots O"},
+        ],
+        "spans": [
+            {
+                "tokens": [
+                    {"idx": 0, "text": "San B-GPE"},
+                    {"idx": 1, "text": "Francisco I-GPE"},
+                ]
+            },
+            {"tokens": [{"idx": 3, "text": "banning B-VERB"}]},
+        ],
+    }
+
+    # Cleanup
+    output.unlink()
diff --git a/tests/test_cli.py b/tests/test_cli.py
deleted file mode 100644
index f4f53619168f8993841e5a85193b424a60085554..0000000000000000000000000000000000000000
--- a/tests/test_cli.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_dummy():
-    assert True