diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3703391f03074a9efb507832eeba643a8e901b62..13d006c50a5e63a20844abf496da099f270eb336 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,7 +8,7 @@ repos:
     rev: 22.6.0
     hooks:
     - id: black
-  - repo: https://gitlab.com/pycqa/flake8
+  - repo: https://github.com/pycqa/flake8
     rev: 3.9.2
     hooks:
       - id: flake8
diff --git a/README.md b/README.md
index af2f58c55727ef243b41b2a261bc03f5498504f9..e66d25e031e0cb4b79d073a886ad973095721ee4 100644
--- a/README.md
+++ b/README.md
@@ -104,19 +104,19 @@ The available arguments are
 
 | Parameter                      | Description                                                                         | Type     | Default |
 | ------------------------------ | ----------------------------------------------------------------------------------- | -------- | ------- |
-| `--parent`                       | UUID of the folder to import from Arkindex. You may specify multiple UUIDs.         | str/uuid |         |
-| `--element-type`                 | Type of the elements to extract. You may specify multiple types.                    | str      |         |
-| `--output`                       | Folder where the data will be generated. Must exist.                                | Path     |         |
-| `--load-entities`                | Extract text with their entities. Needed for NER tasks.                             | bool     | False   |
-| `--tokens`                       | Mapping between starting tokens and end tokens. Needed for NER tasks.               | Path     |         |
-| `--use-existing-split`           | Use the specified folder IDs for the dataset split.                                 | bool     |         |
-| `--train-folder`                 | ID of the training folder to import from Arkindex.                                  | uuid     |         |
-| `--val-folder`                   | ID of the validation folder to import from Arkindex.                                | uuid     |         |
-| `--test-folder`                  | ID of the training folder to import from Arkindex.                                  | uuid     |         |
-| `--transcription-worker-version` | Filter transcriptions by worker_version. Use ‘manual’ for manual filtering.         | str/uuid |         |
-| `--entity-worker-version`        | Filter transcriptions entities by worker_version. Use ‘manual’ for manual filtering | str/uuid |         |
-| `--train-prob`                   | Training set split size                                                             | float    | 0,7     |
-| `--val-prob`                     | Validation set split size                                                           | float    | 0,15    |
+| `--parent`                       | UUID of the folder to import from Arkindex. You may specify multiple UUIDs.         | `str/uuid` |         |
+| `--element-type`                 | Type of the elements to extract. You may specify multiple types.                    | `str`      |         |
+| `--output`                       | Folder where the data will be generated. Must exist.                                | `Path`     |         |
+| `--load-entities`                | Extract text with their entities. Needed for NER tasks.                             | `bool`     | `False`   |
+| `--tokens`                       | Mapping between starting tokens and end tokens. Needed for NER tasks.               | `Path`    |         |
+| `--use-existing-split`           | Use the specified folder IDs for the dataset split.                                 | `bool`     |         |
+| `--train-folder`                 | ID of the training folder to import from Arkindex.                                  | `uuid`     |         |
+| `--val-folder`                   | ID of the validation folder to import from Arkindex.                                | `uuid`     |         |
+| `--test-folder`                  | ID of the training folder to import from Arkindex.                                  | `uuid`     |         |
+| `--transcription-worker-version` | Filter transcriptions by worker_version. Use ‘manual’ for manual filtering.         | `str/uuid` |         |
+| `--entity-worker-version`        | Filter transcriptions entities by worker_version. Use ‘manual’ for manual filtering | `str/uuid` |         |
+| `--train-prob`                   | Training set split size                                                             | `float`    | `0,7`     |
+| `--val-prob`                     | Validation set split size                                                           | `float`    | `0,15`    |
 
 The `--tokens` argument expects a file with the following format.
 ```yaml
@@ -171,7 +171,7 @@ To use the data from three folders as **training**, **validation** and **testing
 ```shell
 teklia-dan extract \
     --use-existing-split \
-    --train-folder 2275529a-1ec5-40ce-a516-42ea7ada858c 
+    --train-folder 2275529a-1ec5-40ce-a516-42ea7ada858c \
     --val-folder af9b38b5-5d95-417d-87ec-730537cb1898 \
     --test-folder 6ff44957-0e65-48c5-9d77-a178116405b2 \
     --element-type page \
@@ -193,4 +193,3 @@ teklia-dan extract \
 
 #### Synthetic data generation
 `teklia-dan generate` with multiple arguments
-
diff --git a/dan/datasets/extract/extract_from_arkindex.py b/dan/datasets/extract/extract_from_arkindex.py
index 5c3818be99b27ed43bb803fac02c4c0da4a9995b..b0274f9859c45dbaa50416a8305cb130a885c1dc 100644
--- a/dan/datasets/extract/extract_from_arkindex.py
+++ b/dan/datasets/extract/extract_from_arkindex.py
@@ -4,17 +4,18 @@
 Extract dataset from Arkindex using API.
 """
 
-from collections import defaultdict
 import logging
 import os
 import pathlib
 import random
 import uuid
+from collections import defaultdict
 
 import imageio.v2 as iio
 from arkindex import ArkindexClient, options_from_env
 from tqdm import tqdm
 
+from dan import logger
 from dan.datasets.extract.utils import (
     insert_token,
     parse_tokens,
@@ -23,9 +24,6 @@ from dan.datasets.extract.utils import (
     save_text,
 )
 
-from dan import logger
-
-
 IMAGES_DIR = "images"  # Subpath to the images directory.
 LABELS_DIR = "labels"  # Subpath to the labels directory.
 MANUAL_SOURCE = "manual"
diff --git a/dan/datasets/extract/utils.py b/dan/datasets/extract/utils.py
index 4b1aa93f07d042aa2d895083d3590adfa995af75..6d47943fb8fb2ce72b4567348368d9e3e9c28952 100644
--- a/dan/datasets/extract/utils.py
+++ b/dan/datasets/extract/utils.py
@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
-import yaml
 import json
 import random
 
 import cv2
+import yaml
 
 random.seed(42)