README.md

git clone https://github.com/FactoDeepLearning/DAN.git
pip install -r requirements.txt
pip install -e .
import cv2
from dan.predict import DAN

image = cv2.cvtColor(cv2.imread(IMAGE_PATH), cv2.COLOR_BGR2RGB)
model_path = 'model.pt'
params_path = 'parameters.yml'
charset_path = 'charset.pkl'

model = DAN('cpu')
model.load(model_path, params_path, charset_path, mode="eval")
text, confidence_scores = model.predict(image, confidences=True)
---
INTITULE:
  start: ⓘ
  end: Ⓘ
DATE:
  start: ⓓ
  end: Ⓓ
COTE_SERIE:
  start: ⓢ
  end: Ⓢ
ANALYSE_COMPL.:
  start: ⓒ
  end: Ⓒ
PRECISIONS_SUR_COTE:
  start: ⓟ
  end: Ⓟ
COTE_ARTICLE:
  start: ⓐ
  end: Ⓐ
CLASSEMENT:
  start: ⓛ
  end: Ⓛ
teklia-dan dataset extract \
    --parent 665e84ea-bd97-4912-91b0-1f4a844287ff \
    --element-type page \
    --output data \
    --load-entities \
    --tokens tokens.yml
teklia-dan dataset extract \
    --parent 2275529a-1ec5-40ce-a516-42ea7ada858c af9b38b5-5d95-417d-87ec-730537cb1898 6ff44957-0e65-48c5-9d77-a178116405b2 \
    --element-type page \
    --output data \
    --load-entities \
    --tokens tokens.yml
teklia-dan dataset extract \
    --use-existing-split \
    --train-folder 2275529a-1ec5-40ce-a516-42ea7ada858c
    --val-folder af9b38b5-5d95-417d-87ec-730537cb1898 \
    --test-folder 6ff44957-0e65-48c5-9d77-a178116405b2 \
    --element-type page \
    --output data \
    --load-entities \
    --tokens tokens.yml
teklia-dan dataset extract \
    --parent 48852284-fc02-41bb-9a42-4458e5a51615 \
    --element-type text_zone annotation \
    --parent-element-type single_page \
    --output data
teklia-dan dataset format \
    --dataset path/to/dataset \
    --image-format png