diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index adc6833cc5a4c2898b26ce5b3ce776b93c7f7ad4..04d5b90e6db2a1835f01819cac9d22509169cb1d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,10 +3,6 @@ stages: - build - deploy -variables: - # Submodule clone - GIT_SUBMODULE_STRATEGY: recursive - lint: image: python:3.10 stage: test @@ -46,6 +42,14 @@ test: - apt-get update -q - apt-get install -q -y libgl1 + # Add netrc file + - | + echo " + machine gitlab.teklia.com + login gitlab-ci-token + password $CI_JOB_TOKEN + " > ~/.netrc + except: - schedules @@ -64,6 +68,15 @@ docker-build: except: - schedules + before_script: + # Add netrc file + - | + echo " + machine gitlab.teklia.com + login gitlab-ci-token + password $CI_JOB_TOKEN + " > ~/.netrc + script: - ci/build.sh @@ -75,7 +88,15 @@ docker-build: - public before_script: - - pip install -e .[docs] + # Add netrc file + - | + echo " + machine gitlab.teklia.com + login gitlab-ci-token + password $CI_JOB_TOKEN + " > ~/.netrc + + - pip install --index-url https://gitlab.teklia.com/api/v4/projects/210/packages/pypi/simple -e .[docs] script: - mkdocs build --strict --verbose diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 7d6f8a61515402d96d9d86de1546165d8d529a87..0000000000000000000000000000000000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "nerval"] - path = nerval - url = ../../ner/nerval.git diff --git a/Dockerfile b/Dockerfile index 7d50ce7bc4a70b549a6830ebeb334681e8e1d6a7..4a42644db1fa2feb9bd50aea3c3972db47b0911a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,12 +7,10 @@ RUN apt-get -y update && \ WORKDIR /src -# Copy submodule data -COPY nerval nerval - # Copy DAN data COPY dan dan COPY requirements.txt *-requirements.txt setup.py VERSION README.md ./ -# Install DAN as a package -RUN pip install . --no-cache-dir +# Install DAN as a package with GitLab package registry +RUN --mount=type=secret,id=netrc,target=/root/.netrc \ + pip install . --no-cache-dir --index-url https://gitlab.teklia.com/api/v4/projects/210/packages/pypi/simple diff --git a/README.md b/README.md index f3526541b6e47690b0d4d5cf1b5e941206ef0a7c..fe3a6e147baa70e2a6aea8db95cc0aacc9e25cff 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,20 @@ This is an open-source project, licensed using [the MIT license](https://opensou For development and tests purpose it may be useful to install the project as a editable package with pip. -- Use a virtualenv (e.g. with virtualenvwrapper `mkvirtualenv -a . dan`) -- Initialize the [`Nerval`](https://gitlab.teklia.com/ner/nerval) submodule (e.g. `git submodule update --init --recursive`) -- Install `dan` as a package (e.g. `pip install -e .`) +This package is based on a GitLab package registry containing all the nerval source code. +You need [a personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html) and access to the [nerval repository](https://gitlab.teklia.com/ner/nerval) in order to install this module. You will need to add the below to your `~/.netrc` file: + +```shell +machine gitlab.teklia.com +login __token__ +password <YOUR_PERSONAL_TOKEN> +``` + +Then you can install the package as a editable package with pip: + +```shell +pip3 install --index-url https://gitlab.teklia.com/api/v4/projects/210/packages/pypi/simple -e . +``` ### Linter diff --git a/ci/build.sh b/ci/build.sh index f29f50f27b88056216e6f880bb713012fc4e9956..65956bf7bdc0c82360a6917f35b1d41504ae6c6a 100755 --- a/ci/build.sh +++ b/ci/build.sh @@ -17,7 +17,7 @@ fi IMAGE_TAG="$CI_REGISTRY_IMAGE:$VERSION" cd $CI_PROJECT_DIR -docker build -f Dockerfile . -t "$IMAGE_TAG" +docker build -f Dockerfile . -t "$IMAGE_TAG" --secret id=netrc,src=$HOME/.netrc # Publish the image on the main branch or on a tag if [ "$CI_COMMIT_REF_NAME" = "main" -o -n "$CI_COMMIT_TAG" ]; then diff --git a/dan/ocr/evaluate.py b/dan/ocr/evaluate.py index f8f1fc743803967384f217b2c8f7f35e48525ca0..76ebd42ec872fd3b3acbe8aca9707dbf5ca7f1c1 100644 --- a/dan/ocr/evaluate.py +++ b/dan/ocr/evaluate.py @@ -15,6 +15,9 @@ import numpy as np import torch import torch.multiprocessing as mp from edlib import align, getNiceAlignment +from nerval.evaluate import evaluate +from nerval.parse import parse_bio +from nerval.utils import print_results from prettytable import MARKDOWN, PrettyTable from dan.bio import convert @@ -22,9 +25,6 @@ from dan.ocr.manager.metrics import Inference from dan.ocr.manager.training import Manager from dan.ocr.utils import add_metrics_table_row, create_metrics_table, update_config from dan.utils import parse_tokens, read_json -from nerval.evaluate import evaluate -from nerval.parse import parse_bio -from nerval.utils import print_results logger = logging.getLogger(__name__) diff --git a/docs/get_started/development.md b/docs/get_started/development.md index 7bf6e665ce9340cf0ceaec137ca62b54c7b2e6c8..ee7739d82da238ca8eb8971faf906cf3a01614cf 100644 --- a/docs/get_started/development.md +++ b/docs/get_started/development.md @@ -62,9 +62,11 @@ Add the `docs` extra when installing `teklia-dan`: ```shell # In a clone of the Git repository -pip install .[docs] +pip install --index-url https://gitlab.teklia.com/api/v4/projects/210/packages/pypi/simple .[docs] ``` +The `--index-url` argument is required to find the `nerval` package. + Build the documentation using `mkdocs serve -v`. You can then write in [Markdown](https://www.markdownguide.org/) in the relevant `docs/*.md` files, and see live output on http://localhost:8000. ### Linter diff --git a/docs/get_started/index.md b/docs/get_started/index.md index 90e88151d4bc0991179d4e8068660404c251b803..7d30a551a403c22cdcdce08459ebba69831cfa93 100644 --- a/docs/get_started/index.md +++ b/docs/get_started/index.md @@ -2,41 +2,43 @@ ## Installation -To use DAN in your own environment, you need to install it as a dependency or manually. +DAN is based on a GitLab package registry containing all the nerval source code. +You need [a personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html) and access to the [nerval repository](https://gitlab.teklia.com/ner/nerval) in order to install this module. You will need to add the below to your `~/.netrc` file: + +```shell +machine gitlab.teklia.com +login __token__ +password <YOUR_PERSONAL_TOKEN> +``` + +If you install DAN as a dependency, the host must have access to this configuration file to be able to download the [nerval repository](https://gitlab.teklia.com/ner/nerval) package. ### As a dependency To install DAN as a dependency, you need to first add the following line to your `requirements.txt` file: ```shell -teklia-dan @ git+ssh://git@gitlab.teklia.com/atr/dan.git +--index-url https://gitlab.teklia.com/api/v4/projects/98/packages/pypi/simple --extra-index-url https://gitlab.teklia.com/api/v4/projects/210/packages/pypi/simple +teklia-dan ``` +The `--index-url` argument is required to find the `DAN` package, the `--extra-index-url` argument is needed to find the `nerval` dependency. + Then you can install it via pip: ```shell pip install -r requirements.txt ``` -### Manually +### Directly -To install DAN manually, you need to first clone via: +To install DAN directly, you can install it via pip: ```shell -git clone git@gitlab.teklia.com:atr/dan.git +pip3 install --index-url https://gitlab.teklia.com/api/v4/projects/210/packages/pypi/simple --extra-index-url https://gitlab.teklia.com/api/v4/projects/210/packages/pypi/simple teklia-dan ``` -Then you can initialize the [`Nerval`](https://gitlab.teklia.com/ner/nerval) submodule: - -```shell -git submodule update --init --recursive -``` - -Then you can install it via pip: - -```shell -pip install . -``` +The `--index-url` argument is required to find the `DAN` package, the `--extra-index-url` argument is needed to find the `nerval` dependency. --- diff --git a/docs/usage/train/config.md b/docs/usage/train/config.md index 4408d3852c2336c900f8ba509d15bc43f89c6fd6..20d18fa83e31aa4efadb5cdf3ee5894462c0c5ab 100644 --- a/docs/usage/train/config.md +++ b/docs/usage/train/config.md @@ -242,9 +242,11 @@ To log your experiment on MLFlow, you need to: - install the extra requirements via ```shell -$ pip install .[mlflow] +$ pip install --index-url https://gitlab.teklia.com/api/v4/projects/210/packages/pypi/simple .[mlflow] ``` +The `--index-url` argument is required to find the `nerval` package. + - update the following arguments: | Name | Description | Type | Default | diff --git a/nerval b/nerval deleted file mode 160000 index 525c1a9e6d5a33075669085148247e2604dd092f..0000000000000000000000000000000000000000 --- a/nerval +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 525c1a9e6d5a33075669085148247e2604dd092f diff --git a/pyproject.toml b/pyproject.toml index d7cd4179736d35b94bb4e03ba536af3bf3ea8877..1b15626128be292b23ea1290644355a8442a96fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ known-third-party = [ "cv2", "editdistance", "imageio", + "nerval", "numpy", "scipy", "tensorboard", diff --git a/requirements.txt b/requirements.txt index d065f01db77260ee824b2c39ea3e778332bb93a8..f596371610cf204d6ff7c39f8b6f5d127a873d51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ --e ./nerval albumentations==1.3.1 arkindex-export==0.1.9 boto3==1.26.124 @@ -13,6 +12,7 @@ PyYAML==6.0 scipy==1.10.1 sentencepiece==0.1.99 teklia-line-image-extractor==0.2.8rc5 +teklia-nerval==0.3.1 tenacity==8.2.3 tensorboard==2.12.2 torch==2.0.0 diff --git a/tox.ini b/tox.ini index ffb845de95a7750ccdf7d1fe5e3ecfde4bc8cd22..3067b2942b06a3242a17c0f2c5ba93450a8add09 100644 --- a/tox.ini +++ b/tox.ini @@ -14,5 +14,8 @@ deps = commands = pytest {tty:--color=yes} {posargs} +setenv = + PIP_INDEX_URL=https://gitlab.teklia.com/api/v4/projects/210/packages/pypi/simple + [pytest] testpaths= tests