Skip to content
Snippets Groups Projects
Commit 6544a3c7 authored by Eva Bardou's avatar Eva Bardou :frog: Committed by Yoann Schneider
Browse files

Add support for Python 3.12 + Finish the migration to `pyproject.toml`

parent f97736ab
No related branches found
No related tags found
1 merge request!57Add support for Python 3.12 + Finish the migration to `pyproject.toml`
Pipeline #203647 passed
......@@ -10,7 +10,7 @@ cache:
linter:
stage: test
image: python:3.10
image: python:3.12-slim
cache:
paths:
......@@ -26,12 +26,15 @@ linter:
before_script:
- pip install pre-commit
# Install git
- apt-get update -q -y && apt-get install -q -y --no-install-recommends git
script:
- pre-commit run -a
tests:
stage: test
image: python:3.10
image: python:3.12-slim
cache:
paths:
......@@ -54,11 +57,11 @@ bump-python-deps:
- schedules
script:
- devops python-deps requirements.txt
- devops python-deps pyproject.toml
deploy-pypi:
stage: release
image: python:3.10
image: python:3.12
only:
- tags
......
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.8.3
hooks:
# Run the linter.
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v5.0.0
hooks:
- id: check-ast
- id: check-docstring-first
......@@ -16,22 +25,19 @@ repos:
- id: name-tests-test
args: ['--django']
- id: check-json
- id: requirements-txt-fixer
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.6
hooks:
# Run the linter.
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
# Run the formatter.
- id: ruff-format
- id: check-toml
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
rev: v2.3.0
hooks:
- id: codespell
args: ['--write-changes']
exclude: '\.bio$'
- repo: meta
hooks:
- id: check-useless-excludes
\ No newline at end of file
- id: check-useless-excludes
- repo: https://gitlab.teklia.com/tools/pre-commit-hooks
rev: 0.1.0
hooks:
- id: long-test-files
args: ['1000']
files: '^tests\/(.*\/)?test_[^\/]*\.py$'
......@@ -10,10 +10,10 @@ def threshold_float_type(arg):
"""Type function for argparse."""
try:
f = float(arg)
except ValueError:
raise argparse.ArgumentTypeError("Must be a floating point number.")
except ValueError as e:
raise argparse.ArgumentTypeError("Must be a floating point number.") from e
if f < 0 or f > 1:
raise argparse.ArgumentTypeError("Must be between 0 and 1.")
raise argparse.ArgumentTypeError("Must be between 0 and 1.") from None
return f
......
......@@ -212,16 +212,14 @@ def get_labels_aligned(original: str, aligned: str, labels_original: list) -> li
last_label = NOT_ENTITY_TAG
# Inspecting aligned string
for i, char in enumerate(aligned):
# new_label = ""
for char in aligned:
# If original string has been fully processed, rest of labels are "O" ('-' characters at aligned end)
if index_original >= len(original):
new_label = NOT_ENTITY_TAG
# If current aligned char does not match current original char ('-' characters in aligned)
# Keep last_label and don't increment index_original
elif not char == original[index_original]:
elif char != original[index_original]:
new_label = (
last_label
if get_position_label(last_label) not in BEGINNING_POS
......@@ -321,7 +319,9 @@ def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict:
# Compute scores
scores = compute_scores(
annotation["entity_count"], prediction["entity_count"], matches
annotation["entity_count"],
prediction["entity_count"],
matches,
)
return scores
......@@ -389,7 +389,7 @@ def run_multiple(file_csv: Path, folder: Path, threshold: int, verbose: bool):
if not (annot and predict):
raise Exception(
f"No file found for files {row[ANNO_COLUMN]}, {row[PRED_COLUMN]}"
f"No file found for files {row[ANNO_COLUMN]}, {row[PRED_COLUMN]}",
)
count += 1
......
......@@ -16,8 +16,10 @@ def get_type_label(label: str) -> str:
"""
try:
tag = NOT_ENTITY_TAG if label == NOT_ENTITY_TAG else REGEX_LABEL.match(label)[1]
except TypeError:
raise (Exception(f"The label {label} is not valid in BIOES/BIOLU format."))
except TypeError as e:
raise (
Exception(f"The label {label} is not valid in BIOES/BIOLU format.")
) from e
return tag
......@@ -33,8 +35,8 @@ def get_position_label(label: str) -> str:
if label == NOT_ENTITY_TAG
else re.match(r"([BIESLU])-(.*)$", label)[1]
)
except TypeError:
raise Exception(f"The label {label} is not valid in BIOES/BIOLU format.")
except TypeError as e:
raise Exception(f"The label {label} is not valid in BIOES/BIOLU format.") from e
return pos
......@@ -46,8 +48,10 @@ def parse_line(index: int, line: str):
assert match_iob, f"Line {line} does not match IOB regex"
return match_iob.group(1, 2)
except AssertionError:
raise Exception(f"The file is not in BIO format: check line {index} ({line})")
except AssertionError as e:
raise Exception(
f"The file is not in BIO format: check line {index} ({line})"
) from e
def parse_bio(lines: list[str]) -> dict:
......@@ -64,7 +68,7 @@ def parse_bio(lines: list[str]) -> dict:
if "§" in " ".join(lines):
raise (
Exception(
"§ found in input file. Since this character is used in a specific way during evaluation, please remove it from files."
"§ found in input file. Since this character is used in a specific way during evaluation, please remove it from files.",
)
)
......@@ -154,7 +158,7 @@ def parse_bio(lines: list[str]) -> dict:
result["entity_count"] = entity_count
assert len(result["words"]) == len(
result["labels"]
result["labels"],
), f'Found {len(result["words"])} word(s) for {len(result["labels"])} label(s)'
for tag in result["entity_count"]:
if tag != ALL_ENTITIES:
......
......@@ -6,27 +6,39 @@ build-backend = "setuptools.build_meta"
name = "teklia-nerval"
version = "0.3.3rc3"
description = "Tool to evaluate NER on noisy text."
dynamic = ["dependencies"]
dependencies = [
"editdistance==0.8.1",
"edlib==1.3.9.post1",
"prettytable==3.9.0",
]
authors = [
{ name = "Teklia", email = "contact@teklia.com" },
]
maintainers = [
{ name = "Teklia", email = "contact@teklia.com" },
]
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.10"
readme = { file = "README.md", content-type = "text/markdown" }
keywords = ["python"]
classifiers = [
# Specify the Python versions you support here.
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
[project.scripts]
nerval = "nerval.cli:main"
"nerval" = "nerval.cli:main"
[tool.setuptools]
packages = ["nerval"]
[tool.setuptools.dynamic]
dependencies = { file = ["requirements.txt"] }
[tool.ruff]
exclude = [".git", "__pycache__"]
target-version = "py312"
[tool.ruff.lint]
ignore = [
"E501",
# Conflicts with the formatter
......@@ -42,26 +54,42 @@ select = [
"T1",
# Isort
"I",
# Pyupgrade
# Implicit Optional
"RUF013",
# Invalid pyproject.toml
"RUF200",
# pyupgrade
"UP",
# Pandas-vet
# pandas-vet
"PD",
# Flake8-comprehension
"C4",
# Flake8-builtins
# flake8-bugbear
"B",
# flake8-builtins
"A",
# flake8-commas
"COM",
# flake8-comprehension
"C4",
# flake8-import-conventions
"ICN",
# flake8-raise
"RSE",
# flake8-quotes
"Q",
# flake8-raise
"RSE",
# flake8-simplify
"SIM",
# flake8-unused-arguments
"ARG",
# flake8-use-pathlib
"PTH",
# flake8-pytest-style
"PT",
# flake8-use-pathlib
"PTH",
]
[tool.ruff.lint.per-file-ignores]
# Ignore `pytest-composite-assertion` rules of `flake8-pytest-style` linter for non-test files
"nerval/**/*.py" = ["PT018"]
[tool.ruff.lint.isort]
known-first-party = []
known-third-party = ["pytest", "setuptools", "editdistance", "edlib", "prettytable"]
editdistance==0.6.2
edlib==1.3.9
prettytable==3.9.0
......@@ -5,56 +5,56 @@ import pytest
FIXTURES = Path(__file__).parent / "fixtures"
@pytest.fixture()
@pytest.fixture
def fake_annot_bio():
return FIXTURES / "test_annot.bio"
@pytest.fixture()
@pytest.fixture
def fake_annot_with_empty_lines_bio():
return FIXTURES / "test_annot_with_empty_lines.bio"
@pytest.fixture()
@pytest.fixture
def fake_predict_bio():
return FIXTURES / "test_predict.bio"
@pytest.fixture()
@pytest.fixture
def empty_bio():
return FIXTURES / "test_empty.bio"
@pytest.fixture()
@pytest.fixture
def bad_bio():
return FIXTURES / "test_bad.bio"
@pytest.fixture()
@pytest.fixture
def bioeslu_bio():
return FIXTURES / "bioeslu.bio"
@pytest.fixture()
@pytest.fixture
def end_of_file_bio():
return FIXTURES / "end_of_file.bio"
@pytest.fixture()
@pytest.fixture
def nested_bio():
return FIXTURES / "test_nested.bio"
@pytest.fixture()
@pytest.fixture
def folder_bio():
return FIXTURES
@pytest.fixture()
@pytest.fixture
def csv_file_error():
return FIXTURES / "test_mapping_file_error.csv"
@pytest.fixture()
@pytest.fixture
def csv_file():
return FIXTURES / "test_mapping_file.csv"
......@@ -188,7 +188,8 @@ def test_parse_bio(test_input, expected):
def test_parse_bio_bad_input(bad_bio):
lines = bad_bio.read_text().strip().splitlines()
with pytest.raises(
Exception, match=re.escape("The file is not in BIO format: check line 1 (file)")
Exception,
match=re.escape("The file is not in BIO format: check line 1 (file)"),
):
evaluate.parse_bio(lines)
......
......@@ -101,7 +101,8 @@ def test_run(annotation, prediction, expected):
def test_run_empty_bio(empty_bio):
with pytest.raises(
Exception, match="No content found in annotation or prediction files."
Exception,
match="No content found in annotation or prediction files.",
):
evaluate.run(empty_bio, empty_bio, 0.3, False)
......@@ -116,13 +117,15 @@ def test_run_empty_entry():
def test_run_invalid_header(csv_file_error, folder_bio):
with pytest.raises(
Exception, match="Columns in the CSV mapping should be: Annotation,Prediction"
Exception,
match="Columns in the CSV mapping should be: Annotation,Prediction",
):
evaluate.run_multiple(csv_file_error, folder_bio, 0.3, False)
def test_run_multiple(csv_file, folder_bio):
with pytest.raises(
Exception, match="No file found for files demo_annot.bio, demo_predict.bio"
Exception,
match="No file found for files demo_annot.bio, demo_predict.bio",
):
evaluate.run_multiple(csv_file, folder_bio, 0.3, False)
......@@ -6,7 +6,6 @@ testpaths = tests
addopts =
--cov-report=term-missing
[testenv]
commands =
pytest --cov=nerval {posargs}
......@@ -16,4 +15,3 @@ deps =
pytest<8
pytest-lazy-fixture
pytest-cov
-rrequirements.txt
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment