Skip to content
Snippets Groups Projects
Verified Commit d13301eb authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

Setup ruff for lintint/formatting

parent 0658f885
No related branches found
No related tags found
1 merge request!31Setup ruff for lintint/formatting
Pipeline #143891 passed
[flake8]
max-line-length = 120
exclude=build,.cache,.eggs,.git,src/zeep,front
# Flake8 ignores multiple errors by default;
# the only interesting ignore is W503, which goes against PEP8.
# See https://lintlyci.github.io/Flake8Rules/rules/W503.html
ignore = E203,E501,W503
\ No newline at end of file
[settings]
# Compatible with black
multi_line_output = 3
include_trailing_comma = True
force_grid_wrap = 0
use_parentheses = True
line_length = 120
default_section=FIRSTPARTY
known_third_party = editdistance,edlib,pytest,setuptools,prettytable
repos: repos:
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/ambv/black
rev: 23.1.0
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
additional_dependencies:
- 'flake8-coding==1.3.2'
- 'flake8-debugger==4.1.2'
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0 rev: v4.5.0
hooks: hooks:
- id: check-ast - id: check-ast
- id: check-docstring-first - id: check-docstring-first
...@@ -26,17 +11,27 @@ repos: ...@@ -26,17 +11,27 @@ repos:
- id: trailing-whitespace - id: trailing-whitespace
- id: check-yaml - id: check-yaml
args: [--allow-multiple-documents] args: [--allow-multiple-documents]
- id: check-toml
- id: mixed-line-ending - id: mixed-line-ending
- id: name-tests-test - id: name-tests-test
args: ['--django'] args: ['--django']
- id: check-json - id: check-json
- id: requirements-txt-fixer - id: requirements-txt-fixer
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.6
hooks:
# Run the linter.
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/codespell-project/codespell - repo: https://github.com/codespell-project/codespell
rev: v2.2.2 rev: v2.2.6
hooks: hooks:
- id: codespell - id: codespell
args: ['--write-changes'] args: ['--write-changes']
exclude: '\.bio$' exclude: '\.bio$'
- repo: meta - repo: meta
hooks: hooks:
- id: check-useless-excludes - id: check-useless-excludes
\ No newline at end of file
# -*- coding: utf-8 -*-
import logging import logging
logging.basicConfig( logging.basicConfig(
......
# -*- coding: utf-8 -*-
import argparse import argparse
from pathlib import Path from pathlib import Path
...@@ -70,18 +69,18 @@ def main(): ...@@ -70,18 +69,18 @@ def main():
if args.annot: if args.annot:
if not args.predict: if not args.predict:
raise argparse.ArgumentTypeError( raise argparse.ArgumentTypeError(
"You need to specify the path to a predict file with -p" "You need to specify the path to a predict file with -p",
) )
run(args.annot, args.predict, args.threshold, args.verbose) run(args.annot, args.predict, args.threshold, args.verbose)
elif args.csv: elif args.csv:
if not args.folder: if not args.folder:
raise argparse.ArgumentTypeError( raise argparse.ArgumentTypeError(
"You need to specify the path to a folder of bio files with -f" "You need to specify the path to a folder of bio files with -f",
) )
run_multiple(args.csv, args.folder, args.threshold, args.verbose) run_multiple(args.csv, args.folder, args.threshold, args.verbose)
else: else:
raise argparse.ArgumentTypeError( raise argparse.ArgumentTypeError(
"You need to specify the argument of input file" "You need to specify the argument of input file",
) )
......
# -*- coding: utf-8 -*-
import logging import logging
import os import os
from csv import reader from csv import reader
...@@ -138,13 +136,19 @@ def compute_matches( ...@@ -138,13 +136,19 @@ def compute_matches(
): ):
if not found_aligned_end: if not found_aligned_end:
rest_predict, visited = look_for_further_entity_part( rest_predict, visited = look_for_further_entity_part(
i + 1, tag_ref, prediction, labels_predict i + 1,
tag_ref,
prediction,
labels_predict,
) )
current_compar += rest_predict current_compar += rest_predict
visited_predict += visited visited_predict += visited
rest_annot, visited = look_for_further_entity_part( rest_annot, visited = look_for_further_entity_part(
i + 1, tag_ref, annotation, labels_annot i + 1,
tag_ref,
annotation,
labels_annot,
) )
current_ref += rest_annot current_ref += rest_annot
visited_annot += visited visited_annot += visited
...@@ -221,7 +225,9 @@ def get_labels_aligned(original: str, aligned: str, labels_original: list) -> li ...@@ -221,7 +225,9 @@ def get_labels_aligned(original: str, aligned: str, labels_original: list) -> li
def compute_scores( def compute_scores(
annot_tags_count: dict, predict_tags_count: dict, matches: dict annot_tags_count: dict,
predict_tags_count: dict,
matches: dict,
) -> dict: ) -> dict:
"""Compute Precision, Recall and F1 score for all entity types found in annotation and prediction. """Compute Precision, Recall and F1 score for all entity types found in annotation and prediction.
...@@ -270,7 +276,9 @@ def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict: ...@@ -270,7 +276,9 @@ def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict:
# Align annotation and prediction # Align annotation and prediction
align_result = edlib.align(annotation["words"], prediction["words"], task="path") align_result = edlib.align(annotation["words"], prediction["words"], task="path")
nice_alignment = edlib.getNiceAlignment( nice_alignment = edlib.getNiceAlignment(
align_result, annotation["words"], prediction["words"] align_result,
annotation["words"],
prediction["words"],
) )
annot_aligned = nice_alignment["query_aligned"] annot_aligned = nice_alignment["query_aligned"]
...@@ -278,10 +286,14 @@ def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict: ...@@ -278,10 +286,14 @@ def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict:
# Align labels from string alignment # Align labels from string alignment
labels_annot_aligned = get_labels_aligned( labels_annot_aligned = get_labels_aligned(
annotation["words"], annot_aligned, annotation["labels"] annotation["words"],
annot_aligned,
annotation["labels"],
) )
labels_predict_aligned = get_labels_aligned( labels_predict_aligned = get_labels_aligned(
prediction["words"], predict_aligned, prediction["labels"] prediction["words"],
predict_aligned,
prediction["labels"],
) )
# Get nb match # Get nb match
...@@ -334,7 +346,7 @@ def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> di ...@@ -334,7 +346,7 @@ def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> di
def run_multiple(file_csv, folder, threshold, verbose): def run_multiple(file_csv, folder, threshold, verbose):
"""Run the program for multiple files (correlation indicated in the csv file)""" """Run the program for multiple files (correlation indicated in the csv file)"""
# Read the csv in a list # Read the csv in a list
with open(file_csv, "r") as read_obj: with open(file_csv) as read_obj:
csv_reader = reader(read_obj) csv_reader = reader(read_obj)
list_cor = list(csv_reader) list_cor = list(csv_reader)
...@@ -375,8 +387,8 @@ def run_multiple(file_csv, folder, threshold, verbose): ...@@ -375,8 +387,8 @@ def run_multiple(file_csv, folder, threshold, verbose):
round(precision / count, 3), round(precision / count, 3),
round(recall / count, 3), round(recall / count, 3),
round(f1 / count, 3), round(f1 / count, 3),
] ],
] ],
) )
print(table) print(table)
else: else:
......
# -*- coding: utf-8 -*-
import re import re
from typing import List from typing import List
...@@ -64,7 +63,7 @@ def parse_bio(lines: List[str]) -> dict: ...@@ -64,7 +63,7 @@ def parse_bio(lines: List[str]) -> dict:
if "§" in " ".join(lines): if "§" in " ".join(lines):
raise ( raise (
Exception( Exception(
"§ found in input file. Since this character is used in a specific way during evaluation, prease remove it from files." "§ found in input file. Since this character is used in a specific way during evaluation, please remove it from files."
) )
) )
...@@ -145,7 +144,7 @@ def parse_bio(lines: List[str]) -> dict: ...@@ -145,7 +144,7 @@ def parse_bio(lines: List[str]) -> dict:
result = None result = None
if words: if words:
result = dict() result = {}
result["words"] = " ".join(words) result["words"] = " ".join(words)
result["labels"] = labels result["labels"] = labels
result["entity_count"] = entity_count result["entity_count"] = entity_count
......
# -*- coding: utf-8 -*-
from prettytable import MARKDOWN, PrettyTable from prettytable import MARKDOWN, PrettyTable
...@@ -23,7 +22,7 @@ def print_results(scores: dict): ...@@ -23,7 +22,7 @@ def print_results(scores: dict):
rec, rec,
f1, f1,
scores[tag]["Support"], scores[tag]["Support"],
] ],
) )
table = PrettyTable() table = PrettyTable()
...@@ -44,7 +43,7 @@ def print_result_compact(scores: dict): ...@@ -44,7 +43,7 @@ def print_result_compact(scores: dict):
round(scores["All"]["R"], 3), round(scores["All"]["R"], 3),
round(scores["All"]["F1"], 3), round(scores["All"]["F1"], 3),
scores["All"]["Support"], scores["All"]["Support"],
] ],
] ]
table = PrettyTable() table = PrettyTable()
......
[tool.ruff]
exclude = [".git", "__pycache__"]
ignore = [
"E501",
# Conflicts with the formatter
"COM812"
]
select = [
# pycodestyle
"E",
"W",
# Pyflakes
"F",
# Flake8 Debugger
"T1",
# Isort
"I",
# Pyupgrade
"UP",
# Pandas-vet
"PD",
# Flake8-comprehension
"C4",
# Flake8-builtins
"A",
# flake8-commas
"COM",
# flake8-import-conventions
"ICN",
# flake8-raise
"RSE",
# flake8-quotes
"Q",
# flake8-unused-arguments
"ARG",
]
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*-
from pathlib import Path from pathlib import Path
from setuptools import find_packages, setup from setuptools import find_packages, setup
...@@ -22,7 +21,7 @@ def parse_requirements(): ...@@ -22,7 +21,7 @@ def parse_requirements():
path = Path(__file__).parent.resolve() / "requirements.txt" path = Path(__file__).parent.resolve() / "requirements.txt"
assert path.exists(), f"Missing requirements: {path}" assert path.exists(), f"Missing requirements: {path}"
return list( return list(
map(parse_requirements_line, map(str.strip, path.read_text().splitlines())) map(parse_requirements_line, map(str.strip, path.read_text().splitlines())),
) )
......
# -*- coding: utf-8 -*-
from pathlib import Path from pathlib import Path
import pytest import pytest
......
# -*- coding: utf-8 -*-
import edlib import edlib
import pytest import pytest
...@@ -9,7 +8,7 @@ import pytest ...@@ -9,7 +8,7 @@ import pytest
( (
"Gérard de Nerval was born in Paris in 1808 .", "Gérard de Nerval was born in Paris in 1808 .",
"G*rard de *N*erval bo*rn in Paris in 1833 *.", "G*rard de *N*erval bo*rn in Paris in 1833 *.",
) ),
], ],
) )
def test_align(query, target): def test_align(query, target):
......
# -*- coding: utf-8 -*-
import pytest import pytest
from nerval import evaluate from nerval import evaluate
...@@ -8,81 +7,81 @@ THRESHOLD = 0.30 ...@@ -8,81 +7,81 @@ THRESHOLD = 0.30
# fmt: off # fmt: off
fake_tags_aligned_nested_perfect = [ fake_tags_aligned_nested_perfect = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "B-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'O', "O",
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
'O', "O",
'O' "O",
] ]
fake_tags_aligned_nested_false = [ fake_tags_aligned_nested_false = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "B-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'O', "O",
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
'O', "O",
'O' "O",
] ]
fake_predict_tags_aligned = [ fake_predict_tags_aligned = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'O', "O",
'O', 'O', 'O', 'O', "O", "O", "O", "O",
'O', 'O', 'O', 'O', 'O', "O", "O", "O", "O", "O",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'B-***', 'I-***', 'I-***', 'I-***', 'I-***', "B-***", "I-***", "I-***", "I-***", "I-***",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT', "B-DAT", "I-DAT", "I-DAT", "I-DAT",
'O', "O",
'O', 'O' "O", "O",
] ]
# fmt: on # fmt: on
......
# -*- coding: utf-8 -*-
import pytest import pytest
from nerval import evaluate from nerval import evaluate
...@@ -11,7 +10,7 @@ from nerval import evaluate ...@@ -11,7 +10,7 @@ from nerval import evaluate
{"All": 3, "DAT": 1, "LOC": 1, "PER": 1}, {"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
{"All": 3, "DAT": 1, "***": 1, "PER": 1}, {"All": 3, "DAT": 1, "***": 1, "PER": 1},
{"All": 1, "PER": 1, "LOC": 0, "DAT": 0}, {"All": 1, "PER": 1, "LOC": 0, "DAT": 0},
) ),
], ],
) )
def test_compute_scores(annot, predict, matches): def test_compute_scores(annot, predict, matches):
......
# -*- coding: utf-8 -*-
import pytest import pytest
from nerval import evaluate from nerval import evaluate
...@@ -11,88 +10,88 @@ fake_predict_aligned = "G*rard de *N*erval ----bo*rn in Paris in 1833 *." ...@@ -11,88 +10,88 @@ fake_predict_aligned = "G*rard de *N*erval ----bo*rn in Paris in 1833 *."
# fmt: off # fmt: off
fake_annot_tags_original = [ fake_annot_tags_original = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'O', "O",
'O', 'O', 'O', "O", "O", "O",
'O', "O",
'O', 'O', 'O', 'O', "O", "O", "O", "O",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT', "B-DAT", "I-DAT", "I-DAT", "I-DAT",
'O', "O",
'O' "O",
] ]
fake_predict_tags_original = [ fake_predict_tags_original = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'O', "O",
'O', 'O', 'O', 'O', 'O', "O", "O", "O", "O", "O",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'***', '***', '***', '***', '***', "***", "***", "***", "***", "***",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT', "B-DAT", "I-DAT", "I-DAT", "I-DAT",
'O', "O",
'O', 'O' "O", "O",
] ]
expected_annot_tags_aligned = [ expected_annot_tags_aligned = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'O', "O",
'O', 'O', 'O', "O", "O", "O",
'O', "O",
'O', 'O', 'O', 'O', 'O', "O", "O", "O", "O", "O",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT', "B-DAT", "I-DAT", "I-DAT", "I-DAT",
'O', "O",
'O', 'O' "O", "O",
] ]
expected_predict_tags_aligned = [ expected_predict_tags_aligned = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', "I-PER", "I-PER",
'I-PER', "I-PER",
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
'O', "O",
'O', 'O', 'O', 'O', "O", "O", "O", "O",
'O', 'O', 'O', 'O', 'O', "O", "O", "O", "O", "O",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'***', '***', '***', '***', '***', "***", "***", "***", "***", "***",
'O', "O",
'O', 'O', "O", "O",
'O', "O",
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT', "B-DAT", "I-DAT", "I-DAT", "I-DAT",
'O', "O",
'O', 'O' "O", "O",
] ]
# fmt: on # fmt: on
......
# -*- coding: utf-8 -*-
import pytest import pytest
from nerval import evaluate from nerval import evaluate
......
# -*- coding: utf-8 -*-
import pytest import pytest
from nerval import evaluate from nerval import evaluate
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment