Skip to content
Snippets Groups Projects
Verified Commit d13301eb authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

Setup ruff for lintint/formatting

parent 0658f885
No related branches found
No related tags found
1 merge request!31Setup ruff for lintint/formatting
Pipeline #143891 passed
[flake8]
max-line-length = 120
exclude=build,.cache,.eggs,.git,src/zeep,front
# Flake8 ignores multiple errors by default;
# the only interesting ignore is W503, which goes against PEP8.
# See https://lintlyci.github.io/Flake8Rules/rules/W503.html
ignore = E203,E501,W503
\ No newline at end of file
[settings]
# Compatible with black
multi_line_output = 3
include_trailing_comma = True
force_grid_wrap = 0
use_parentheses = True
line_length = 120
default_section=FIRSTPARTY
known_third_party = editdistance,edlib,pytest,setuptools,prettytable
repos:
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/ambv/black
rev: 23.1.0
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
additional_dependencies:
- 'flake8-coding==1.3.2'
- 'flake8-debugger==4.1.2'
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-ast
- id: check-docstring-first
......@@ -26,17 +11,27 @@ repos:
- id: trailing-whitespace
- id: check-yaml
args: [--allow-multiple-documents]
- id: check-toml
- id: mixed-line-ending
- id: name-tests-test
args: ['--django']
- id: check-json
- id: requirements-txt-fixer
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.6
hooks:
# Run the linter.
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
# Run the formatter.
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.2.2
rev: v2.2.6
hooks:
- id: codespell
args: ['--write-changes']
exclude: '\.bio$'
- repo: meta
hooks:
- id: check-useless-excludes
- id: check-useless-excludes
\ No newline at end of file
# -*- coding: utf-8 -*-
import logging
logging.basicConfig(
......
# -*- coding: utf-8 -*-
import argparse
from pathlib import Path
......@@ -70,18 +69,18 @@ def main():
if args.annot:
if not args.predict:
raise argparse.ArgumentTypeError(
"You need to specify the path to a predict file with -p"
"You need to specify the path to a predict file with -p",
)
run(args.annot, args.predict, args.threshold, args.verbose)
elif args.csv:
if not args.folder:
raise argparse.ArgumentTypeError(
"You need to specify the path to a folder of bio files with -f"
"You need to specify the path to a folder of bio files with -f",
)
run_multiple(args.csv, args.folder, args.threshold, args.verbose)
else:
raise argparse.ArgumentTypeError(
"You need to specify the argument of input file"
"You need to specify the argument of input file",
)
......
# -*- coding: utf-8 -*-
import logging
import os
from csv import reader
......@@ -138,13 +136,19 @@ def compute_matches(
):
if not found_aligned_end:
rest_predict, visited = look_for_further_entity_part(
i + 1, tag_ref, prediction, labels_predict
i + 1,
tag_ref,
prediction,
labels_predict,
)
current_compar += rest_predict
visited_predict += visited
rest_annot, visited = look_for_further_entity_part(
i + 1, tag_ref, annotation, labels_annot
i + 1,
tag_ref,
annotation,
labels_annot,
)
current_ref += rest_annot
visited_annot += visited
......@@ -221,7 +225,9 @@ def get_labels_aligned(original: str, aligned: str, labels_original: list) -> li
def compute_scores(
annot_tags_count: dict, predict_tags_count: dict, matches: dict
annot_tags_count: dict,
predict_tags_count: dict,
matches: dict,
) -> dict:
"""Compute Precision, Recall and F1 score for all entity types found in annotation and prediction.
......@@ -270,7 +276,9 @@ def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict:
# Align annotation and prediction
align_result = edlib.align(annotation["words"], prediction["words"], task="path")
nice_alignment = edlib.getNiceAlignment(
align_result, annotation["words"], prediction["words"]
align_result,
annotation["words"],
prediction["words"],
)
annot_aligned = nice_alignment["query_aligned"]
......@@ -278,10 +286,14 @@ def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict:
# Align labels from string alignment
labels_annot_aligned = get_labels_aligned(
annotation["words"], annot_aligned, annotation["labels"]
annotation["words"],
annot_aligned,
annotation["labels"],
)
labels_predict_aligned = get_labels_aligned(
prediction["words"], predict_aligned, prediction["labels"]
prediction["words"],
predict_aligned,
prediction["labels"],
)
# Get nb match
......@@ -334,7 +346,7 @@ def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> di
def run_multiple(file_csv, folder, threshold, verbose):
"""Run the program for multiple files (correlation indicated in the csv file)"""
# Read the csv in a list
with open(file_csv, "r") as read_obj:
with open(file_csv) as read_obj:
csv_reader = reader(read_obj)
list_cor = list(csv_reader)
......@@ -375,8 +387,8 @@ def run_multiple(file_csv, folder, threshold, verbose):
round(precision / count, 3),
round(recall / count, 3),
round(f1 / count, 3),
]
]
],
],
)
print(table)
else:
......
# -*- coding: utf-8 -*-
import re
from typing import List
......@@ -64,7 +63,7 @@ def parse_bio(lines: List[str]) -> dict:
if "§" in " ".join(lines):
raise (
Exception(
"§ found in input file. Since this character is used in a specific way during evaluation, prease remove it from files."
"§ found in input file. Since this character is used in a specific way during evaluation, please remove it from files."
)
)
......@@ -145,7 +144,7 @@ def parse_bio(lines: List[str]) -> dict:
result = None
if words:
result = dict()
result = {}
result["words"] = " ".join(words)
result["labels"] = labels
result["entity_count"] = entity_count
......
# -*- coding: utf-8 -*-
from prettytable import MARKDOWN, PrettyTable
......@@ -23,7 +22,7 @@ def print_results(scores: dict):
rec,
f1,
scores[tag]["Support"],
]
],
)
table = PrettyTable()
......@@ -44,7 +43,7 @@ def print_result_compact(scores: dict):
round(scores["All"]["R"], 3),
round(scores["All"]["F1"], 3),
scores["All"]["Support"],
]
],
]
table = PrettyTable()
......
[tool.ruff]
exclude = [".git", "__pycache__"]
ignore = [
"E501",
# Conflicts with the formatter
"COM812"
]
select = [
# pycodestyle
"E",
"W",
# Pyflakes
"F",
# Flake8 Debugger
"T1",
# Isort
"I",
# Pyupgrade
"UP",
# Pandas-vet
"PD",
# Flake8-comprehension
"C4",
# Flake8-builtins
"A",
# flake8-commas
"COM",
# flake8-import-conventions
"ICN",
# flake8-raise
"RSE",
# flake8-quotes
"Q",
# flake8-unused-arguments
"ARG",
]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pathlib import Path
from setuptools import find_packages, setup
......@@ -22,7 +21,7 @@ def parse_requirements():
path = Path(__file__).parent.resolve() / "requirements.txt"
assert path.exists(), f"Missing requirements: {path}"
return list(
map(parse_requirements_line, map(str.strip, path.read_text().splitlines()))
map(parse_requirements_line, map(str.strip, path.read_text().splitlines())),
)
......
# -*- coding: utf-8 -*-
from pathlib import Path
import pytest
......
# -*- coding: utf-8 -*-
import edlib
import pytest
......@@ -9,7 +8,7 @@ import pytest
(
"Gérard de Nerval was born in Paris in 1808 .",
"G*rard de *N*erval bo*rn in Paris in 1833 *.",
)
),
],
)
def test_align(query, target):
......
# -*- coding: utf-8 -*-
import pytest
from nerval import evaluate
......@@ -8,81 +7,81 @@ THRESHOLD = 0.30
# fmt: off
fake_tags_aligned_nested_perfect = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'O',
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
'O',
'O'
"B-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"O",
"B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
"O",
"O",
]
fake_tags_aligned_nested_false = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'O',
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
'O',
'O'
"B-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"O",
"B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
"O",
"O",
]
fake_predict_tags_aligned = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'O',
'O', 'O', 'O', 'O',
'O', 'O', 'O', 'O', 'O',
'O',
'O', 'O',
'O',
'B-***', 'I-***', 'I-***', 'I-***', 'I-***',
'O',
'O', 'O',
'O',
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
'O',
'O', 'O'
"B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"O",
"O", "O", "O", "O",
"O", "O", "O", "O", "O",
"O",
"O", "O",
"O",
"B-***", "I-***", "I-***", "I-***", "I-***",
"O",
"O", "O",
"O",
"B-DAT", "I-DAT", "I-DAT", "I-DAT",
"O",
"O", "O",
]
# fmt: on
......
# -*- coding: utf-8 -*-
import pytest
from nerval import evaluate
......@@ -11,7 +10,7 @@ from nerval import evaluate
{"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
{"All": 3, "DAT": 1, "***": 1, "PER": 1},
{"All": 1, "PER": 1, "LOC": 0, "DAT": 0},
)
),
],
)
def test_compute_scores(annot, predict, matches):
......
# -*- coding: utf-8 -*-
import pytest
from nerval import evaluate
......@@ -11,88 +10,88 @@ fake_predict_aligned = "G*rard de *N*erval ----bo*rn in Paris in 1833 *."
# fmt: off
fake_annot_tags_original = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'O',
'O', 'O', 'O',
'O',
'O', 'O', 'O', 'O',
'O',
'O', 'O',
'O',
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
'O',
'O', 'O',
'O',
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
'O',
'O'
"B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"O",
"O", "O", "O",
"O",
"O", "O", "O", "O",
"O",
"O", "O",
"O",
"B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
"O",
"O", "O",
"O",
"B-DAT", "I-DAT", "I-DAT", "I-DAT",
"O",
"O",
]
fake_predict_tags_original = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'O',
'O', 'O', 'O', 'O', 'O',
'O',
'O', 'O',
'O',
'***', '***', '***', '***', '***',
'O',
'O', 'O',
'O',
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
'O',
'O', 'O'
"B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"O",
"O", "O", "O", "O", "O",
"O",
"O", "O",
"O",
"***", "***", "***", "***", "***",
"O",
"O", "O",
"O",
"B-DAT", "I-DAT", "I-DAT", "I-DAT",
"O",
"O", "O",
]
expected_annot_tags_aligned = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'O',
'O', 'O', 'O',
'O',
'O', 'O', 'O', 'O', 'O',
'O',
'O', 'O',
'O',
'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
'O',
'O', 'O',
'O',
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
'O',
'O', 'O'
"B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"O",
"O", "O", "O",
"O",
"O", "O", "O", "O", "O",
"O",
"O", "O",
"O",
"B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
"O",
"O", "O",
"O",
"B-DAT", "I-DAT", "I-DAT", "I-DAT",
"O",
"O", "O",
]
expected_predict_tags_aligned = [
'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER',
'I-PER',
'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
'O',
'O', 'O', 'O', 'O',
'O', 'O', 'O', 'O', 'O',
'O',
'O', 'O',
'O',
'***', '***', '***', '***', '***',
'O',
'O', 'O',
'O',
'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
'O',
'O', 'O'
"B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER",
"I-PER",
"I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
"O",
"O", "O", "O", "O",
"O", "O", "O", "O", "O",
"O",
"O", "O",
"O",
"***", "***", "***", "***", "***",
"O",
"O", "O",
"O",
"B-DAT", "I-DAT", "I-DAT", "I-DAT",
"O",
"O", "O",
]
# fmt: on
......
# -*- coding: utf-8 -*-
import pytest
from nerval import evaluate
......
# -*- coding: utf-8 -*-
import pytest
from nerval import evaluate
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment