Setup ruff for lintint/formatting

d13301eb · Yoann Schneider · 0658f885 · 0658f885 · 0658f885 · d13301eb
Verified Commit d13301eb authored 1 year ago by Yoann Schneider
--- a/.flake8
+++ b/.flake8
-[flake8]
-max-line-length = 120
-exclude=build,.cache,.eggs,.git,src/zeep,front
-# Flake8 ignores multiple errors by default;
-# the only interesting ignore is W503, which goes against PEP8.
-# See https://lintlyci.github.io/Flake8Rules/rules/W503.html
-ignore = E203,E501,W503
\ No newline at end of file
--- a/.isort.cfg
+++ b/.isort.cfg
-[settings]
-# Compatible with black
-multi_line_output = 3
-include_trailing_comma = True
-force_grid_wrap = 0
-use_parentheses = True
-line_length = 120
-default_section=FIRSTPARTY
-known_third_party = editdistance,edlib,pytest,setuptools,prettytable
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
 repos:
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
-  - repo: https://github.com/ambv/black
-    rev: 23.1.0
-    hooks:
-    - id: black
-  - repo: https://github.com/pycqa/flake8
-    rev: 6.0.0
-    hooks:
-      - id: flake8
-        additional_dependencies:
-          - 'flake8-coding==1.3.2'
-          - 'flake8-debugger==4.1.2'
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
    hooks:
      - id: check-ast
      - id: check-docstring-first
@@ -26,17 +11,27 @@ repos:
      - id: trailing-whitespace
      - id: check-yaml
        args: [--allow-multiple-documents]
+      - id: check-toml
      - id: mixed-line-ending
      - id: name-tests-test
        args: ['--django']
      - id: check-json
      - id: requirements-txt-fixer
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.1.6
+    hooks:
+      # Run the linter.
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+      # Run the formatter.
+      - id: ruff-format
  - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.2
+    rev: v2.2.6
    hooks:
      - id: codespell
        args: ['--write-changes']
        exclude: '\.bio$'
  - repo: meta
    hooks:
      - id: check-useless-excludes
\ No newline at end of file
--- a/nerval/__init__.py
+++ b/nerval/__init__.py
-# -*- coding: utf-8 -*-
 import logging
 logging.basicConfig(

--- a/nerval/cli.py
+++ b/nerval/cli.py
-# -*- coding: utf-8 -*-
 import argparse
 from pathlib import Path
@@ -70,18 +69,18 @@ def main():
    if args.annot:
        if not args.predict:
            raise argparse.ArgumentTypeError(
-                "You need to specify the path to a predict file with -p"
+                "You need to specify the path to a predict file with -p",
            )
        run(args.annot, args.predict, args.threshold, args.verbose)
    elif args.csv:
        if not args.folder:
            raise argparse.ArgumentTypeError(
-                "You need to specify the path to a folder of bio files with -f"
+                "You need to specify the path to a folder of bio files with -f",
            )
        run_multiple(args.csv, args.folder, args.threshold, args.verbose)
    else:
        raise argparse.ArgumentTypeError(
-            "You need to specify the argument of input file"
+            "You need to specify the argument of input file",
        )

--- a/nerval/evaluate.py
+++ b/nerval/evaluate.py
-# -*- coding: utf-8 -*-
 import logging
 import os
 from csv import reader
@@ -138,13 +136,19 @@ def compute_matches(
            ):
                if not found_aligned_end:
                    rest_predict, visited = look_for_further_entity_part(
-                        i + 1, tag_ref, prediction, labels_predict
+                        i + 1,
+                        tag_ref,
+                        prediction,
+                        labels_predict,
                    )
                    current_compar += rest_predict
                    visited_predict += visited
                rest_annot, visited = look_for_further_entity_part(
-                    i + 1, tag_ref, annotation, labels_annot
+                    i + 1,
+                    tag_ref,
+                    annotation,
+                    labels_annot,
                )
                current_ref += rest_annot
                visited_annot += visited
@@ -221,7 +225,9 @@ def get_labels_aligned(original: str, aligned: str, labels_original: list) -> li
 def compute_scores(
-    annot_tags_count: dict, predict_tags_count: dict, matches: dict
+    annot_tags_count: dict,
+    predict_tags_count: dict,
+    matches: dict,
 ) -> dict:
    """Compute Precision, Recall and F1 score for all entity types found in annotation and prediction.
@@ -270,7 +276,9 @@ def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict:
    # Align annotation and prediction
    align_result = edlib.align(annotation["words"], prediction["words"], task="path")
    nice_alignment = edlib.getNiceAlignment(
-        align_result, annotation["words"], prediction["words"]
+        align_result,
+        annotation["words"],
+        prediction["words"],
    )
    annot_aligned = nice_alignment["query_aligned"]
@@ -278,10 +286,14 @@ def evaluate(annotation: dict, prediction: dict, threshold: int) -> dict:
    # Align labels from string alignment
    labels_annot_aligned = get_labels_aligned(
-        annotation["words"], annot_aligned, annotation["labels"]
+        annotation["words"],
+        annot_aligned,
+        annotation["labels"],
    )
    labels_predict_aligned = get_labels_aligned(
-        prediction["words"], predict_aligned, prediction["labels"]
+        prediction["words"],
+        predict_aligned,
+        prediction["labels"],
    )
    # Get nb match
@@ -334,7 +346,7 @@ def run(annotation: Path, prediction: Path, threshold: int, verbose: bool) -> di
 def run_multiple(file_csv, folder, threshold, verbose):
    """Run the program for multiple files (correlation indicated in the csv file)"""
    # Read the csv in a list
-    with open(file_csv, "r") as read_obj:
+    with open(file_csv) as read_obj:
        csv_reader = reader(read_obj)
        list_cor = list(csv_reader)
@@ -375,8 +387,8 @@ def run_multiple(file_csv, folder, threshold, verbose):
                        round(precision / count, 3),
                        round(recall / count, 3),
                        round(f1 / count, 3),
-                    ]
+                    ],
-                ]
+                ],
            )
            print(table)
        else:

--- a/nerval/parse.py
+++ b/nerval/parse.py
-# -*- coding: utf-8 -*-
 import re
 from typing import List
@@ -64,7 +63,7 @@ def parse_bio(lines: List[str]) -> dict:
    if "§" in " ".join(lines):
        raise (
            Exception(
-                "§ found in input file. Since this character is used in a specific way during evaluation, prease remove it from files."
+                "§ found in input file. Since this character is used in a specific way during evaluation, please remove it from files."
            )
        )
@@ -145,7 +144,7 @@ def parse_bio(lines: List[str]) -> dict:
    result = None
    if words:
-        result = dict()
+        result = {}
        result["words"] = " ".join(words)
        result["labels"] = labels
        result["entity_count"] = entity_count

--- a/nerval/utils.py
+++ b/nerval/utils.py
-# -*- coding: utf-8 -*-
 from prettytable import MARKDOWN, PrettyTable
@@ -23,7 +22,7 @@ def print_results(scores: dict):
                rec,
                f1,
                scores[tag]["Support"],
-            ]
+            ],
        )
    table = PrettyTable()
@@ -44,7 +43,7 @@ def print_result_compact(scores: dict):
            round(scores["All"]["R"], 3),
            round(scores["All"]["F1"], 3),
            scores["All"]["Support"],
-        ]
+        ],
    ]
    table = PrettyTable()

--- a/pyproject.toml
+++ b/pyproject.toml
+[tool.ruff]
+exclude = [".git", "__pycache__"]
+ignore = [
+    "E501",
+    # Conflicts with the formatter
+    "COM812"
+]
+select = [
+    # pycodestyle
+    "E",
+    "W",
+    # Pyflakes
+    "F",
+    # Flake8 Debugger
+    "T1",
+    # Isort
+    "I",
+    # Pyupgrade
+    "UP",
+    # Pandas-vet
+    "PD",
+    # Flake8-comprehension
+    "C4",
+    # Flake8-builtins
+    "A",
+    # flake8-commas
+    "COM",
+    # flake8-import-conventions
+    "ICN",
+    # flake8-raise
+    "RSE",
+    # flake8-quotes
+    "Q",
+    # flake8-unused-arguments
+    "ARG",
+]
--- a/setup.py
+++ b/setup.py
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 from pathlib import Path
 from setuptools import find_packages, setup
@@ -22,7 +21,7 @@ def parse_requirements():
    path = Path(__file__).parent.resolve() / "requirements.txt"
    assert path.exists(), f"Missing requirements: {path}"
    return list(
-        map(parse_requirements_line, map(str.strip, path.read_text().splitlines()))
+        map(parse_requirements_line, map(str.strip, path.read_text().splitlines())),
    )

--- a/tests/conftest.py
+++ b/tests/conftest.py
-# -*- coding: utf-8 -*-
 from pathlib import Path
 import pytest

--- a/tests/test_align.py
+++ b/tests/test_align.py
-# -*- coding: utf-8 -*-
 import edlib
 import pytest
@@ -9,7 +8,7 @@ import pytest
        (
            "Gérard de Nerval was born in Paris in 1808 .",
            "G*rard de *N*erval bo*rn in Paris in 1833 *.",
-        )
+        ),
    ],
 )
 def test_align(query, target):

--- a/tests/test_compute_matches.py
+++ b/tests/test_compute_matches.py
-# -*- coding: utf-8 -*-
 import pytest
 from nerval import evaluate
@@ -8,81 +7,81 @@ THRESHOLD = 0.30
 # fmt: off
 fake_tags_aligned_nested_perfect = [
-    'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "B-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
+    "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'O',
+    "O",
-    'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
+    "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
-    'O',
+    "O",
-    'O'
+    "O",
 ]
 fake_tags_aligned_nested_false = [
-    'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "B-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'O',
+    "O",
-    'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
+    "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
-    'O',
+    "O",
-    'O'
+    "O",
 ]
 fake_predict_tags_aligned = [
-    'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'O',
+    "O",
-    'O', 'O', 'O', 'O',
+    "O", "O", "O", "O",
-    'O', 'O', 'O', 'O', 'O',
+    "O", "O", "O", "O", "O",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    'B-***', 'I-***', 'I-***', 'I-***', 'I-***',
+    "B-***", "I-***", "I-***", "I-***", "I-***",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
+    "B-DAT", "I-DAT", "I-DAT", "I-DAT",
-    'O',
+    "O",
-    'O', 'O'
+    "O", "O",
 ]
 # fmt: on

--- a/tests/test_compute_scores.py
+++ b/tests/test_compute_scores.py
-# -*- coding: utf-8 -*-
 import pytest
 from nerval import evaluate
@@ -11,7 +10,7 @@ from nerval import evaluate
            {"All": 3, "DAT": 1, "LOC": 1, "PER": 1},
            {"All": 3, "DAT": 1, "***": 1, "PER": 1},
            {"All": 1, "PER": 1, "LOC": 0, "DAT": 0},
-        )
+        ),
    ],
 )
 def test_compute_scores(annot, predict, matches):

--- a/tests/test_get_labels_aligned.py
+++ b/tests/test_get_labels_aligned.py
-# -*- coding: utf-8 -*-
 import pytest
 from nerval import evaluate
@@ -11,88 +10,88 @@ fake_predict_aligned = "G*rard de *N*erval ----bo*rn in Paris in 1833 *."
 # fmt: off
 fake_annot_tags_original = [
-    'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'O',
+    "O",
-    'O', 'O', 'O',
+    "O", "O", "O",
-    'O',
+    "O",
-    'O', 'O', 'O', 'O',
+    "O", "O", "O", "O",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
+    "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
+    "B-DAT", "I-DAT", "I-DAT", "I-DAT",
-    'O',
+    "O",
-    'O'
+    "O",
 ]
 fake_predict_tags_original = [
-    'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'O',
+    "O",
-    'O', 'O', 'O', 'O', 'O',
+    "O", "O", "O", "O", "O",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    '***', '***', '***', '***', '***',
+    "***", "***", "***", "***", "***",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
+    "B-DAT", "I-DAT", "I-DAT", "I-DAT",
-    'O',
+    "O",
-    'O', 'O'
+    "O", "O",
 ]
 expected_annot_tags_aligned = [
-    'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'O',
+    "O",
-    'O', 'O', 'O',
+    "O", "O", "O",
-    'O',
+    "O",
-    'O', 'O', 'O', 'O', 'O',
+    "O", "O", "O", "O", "O",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    'B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'I-LOC',
+    "B-LOC", "I-LOC", "I-LOC", "I-LOC", "I-LOC",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
+    "B-DAT", "I-DAT", "I-DAT", "I-DAT",
-    'O',
+    "O",
-    'O', 'O'
+    "O", "O",
 ]
 expected_predict_tags_aligned = [
-    'B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "B-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER',
+    "I-PER", "I-PER",
-    'I-PER',
+    "I-PER",
-    'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER',
+    "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER", "I-PER",
-    'O',
+    "O",
-    'O', 'O', 'O', 'O',
+    "O", "O", "O", "O",
-    'O', 'O', 'O', 'O', 'O',
+    "O", "O", "O", "O", "O",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    '***', '***', '***', '***', '***',
+    "***", "***", "***", "***", "***",
-    'O',
+    "O",
-    'O', 'O',
+    "O", "O",
-    'O',
+    "O",
-    'B-DAT', 'I-DAT', 'I-DAT', 'I-DAT',
+    "B-DAT", "I-DAT", "I-DAT", "I-DAT",
-    'O',
+    "O",
-    'O', 'O'
+    "O", "O",
 ]
 # fmt: on

--- a/tests/test_parse_bio.py
+++ b/tests/test_parse_bio.py
-# -*- coding: utf-8 -*-
 import pytest
 from nerval import evaluate

--- a/tests/test_run.py
+++ b/tests/test_run.py
-# -*- coding: utf-8 -*-
 import pytest
 from nerval import evaluate