diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 51fc70b908332e03c8357bfc19df0d372f3359f7..dab142c89c067df7af00866d4ab34b0ceb12d5ac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,4 @@ repos: - - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 - hooks: - - id: flake8 - additional_dependencies: - - 'flake8-copyright==0.2.2' - - 'flake8-debugger==3.1.0' - - 'flake8-quotes==3.3.2' - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: v0.3.7 diff --git a/arkindex/documents/api/ml.py b/arkindex/documents/api/ml.py index 7e852fc139f2d1994194ea2d8922a54339086387..62fea8eafadde3a1533eaa67dff9d57804c4374f 100644 --- a/arkindex/documents/api/ml.py +++ b/arkindex/documents/api/ml.py @@ -440,7 +440,7 @@ class ManageClassificationsSelection(SelectionMixin, CorpusACLMixin, CreateAPIVi mode = serializer.validated_data["mode"] if mode == ClassificationMode.Create: return self.create(corpus, request, *args, **kwargs) - elif mode == ClassificationMode.Validate: + if mode == ClassificationMode.Validate: elements = self.get_selection(corpus.id) Classification.objects.filter( element__in=elements, diff --git a/arkindex/documents/dates.py b/arkindex/documents/dates.py index 10643931a25ef85ddc16eacf9fbb9c262278d8ef..dc3012bef8a3276bb890e8cb6bb5607c5876e52d 100644 --- a/arkindex/documents/dates.py +++ b/arkindex/documents/dates.py @@ -45,7 +45,7 @@ class InterpretedDate(object): """ if self.month and self.day: return DatePrecision.Day - elif self.month: + if self.month: return DatePrecision.Month return DatePrecision.Year diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py index 7c7099022611581f7974ae4d89934bd158c6a8b8..d8ba836bed4af52fe35ef1d7c072fe6d0f890914 100644 --- a/arkindex/documents/serializers/elements.py +++ b/arkindex/documents/serializers/elements.py @@ -667,8 +667,7 @@ class ElementSerializer(ElementTinySerializer): validated_data.update(image=image, polygon=polygon) - instance = super().update(instance, validated_data) - return instance + return super().update(instance, validated_data) class ElementNeighborsSerializer(serializers.ModelSerializer): diff --git a/arkindex/documents/serializers/entities.py b/arkindex/documents/serializers/entities.py index 1f107ee06bb5b60bbaee32debebaca28c2ec0be6..335e63e4504687c55a21fd278e4e9c4f108107ec 100644 --- a/arkindex/documents/serializers/entities.py +++ b/arkindex/documents/serializers/entities.py @@ -32,8 +32,7 @@ class EntityTypeSerializer(serializers.ModelSerializer): corpus = data.get("corpus") if self.instance and corpus: raise ValidationError({"corpus": ["It is not possible to update an Entity Type's corpus."]}) - data = super().to_internal_value(data) - return data + return super().to_internal_value(data) def validate(self, data): name = data.get("name") if "name" in data else self.instance.name diff --git a/arkindex/documents/tasks.py b/arkindex/documents/tasks.py index 2dd9855b9ba7fb4d9400db8973c18d6046354de1..1c42b0d360adf1e7e7f2c97f61f911629cfbedea 100644 --- a/arkindex/documents/tasks.py +++ b/arkindex/documents/tasks.py @@ -21,7 +21,7 @@ from arkindex.documents.models import ( Transcription, TranscriptionEntity, ) -from arkindex.ponos.models import Task +from arkindex.ponos.models import Artifact, Task from arkindex.process.models import Process, ProcessDatasetSet, ProcessElement, WorkerActivity, WorkerRun from arkindex.training.models import DatasetElement, DatasetSet from arkindex.users.models import User @@ -41,6 +41,10 @@ def corpus_delete(corpus_id: str) -> None: corpus.top_level_type_id = None corpus.save(update_fields=["top_level_type_id"]) + # Set process.element to NULL in all processes, to avoid IntegrityErrors when deleting elements, as + # elements are deleted before processes. + Process.objects.filter(corpus_id=corpus_id).exclude(element_id=None).update(element_id=None) + # Delete all related objects, bypassing RESTRICTs deletion rules # and Django's way of loading everything into memory before deleting. querysets = [ @@ -67,6 +71,8 @@ def corpus_delete(corpus_id: str) -> None: Selection.objects.filter(element__corpus_id=corpus_id), corpus.memberships.all(), corpus.exports.all(), + # Delete task artifacts + Artifact.objects.filter(task__process__corpus_id=corpus_id), # ProcessDatasetSet M2M ProcessDatasetSet.objects.filter(set__dataset__corpus_id=corpus_id), ProcessDatasetSet.objects.filter(process__corpus_id=corpus_id), diff --git a/arkindex/documents/tests/test_edit_elementpath.py b/arkindex/documents/tests/test_edit_elementpath.py index 21455699fdda5f61a5825ea0652c32827f92db3a..ac0f945e98d688a1481e61d08693a88beb374a00 100644 --- a/arkindex/documents/tests/test_edit_elementpath.py +++ b/arkindex/documents/tests/test_edit_elementpath.py @@ -363,7 +363,7 @@ class TestEditElementPath(FixtureTestCase): path_id = elements["B"].paths.get().id if path1.id == path_id: return str(path1.path[0]) - elif path2.id == path_id: + if path2.id == path_id: return str(path2.path[0]) raise AssertionError("Unexpected top-level path ID") diff --git a/arkindex/images/managers.py b/arkindex/images/managers.py index d3d97692560fdda9ab318a43b21e20e53f387d0b..61ffb364bbc729af6b505c277a08e00f33564dd0 100644 --- a/arkindex/images/managers.py +++ b/arkindex/images/managers.py @@ -47,7 +47,7 @@ class ImageServerManager(models.Manager): f'The URL "{url}" does not match any existing image server. ' "Please ask an instance administrator to register the IIIF server for this image." ) - elif servers_count > 1: + if servers_count > 1: raise self.model.MultipleObjectsReturned( f'The URL "{url}" matched multiple existing image servers' ) diff --git a/arkindex/images/models.py b/arkindex/images/models.py index aecc25fca690b179e55696ab1f9775179b18d830..8b904acaf919c199d8ce862d12280b5da03dd7f4 100644 --- a/arkindex/images/models.py +++ b/arkindex/images/models.py @@ -108,7 +108,7 @@ class ImageServer(models.Model): src_url, dest_url = map(urllib.parse.urlsplit, (self.url, other.url)) if src_url.scheme != dest_url.scheme: raise ValueError("Cannot merge into an image server of a different protocol") - elif src_url.netloc != dest_url.netloc: + if src_url.netloc != dest_url.netloc: raise ValueError("Cannot merge into an image server on a different domain") # Check paths diff --git a/arkindex/images/serializers.py b/arkindex/images/serializers.py index 1b0805674f253a082a593f45c4438c27bdbb117a..bef4751cb1b59b42e96c76741ea2ea831731d74a 100644 --- a/arkindex/images/serializers.py +++ b/arkindex/images/serializers.py @@ -68,7 +68,7 @@ class ImageSerializer(serializers.ModelSerializer): if not self.instance: # Force the Unchecked status when creating a new image return S3FileStatus.Unchecked - elif value == S3FileStatus.Checked: + if value == S3FileStatus.Checked: # Perform image validation if we are updating an existing image to Checked try: if self.instance.server.s3_bucket and not self.instance.server.s3_read_only_bucket: diff --git a/arkindex/process/api.py b/arkindex/process/api.py index e6c154ea70b51eee6455c9749ce2bd5243a23205..f527305661111a9e60b334b52f2e63ccaed632c4 100644 --- a/arkindex/process/api.py +++ b/arkindex/process/api.py @@ -402,8 +402,9 @@ class ProcessDetails(ProcessACLMixin, ProcessQuerysetMixin, RetrieveUpdateDestro class ProcessRetry(ProcessACLMixin, ProcessQuerysetMixin, GenericAPIView): - """ - Retry a process. Can only be used on processes with Error, Failed, Stopped or Completed states.\n\n + r""" + Retry a process. Can only be used on processes with Error, Failed, Stopped or Completed states. + Requires an **admin** access to the process and **guest** access to the process' farm. """ permission_classes = (IsVerified, ) @@ -427,9 +428,9 @@ class ProcessRetry(ProcessACLMixin, ProcessQuerysetMixin, GenericAPIView): # Allow 'retrying' a process that has no Ponos tasks (that has never been started) if len(process.tasks.all()) and state in (State.Unscheduled, State.Pending): raise ValidationError({"__all__": ["This process is already pending"]}) - elif state == State.Running: + if state == State.Running: raise ValidationError({"__all__": ["This process is already running"]}) - elif state == State.Stopping: + if state == State.Stopping: raise ValidationError({"__all__": ["This process is stopping"]}) @extend_schema( @@ -476,8 +477,9 @@ class FilesProcess(CreateAPIView): ) ) class CorpusProcess(SelectionMixin, CorpusACLMixin, CreateAPIView): - """ - Create a distributed process from elements of an Arkindex corpus.\n\n + r""" + Create a distributed process from elements of an Arkindex corpus. + Requires an **admin** access to the corpus. """ permission_classes = (IsVerified, ) @@ -542,8 +544,9 @@ class CorpusProcess(SelectionMixin, CorpusACLMixin, CreateAPIView): ), ) class StartProcess(CorpusACLMixin, CreateAPIView): - """ - Start a process, used to build a Workflow with Workers.\n\n + r""" + Start a process, used to build a Workflow with Workers. + Requires an **admin** access to the corpus of this process. """ permission_classes = (IsVerified, ) @@ -1555,8 +1558,9 @@ class WorkerRunDetails(ProcessACLMixin, RetrieveUpdateDestroyAPIView): tags=["process"] )) class ListProcessElements(CorpusACLMixin, ListAPIView): - """ - List all elements for a process with workers.\n\n + r""" + List all elements for a process with workers. + Requires an **admin** access to the process corpus. """ pagination_class = CountCursorPagination @@ -1579,8 +1583,7 @@ class ListProcessElements(CorpusACLMixin, ListAPIView): def get_serializer_class(self): if self.with_image: return ProcessElementSerializer - else: - return ProcessElementLightSerializer + return ProcessElementLightSerializer def get_queryset(self): if not self.has_admin_access(self.process.corpus): @@ -1829,8 +1832,9 @@ class WorkerActivityBase(ListAPIView): ) ) class CorpusWorkersActivity(CorpusACLMixin, WorkerActivityBase): - """ - Retrieve corpus wise statistics about the activity of all its worker processes.\n + r""" + Retrieve corpus wise statistics about the activity of all its worker processes. + Requires a **guest** access. """ @@ -1846,8 +1850,9 @@ class CorpusWorkersActivity(CorpusACLMixin, WorkerActivityBase): ) ) class ProcessWorkersActivity(ProcessACLMixin, WorkerActivityBase): - """ - Retrieve process statistics about the activity of its workers.\n + r""" + Retrieve process statistics about the activity of its workers. + Requires a **guest** access. """ diff --git a/arkindex/process/models.py b/arkindex/process/models.py index 94d041202912e84b7f43ced2a9b347311fd3936d..8e8dff5625624eb63d18d28b6c3644ac7f529396 100644 --- a/arkindex/process/models.py +++ b/arkindex/process/models.py @@ -232,8 +232,7 @@ class Process(IndexableModel): """ if self.has_prefetched_tasks: return max((t.expiry for t in self.tasks.all()), default=None) - else: - return self.tasks.aggregate(models.Max("expiry"))["expiry__max"] + return self.tasks.aggregate(models.Max("expiry"))["expiry__max"] def get_last_run(self) -> int: """ diff --git a/arkindex/process/serializers/imports.py b/arkindex/process/serializers/imports.py index 2e01179db67c303ef789998bd1e7ece990400e3e..23e601754682282d108caba5b82ca3d5cea3180f 100644 --- a/arkindex/process/serializers/imports.py +++ b/arkindex/process/serializers/imports.py @@ -545,7 +545,7 @@ class ApplyProcessTemplateSerializer(ProcessACLMixin, serializers.Serializer): access_level = self.process_access_level(process) if not access_level: raise ValidationError(detail="Process with this ID does not exist.") - elif access_level < Role.Contributor.value: + if access_level < Role.Contributor.value: raise PermissionDenied(detail="You do not have a contributor access to this process.") errors = [] @@ -627,7 +627,7 @@ class CorpusProcessSerializer(serializers.Serializer): access_level = get_max_level(self.context["request"].user, corpus) if not access_level: raise ValidationError(["Corpus with this ID does not exist."]) - elif access_level < Role.Admin.value: + if access_level < Role.Admin.value: raise ValidationError(["You do not have an admin access to this corpus."]) return corpus diff --git a/arkindex/project/argparse.py b/arkindex/project/argparse.py index 7f0f9330c6149fb6eed315e4cfc71114ebb40612..bbb3dcf17ccbee051c11c0c33d2236d32bde414a 100644 --- a/arkindex/project/argparse.py +++ b/arkindex/project/argparse.py @@ -37,8 +37,7 @@ class ModelArgument(object): text_filter = {f"{self.text_search_field}__{self.text_search_lookup}": arg} if self.many: return qs.filter(**text_filter) - else: - return qs.get(**text_filter) + return qs.get(**text_filter) class CorpusArgument(ModelArgument): diff --git a/arkindex/project/serializer_fields.py b/arkindex/project/serializer_fields.py index 230bafd37351cccedda4d2f0c8298f0b3cc1f916..06245b86fc0ef12f757846b56b71927b684d87bd 100644 --- a/arkindex/project/serializer_fields.py +++ b/arkindex/project/serializer_fields.py @@ -304,4 +304,4 @@ class NullField(serializers.CharField): def to_internal_value(self, data): if data is not None: self.fail("invalid") - return None + return diff --git a/arkindex/project/tests/test_config.py b/arkindex/project/tests/test_config.py index 4a046d010a6a6bfb00dfdc75ed1834b50513f058..ec9eb25440b9849e783bcf195a093c414006cdcd 100644 --- a/arkindex/project/tests/test_config.py +++ b/arkindex/project/tests/test_config.py @@ -26,7 +26,7 @@ class TestConfig(TestCase): data = data.value if data is None: return self.represent_none(data) - elif isinstance(data, (bool, int, float, bytes, str)): + if isinstance(data, (bool, int, float, bytes, str)): return self.represent_data(data) return self.represent_str(str(data)) diff --git a/arkindex/sql_validation/corpus_delete.sql b/arkindex/sql_validation/corpus_delete.sql index 2ebb6b6f75be165e6be9d6bac416a10ec36d74bc..51eae442a74c5aba4fc8c8f7c0e9238bbe0dda90 100644 --- a/arkindex/sql_validation/corpus_delete.sql +++ b/arkindex/sql_validation/corpus_delete.sql @@ -10,6 +10,11 @@ FROM "documents_corpus" WHERE "documents_corpus"."id" = '{corpus_id}'::uuid LIMIT 21; +UPDATE "process_process" +SET "element_id" = NULL +WHERE ("process_process"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("process_process"."element_id" IS NULL)); + SELECT "django_content_type"."id", "django_content_type"."app_label", "django_content_type"."model" @@ -152,6 +157,15 @@ DELETE FROM "documents_corpusexport" WHERE "documents_corpusexport"."corpus_id" = '{corpus_id}'::uuid; +DELETE +FROM "ponos_artifact" +WHERE "ponos_artifact"."id" IN + (SELECT U0."id" + FROM "ponos_artifact" U0 + INNER JOIN "ponos_task" U1 ON (U0."task_id" = U1."id") + INNER JOIN "process_process" U2 ON (U1."process_id" = U2."id") + WHERE U2."corpus_id" = '{corpus_id}'::uuid); + DELETE FROM "process_processdatasetset" WHERE "process_processdatasetset"."id" IN diff --git a/arkindex/sql_validation/corpus_delete_top_level_type.sql b/arkindex/sql_validation/corpus_delete_top_level_type.sql index 80ae4c77e85a44b11f33a3ef17803d55a9fc6d7b..0a552ec5f429f0caa12f106c13e223d7afbe11ab 100644 --- a/arkindex/sql_validation/corpus_delete_top_level_type.sql +++ b/arkindex/sql_validation/corpus_delete_top_level_type.sql @@ -14,6 +14,11 @@ UPDATE "documents_corpus" SET "top_level_type_id" = NULL WHERE "documents_corpus"."id" = '{corpus_id}'::uuid; +UPDATE "process_process" +SET "element_id" = NULL +WHERE ("process_process"."corpus_id" = '{corpus_id}'::uuid + AND NOT ("process_process"."element_id" IS NULL)); + SELECT "django_content_type"."id", "django_content_type"."app_label", "django_content_type"."model" @@ -156,6 +161,15 @@ DELETE FROM "documents_corpusexport" WHERE "documents_corpusexport"."corpus_id" = '{corpus_id}'::uuid; +DELETE +FROM "ponos_artifact" +WHERE "ponos_artifact"."id" IN + (SELECT U0."id" + FROM "ponos_artifact" U0 + INNER JOIN "ponos_task" U1 ON (U0."task_id" = U1."id") + INNER JOIN "process_process" U2 ON (U1."process_id" = U2."id") + WHERE U2."corpus_id" = '{corpus_id}'::uuid); + DELETE FROM "process_processdatasetset" WHERE "process_processdatasetset"."id" IN diff --git a/arkindex/training/admin.py b/arkindex/training/admin.py index f8cc278929d0c29dd3f53a18fd10a91aab19d5b6..c3c6c7a8b5362278dfac8165e183eadc04a8938b 100644 --- a/arkindex/training/admin.py +++ b/arkindex/training/admin.py @@ -48,8 +48,7 @@ class DatasetAdmin(admin.ModelAdmin): self.readonly_fields = self.__class__.readonly_fields if obj is not None: self.readonly_fields += ("unique_elements",) - form = super().get_form(request, obj=None, **kwargs) - return form + return super().get_form(request, obj=None, **kwargs) admin.site.register(Model, ModelAdmin) diff --git a/arkindex/training/api.py b/arkindex/training/api.py index 66b6d00a377014677c08e54e7a3e8dcb5df39336..c7d7220ae96ece53f8fd0426cbd0f8d93f4396d6 100644 --- a/arkindex/training/api.py +++ b/arkindex/training/api.py @@ -417,8 +417,7 @@ class ModelsList(TrainingModelMixin, ListCreateAPIView): "id": str(existing_model.id), "name": "A model with this name already exists", }) - else: - raise PermissionDenied() + raise PermissionDenied() return serializer.save() @@ -753,7 +752,7 @@ class DatasetSetBase(): ) if self.request.method == "DELETE" and not Corpus.objects.admin(self.request.user).filter(pk=dataset.corpus_id).exists(): raise PermissionDenied(detail="You do not have admin access to this dataset.") - elif self.request.method != "DELETE" and not Corpus.objects.writable(self.request.user).filter(pk=dataset.corpus_id).exists(): + if self.request.method != "DELETE" and not Corpus.objects.writable(self.request.user).filter(pk=dataset.corpus_id).exists(): raise PermissionDenied(detail="You do not have contributor access to this dataset.") if dataset.state != DatasetState.Open: raise ValidationError(detail="You can only add or update sets from a dataset in an open state.") diff --git a/arkindex/training/management/commands/migrate_workers.py b/arkindex/training/management/commands/migrate_workers.py index 51e15165089905cc44b4063e851087dec1dfcbac..6e011fe5a60d43123819b71075eb2872f2fca0db 100644 --- a/arkindex/training/management/commands/migrate_workers.py +++ b/arkindex/training/management/commands/migrate_workers.py @@ -23,7 +23,7 @@ def ask(phrase): choice = input(phrase + " [y/n]: ") if choice.lower() == "y": return True - elif choice.lower() == "n": + if choice.lower() == "n": return False @@ -33,7 +33,7 @@ def choose(instances, name_field="name", title="Pick one item", allow_skip=False nb = instances.count() if nb == 0: return - elif nb == 1: + if nb == 1: return instances.first() # Build internal representation diff --git a/arkindex/users/api.py b/arkindex/users/api.py index 5c4b9c40df27caa4a3d55ae485281feecf084c26..467fd2e7283111cbab05450426c502e5c3ad0ee7 100644 --- a/arkindex/users/api.py +++ b/arkindex/users/api.py @@ -261,12 +261,11 @@ class PasswordReset(CreateAPIView): status=status.HTTP_201_CREATED, headers=headers, ) - else: - return Response( - form.errors.get_json_data(), - status=status.HTTP_400_BAD_REQUEST, - headers=self.default_response_headers - ) + return Response( + form.errors.get_json_data(), + status=status.HTTP_400_BAD_REQUEST, + headers=self.default_response_headers + ) @extend_schema_view(post=extend_schema(operation_id="PasswordResetConfirm", tags=["users"])) diff --git a/ruff.toml b/ruff.toml index d14e149d84c88b14c3ea363299e958e3e6adffed..6def6e28921fcec79c3c996c8e4ad680dcaf05b8 100644 --- a/ruff.toml +++ b/ruff.toml @@ -7,7 +7,7 @@ quote-style = "double" [lint] select = [ # flake8-quotes - "Q0", + "Q", # pyflakes "F", # pycodestyle @@ -17,8 +17,14 @@ select = [ "S113", # isort "I", + # flake8-debugger + "T10", + # escape-sequence-in-docstring + "D301", + # flake8-return + "RET", ] -ignore = ["E501"] +ignore = ["E501", "RET502", "RET503"] [lint.isort] default-section = "first-party"