Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (15)
Showing
with 952 additions and 1339 deletions
...@@ -12,8 +12,10 @@ repos: ...@@ -12,8 +12,10 @@ repos:
- 'flake8-debugger==3.1.0' - 'flake8-debugger==3.1.0'
- 'flake8-quotes==3.3.2' - 'flake8-quotes==3.3.2'
- repo: https://github.com/astral-sh/ruff-pre-commit - repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.11 # Ruff version.
rev: v0.3.7
hooks: hooks:
# Run the linter.
- id: ruff - id: ruff
args: [--fix] args: [--fix]
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
......
...@@ -54,7 +54,7 @@ release: ...@@ -54,7 +54,7 @@ release:
clean-docker: clean-docker:
$(eval containers:=$(shell docker ps -a -q)) $(eval containers:=$(shell docker ps -a -q))
@if [ -n "$(containers)" ]; then \ @if [ -n "$(containers)" ]; then \
echo "Cleaning up past containers\n" \ echo "Cleaning up past containers\n" ; \
docker rm -f $(containers) ; \ docker rm -f $(containers) ; \
fi fi
......
1.6.0-rc2 1.6.0
...@@ -11,8 +11,6 @@ from arkindex.documents.models import ( ...@@ -11,8 +11,6 @@ from arkindex.documents.models import (
Element, Element,
ElementType, ElementType,
Entity, Entity,
EntityLink,
EntityRole,
EntityType, EntityType,
MetaData, MetaData,
MLClass, MLClass,
...@@ -135,29 +133,15 @@ class EntityMetaForm(forms.ModelForm): ...@@ -135,29 +133,15 @@ class EntityMetaForm(forms.ModelForm):
metas = HStoreFormField() metas = HStoreFormField()
class EntityLinkInLine(admin.TabularInline):
model = EntityLink
fk_name = "parent"
raw_id_fields = ("child", )
class EntityAdmin(admin.ModelAdmin): class EntityAdmin(admin.ModelAdmin):
list_display = ("id", "name", "type") list_display = ("id", "name", "type")
list_filter = ["corpus", "type"] list_filter = ["corpus", "type"]
readonly_fields = ("id", ) readonly_fields = ("id", )
raw_id_fields = ("worker_version", "worker_run", ) raw_id_fields = ("worker_version", "worker_run", )
search_fields = ("name", ) search_fields = ("name", )
inlines = (EntityLinkInLine, )
form = EntityMetaForm form = EntityMetaForm
class EntityRoleAdmin(admin.ModelAdmin):
list_display = ("id", "corpus", "parent_name", "child_name")
list_filter = ("corpus", )
readonly_fields = ("id", )
ordering = ("corpus", "parent_name", "child_name")
class EntityTypeAdmin(admin.ModelAdmin): class EntityTypeAdmin(admin.ModelAdmin):
list_display = ("id", "corpus", "name", "color") list_display = ("id", "corpus", "name", "color")
list_filter = ("corpus", ) list_filter = ("corpus", )
...@@ -180,7 +164,6 @@ admin.site.register(Transcription, TranscriptionAdmin) ...@@ -180,7 +164,6 @@ admin.site.register(Transcription, TranscriptionAdmin)
admin.site.register(MLClass, MLClassAdmin) admin.site.register(MLClass, MLClassAdmin)
admin.site.register(MetaData, MetaDataAdmin) admin.site.register(MetaData, MetaDataAdmin)
admin.site.register(Entity, EntityAdmin) admin.site.register(Entity, EntityAdmin)
admin.site.register(EntityRole, EntityRoleAdmin)
admin.site.register(EntityType, EntityTypeAdmin) admin.site.register(EntityType, EntityTypeAdmin)
admin.site.register(AllowedMetaData, AllowedMetaDataAdmin) admin.site.register(AllowedMetaData, AllowedMetaDataAdmin)
admin.site.register(CorpusExport, CorpusExportAdmin) admin.site.register(CorpusExport, CorpusExportAdmin)
...@@ -3,32 +3,18 @@ from textwrap import dedent ...@@ -3,32 +3,18 @@ from textwrap import dedent
from uuid import UUID from uuid import UUID
from django.core.exceptions import ValidationError as DjangoValidationError from django.core.exceptions import ValidationError as DjangoValidationError
from django.db.models import Q
from django.shortcuts import get_object_or_404 from django.shortcuts import get_object_or_404
from drf_spectacular.utils import OpenApiExample, OpenApiParameter, OpenApiResponse, extend_schema, extend_schema_view from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema, extend_schema_view
from rest_framework import permissions, serializers, status from rest_framework import permissions, serializers, status
from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError
from rest_framework.generics import CreateAPIView, ListAPIView, ListCreateAPIView, RetrieveUpdateDestroyAPIView from rest_framework.generics import CreateAPIView, ListAPIView, RetrieveUpdateDestroyAPIView
from rest_framework.response import Response from rest_framework.response import Response
from arkindex.documents.models import ( from arkindex.documents.models import Corpus, Element, Entity, EntityType, Transcription, TranscriptionEntity
Corpus,
Element,
Entity,
EntityLink,
EntityRole,
EntityType,
Transcription,
TranscriptionEntity,
)
from arkindex.documents.serializers.elements import ElementTinySerializer from arkindex.documents.serializers.elements import ElementTinySerializer
from arkindex.documents.serializers.entities import ( from arkindex.documents.serializers.entities import (
BaseEntitySerializer, BaseEntitySerializer,
CreateEntityRoleErrorResponseSerializer,
EntityCreateSerializer, EntityCreateSerializer,
EntityLinkCreateSerializer,
EntityLinkSerializer,
EntityRoleSerializer,
EntitySerializer, EntitySerializer,
EntityTypeCreateSerializer, EntityTypeCreateSerializer,
EntityTypeSerializer, EntityTypeSerializer,
...@@ -44,53 +30,6 @@ from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly ...@@ -44,53 +30,6 @@ from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly
from arkindex.users.models import Role from arkindex.users.models import Role
@extend_schema(tags=["entities"])
@extend_schema_view(
get=extend_schema(operation_id="ListCorpusRoles", description="List all roles of a corpus"),
post=extend_schema(
description="Create a new entity role",
responses={
200: EntityRoleSerializer,
400: CreateEntityRoleErrorResponseSerializer
},
examples=[OpenApiExample(
status_codes=["400"],
response_only=True,
name="role-exists",
value={"id": "55cd009d-cd4b-4ec2-a475-b060f98f9138", "corpus": ["Role already exists in this corpus"]},
description="Role already exists."
)]
)
)
class CorpusRoles(CorpusACLMixin, ListCreateAPIView):
"""
List all roles in a corpus
"""
permission_classes = (IsVerifiedOrReadOnly, )
serializer_class = EntityRoleSerializer
queryset = EntityRole.objects.none()
def get_queryset(self):
return EntityRole.objects \
.filter(corpus=self.get_corpus(self.kwargs["pk"])) \
.order_by("parent_name", "child_name")
def perform_create(self, serializer):
data = self.request.data
if EntityRole.objects.filter(
parent_name=data["parent_name"],
child_name=data["child_name"],
parent_type=data["parent_type_id"],
child_type=data["child_type_id"],
corpus_id=self.request.parser_context["kwargs"]["pk"]
).exists():
raise serializers.ValidationError({
"corpus": ["Role already exists in this corpus"],
"id": self.request.parser_context["kwargs"]["pk"]
})
super().perform_create(serializer)
@extend_schema(tags=["entities"]) @extend_schema(tags=["entities"])
@extend_schema_view( @extend_schema_view(
get=extend_schema(operation_id="ListCorpusEntityTypes", description="List all entity types in a corpus"), get=extend_schema(operation_id="ListCorpusEntityTypes", description="List all entity types in a corpus"),
...@@ -173,8 +112,6 @@ class EntityTypeUpdate(ACLMixin, RetrieveUpdateDestroyAPIView): ...@@ -173,8 +112,6 @@ class EntityTypeUpdate(ACLMixin, RetrieveUpdateDestroyAPIView):
def perform_destroy(self, instance): def perform_destroy(self, instance):
if instance.entities.exists(): if instance.entities.exists():
raise ValidationError({"detail": ["Some entities are using this entity type."]}) raise ValidationError({"detail": ["Some entities are using this entity type."]})
if EntityRole.objects.filter(Q(parent_type_id=instance.id) | Q(child_type_id=instance.id)).exists():
raise ValidationError({"detail": ["Some entity roles are using this entity type."]})
super().perform_destroy(instance) super().perform_destroy(instance)
...@@ -196,14 +133,6 @@ class EntityDetails(ACLMixin, RetrieveUpdateDestroyAPIView): ...@@ -196,14 +133,6 @@ class EntityDetails(ACLMixin, RetrieveUpdateDestroyAPIView):
.select_related("corpus", "type") \ .select_related("corpus", "type") \
.filter(corpus__in=Corpus.objects.readable(self.request.user)) \ .filter(corpus__in=Corpus.objects.readable(self.request.user)) \
.prefetch_related( .prefetch_related(
"parents__role__parent_type",
"parents__role__child_type",
"children__role__parent_type",
"children__role__child_type",
"parents__child__type",
"parents__parent__type",
"children__parent__type",
"children__child__type",
"corpus", "corpus",
) )
...@@ -307,15 +236,6 @@ class EntityCreate(CreateAPIView): ...@@ -307,15 +236,6 @@ class EntityCreate(CreateAPIView):
return Response(entity.data, status=status_code, headers=headers) return Response(entity.data, status=status_code, headers=headers)
@extend_schema_view(post=extend_schema(operation_id="CreateEntityLink", tags=["entities"]))
class EntityLinkCreate(CreateAPIView):
"""
Create a new link between two entities with a role
"""
permission_classes = (IsVerified, )
serializer_class = EntityLinkCreateSerializer
@extend_schema_view(post=extend_schema( @extend_schema_view(post=extend_schema(
operation_id="CreateTranscriptionEntity", operation_id="CreateTranscriptionEntity",
tags=["entities"], tags=["entities"],
...@@ -519,41 +439,6 @@ class CorpusEntities(CorpusACLMixin, ListAPIView): ...@@ -519,41 +439,6 @@ class CorpusEntities(CorpusACLMixin, ListAPIView):
return queryset return queryset
@extend_schema_view(get=extend_schema(operation_id="ListElementLinks", tags=["entities"]))
class ElementLinks(CorpusACLMixin, ListAPIView):
"""
List all links where parent and child are linked to the element.\n\n
Requires a **guest** access to the element corpus
"""
serializer_class = EntityLinkSerializer
def get_queryset(self):
try:
element = Element.objects.select_related("corpus").only("id", "corpus").get(id=self.kwargs["pk"])
except Element.DoesNotExist:
raise NotFound
if not self.has_read_access(element.corpus):
raise PermissionDenied(detail="You do not have access to this element.")
# Load entities linked by transcriptions
entities_tr = Entity.objects.filter(transcriptions__element_id=element.id).prefetch_related("transcriptions")
# Load entities linked by metadatas
entities_meta = Entity.objects.filter(metadatas__element_id=element.id).prefetch_related("metadatas")
# Now load all links belonging to those entities
# It's several times faster to combine the queries in the final one
# than combining them at the lower level (entities is slower than entities_tr + entities_meta)
# We need to support cross references between transcriptions & metadata entities
return EntityLink.objects.filter(
Q(parent__in=entities_tr, child__in=entities_tr)
| Q(parent__in=entities_tr, child__in=entities_meta)
| Q(parent__in=entities_meta, child__in=entities_tr)
| Q(parent__in=entities_meta, child__in=entities_meta)
).select_related("role", "child__type", "parent__type").order_by("parent__name")
@extend_schema_view( @extend_schema_view(
post=extend_schema( post=extend_schema(
operation_id="CreateTranscriptionEntities", operation_id="CreateTranscriptionEntities",
......
...@@ -38,8 +38,6 @@ EXPORT_QUERIES = [ ...@@ -38,8 +38,6 @@ EXPORT_QUERIES = [
"entity_type", "entity_type",
"entity", "entity",
"transcription_entity", "transcription_entity",
"entity_role",
"entity_link",
"metadata", "metadata",
"dataset", "dataset",
"dataset_element", "dataset_element",
......
SELECT link.id, link.parent_id, link.child_id, link.role_id
FROM documents_entitylink link
INNER JOIN documents_entityrole role ON (link.role_id = role.id)
WHERE role.corpus_id = '{corpus_id}'::uuid
SELECT id, parent_name, child_name, parent_type_id, child_type_id
FROM documents_entityrole
WHERE corpus_id = '{corpus_id}'::uuid
...@@ -26,13 +26,6 @@ CREATE INDEX transcription_entity_entity_id ON transcription_entity (entity_id); ...@@ -26,13 +26,6 @@ CREATE INDEX transcription_entity_entity_id ON transcription_entity (entity_id);
CREATE INDEX transcription_entity_worker_version_id ON transcription_entity (worker_version_id); CREATE INDEX transcription_entity_worker_version_id ON transcription_entity (worker_version_id);
CREATE INDEX transcription_entity_worker_run_id ON transcription_entity (worker_run_id); CREATE INDEX transcription_entity_worker_run_id ON transcription_entity (worker_run_id);
CREATE INDEX entity_link_parent_id ON entity_link (parent_id);
CREATE INDEX entity_link_child_id ON entity_link (child_id);
CREATE INDEX entity_link_role_id ON entity_link (role_id);
CREATE INDEX entity_role_parent_type_id ON entity_role (parent_type_id);
CREATE INDEX entity_role_child_type_id ON entity_role (child_type_id);
CREATE INDEX metadata_element_id ON metadata (element_id); CREATE INDEX metadata_element_id ON metadata (element_id);
CREATE INDEX metadata_entity_id ON metadata (entity_id); CREATE INDEX metadata_entity_id ON metadata (entity_id);
CREATE INDEX metadata_worker_version_id ON metadata (worker_version_id); CREATE INDEX metadata_worker_version_id ON metadata (worker_version_id);
......
PRAGMA foreign_keys = ON; PRAGMA foreign_keys = ON;
CREATE TABLE export_version AS SELECT 8 AS version; CREATE TABLE export_version AS SELECT 9 AS version;
CREATE TABLE image_server ( CREATE TABLE image_server (
id INTEGER NOT NULL, id INTEGER NOT NULL,
...@@ -168,29 +168,6 @@ CREATE TABLE transcription_entity ( ...@@ -168,29 +168,6 @@ CREATE TABLE transcription_entity (
CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL) CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL)
); );
CREATE TABLE entity_role (
id VARCHAR(37) NOT NULL,
parent_name VARCHAR(250) NOT NULL,
child_name VARCHAR(250) NOT NULL,
parent_type_id VARCHAR(37) NOT NULL,
child_type_id VARCHAR(37) NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (parent_type_id) REFERENCES entity_type (id) ON DELETE CASCADE,
FOREIGN KEY (child_type_id) REFERENCES entity_type (id) ON DELETE CASCADE,
UNIQUE (parent_name, child_name, parent_type_id, child_type_id)
);
CREATE TABLE entity_link (
id VARCHAR(37) NOT NULL,
parent_id VARCHAR(37) NOT NULL,
child_id VARCHAR(37) NOT NULL,
role_id VARCHAR(37) NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (parent_id) REFERENCES entity (id),
FOREIGN KEY (child_id) REFERENCES entity (id),
FOREIGN KEY (role_id) REFERENCES entity_role (id)
);
CREATE TABLE metadata ( CREATE TABLE metadata (
id VARCHAR(37) NOT NULL, id VARCHAR(37) NOT NULL,
element_id VARCHAR(37) NOT NULL, element_id VARCHAR(37) NOT NULL,
......
This diff is collapsed.
...@@ -6,12 +6,10 @@ from django.core.management.base import BaseCommand ...@@ -6,12 +6,10 @@ from django.core.management.base import BaseCommand
from django.db import transaction from django.db import transaction
from django.db.models import Q from django.db.models import Q
from django.db.utils import IntegrityError from django.db.utils import IntegrityError
from rest_framework.authtoken.models import Token
from arkindex.images.models import ImageServer from arkindex.images.models import ImageServer
from arkindex.ponos.models import Farm from arkindex.ponos.models import Farm
from arkindex.process.models import FeatureUsage, Repository, Worker, WorkerType, WorkerVersion, WorkerVersionState from arkindex.process.models import FeatureUsage, Repository, Worker, WorkerType, WorkerVersion, WorkerVersionState
from arkindex.users.models import User
# Constants used in architecture project # Constants used in architecture project
UPLOADS_IMAGE_SERVER_ID = 12345 UPLOADS_IMAGE_SERVER_ID = 12345
...@@ -30,7 +28,6 @@ IMPORT_WORKER_SLUG = "file_import" ...@@ -30,7 +28,6 @@ IMPORT_WORKER_SLUG = "file_import"
IMPORT_WORKER_REPO = "https://gitlab.teklia.com/arkindex/tasks" IMPORT_WORKER_REPO = "https://gitlab.teklia.com/arkindex/tasks"
IMPORT_WORKER_REVISION_MESSAGE = "File import worker bootstrap" IMPORT_WORKER_REVISION_MESSAGE = "File import worker bootstrap"
IMPORT_WORKER_REVISION_AUTHOR = "Dev Bootstrap" IMPORT_WORKER_REVISION_AUTHOR = "Dev Bootstrap"
ADMIN_API_TOKEN = "deadbeefTestToken"
class Command(BaseCommand): class Command(BaseCommand):
...@@ -48,15 +45,6 @@ class Command(BaseCommand): ...@@ -48,15 +45,6 @@ class Command(BaseCommand):
"""Helper to display error messages""" """Helper to display error messages"""
self.stdout.write(self.style.ERROR(f"{msg}")) self.stdout.write(self.style.ERROR(f"{msg}"))
def check_user(self, user):
"""Ensure a user is admin"""
if user.is_admin:
self.success(f"Admin user for legacy worker API tokens {user} is valid")
else:
user.is_admin = True
user.save()
self.warn(f"Updated user {user} to admin")
def create_image_server(self, id, url, bucket, region, display_name): def create_image_server(self, id, url, bucket, region, display_name):
try: try:
server = ImageServer.objects.get(Q(id=id) | Q(url=url)) server = ImageServer.objects.get(Q(id=id) | Q(url=url))
...@@ -129,29 +117,6 @@ class Command(BaseCommand): ...@@ -129,29 +117,6 @@ class Command(BaseCommand):
) )
self.success("Ponos farm created") self.success("Ponos farm created")
# An admin API user with a specific token
try:
token = Token.objects.get(key=ADMIN_API_TOKEN)
self.check_user(token.user)
except Token.DoesNotExist:
# Create a new internal user
user, _ = User.objects.get_or_create(
email="internal+bootstrap@teklia.com",
defaults={
"display_name": "Bootstrap Admin user",
"is_admin": True,
}
)
self.success("Created internal user")
self.check_user(user)
# Finally create a specific token for that user
if hasattr(user, "auth_token"):
# Support One-To-One relation
user.auth_token.delete()
Token.objects.create(key=ADMIN_API_TOKEN, user=user)
self.success(f"Created token {ADMIN_API_TOKEN}")
# an image server for local cantaloupe https://ark.localhost/iiif/2 # an image server for local cantaloupe https://ark.localhost/iiif/2
uploads_server = self.create_image_server(UPLOADS_IMAGE_SERVER_ID , UPLOADS_IMAGE_SERVER_URL, UPLOADS_IMAGE_SERVER_BUCKET , UPLOADS_IMAGE_SERVER_REGION , "Local IIIF server for user uploaded files through frontend") uploads_server = self.create_image_server(UPLOADS_IMAGE_SERVER_ID , UPLOADS_IMAGE_SERVER_URL, UPLOADS_IMAGE_SERVER_BUCKET , UPLOADS_IMAGE_SERVER_REGION , "Local IIIF server for user uploaded files through frontend")
if uploads_server is None: if uploads_server is None:
......
...@@ -49,7 +49,7 @@ class Command(BaseCommand): ...@@ -49,7 +49,7 @@ class Command(BaseCommand):
img5 = Image.objects.create(path="img5", width=1000, height=1000, server=imgsrv) img5 = Image.objects.create(path="img5", width=1000, height=1000, server=imgsrv)
img6 = Image.objects.create(path="img6", width=1000, height=1000, server=imgsrv) img6 = Image.objects.create(path="img6", width=1000, height=1000, server=imgsrv)
# Create an admin, an internal and a normal user # Create an admin and a normal user
superuser = User.objects.create_superuser("root@root.fr", "Pa$$w0rd", display_name="Admin") superuser = User.objects.create_superuser("root@root.fr", "Pa$$w0rd", display_name="Admin")
superuser.verified_email = True superuser.verified_email = True
superuser.save() superuser.save()
......
...@@ -14,10 +14,21 @@ from rq.utils import as_text ...@@ -14,10 +14,21 @@ from rq.utils import as_text
from arkindex.documents.models import CorpusExport, CorpusExportState, Element from arkindex.documents.models import CorpusExport, CorpusExportState, Element
from arkindex.images.models import Image, ImageServer from arkindex.images.models import Image, ImageServer
from arkindex.ponos.models import Artifact, Task from arkindex.ponos.models import Artifact, Task
from arkindex.process.models import DataFile, GitRef, GitRefType, Process, WorkerVersion, WorkerVersionState from arkindex.process.models import (
CorpusWorkerVersion,
DataFile,
GitRef,
GitRefType,
Process,
Worker,
WorkerActivity,
WorkerRun,
WorkerVersion,
WorkerVersionState,
)
from arkindex.project.aws import s3 from arkindex.project.aws import s3
from arkindex.project.rq_overrides import Job from arkindex.project.rq_overrides import Job
from arkindex.training.models import ModelVersion from arkindex.training.models import Model, ModelVersion
from redis.exceptions import ConnectionError from redis.exceptions import ConnectionError
# Ponos artifacts use the path: <task id>/<path> # Ponos artifacts use the path: <task id>/<path>
...@@ -32,6 +43,9 @@ class Command(BaseCommand): ...@@ -32,6 +43,9 @@ class Command(BaseCommand):
help = "Clean up old corpus exports, trashed DataFiles, expired processes and S3 buckets" help = "Clean up old corpus exports, trashed DataFiles, expired processes and S3 buckets"
def handle(self, *args, **options): def handle(self, *args, **options):
# Cleaning up workers could free some artifacts, so clean them before artifacts
self.cleanup_archived_workers()
self.cleanup_artifacts() self.cleanup_artifacts()
self.cleanup_expired_processes() self.cleanup_expired_processes()
...@@ -48,6 +62,8 @@ class Command(BaseCommand): ...@@ -48,6 +62,8 @@ class Command(BaseCommand):
self.cleanup_ponos_logs() self.cleanup_ponos_logs()
self.cleanup_archived_models()
self.cleanup_unlinked_model_versions() self.cleanup_unlinked_model_versions()
self.cleanup_rq_user_registries() self.cleanup_rq_user_registries()
...@@ -294,6 +310,71 @@ class Command(BaseCommand): ...@@ -294,6 +310,71 @@ class Command(BaseCommand):
self.stdout.write(self.style.SUCCESS("Successfully cleaned up orphaned Ponos logs.")) self.stdout.write(self.style.SUCCESS("Successfully cleaned up orphaned Ponos logs."))
def cleanup_archived_workers(self):
"""
Remove Worker instances that have been archived for longer than the configured worker cleanup delay
and that are not being used in any worker result.
"""
self.stdout.write("Removing archived workers…")
workers = Worker.objects.filter(archived__lte=timezone.now() - timedelta(days=settings.WORKER_CLEANUP_DELAY))
skipped, deleted = 0, 0
for worker in workers.iterator():
# There are both foreign keys for worker versions and worker runs on worker results.
# Some old results might only have a worker version ID, but when a worker run ID is set,
# the worker version ID is deduced from it, so we only have to check on the version.
if worker.versions.all().in_use():
skipped += 1
continue
# Skip any workers whose WorkerConfigurations are in use.
# This should never happen since we already filter on the WorkerVersions,
# but that could lead to deleting worker results when we didn't want to.
if WorkerRun.objects.filter(configuration__worker=worker).in_use():
self.stdout.write(self.style.WARNING(
f"Worker {worker.name} ({worker.id}) does not have any worker versions used by worker results, "
"but some of its worker configurations are in use."
))
continue
self.stdout.write(f"Removing worker {worker.name} ({worker.id})")
worker.delete()
deleted += 1
if skipped:
self.stdout.write(f"Skipping {skipped} archived workers that have worker versions or configurations used in worker results.")
self.stdout.write(self.style.SUCCESS(f"Successfully cleaned up {deleted} archived workers."))
def cleanup_archived_models(self):
"""
Remove Model instances that have been archived for longer than the configured model cleanup delay
and that are not being used in any worker result.
"""
self.stdout.write("Removing archived models…")
models = Model.objects.filter(archived__lte=timezone.now() - timedelta(days=settings.MODEL_CLEANUP_DELAY))
skipped, deleted = 0, 0
for model in models.iterator():
if WorkerRun.objects.filter(model_version__model=model).in_use():
skipped += 1
continue
self.stdout.write(f"Removing model {model.name} ({model.id})")
# Remove CorpusWorkerVersions and WorkerActivities first
# Those normally use SET_NULL, but this can cause the unique constraints to complain
# if there already are rows with a model version set to None.
WorkerActivity.objects.filter(model_version__model=model).delete()
CorpusWorkerVersion.objects.filter(model_version__model=model).delete()
model.delete()
deleted += 1
if skipped:
self.stdout.write(f"Skipping {skipped} archived models that have model versions used in worker results.")
self.stdout.write(self.style.SUCCESS(f"Successfully cleaned up {deleted} archived models."))
def cleanup_unlinked_model_versions(self): def cleanup_unlinked_model_versions(self):
self.stdout.write("Removing orphaned model versions archives…") self.stdout.write("Removing orphaned model versions archives…")
bucket = s3.Bucket(settings.AWS_TRAINING_BUCKET) bucket = s3.Bucket(settings.AWS_TRAINING_BUCKET)
......
...@@ -18,8 +18,6 @@ from arkindex.documents.models import ( ...@@ -18,8 +18,6 @@ from arkindex.documents.models import (
ElementPath, ElementPath,
ElementType, ElementType,
Entity, Entity,
EntityLink,
EntityRole,
EntityType, EntityType,
MetaData, MetaData,
MLClass, MLClass,
...@@ -40,7 +38,7 @@ from arkindex.process.models import ( ...@@ -40,7 +38,7 @@ from arkindex.process.models import (
from arkindex.training.models import Dataset, DatasetElement, DatasetSet, Model from arkindex.training.models import Dataset, DatasetElement, DatasetSet, Model
from arkindex.users.models import Role, User from arkindex.users.models import Role, User
EXPORT_VERSION = 8 EXPORT_VERSION = 9
TABLE_NAMES = { TABLE_NAMES = {
"export_version", "export_version",
...@@ -52,8 +50,6 @@ TABLE_NAMES = { ...@@ -52,8 +50,6 @@ TABLE_NAMES = {
"element_path", "element_path",
"entity", "entity",
"entity_type", "entity_type",
"entity_role",
"entity_link",
"transcription", "transcription",
"transcription_entity", "transcription_entity",
"metadata", "metadata",
...@@ -132,8 +128,6 @@ SQL_TOP_LEVEL_PATH_QUERY = """ ...@@ -132,8 +128,6 @@ SQL_TOP_LEVEL_PATH_QUERY = """
SQL_ENTITY_QUERY = "SELECT * FROM entity" SQL_ENTITY_QUERY = "SELECT * FROM entity"
SQL_ENTITY_TYPE_QUERY = "SELECT * FROM entity_type" SQL_ENTITY_TYPE_QUERY = "SELECT * FROM entity_type"
SQL_ENTITY_ROLE_QUERY = "SELECT * FROM entity_role"
SQL_ENTITY_LINK_QUERY = "SELECT * FROM entity_link"
SQL_TRANSCRIPTION_QUERY = "SELECT * FROM transcription" SQL_TRANSCRIPTION_QUERY = "SELECT * FROM transcription"
SQL_TRANSCRIPTION_ENTITY_QUERY = "SELECT * FROM transcription_entity" SQL_TRANSCRIPTION_ENTITY_QUERY = "SELECT * FROM transcription_entity"
...@@ -249,24 +243,6 @@ class Command(BaseCommand): ...@@ -249,24 +243,6 @@ class Command(BaseCommand):
corpus=self.corpus corpus=self.corpus
)] )]
def convert_entity_roles(self, row):
return [EntityRole(
id=row["id"],
parent_name=row["parent_name"],
child_name=row["child_name"],
parent_type_id=row["parent_type_id"],
child_type_id=row["child_type_id"],
corpus=self.corpus
)]
def convert_entity_links(self, row):
return [EntityLink(
id=row["id"],
parent_id=row["parent_id"],
child_id=row["child_id"],
role_id=row["role_id"],
)]
def convert_transcriptions(self, row): def convert_transcriptions(self, row):
return [Transcription( return [Transcription(
id=row["id"], id=row["id"],
...@@ -597,11 +573,9 @@ class Command(BaseCommand): ...@@ -597,11 +573,9 @@ class Command(BaseCommand):
self.bulk_create_objects(ElementPath, self.convert_element_paths, SQL_ELEMENT_PATH_QUERY, ignore_conflicts=False) self.bulk_create_objects(ElementPath, self.convert_element_paths, SQL_ELEMENT_PATH_QUERY, ignore_conflicts=False)
self.bulk_create_objects(ElementPath, self.convert_top_level_paths, SQL_TOP_LEVEL_PATH_QUERY, ignore_conflicts=False) self.bulk_create_objects(ElementPath, self.convert_top_level_paths, SQL_TOP_LEVEL_PATH_QUERY, ignore_conflicts=False)
# Create entities, entity types, roles and links # Create entities and entity types
self.bulk_create_objects(EntityType, self.convert_entity_types, SQL_ENTITY_TYPE_QUERY) self.bulk_create_objects(EntityType, self.convert_entity_types, SQL_ENTITY_TYPE_QUERY)
self.bulk_create_objects(Entity, self.convert_entities, SQL_ENTITY_QUERY) self.bulk_create_objects(Entity, self.convert_entities, SQL_ENTITY_QUERY)
self.bulk_create_objects(EntityRole, self.convert_entity_roles, SQL_ENTITY_ROLE_QUERY)
self.bulk_create_objects(EntityLink, self.convert_entity_links, SQL_ENTITY_LINK_QUERY)
# Create transcriptions and transcription entities # Create transcriptions and transcription entities
self.bulk_create_objects(Transcription, self.convert_transcriptions, SQL_TRANSCRIPTION_QUERY) self.bulk_create_objects(Transcription, self.convert_transcriptions, SQL_TRANSCRIPTION_QUERY)
......
...@@ -135,24 +135,6 @@ class Command(BaseCommand): ...@@ -135,24 +135,6 @@ class Command(BaseCommand):
""") """)
self.stdout.write(f"Updated {cursor.rowcount} TranscriptionEntities.") self.stdout.write(f"Updated {cursor.rowcount} TranscriptionEntities.")
self.stdout.write("Updating child entity IDs on entity links…")
cursor.execute("""
UPDATE documents_entitylink
SET child_id = keep_id
FROM duplicated_entities
WHERE child_id = remove_id;
""")
self.stdout.write(f"Updated {cursor.rowcount} entity links.")
self.stdout.write("Updating parent entity IDs on entity links…")
cursor.execute("""
UPDATE documents_entitylink
SET parent_id = keep_id
FROM duplicated_entities
WHERE parent_id = remove_id;
""")
self.stdout.write(f"Updated {cursor.rowcount} entity links.")
self.stdout.write("Removing duplicate entities…") self.stdout.write("Removing duplicate entities…")
cursor.execute(""" cursor.execute("""
DELETE FROM documents_entity DELETE FROM documents_entity
......
# Generated by Django 4.1.7 on 2024-04-15 12:36
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "0009_corpusexport_source"),
]
operations = [
migrations.AlterUniqueTogether(
name="entityrole",
unique_together=None,
),
migrations.RemoveField(
model_name="entityrole",
name="child_type",
),
migrations.RemoveField(
model_name="entityrole",
name="corpus",
),
migrations.RemoveField(
model_name="entityrole",
name="parent_type",
),
migrations.DeleteModel(
name="EntityLink",
),
migrations.DeleteModel(
name="EntityRole",
),
]
...@@ -769,63 +769,6 @@ class Entity(models.Model): ...@@ -769,63 +769,6 @@ class Entity(models.Model):
return self.name return self.name
class EntityRole(models.Model):
"""
Role's type between a parent and a child
"""
parent_name = models.CharField(max_length=250)
child_name = models.CharField(max_length=250)
parent_type = models.ForeignKey(EntityType, related_name="parent_role", on_delete=models.DO_NOTHING)
child_type = models.ForeignKey(EntityType, related_name="child_role", on_delete=models.DO_NOTHING)
corpus = models.ForeignKey(Corpus, related_name="roles", on_delete=models.CASCADE)
class Meta:
unique_together = (
("parent_name", "child_name", "parent_type", "child_type", "corpus"),
)
def __str__(self):
return "{} -> {}".format(self.parent_name, self.child_name)
class EntityLink(models.Model):
"""
Link between two entities with a role
"""
id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False)
parent = models.ForeignKey(Entity, related_name="parents", on_delete=models.CASCADE)
child = models.ForeignKey(Entity, related_name="children", on_delete=models.CASCADE)
role = models.ForeignKey(EntityRole, related_name="links", on_delete=models.CASCADE)
def clean(self):
if self.role is None:
return
if self.parent is None:
return
if self.parent.type_id != self.role.parent_type_id:
raise ValidationError("Parent's type {} is different from the expected type {}".format(
self.parent.type_id,
self.role.parent_type_id))
if self.parent.corpus_id != self.role.corpus_id:
raise ValidationError("Parent's corpus {} is different from the expected corpus {}".format(
self.parent.corpus_id,
self.role.corpus_id))
if self.child is None:
return
if self.child.type_id != self.role.child_type_id:
raise ValidationError("Child's type {} is different from the expected type {}".format(
self.child.type_id,
self.role.child_type_id))
if self.child.corpus_id != self.role.corpus_id:
raise ValidationError("Child's corpus {} is different from the expected corpus {}".format(
self.child.corpus_id,
self.role.corpus_id))
def save(self, *args, **kwargs):
self.full_clean()
super().save(*args, **kwargs)
class TextOrientation(Enum): class TextOrientation(Enum):
HorizontalLeftToRight = "horizontal-lr" HorizontalLeftToRight = "horizontal-lr"
HorizontalRightToLeft = "horizontal-rl" HorizontalRightToLeft = "horizontal-rl"
......
...@@ -6,7 +6,7 @@ from drf_spectacular.utils import extend_schema_serializer ...@@ -6,7 +6,7 @@ from drf_spectacular.utils import extend_schema_serializer
from rest_framework import serializers from rest_framework import serializers
from rest_framework.exceptions import ValidationError from rest_framework.exceptions import ValidationError
from arkindex.documents.models import Corpus, Entity, EntityLink, EntityRole, EntityType, TranscriptionEntity from arkindex.documents.models import Corpus, Entity, EntityType, TranscriptionEntity
from arkindex.documents.serializers.light import CorpusLightSerializer, EntityTypeLightSerializer from arkindex.documents.serializers.light import CorpusLightSerializer, EntityTypeLightSerializer
from arkindex.documents.serializers.ml import WorkerRunSummarySerializer from arkindex.documents.serializers.ml import WorkerRunSummarySerializer
from arkindex.project.serializer_fields import ForbiddenField, WorkerRunIDField from arkindex.project.serializer_fields import ForbiddenField, WorkerRunIDField
...@@ -31,7 +31,7 @@ class EntityTypeSerializer(serializers.ModelSerializer): ...@@ -31,7 +31,7 @@ class EntityTypeSerializer(serializers.ModelSerializer):
# Get an error if a request tries to change an entity type's corpus # Get an error if a request tries to change an entity type's corpus
corpus = data.get("corpus") corpus = data.get("corpus")
if self.instance and corpus: if self.instance and corpus:
raise ValidationError({"corpus": ["It is not possible to update an Entity Type\'s corpus."]}) raise ValidationError({"corpus": ["It is not possible to update an Entity Type's corpus."]})
data = super().to_internal_value(data) data = super().to_internal_value(data)
return data return data
...@@ -86,79 +86,6 @@ class BaseEntitySerializer(serializers.ModelSerializer): ...@@ -86,79 +86,6 @@ class BaseEntitySerializer(serializers.ModelSerializer):
) )
class EntityRoleSerializer(serializers.ModelSerializer):
"""
Serialize a role between two types of entity
"""
parent_type_id = serializers.PrimaryKeyRelatedField(
queryset=EntityType.objects.all(),
style={"base_template": "input.html"},
source="parent_type",
)
child_type_id = serializers.PrimaryKeyRelatedField(
queryset=EntityType.objects.all(),
style={"base_template": "input.html"},
source="child_type",
)
class Meta:
model = EntityRole
fields = (
"id",
"parent_name",
"child_name",
"parent_type_id",
"child_type_id"
)
def validate(self, data):
errors = defaultdict(list)
assert "corpus" not in data
assert self.context.get("request") is not None
corpus_id = self.context["request"].parser_context["kwargs"]["pk"]
corpus = Corpus.objects.writable(self.context["request"].user).filter(id=corpus_id).first()
if corpus is None:
raise serializers.ValidationError({
"corpus": ["You do not have write access to this corpus"],
"id": corpus_id,
})
parent_type = data.get("parent_type")
child_type = data.get("child_type")
if parent_type.corpus_id != corpus.id:
errors["parent_type_id"].append(f"Type {parent_type.id} does not exist in corpus {corpus}.")
if child_type.corpus_id != corpus.id:
errors["child_type_id"].append(f"Type {child_type.id} does not exist in corpus {corpus}.")
if errors:
raise ValidationError(errors)
data["corpus"] = corpus
return data
class CreateEntityRoleErrorResponseSerializer(serializers.Serializer):
id = serializers.UUIDField(required=False, help_text="The corpus ID.")
corpus = serializers.ListField(child=serializers.CharField(), required=False, help_text="Errors that occurred during corpus ID field validation.")
class EntityLinkSerializer(serializers.ModelSerializer):
"""
Serialize an entity link with its child, parent and role
"""
parent = BaseEntitySerializer()
child = BaseEntitySerializer()
role = EntityRoleSerializer()
class Meta:
model = EntityLink
fields = (
"id",
"parent",
"child",
"role"
)
@extend_schema_serializer( @extend_schema_serializer(
deprecate_fields=("worker_version_id") deprecate_fields=("worker_version_id")
) )
...@@ -167,8 +94,6 @@ class EntitySerializer(BaseEntitySerializer): ...@@ -167,8 +94,6 @@ class EntitySerializer(BaseEntitySerializer):
Serialize an entity with its metadata Serialize an entity with its metadata
""" """
corpus = CorpusLightSerializer(read_only=True) corpus = CorpusLightSerializer(read_only=True)
children = EntityLinkSerializer(many=True, read_only=True)
parents = EntityLinkSerializer(many=True, read_only=True)
# When updating an entity, the type can be set either by using its EntityType UUID, or its name # When updating an entity, the type can be set either by using its EntityType UUID, or its name
# (in which case the serializer checks that an EntityType with this name exists in the corpus) # (in which case the serializer checks that an EntityType with this name exists in the corpus)
type_id = serializers.PrimaryKeyRelatedField( type_id = serializers.PrimaryKeyRelatedField(
...@@ -183,14 +108,10 @@ class EntitySerializer(BaseEntitySerializer): ...@@ -183,14 +108,10 @@ class EntitySerializer(BaseEntitySerializer):
model = Entity model = Entity
fields = BaseEntitySerializer.Meta.fields + ( fields = BaseEntitySerializer.Meta.fields + (
"corpus", "corpus",
"children",
"parents",
"type_id" "type_id"
) )
read_only_fields = BaseEntitySerializer.Meta.read_only_fields = ( read_only_fields = BaseEntitySerializer.Meta.read_only_fields = (
"corpus", "corpus",
"children",
"parents",
) )
def validate(self, data): def validate(self, data):
...@@ -221,8 +142,6 @@ class EntityCreateSerializer(BaseEntitySerializer): ...@@ -221,8 +142,6 @@ class EntityCreateSerializer(BaseEntitySerializer):
style={"base_template": "input.html"}, style={"base_template": "input.html"},
) )
metas = serializers.HStoreField(child=serializers.CharField(), required=False) metas = serializers.HStoreField(child=serializers.CharField(), required=False)
children = EntityLinkSerializer(many=True, read_only=True)
parents = EntityLinkSerializer(many=True, read_only=True)
worker_version = serializers.UUIDField( worker_version = serializers.UUIDField(
allow_null=True, allow_null=True,
required=False, required=False,
...@@ -262,15 +181,11 @@ class EntityCreateSerializer(BaseEntitySerializer): ...@@ -262,15 +181,11 @@ class EntityCreateSerializer(BaseEntitySerializer):
"metas", "metas",
"validated", "validated",
"corpus", "corpus",
"parents",
"children",
"worker_version", "worker_version",
"worker_run_id" "worker_run_id"
) )
read_only_fields = ( read_only_fields = (
"id", "id",
"children",
"parents",
) )
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
...@@ -300,46 +215,6 @@ class EntityCreateSerializer(BaseEntitySerializer): ...@@ -300,46 +215,6 @@ class EntityCreateSerializer(BaseEntitySerializer):
return data return data
class EntityLinkCreateSerializer(EntityLinkSerializer):
"""
Serialize an entity with a possible parents and children
"""
parent = serializers.PrimaryKeyRelatedField(
queryset=Entity.objects.none(),
style={"base_template": "input.html"},
)
child = serializers.PrimaryKeyRelatedField(
queryset=Entity.objects.none(),
style={"base_template": "input.html"},
)
role = serializers.PrimaryKeyRelatedField(
queryset=EntityRole.objects.none(),
style={"base_template": "input.html"},
)
class Meta:
model = EntityLink
fields = EntityLinkSerializer.Meta.fields
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if not self.context.get("request"):
# Do not raise Error in order to create OpenAPI schema
return
corpora = Corpus.objects.writable(self.context["request"].user)
entities = Entity.objects.all().filter(corpus__in=corpora)
roles = EntityRole.objects.all().filter(corpus__in=corpora)
self.fields["parent"].queryset = entities
self.fields["child"].queryset = entities
self.fields["role"].queryset = roles
def validate(self, data):
data = super().validate(data)
link = EntityLink(**data)
link.full_clean()
return data
class TranscriptionEntityCreateSerializer(serializers.ModelSerializer): class TranscriptionEntityCreateSerializer(serializers.ModelSerializer):
""" """
Serialise the link between an entity and a transcription Serialise the link between an entity and a transcription
......
...@@ -16,7 +16,6 @@ from arkindex.documents.models import ( ...@@ -16,7 +16,6 @@ from arkindex.documents.models import (
Corpus, Corpus,
Element, Element,
ElementPath, ElementPath,
EntityLink,
MetaData, MetaData,
Selection, Selection,
Transcription, Transcription,
...@@ -58,8 +57,6 @@ def corpus_delete(corpus_id: str) -> None: ...@@ -58,8 +57,6 @@ def corpus_delete(corpus_id: str) -> None:
WorkerActivity.objects.filter(process__corpus_id=corpus_id), WorkerActivity.objects.filter(process__corpus_id=corpus_id),
corpus.files.all(), corpus.files.all(),
MetaData.objects.filter(element__corpus_id=corpus_id), MetaData.objects.filter(element__corpus_id=corpus_id),
EntityLink.objects.filter(role__corpus_id=corpus_id),
corpus.roles.all(),
TranscriptionEntity.objects.filter(entity__corpus_id=corpus_id), TranscriptionEntity.objects.filter(entity__corpus_id=corpus_id),
TranscriptionEntity.objects.filter(transcription__element__corpus_id=corpus_id), TranscriptionEntity.objects.filter(transcription__element__corpus_id=corpus_id),
corpus.entities.all(), corpus.entities.all(),
......