Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (15)
Showing
with 952 additions and 1339 deletions
......@@ -12,8 +12,10 @@ repos:
- 'flake8-debugger==3.1.0'
- 'flake8-quotes==3.3.2'
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.11
# Ruff version.
rev: v0.3.7
hooks:
# Run the linter.
- id: ruff
args: [--fix]
- repo: https://github.com/pre-commit/pre-commit-hooks
......
......@@ -54,7 +54,7 @@ release:
clean-docker:
$(eval containers:=$(shell docker ps -a -q))
@if [ -n "$(containers)" ]; then \
echo "Cleaning up past containers\n" \
echo "Cleaning up past containers\n" ; \
docker rm -f $(containers) ; \
fi
......
1.6.0-rc2
1.6.0
......@@ -11,8 +11,6 @@ from arkindex.documents.models import (
Element,
ElementType,
Entity,
EntityLink,
EntityRole,
EntityType,
MetaData,
MLClass,
......@@ -135,29 +133,15 @@ class EntityMetaForm(forms.ModelForm):
metas = HStoreFormField()
class EntityLinkInLine(admin.TabularInline):
model = EntityLink
fk_name = "parent"
raw_id_fields = ("child", )
class EntityAdmin(admin.ModelAdmin):
list_display = ("id", "name", "type")
list_filter = ["corpus", "type"]
readonly_fields = ("id", )
raw_id_fields = ("worker_version", "worker_run", )
search_fields = ("name", )
inlines = (EntityLinkInLine, )
form = EntityMetaForm
class EntityRoleAdmin(admin.ModelAdmin):
list_display = ("id", "corpus", "parent_name", "child_name")
list_filter = ("corpus", )
readonly_fields = ("id", )
ordering = ("corpus", "parent_name", "child_name")
class EntityTypeAdmin(admin.ModelAdmin):
list_display = ("id", "corpus", "name", "color")
list_filter = ("corpus", )
......@@ -180,7 +164,6 @@ admin.site.register(Transcription, TranscriptionAdmin)
admin.site.register(MLClass, MLClassAdmin)
admin.site.register(MetaData, MetaDataAdmin)
admin.site.register(Entity, EntityAdmin)
admin.site.register(EntityRole, EntityRoleAdmin)
admin.site.register(EntityType, EntityTypeAdmin)
admin.site.register(AllowedMetaData, AllowedMetaDataAdmin)
admin.site.register(CorpusExport, CorpusExportAdmin)
......@@ -3,32 +3,18 @@ from textwrap import dedent
from uuid import UUID
from django.core.exceptions import ValidationError as DjangoValidationError
from django.db.models import Q
from django.shortcuts import get_object_or_404
from drf_spectacular.utils import OpenApiExample, OpenApiParameter, OpenApiResponse, extend_schema, extend_schema_view
from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema, extend_schema_view
from rest_framework import permissions, serializers, status
from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError
from rest_framework.generics import CreateAPIView, ListAPIView, ListCreateAPIView, RetrieveUpdateDestroyAPIView
from rest_framework.generics import CreateAPIView, ListAPIView, RetrieveUpdateDestroyAPIView
from rest_framework.response import Response
from arkindex.documents.models import (
Corpus,
Element,
Entity,
EntityLink,
EntityRole,
EntityType,
Transcription,
TranscriptionEntity,
)
from arkindex.documents.models import Corpus, Element, Entity, EntityType, Transcription, TranscriptionEntity
from arkindex.documents.serializers.elements import ElementTinySerializer
from arkindex.documents.serializers.entities import (
BaseEntitySerializer,
CreateEntityRoleErrorResponseSerializer,
EntityCreateSerializer,
EntityLinkCreateSerializer,
EntityLinkSerializer,
EntityRoleSerializer,
EntitySerializer,
EntityTypeCreateSerializer,
EntityTypeSerializer,
......@@ -44,53 +30,6 @@ from arkindex.project.permissions import IsVerified, IsVerifiedOrReadOnly
from arkindex.users.models import Role
@extend_schema(tags=["entities"])
@extend_schema_view(
get=extend_schema(operation_id="ListCorpusRoles", description="List all roles of a corpus"),
post=extend_schema(
description="Create a new entity role",
responses={
200: EntityRoleSerializer,
400: CreateEntityRoleErrorResponseSerializer
},
examples=[OpenApiExample(
status_codes=["400"],
response_only=True,
name="role-exists",
value={"id": "55cd009d-cd4b-4ec2-a475-b060f98f9138", "corpus": ["Role already exists in this corpus"]},
description="Role already exists."
)]
)
)
class CorpusRoles(CorpusACLMixin, ListCreateAPIView):
"""
List all roles in a corpus
"""
permission_classes = (IsVerifiedOrReadOnly, )
serializer_class = EntityRoleSerializer
queryset = EntityRole.objects.none()
def get_queryset(self):
return EntityRole.objects \
.filter(corpus=self.get_corpus(self.kwargs["pk"])) \
.order_by("parent_name", "child_name")
def perform_create(self, serializer):
data = self.request.data
if EntityRole.objects.filter(
parent_name=data["parent_name"],
child_name=data["child_name"],
parent_type=data["parent_type_id"],
child_type=data["child_type_id"],
corpus_id=self.request.parser_context["kwargs"]["pk"]
).exists():
raise serializers.ValidationError({
"corpus": ["Role already exists in this corpus"],
"id": self.request.parser_context["kwargs"]["pk"]
})
super().perform_create(serializer)
@extend_schema(tags=["entities"])
@extend_schema_view(
get=extend_schema(operation_id="ListCorpusEntityTypes", description="List all entity types in a corpus"),
......@@ -173,8 +112,6 @@ class EntityTypeUpdate(ACLMixin, RetrieveUpdateDestroyAPIView):
def perform_destroy(self, instance):
if instance.entities.exists():
raise ValidationError({"detail": ["Some entities are using this entity type."]})
if EntityRole.objects.filter(Q(parent_type_id=instance.id) | Q(child_type_id=instance.id)).exists():
raise ValidationError({"detail": ["Some entity roles are using this entity type."]})
super().perform_destroy(instance)
......@@ -196,14 +133,6 @@ class EntityDetails(ACLMixin, RetrieveUpdateDestroyAPIView):
.select_related("corpus", "type") \
.filter(corpus__in=Corpus.objects.readable(self.request.user)) \
.prefetch_related(
"parents__role__parent_type",
"parents__role__child_type",
"children__role__parent_type",
"children__role__child_type",
"parents__child__type",
"parents__parent__type",
"children__parent__type",
"children__child__type",
"corpus",
)
......@@ -307,15 +236,6 @@ class EntityCreate(CreateAPIView):
return Response(entity.data, status=status_code, headers=headers)
@extend_schema_view(post=extend_schema(operation_id="CreateEntityLink", tags=["entities"]))
class EntityLinkCreate(CreateAPIView):
"""
Create a new link between two entities with a role
"""
permission_classes = (IsVerified, )
serializer_class = EntityLinkCreateSerializer
@extend_schema_view(post=extend_schema(
operation_id="CreateTranscriptionEntity",
tags=["entities"],
......@@ -519,41 +439,6 @@ class CorpusEntities(CorpusACLMixin, ListAPIView):
return queryset
@extend_schema_view(get=extend_schema(operation_id="ListElementLinks", tags=["entities"]))
class ElementLinks(CorpusACLMixin, ListAPIView):
"""
List all links where parent and child are linked to the element.\n\n
Requires a **guest** access to the element corpus
"""
serializer_class = EntityLinkSerializer
def get_queryset(self):
try:
element = Element.objects.select_related("corpus").only("id", "corpus").get(id=self.kwargs["pk"])
except Element.DoesNotExist:
raise NotFound
if not self.has_read_access(element.corpus):
raise PermissionDenied(detail="You do not have access to this element.")
# Load entities linked by transcriptions
entities_tr = Entity.objects.filter(transcriptions__element_id=element.id).prefetch_related("transcriptions")
# Load entities linked by metadatas
entities_meta = Entity.objects.filter(metadatas__element_id=element.id).prefetch_related("metadatas")
# Now load all links belonging to those entities
# It's several times faster to combine the queries in the final one
# than combining them at the lower level (entities is slower than entities_tr + entities_meta)
# We need to support cross references between transcriptions & metadata entities
return EntityLink.objects.filter(
Q(parent__in=entities_tr, child__in=entities_tr)
| Q(parent__in=entities_tr, child__in=entities_meta)
| Q(parent__in=entities_meta, child__in=entities_tr)
| Q(parent__in=entities_meta, child__in=entities_meta)
).select_related("role", "child__type", "parent__type").order_by("parent__name")
@extend_schema_view(
post=extend_schema(
operation_id="CreateTranscriptionEntities",
......
......@@ -38,8 +38,6 @@ EXPORT_QUERIES = [
"entity_type",
"entity",
"transcription_entity",
"entity_role",
"entity_link",
"metadata",
"dataset",
"dataset_element",
......
SELECT link.id, link.parent_id, link.child_id, link.role_id
FROM documents_entitylink link
INNER JOIN documents_entityrole role ON (link.role_id = role.id)
WHERE role.corpus_id = '{corpus_id}'::uuid
SELECT id, parent_name, child_name, parent_type_id, child_type_id
FROM documents_entityrole
WHERE corpus_id = '{corpus_id}'::uuid
......@@ -26,13 +26,6 @@ CREATE INDEX transcription_entity_entity_id ON transcription_entity (entity_id);
CREATE INDEX transcription_entity_worker_version_id ON transcription_entity (worker_version_id);
CREATE INDEX transcription_entity_worker_run_id ON transcription_entity (worker_run_id);
CREATE INDEX entity_link_parent_id ON entity_link (parent_id);
CREATE INDEX entity_link_child_id ON entity_link (child_id);
CREATE INDEX entity_link_role_id ON entity_link (role_id);
CREATE INDEX entity_role_parent_type_id ON entity_role (parent_type_id);
CREATE INDEX entity_role_child_type_id ON entity_role (child_type_id);
CREATE INDEX metadata_element_id ON metadata (element_id);
CREATE INDEX metadata_entity_id ON metadata (entity_id);
CREATE INDEX metadata_worker_version_id ON metadata (worker_version_id);
......
PRAGMA foreign_keys = ON;
CREATE TABLE export_version AS SELECT 8 AS version;
CREATE TABLE export_version AS SELECT 9 AS version;
CREATE TABLE image_server (
id INTEGER NOT NULL,
......@@ -168,29 +168,6 @@ CREATE TABLE transcription_entity (
CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL)
);
CREATE TABLE entity_role (
id VARCHAR(37) NOT NULL,
parent_name VARCHAR(250) NOT NULL,
child_name VARCHAR(250) NOT NULL,
parent_type_id VARCHAR(37) NOT NULL,
child_type_id VARCHAR(37) NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (parent_type_id) REFERENCES entity_type (id) ON DELETE CASCADE,
FOREIGN KEY (child_type_id) REFERENCES entity_type (id) ON DELETE CASCADE,
UNIQUE (parent_name, child_name, parent_type_id, child_type_id)
);
CREATE TABLE entity_link (
id VARCHAR(37) NOT NULL,
parent_id VARCHAR(37) NOT NULL,
child_id VARCHAR(37) NOT NULL,
role_id VARCHAR(37) NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (parent_id) REFERENCES entity (id),
FOREIGN KEY (child_id) REFERENCES entity (id),
FOREIGN KEY (role_id) REFERENCES entity_role (id)
);
CREATE TABLE metadata (
id VARCHAR(37) NOT NULL,
element_id VARCHAR(37) NOT NULL,
......
This diff is collapsed.
......@@ -6,12 +6,10 @@ from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import Q
from django.db.utils import IntegrityError
from rest_framework.authtoken.models import Token
from arkindex.images.models import ImageServer
from arkindex.ponos.models import Farm
from arkindex.process.models import FeatureUsage, Repository, Worker, WorkerType, WorkerVersion, WorkerVersionState
from arkindex.users.models import User
# Constants used in architecture project
UPLOADS_IMAGE_SERVER_ID = 12345
......@@ -30,7 +28,6 @@ IMPORT_WORKER_SLUG = "file_import"
IMPORT_WORKER_REPO = "https://gitlab.teklia.com/arkindex/tasks"
IMPORT_WORKER_REVISION_MESSAGE = "File import worker bootstrap"
IMPORT_WORKER_REVISION_AUTHOR = "Dev Bootstrap"
ADMIN_API_TOKEN = "deadbeefTestToken"
class Command(BaseCommand):
......@@ -48,15 +45,6 @@ class Command(BaseCommand):
"""Helper to display error messages"""
self.stdout.write(self.style.ERROR(f"{msg}"))
def check_user(self, user):
"""Ensure a user is admin"""
if user.is_admin:
self.success(f"Admin user for legacy worker API tokens {user} is valid")
else:
user.is_admin = True
user.save()
self.warn(f"Updated user {user} to admin")
def create_image_server(self, id, url, bucket, region, display_name):
try:
server = ImageServer.objects.get(Q(id=id) | Q(url=url))
......@@ -129,29 +117,6 @@ class Command(BaseCommand):
)
self.success("Ponos farm created")
# An admin API user with a specific token
try:
token = Token.objects.get(key=ADMIN_API_TOKEN)
self.check_user(token.user)
except Token.DoesNotExist:
# Create a new internal user
user, _ = User.objects.get_or_create(
email="internal+bootstrap@teklia.com",
defaults={
"display_name": "Bootstrap Admin user",
"is_admin": True,
}
)
self.success("Created internal user")
self.check_user(user)
# Finally create a specific token for that user
if hasattr(user, "auth_token"):
# Support One-To-One relation
user.auth_token.delete()
Token.objects.create(key=ADMIN_API_TOKEN, user=user)
self.success(f"Created token {ADMIN_API_TOKEN}")
# an image server for local cantaloupe https://ark.localhost/iiif/2
uploads_server = self.create_image_server(UPLOADS_IMAGE_SERVER_ID , UPLOADS_IMAGE_SERVER_URL, UPLOADS_IMAGE_SERVER_BUCKET , UPLOADS_IMAGE_SERVER_REGION , "Local IIIF server for user uploaded files through frontend")
if uploads_server is None:
......
......@@ -49,7 +49,7 @@ class Command(BaseCommand):
img5 = Image.objects.create(path="img5", width=1000, height=1000, server=imgsrv)
img6 = Image.objects.create(path="img6", width=1000, height=1000, server=imgsrv)
# Create an admin, an internal and a normal user
# Create an admin and a normal user
superuser = User.objects.create_superuser("root@root.fr", "Pa$$w0rd", display_name="Admin")
superuser.verified_email = True
superuser.save()
......
......@@ -14,10 +14,21 @@ from rq.utils import as_text
from arkindex.documents.models import CorpusExport, CorpusExportState, Element
from arkindex.images.models import Image, ImageServer
from arkindex.ponos.models import Artifact, Task
from arkindex.process.models import DataFile, GitRef, GitRefType, Process, WorkerVersion, WorkerVersionState
from arkindex.process.models import (
CorpusWorkerVersion,
DataFile,
GitRef,
GitRefType,
Process,
Worker,
WorkerActivity,
WorkerRun,
WorkerVersion,
WorkerVersionState,
)
from arkindex.project.aws import s3
from arkindex.project.rq_overrides import Job
from arkindex.training.models import ModelVersion
from arkindex.training.models import Model, ModelVersion
from redis.exceptions import ConnectionError
# Ponos artifacts use the path: <task id>/<path>
......@@ -32,6 +43,9 @@ class Command(BaseCommand):
help = "Clean up old corpus exports, trashed DataFiles, expired processes and S3 buckets"
def handle(self, *args, **options):
# Cleaning up workers could free some artifacts, so clean them before artifacts
self.cleanup_archived_workers()
self.cleanup_artifacts()
self.cleanup_expired_processes()
......@@ -48,6 +62,8 @@ class Command(BaseCommand):
self.cleanup_ponos_logs()
self.cleanup_archived_models()
self.cleanup_unlinked_model_versions()
self.cleanup_rq_user_registries()
......@@ -294,6 +310,71 @@ class Command(BaseCommand):
self.stdout.write(self.style.SUCCESS("Successfully cleaned up orphaned Ponos logs."))
def cleanup_archived_workers(self):
"""
Remove Worker instances that have been archived for longer than the configured worker cleanup delay
and that are not being used in any worker result.
"""
self.stdout.write("Removing archived workers…")
workers = Worker.objects.filter(archived__lte=timezone.now() - timedelta(days=settings.WORKER_CLEANUP_DELAY))
skipped, deleted = 0, 0
for worker in workers.iterator():
# There are both foreign keys for worker versions and worker runs on worker results.
# Some old results might only have a worker version ID, but when a worker run ID is set,
# the worker version ID is deduced from it, so we only have to check on the version.
if worker.versions.all().in_use():
skipped += 1
continue
# Skip any workers whose WorkerConfigurations are in use.
# This should never happen since we already filter on the WorkerVersions,
# but that could lead to deleting worker results when we didn't want to.
if WorkerRun.objects.filter(configuration__worker=worker).in_use():
self.stdout.write(self.style.WARNING(
f"Worker {worker.name} ({worker.id}) does not have any worker versions used by worker results, "
"but some of its worker configurations are in use."
))
continue
self.stdout.write(f"Removing worker {worker.name} ({worker.id})")
worker.delete()
deleted += 1
if skipped:
self.stdout.write(f"Skipping {skipped} archived workers that have worker versions or configurations used in worker results.")
self.stdout.write(self.style.SUCCESS(f"Successfully cleaned up {deleted} archived workers."))
def cleanup_archived_models(self):
"""
Remove Model instances that have been archived for longer than the configured model cleanup delay
and that are not being used in any worker result.
"""
self.stdout.write("Removing archived models…")
models = Model.objects.filter(archived__lte=timezone.now() - timedelta(days=settings.MODEL_CLEANUP_DELAY))
skipped, deleted = 0, 0
for model in models.iterator():
if WorkerRun.objects.filter(model_version__model=model).in_use():
skipped += 1
continue
self.stdout.write(f"Removing model {model.name} ({model.id})")
# Remove CorpusWorkerVersions and WorkerActivities first
# Those normally use SET_NULL, but this can cause the unique constraints to complain
# if there already are rows with a model version set to None.
WorkerActivity.objects.filter(model_version__model=model).delete()
CorpusWorkerVersion.objects.filter(model_version__model=model).delete()
model.delete()
deleted += 1
if skipped:
self.stdout.write(f"Skipping {skipped} archived models that have model versions used in worker results.")
self.stdout.write(self.style.SUCCESS(f"Successfully cleaned up {deleted} archived models."))
def cleanup_unlinked_model_versions(self):
self.stdout.write("Removing orphaned model versions archives…")
bucket = s3.Bucket(settings.AWS_TRAINING_BUCKET)
......
......@@ -18,8 +18,6 @@ from arkindex.documents.models import (
ElementPath,
ElementType,
Entity,
EntityLink,
EntityRole,
EntityType,
MetaData,
MLClass,
......@@ -40,7 +38,7 @@ from arkindex.process.models import (
from arkindex.training.models import Dataset, DatasetElement, DatasetSet, Model
from arkindex.users.models import Role, User
EXPORT_VERSION = 8
EXPORT_VERSION = 9
TABLE_NAMES = {
"export_version",
......@@ -52,8 +50,6 @@ TABLE_NAMES = {
"element_path",
"entity",
"entity_type",
"entity_role",
"entity_link",
"transcription",
"transcription_entity",
"metadata",
......@@ -132,8 +128,6 @@ SQL_TOP_LEVEL_PATH_QUERY = """
SQL_ENTITY_QUERY = "SELECT * FROM entity"
SQL_ENTITY_TYPE_QUERY = "SELECT * FROM entity_type"
SQL_ENTITY_ROLE_QUERY = "SELECT * FROM entity_role"
SQL_ENTITY_LINK_QUERY = "SELECT * FROM entity_link"
SQL_TRANSCRIPTION_QUERY = "SELECT * FROM transcription"
SQL_TRANSCRIPTION_ENTITY_QUERY = "SELECT * FROM transcription_entity"
......@@ -249,24 +243,6 @@ class Command(BaseCommand):
corpus=self.corpus
)]
def convert_entity_roles(self, row):
return [EntityRole(
id=row["id"],
parent_name=row["parent_name"],
child_name=row["child_name"],
parent_type_id=row["parent_type_id"],
child_type_id=row["child_type_id"],
corpus=self.corpus
)]
def convert_entity_links(self, row):
return [EntityLink(
id=row["id"],
parent_id=row["parent_id"],
child_id=row["child_id"],
role_id=row["role_id"],
)]
def convert_transcriptions(self, row):
return [Transcription(
id=row["id"],
......@@ -597,11 +573,9 @@ class Command(BaseCommand):
self.bulk_create_objects(ElementPath, self.convert_element_paths, SQL_ELEMENT_PATH_QUERY, ignore_conflicts=False)
self.bulk_create_objects(ElementPath, self.convert_top_level_paths, SQL_TOP_LEVEL_PATH_QUERY, ignore_conflicts=False)
# Create entities, entity types, roles and links
# Create entities and entity types
self.bulk_create_objects(EntityType, self.convert_entity_types, SQL_ENTITY_TYPE_QUERY)
self.bulk_create_objects(Entity, self.convert_entities, SQL_ENTITY_QUERY)
self.bulk_create_objects(EntityRole, self.convert_entity_roles, SQL_ENTITY_ROLE_QUERY)
self.bulk_create_objects(EntityLink, self.convert_entity_links, SQL_ENTITY_LINK_QUERY)
# Create transcriptions and transcription entities
self.bulk_create_objects(Transcription, self.convert_transcriptions, SQL_TRANSCRIPTION_QUERY)
......
......@@ -135,24 +135,6 @@ class Command(BaseCommand):
""")
self.stdout.write(f"Updated {cursor.rowcount} TranscriptionEntities.")
self.stdout.write("Updating child entity IDs on entity links…")
cursor.execute("""
UPDATE documents_entitylink
SET child_id = keep_id
FROM duplicated_entities
WHERE child_id = remove_id;
""")
self.stdout.write(f"Updated {cursor.rowcount} entity links.")
self.stdout.write("Updating parent entity IDs on entity links…")
cursor.execute("""
UPDATE documents_entitylink
SET parent_id = keep_id
FROM duplicated_entities
WHERE parent_id = remove_id;
""")
self.stdout.write(f"Updated {cursor.rowcount} entity links.")
self.stdout.write("Removing duplicate entities…")
cursor.execute("""
DELETE FROM documents_entity
......
# Generated by Django 4.1.7 on 2024-04-15 12:36
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("documents", "0009_corpusexport_source"),
]
operations = [
migrations.AlterUniqueTogether(
name="entityrole",
unique_together=None,
),
migrations.RemoveField(
model_name="entityrole",
name="child_type",
),
migrations.RemoveField(
model_name="entityrole",
name="corpus",
),
migrations.RemoveField(
model_name="entityrole",
name="parent_type",
),
migrations.DeleteModel(
name="EntityLink",
),
migrations.DeleteModel(
name="EntityRole",
),
]
......@@ -769,63 +769,6 @@ class Entity(models.Model):
return self.name
class EntityRole(models.Model):
"""
Role's type between a parent and a child
"""
parent_name = models.CharField(max_length=250)
child_name = models.CharField(max_length=250)
parent_type = models.ForeignKey(EntityType, related_name="parent_role", on_delete=models.DO_NOTHING)
child_type = models.ForeignKey(EntityType, related_name="child_role", on_delete=models.DO_NOTHING)
corpus = models.ForeignKey(Corpus, related_name="roles", on_delete=models.CASCADE)
class Meta:
unique_together = (
("parent_name", "child_name", "parent_type", "child_type", "corpus"),
)
def __str__(self):
return "{} -> {}".format(self.parent_name, self.child_name)
class EntityLink(models.Model):
"""
Link between two entities with a role
"""
id = models.UUIDField(default=uuid.uuid4, primary_key=True, editable=False)
parent = models.ForeignKey(Entity, related_name="parents", on_delete=models.CASCADE)
child = models.ForeignKey(Entity, related_name="children", on_delete=models.CASCADE)
role = models.ForeignKey(EntityRole, related_name="links", on_delete=models.CASCADE)
def clean(self):
if self.role is None:
return
if self.parent is None:
return
if self.parent.type_id != self.role.parent_type_id:
raise ValidationError("Parent's type {} is different from the expected type {}".format(
self.parent.type_id,
self.role.parent_type_id))
if self.parent.corpus_id != self.role.corpus_id:
raise ValidationError("Parent's corpus {} is different from the expected corpus {}".format(
self.parent.corpus_id,
self.role.corpus_id))
if self.child is None:
return
if self.child.type_id != self.role.child_type_id:
raise ValidationError("Child's type {} is different from the expected type {}".format(
self.child.type_id,
self.role.child_type_id))
if self.child.corpus_id != self.role.corpus_id:
raise ValidationError("Child's corpus {} is different from the expected corpus {}".format(
self.child.corpus_id,
self.role.corpus_id))
def save(self, *args, **kwargs):
self.full_clean()
super().save(*args, **kwargs)
class TextOrientation(Enum):
HorizontalLeftToRight = "horizontal-lr"
HorizontalRightToLeft = "horizontal-rl"
......
......@@ -6,7 +6,7 @@ from drf_spectacular.utils import extend_schema_serializer
from rest_framework import serializers
from rest_framework.exceptions import ValidationError
from arkindex.documents.models import Corpus, Entity, EntityLink, EntityRole, EntityType, TranscriptionEntity
from arkindex.documents.models import Corpus, Entity, EntityType, TranscriptionEntity
from arkindex.documents.serializers.light import CorpusLightSerializer, EntityTypeLightSerializer
from arkindex.documents.serializers.ml import WorkerRunSummarySerializer
from arkindex.project.serializer_fields import ForbiddenField, WorkerRunIDField
......@@ -31,7 +31,7 @@ class EntityTypeSerializer(serializers.ModelSerializer):
# Get an error if a request tries to change an entity type's corpus
corpus = data.get("corpus")
if self.instance and corpus:
raise ValidationError({"corpus": ["It is not possible to update an Entity Type\'s corpus."]})
raise ValidationError({"corpus": ["It is not possible to update an Entity Type's corpus."]})
data = super().to_internal_value(data)
return data
......@@ -86,79 +86,6 @@ class BaseEntitySerializer(serializers.ModelSerializer):
)
class EntityRoleSerializer(serializers.ModelSerializer):
"""
Serialize a role between two types of entity
"""
parent_type_id = serializers.PrimaryKeyRelatedField(
queryset=EntityType.objects.all(),
style={"base_template": "input.html"},
source="parent_type",
)
child_type_id = serializers.PrimaryKeyRelatedField(
queryset=EntityType.objects.all(),
style={"base_template": "input.html"},
source="child_type",
)
class Meta:
model = EntityRole
fields = (
"id",
"parent_name",
"child_name",
"parent_type_id",
"child_type_id"
)
def validate(self, data):
errors = defaultdict(list)
assert "corpus" not in data
assert self.context.get("request") is not None
corpus_id = self.context["request"].parser_context["kwargs"]["pk"]
corpus = Corpus.objects.writable(self.context["request"].user).filter(id=corpus_id).first()
if corpus is None:
raise serializers.ValidationError({
"corpus": ["You do not have write access to this corpus"],
"id": corpus_id,
})
parent_type = data.get("parent_type")
child_type = data.get("child_type")
if parent_type.corpus_id != corpus.id:
errors["parent_type_id"].append(f"Type {parent_type.id} does not exist in corpus {corpus}.")
if child_type.corpus_id != corpus.id:
errors["child_type_id"].append(f"Type {child_type.id} does not exist in corpus {corpus}.")
if errors:
raise ValidationError(errors)
data["corpus"] = corpus
return data
class CreateEntityRoleErrorResponseSerializer(serializers.Serializer):
id = serializers.UUIDField(required=False, help_text="The corpus ID.")
corpus = serializers.ListField(child=serializers.CharField(), required=False, help_text="Errors that occurred during corpus ID field validation.")
class EntityLinkSerializer(serializers.ModelSerializer):
"""
Serialize an entity link with its child, parent and role
"""
parent = BaseEntitySerializer()
child = BaseEntitySerializer()
role = EntityRoleSerializer()
class Meta:
model = EntityLink
fields = (
"id",
"parent",
"child",
"role"
)
@extend_schema_serializer(
deprecate_fields=("worker_version_id")
)
......@@ -167,8 +94,6 @@ class EntitySerializer(BaseEntitySerializer):
Serialize an entity with its metadata
"""
corpus = CorpusLightSerializer(read_only=True)
children = EntityLinkSerializer(many=True, read_only=True)
parents = EntityLinkSerializer(many=True, read_only=True)
# When updating an entity, the type can be set either by using its EntityType UUID, or its name
# (in which case the serializer checks that an EntityType with this name exists in the corpus)
type_id = serializers.PrimaryKeyRelatedField(
......@@ -183,14 +108,10 @@ class EntitySerializer(BaseEntitySerializer):
model = Entity
fields = BaseEntitySerializer.Meta.fields + (
"corpus",
"children",
"parents",
"type_id"
)
read_only_fields = BaseEntitySerializer.Meta.read_only_fields = (
"corpus",
"children",
"parents",
)
def validate(self, data):
......@@ -221,8 +142,6 @@ class EntityCreateSerializer(BaseEntitySerializer):
style={"base_template": "input.html"},
)
metas = serializers.HStoreField(child=serializers.CharField(), required=False)
children = EntityLinkSerializer(many=True, read_only=True)
parents = EntityLinkSerializer(many=True, read_only=True)
worker_version = serializers.UUIDField(
allow_null=True,
required=False,
......@@ -262,15 +181,11 @@ class EntityCreateSerializer(BaseEntitySerializer):
"metas",
"validated",
"corpus",
"parents",
"children",
"worker_version",
"worker_run_id"
)
read_only_fields = (
"id",
"children",
"parents",
)
def __init__(self, *args, **kwargs):
......@@ -300,46 +215,6 @@ class EntityCreateSerializer(BaseEntitySerializer):
return data
class EntityLinkCreateSerializer(EntityLinkSerializer):
"""
Serialize an entity with a possible parents and children
"""
parent = serializers.PrimaryKeyRelatedField(
queryset=Entity.objects.none(),
style={"base_template": "input.html"},
)
child = serializers.PrimaryKeyRelatedField(
queryset=Entity.objects.none(),
style={"base_template": "input.html"},
)
role = serializers.PrimaryKeyRelatedField(
queryset=EntityRole.objects.none(),
style={"base_template": "input.html"},
)
class Meta:
model = EntityLink
fields = EntityLinkSerializer.Meta.fields
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if not self.context.get("request"):
# Do not raise Error in order to create OpenAPI schema
return
corpora = Corpus.objects.writable(self.context["request"].user)
entities = Entity.objects.all().filter(corpus__in=corpora)
roles = EntityRole.objects.all().filter(corpus__in=corpora)
self.fields["parent"].queryset = entities
self.fields["child"].queryset = entities
self.fields["role"].queryset = roles
def validate(self, data):
data = super().validate(data)
link = EntityLink(**data)
link.full_clean()
return data
class TranscriptionEntityCreateSerializer(serializers.ModelSerializer):
"""
Serialise the link between an entity and a transcription
......
......@@ -16,7 +16,6 @@ from arkindex.documents.models import (
Corpus,
Element,
ElementPath,
EntityLink,
MetaData,
Selection,
Transcription,
......@@ -58,8 +57,6 @@ def corpus_delete(corpus_id: str) -> None:
WorkerActivity.objects.filter(process__corpus_id=corpus_id),
corpus.files.all(),
MetaData.objects.filter(element__corpus_id=corpus_id),
EntityLink.objects.filter(role__corpus_id=corpus_id),
corpus.roles.all(),
TranscriptionEntity.objects.filter(entity__corpus_id=corpus_id),
TranscriptionEntity.objects.filter(transcription__element__corpus_id=corpus_id),
corpus.entities.all(),
......