Skip to content
Snippets Groups Projects
Commit 971d2ccf authored by Yoann Schneider's avatar Yoann Schneider :tennis: Committed by Erwan Rouchet
Browse files

Support worker_run id on entities

parent c38a65ad
No related branches found
No related tags found
1 merge request!1699Support worker_run id on entities
......@@ -143,7 +143,7 @@ class EntityAdmin(admin.ModelAdmin):
list_display = ('id', 'name', 'type')
list_filter = [('type', EnumFieldListFilter), 'corpus']
readonly_fields = ('id', )
raw_id_fields = ('worker_version', )
raw_id_fields = ('worker_version', 'worker_run', )
search_fields = ('name', )
inlines = (EntityLinkInLine, )
form = EntityMetaForm
......
......@@ -167,12 +167,14 @@ class EntityCreate(CreateAPIView):
corpus = serializer.validated_data['corpus']
metas = serializer.validated_data['metas'] if 'metas' in serializer.data else None
worker_version = serializer.validated_data['worker_version']
worker_run = serializer.validated_data['worker_run']
return Entity.objects.create(
name=name,
type=type,
corpus=corpus,
metas=metas,
worker_version=worker_version
worker_version=worker_version,
worker_run=worker_run
)
def create(self, request, *args, **kwargs):
......
# Generated by Django 4.0.2 on 2022-05-17 12:41
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0048_workerrun_model_version'),
('documents', '0054_transcription_worker_run'),
]
operations = [
migrations.AddField(
model_name='entity',
name='worker_run',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='entities', to='dataimport.workerrun'),
),
migrations.AddConstraint(
model_name='entity',
constraint=models.CheckConstraint(check=models.Q(('worker_run_id__isnull', True), ('worker_version_id__isnull', False), _connector='OR'), name='entity_worker_run_requires_worker_version'),
),
]
......@@ -474,9 +474,33 @@ class Entity(InterpretedDateMixin, models.Model):
null=True,
blank=True,
)
worker_run = models.ForeignKey(
'dataimport.WorkerRun',
# WorkerRuns that are on processes that have not yet run can be deleted easily by the user, to allow them
# to customize the worker runs before starting the process.
# Since WorkerRuns are related to each worker result, deleting a worker run with models.CASCADE
# could cause a dozen useless SQL queries to look for related classifications and other worker results that
# could be related to it, and could fill up the RAM.
# Using models.RESTRICT or PROTECT would also cause Django to check for related classifications to raise a
# RestrictedError or ProtectedError, which both would cause a HTTP 500. Using models.DO_NOTHING lets PostgreSQL
# do the checks itself, which it always does anyway, and crash with an IntegrityError, which also causes a 500.
on_delete=models.DO_NOTHING,
related_name='entities',
null=True,
blank=True,
)
class Meta:
verbose_name_plural = 'Entities'
constraints = [
# There can be a worker run ID only if there is a worker version ID,
# but there can be a worker version ID without a worker run ID (backwards compatibility).
# In other words, either the worker run ID is null, or the worker version ID is not null.
models.CheckConstraint(
check=Q(worker_run_id__isnull=True) | Q(worker_version_id__isnull=False),
name='entity_worker_run_requires_worker_version',
)
]
def __str__(self):
return self.name
......
from rest_framework import serializers
from arkindex.dataimport.models import WorkerVersion
from arkindex.dataimport.models import WorkerRun, WorkerVersion
from arkindex.documents.models import Corpus, Entity, EntityLink, EntityRole, EntityType, TranscriptionEntity
from arkindex.documents.serializers.light import CorpusLightSerializer, InterpretedDateSerializer
from arkindex.project.serializer_fields import EnumField
from arkindex.project.tools import WorkerRunOrVersionValidator
class BaseEntitySerializer(serializers.ModelSerializer):
......@@ -130,6 +131,12 @@ class EntityCreateSerializer(BaseEntitySerializer):
default=None,
style={'base_template': 'input.html'},
)
worker_run_id = serializers.PrimaryKeyRelatedField(
queryset=WorkerRun.objects.all(),
default=None,
source='worker_run',
style={'base_template': 'input.html'},
)
class Meta:
model = Entity
......@@ -142,13 +149,15 @@ class EntityCreateSerializer(BaseEntitySerializer):
'corpus',
'parents',
'children',
'worker_version'
'worker_version',
'worker_run_id'
)
read_only_fields = (
'id',
'children',
'parents',
)
validators = [WorkerRunOrVersionValidator(worker_version_field='worker_version')]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
......@@ -158,6 +167,12 @@ class EntityCreateSerializer(BaseEntitySerializer):
corpora = Corpus.objects.writable(self.context['request'].user)
self.fields['corpus'].queryset = corpora
def validate(self, data):
worker_run = data.get('worker_run')
if worker_run:
data['worker_version'] = WorkerVersion(id=worker_run.version_id)
return data
class EntityLinkCreateSerializer(EntityLinkSerializer):
"""
......
......@@ -26,6 +26,7 @@ class TestEntitiesAPI(FixtureAPITestCase):
cls.private_corpus = Corpus.objects.create(name='private')
cls.worker_version_1 = WorkerVersion.objects.get(worker__slug='reco')
cls.worker_version_2 = WorkerVersion.objects.get(worker__slug='dla')
cls.worker_run = cls.worker_version_1.worker_runs.get()
cls.page = cls.corpus.elements.get(name='Volume 1, page 1r')
cls.element_type = cls.corpus.types.get(slug='text_line')
......@@ -92,8 +93,27 @@ class TestEntitiesAPI(FixtureAPITestCase):
'worker_version_id': str(self.worker_version_1.id)
}
def make_create_entity_request(self, name='entity', ent_type=EntityType.Person.value, **options):
request = {
'path': reverse('api:entity-create'),
'data': {
'name': name,
'type': ent_type,
'corpus': str(self.corpus.id),
'metas': {
'key': 'value',
'other key': 'other value'
},
},
'format': 'json',
}
# Optional parameters
request['data'].update(options)
return request
def test_get_entity(self):
response = self.client.get(reverse('api:entity-details', kwargs={'pk': str(self.entity.id)}))
with self.assertNumQueries(3):
response = self.client.get(reverse('api:entity-details', kwargs={'pk': str(self.entity.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertEqual(data['id'], str(self.entity.id))
......@@ -115,7 +135,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
self.client.force_login(self.user)
self.element.corpus = self.private_corpus
self.element.save()
response = self.client.get(reverse('api:entity-elements', kwargs={'pk': str(self.entity.id)}))
with self.assertNumQueries(5):
response = self.client.get(reverse('api:entity-elements', kwargs={'pk': str(self.entity.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertListEqual(response.json().get('results'), [])
......@@ -176,7 +197,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
)
def test_get_role_in_corpus(self):
response = self.client.get(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}))
with self.assertNumQueries(3):
response = self.client.get(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
results = data['results']
......@@ -194,7 +216,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
'child_type': EntityType.Location.value
}
self.client.force_login(self.user)
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}), data=data)
with self.assertNumQueries(7):
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}), data=data)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
data = response.json()
self.assertIn('id', data)
......@@ -212,7 +235,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
'child_type': self.role.child_type.value,
}
self.client.force_login(self.user)
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}), data=data)
with self.assertNumQueries(6):
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}), data=data)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
data = response.json()
self.assertEqual(data, {
......@@ -227,7 +251,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
'parent_type': EntityType.Organization.value,
'child_type': EntityType.Location.value
}
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}), data=data)
with self.assertNumQueries(0):
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}), data=data)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_create_role_requires_verified(self):
......@@ -241,7 +266,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
'parent_type': EntityType.Organization.value,
'child_type': EntityType.Location.value
}
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}), data=data)
with self.assertNumQueries(2):
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(self.corpus.id)}), data=data)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_create_role_no_corpus_rights(self):
......@@ -254,7 +280,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
'parent_type': EntityType.Organization.value,
'child_type': EntityType.Location.value
}
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(private_corpus.id)}), data=data)
with self.assertNumQueries(5):
response = self.client.post(reverse('api:corpus-roles', kwargs={'pk': str(private_corpus.id)}), data=data)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
data = response.json()
self.assertEqual(data, {
......@@ -263,93 +290,89 @@ class TestEntitiesAPI(FixtureAPITestCase):
})
def test_create_entity_person(self):
data = {
'name': 'entity',
'type': EntityType.Person.value,
'corpus': str(self.corpus.id),
'metas': {
'key': 'value',
'other key': 'other value'
},
'worker_version': str(self.worker_version_1.id)
}
request = self.make_create_entity_request(ent_type=EntityType.Person.value)
self.client.force_login(self.user)
response = self.client.post(reverse('api:entity-create'), data=data, format='json')
with self.assertNumQueries(8):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
entity = Entity.objects.get(id=response.json()['id'])
self.assertEqual(entity.name, 'entity')
self.assertEqual(entity.raw_dates, None)
self.assertEqual(entity.worker_version, self.worker_version_1)
self.assertEqual(entity.worker_version, None)
self.assertEqual(entity.worker_run, None)
def test_create_entity_number(self):
data = {
'name': '300g',
'type': EntityType.Number.value,
'corpus': str(self.corpus.id),
'metas': {
'key': 'value',
'other key': 'other value'
},
'worker_version': str(self.worker_version_1.id)
}
request = self.make_create_entity_request(name='300g', ent_type=EntityType.Number.value)
self.client.force_login(self.user)
response = self.client.post(reverse('api:entity-create'), data=data, format='json')
with self.assertNumQueries(8):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
entity = Entity.objects.get(id=response.json()['id'])
self.assertEqual(entity.name, '300g')
self.assertEqual(entity.raw_dates, None)
self.assertEqual(entity.worker_version, self.worker_version_1)
self.assertEqual(entity.worker_version, None)
self.assertEqual(entity.worker_run, None)
def test_create_entity_date(self):
data = {
'name': '1789',
'type': EntityType.Date.value,
'corpus': str(self.corpus.id),
'metas': {
'key': 'value',
'other key': 'other value'
},
'worker_version': str(self.worker_version_1.id)
}
request = self.make_create_entity_request(name='1789', ent_type=EntityType.Date.value)
self.client.force_login(self.user)
response = self.client.post(reverse('api:entity-create'), data=data, format='json')
with self.assertNumQueries(8):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
entity = Entity.objects.get(id=response.json()['id'])
self.assertEqual(entity.name, '1789')
self.assertEqual(entity.raw_dates, entity.name)
self.assertEqual(entity.worker_version, self.worker_version_1)
self.assertEqual(entity.worker_version, None)
self.assertEqual(entity.worker_run, None)
def test_create_entity_requires_login(self):
data = {
'name': 'entity',
'type': EntityType.Person.value,
'corpus': str(self.corpus.id),
'metas': {
'key': 'value',
'other key': 'other value'
},
'worker_version': str(self.worker_version_1.id)
}
response = self.client.post(reverse('api:entity-create'), data=data, format='json')
request = self.make_create_entity_request(name='entity', ent_type=EntityType.Person.value)
with self.assertNumQueries(0):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_create_entity_with_worker_version(self):
data = {
'name': '1789',
'type': EntityType.Date.value,
'corpus': str(self.corpus.id),
'metas': {
'key': 'value',
'other key': 'other value'
},
'worker_version': str(self.worker_version_1.id)
}
request = self.make_create_entity_request(worker_version=str(self.worker_version_1.id))
self.client.force_login(self.user)
response = self.client.post(reverse('api:entity-create'), data=data, format='json')
with self.assertNumQueries(9):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
entity = Entity.objects.get(id=response.json()['id'])
self.assertEqual(entity.name, '1789')
self.assertEqual(entity.worker_version, self.worker_version_1)
self.assertEqual(entity.worker_run, None)
def test_create_entity_with_worker_run(self):
request = self.make_create_entity_request(worker_run_id=str(self.worker_run.id))
self.client.force_login(self.user)
with self.assertNumQueries(9):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
entity = Entity.objects.get(id=response.json()['id'])
self.assertEqual(entity.worker_version, self.worker_run.version)
self.assertEqual(entity.worker_run, self.worker_run)
def test_create_entity_with_unknown_worker_run(self):
random_uuid = uuid.uuid4()
request = self.make_create_entity_request(worker_run_id=str(random_uuid))
self.client.force_login(self.user)
with self.assertNumQueries(6):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {'worker_run_id': [f'Invalid pk "{random_uuid}" - object does not exist.']})
def test_create_entity_with_worker_run_and_worker_version(self):
request = self.make_create_entity_request(worker_run_id=str(self.worker_run.id), worker_version=str(self.worker_version_1.id))
self.client.force_login(self.user)
with self.assertNumQueries(7):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {'non_field_errors': ['Only one of `worker_version` and `worker_run_id` may be set.']})
def test_create_link(self):
child = Entity.objects.create(
......@@ -364,7 +387,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
'role': str(self.role.id)
}
self.client.force_login(self.user)
response = self.client.post(reverse('api:entity-link-create'), data=data, format='json')
with self.assertNumQueries(19):
response = self.client.post(reverse('api:entity-link-create'), data=data, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
link = EntityLink.objects.get(id=response.json()['id'])
self.assertEqual(link.parent.id, self.entity.id)
......@@ -383,7 +407,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
'child': str(child.id),
'role': str(self.role.id)
}
response = self.client.post(reverse('api:entity-create'), data=data, format='json')
with self.assertNumQueries(0):
response = self.client.post(reverse('api:entity-create'), data=data, format='json')
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_create_link_error(self):
......@@ -399,7 +424,8 @@ class TestEntitiesAPI(FixtureAPITestCase):
'role': str(self.role.id)
}
self.client.force_login(self.user)
response = self.client.post(reverse('api:entity-link-create'), data=data, format='json')
with self.assertNumQueries(13):
response = self.client.post(reverse('api:entity-link-create'), data=data, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
def test_create_transcription_entity(self):
......
......@@ -74,6 +74,7 @@ SELECT "documents_entitylink"."id",
T3."validated",
T3."moderator_id",
T3."worker_version_id",
T3."worker_run_id",
"documents_entity"."id",
"documents_entity"."name",
"documents_entity"."type",
......@@ -82,6 +83,7 @@ SELECT "documents_entitylink"."id",
"documents_entity"."validated",
"documents_entity"."moderator_id",
"documents_entity"."worker_version_id",
"documents_entity"."worker_run_id",
"documents_entityrole"."id",
"documents_entityrole"."parent_name",
"documents_entityrole"."child_name",
......
......@@ -111,7 +111,8 @@ SELECT ("documents_transcriptionentity"."transcription_id") AS "_prefetch_relate
"documents_entity"."metas",
"documents_entity"."validated",
"documents_entity"."moderator_id",
"documents_entity"."worker_version_id"
"documents_entity"."worker_version_id",
"documents_entity"."worker_run_id"
FROM "documents_entity"
INNER JOIN "documents_transcriptionentity" ON ("documents_entity"."id" = "documents_transcriptionentity"."entity_id")
WHERE "documents_transcriptionentity"."transcription_id" IN ('{transcription_id}'::uuid);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment