Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (14)
Showing
with 1153 additions and 576 deletions
1.1.2-beta1
1.1.2-rc2
......@@ -468,12 +468,14 @@ class StartProcess(CorpusACLMixin, APIView):
data = serializer.validated_data
has_versions = process.versions.exists()
errors = defaultdict(list)
if not has_versions and data.get('worker_activity'):
errors['worker_activity'] = 'The process must have workers attached to handle their activity.'
if not has_versions and data.get('use_cache'):
errors['use_cache'] = 'The process must should have workers attached to use cached results.'
if not process.versions.exists():
if data.get('worker_activity'):
errors['worker_activity'] = 'The process must have workers attached to handle their activity.'
if data.get('use_cache'):
errors['use_cache'] = 'The process must have workers attached to use cached results.'
if data.get('use_gpu'):
errors['use_gpu'] = 'The process must have workers attached to use GPUs.'
if errors:
raise ValidationError(errors)
......
# Generated by Django 3.2.6 on 2021-10-01 15:11
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0037_workerversion_gpu_usage'),
]
operations = [
migrations.AddField(
model_name='dataimport',
name='use_gpu',
field=models.BooleanField(default=False, blank=True),
),
]
......@@ -106,6 +106,8 @@ class DataImport(IndexableModel):
# Use elements cache executing the workflow
use_cache = models.BooleanField(default=False)
use_gpu = models.BooleanField(default=False, blank=True)
class Meta:
ordering = ['corpus', '-created']
......@@ -315,12 +317,18 @@ class DataImport(IndexableModel):
# Generate a task for each WorkerRun on the DataImport
for worker_run in worker_runs:
task_name = f'{worker_run.version.slug}{task_suffix}'
tasks[task_name] = worker_run.build_task_recipe(
task_recipe = worker_run.build_task_recipe(
import_task_name,
elements_path,
chunk=chunk,
workflow_runs=worker_runs
workflow_runs=worker_runs,
)
if self.use_gpu:
if worker_run.version.gpu_usage in (WorkerVersionGPUUsage.Required, WorkerVersionGPUUsage.Supported):
task_recipe["requires_gpu"] = True
elif worker_run.version.gpu_usage == WorkerVersionGPUUsage.Required:
raise ValidationError(f"The task {task_name} requires a GPU and the `use_gpu` option is disabled.")
tasks[task_name] = task_recipe
# Build the workflow in db
recipe = settings.PONOS_RECIPE.copy()
......@@ -346,7 +354,7 @@ class DataImport(IndexableModel):
return Workflow.objects.create(farm_id=farm_id, recipe=yaml.dump(recipe))
def start(self, use_cache=False, worker_activity=False, **kwargs):
def start(self, use_cache=False, worker_activity=False, use_gpu=False, **kwargs):
if self.mode != DataImportMode.Workers:
assert not use_cache and not worker_activity, 'Only worker processes can be started with cache or worker activities'
......@@ -354,6 +362,8 @@ class DataImport(IndexableModel):
self.activity_state = ActivityState.Pending
if use_cache:
self.use_cache = True
if use_gpu:
self.use_gpu = True
self.workflow = self.build_workflow(**kwargs)
self.save()
......
......@@ -76,6 +76,7 @@ class DataImportSerializer(DataImportLightSerializer):
'element_name_contains',
'load_children',
'use_cache',
'use_gpu',
)
read_only_fields = DataImportLightSerializer.Meta.read_only_fields + (
'files',
......@@ -83,6 +84,7 @@ class DataImportSerializer(DataImportLightSerializer):
'element',
'folder_type',
'use_cache',
'use_gpu',
)
def __init__(self, *args, **kwargs):
......@@ -225,6 +227,7 @@ class StartProcessSerializer(serializers.Serializer):
thumbnails = serializers.BooleanField(default=False)
farm = serializers.PrimaryKeyRelatedField(queryset=Farm.objects.all(), required=False, allow_null=True)
use_cache = serializers.BooleanField(default=False)
use_gpu = serializers.BooleanField(default=False, allow_null=True)
worker_activity = serializers.BooleanField(default=False)
......
......@@ -9,7 +9,7 @@ class TestCacheWorkerVersions(FixtureTestCase):
@classmethod
def setUpTestData(cls):
super().setUpTestData()
cls.dla, cls.recognizer = WorkerVersion.objects.order_by('worker__slug')
cls.dla, cls.recognizer = WorkerVersion.objects.get(worker__slug='dla'), WorkerVersion.objects.get(worker__slug='reco')
def test_run(self):
self.corpus.worker_versions.add(self.dla)
......
......@@ -748,6 +748,7 @@ class TestImports(FixtureAPITestCase):
'element_type': 'page',
'load_children': True,
'use_cache': True,
'use_gpu': False,
'activity_state': ActivityState.Pending.value,
},
format='json'
......@@ -759,6 +760,7 @@ class TestImports(FixtureAPITestCase):
'element_type': 'page',
'load_children': True,
'use_cache': False,
'use_gpu': False,
'activity_state': ActivityState.Disabled.value,
'id': str(dataimport.id),
'corpus': str(self.corpus.id),
......@@ -1181,7 +1183,7 @@ class TestImports(FixtureAPITestCase):
@patch('arkindex.project.triggers.dataimport_tasks.initialize_activity.delay')
def test_start_process_options_requires_workers(self, activities_delay_mock, worker_runs_mock):
"""
Cache and worker activity options can be trigerred hen starting a process
Cache and worker activity options can be trigerred when starting a process
"""
process = self.corpus.imports.create(
creator=self.user,
......@@ -1192,12 +1194,13 @@ class TestImports(FixtureAPITestCase):
with self.assertNumQueries(7):
response = self.client.post(
reverse('api:process-start', kwargs={'pk': str(process.id)}),
{'use_cache': 'true', 'worker_activity': 'true'}
{'use_cache': 'true', 'worker_activity': 'true', 'use_gpu': 'true'}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
'use_cache': 'The process must should have workers attached to use cached results.',
'worker_activity': 'The process must have workers attached to handle their activity.'
'use_cache': 'The process must have workers attached to use cached results.',
'worker_activity': 'The process must have workers attached to handle their activity.',
'use_gpu': 'The process must have workers attached to use GPUs.'
})
process.refresh_from_db()
self.assertFalse(process.use_cache)
......@@ -1224,11 +1227,12 @@ class TestImports(FixtureAPITestCase):
with self.assertNumQueries(20):
response = self.client.post(
reverse('api:process-start', kwargs={'pk': str(process.id)}),
{'use_cache': 'true', 'worker_activity': 'true'}
{'use_cache': 'true', 'worker_activity': 'true', 'use_gpu': 'true'}
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
process.refresh_from_db()
self.assertTrue(process.use_cache)
self.assertTrue(process.use_gpu)
# The process activity state is set to Pending while activities are created in an async job
self.assertEqual(process.activity_state, ActivityState.Pending)
self.assertEqual(activities_delay_mock.call_count, 1)
......
......@@ -351,6 +351,7 @@ class TestRepositories(FixtureTestCase):
'element_type': None,
'load_children': False,
'use_cache': False,
'use_gpu': False,
'activity_state': ActivityState.Disabled.value,
'element_name_contains': None,
'files': [],
......@@ -393,6 +394,7 @@ class TestRepositories(FixtureTestCase):
'element_type': None,
'load_children': False,
'use_cache': False,
'use_gpu': False,
'activity_state': ActivityState.Disabled.value,
'element_name_contains': None,
'files': [],
......
......@@ -45,6 +45,7 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
cls.worker_1 = Worker.objects.get(slug='reco')
cls.worker_2 = Worker.objects.get(slug='dla')
cls.worker_3 = Worker.objects.get(slug='worker-gpu')
cls.rev2 = Revision.objects.create(
hash='1234',
message='commit message',
......@@ -70,7 +71,7 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
response = self.client.get(reverse('api:workers-list'))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertDictEqual(response.json(), {
'count': 2,
'count': 3,
'next': None,
'number': 1,
'previous': None,
......@@ -87,6 +88,13 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
'name': 'Recognizer',
'slug': 'reco',
'type': 'recognizer',
},
{
'id': str(self.worker_3.id),
'repository_id': str(self.repo.id),
'name': 'Worker requiring a GPU',
'slug': 'worker-gpu',
'type': 'worker',
}
]
})
......
......@@ -57,6 +57,8 @@ class TestWorkflows(FixtureAPITestCase):
cls.worker_1 = cls.version_1.worker
cls.version_2 = WorkerVersion.objects.get(worker__slug='dla')
cls.worker_2 = cls.version_2.worker
cls.version_3 = WorkerVersion.objects.get(worker__slug='worker-gpu')
cls.worker_3 = cls.version_3.worker
@override_settings(
ARKINDEX_TASKS_IMAGE='tasks:latest',
......@@ -94,6 +96,7 @@ class TestWorkflows(FixtureAPITestCase):
'element': None,
'load_children': False,
'use_cache': False,
'use_gpu': False,
'activity_state': ActivityState.Disabled.value,
'element_name_contains': None
})
......@@ -147,6 +150,7 @@ class TestWorkflows(FixtureAPITestCase):
'element_type': None,
'load_children': True,
'use_cache': True,
'use_gpu': False,
'activity_state': ActivityState.Disabled.value,
'element_name_contains': None
})
......@@ -648,3 +652,45 @@ class TestWorkflows(FixtureAPITestCase):
}
},
})
def test_create_process_use_gpu_option(self):
"""
A process with the `use_gpu` parameter enables the `requires_gpu` attribute of tasks than need one
"""
dataimport_2 = self.corpus.imports.create(creator=self.user, mode=DataImportMode.Workers)
dataimport_2.worker_runs.create(
version=self.version_3,
parents=[],
)
dataimport_2.use_gpu = True
dataimport_2.save()
self.client.force_login(self.user)
with self.assertNumQueries(27):
response = self.client.post(reverse('api:process-start', kwargs={'pk': str(dataimport_2.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.json()['use_gpu'], True)
dataimport_2.refresh_from_db()
self.assertEqual(dataimport_2.state, State.Unscheduled)
self.assertIsNotNone(dataimport_2.workflow)
self.assertDictEqual(yaml.safe_load(dataimport_2.workflow.recipe)['tasks'], {
'initialisation':
{
'command': 'python -m arkindex_tasks.init_elements '
f'{dataimport_2.id} --chunks-number 1',
'image': 'registry.gitlab.com/arkindex/tasks'
},
f'worker-gpu_{str(self.version_3.id)[0:6]}':
{
'command': None,
'image': f'my_repo.fake/workers/worker/worker-gpu:{self.version_3.id}',
'artifact': str(self.version_3.docker_image.id),
'parents': ['initialisation'],
'env': {
'TASK_ELEMENTS': '/data/initialisation/elements.json',
'WORKER_VERSION_ID': str(self.version_3.id),
'ARKINDEX_WORKER_RUN_ID': str(dataimport_2.worker_runs.get().id),
},
'requires_gpu': True,
},
})
......@@ -786,13 +786,11 @@ class ElementChildren(ElementsListBase):
@extend_schema_view(
get=extend_schema(description="Retrieve a single element's informations and metadata"),
patch=extend_schema(description='Rename an element'),
put=extend_schema(description='Rename an element'),
delete=extend_schema(description='Delete an element'),
put=extend_schema(description="Edit an element's attributes. Requires a write access on the corpus."),
delete=extend_schema(description='Delete an element. Requires either an admin access on the corpus, '
'or a write access and to be the creator of this element.'),
)
class ElementRetrieve(ACLMixin, RetrieveUpdateDestroyAPIView):
"""
Retrieve a single element
"""
serializer_class = ElementSerializer
permission_classes = (IsVerifiedOrReadOnly, )
......@@ -806,8 +804,8 @@ class ElementRetrieve(ACLMixin, RetrieveUpdateDestroyAPIView):
corpora = Corpus.objects.readable(self.request.user)
queryset = Element.objects.filter(corpus__in=corpora)
if self.request and self.request.method == 'DELETE':
# Only include corpus for ACL check and ID for deletion
return queryset.select_related('corpus').only('id', 'corpus')
# Only include corpus and creator for ACL check and ID for deletion
return queryset.select_related('corpus').only('id', 'creator_id', 'corpus')
return queryset \
.select_related(
......@@ -817,15 +815,15 @@ class ElementRetrieve(ACLMixin, RetrieveUpdateDestroyAPIView):
'creator',
) \
.prefetch_related(Prefetch('classifications', queryset=classifications_queryset)) \
.annotate(metadata_count=Count('metadatas', filter=Q(metadatas__entity_id=None)))
.annotate(metadata_count=Count('metadatas'))
def check_object_permissions(self, request, obj):
super().check_object_permissions(request, obj)
if self.request.method in permissions.SAFE_METHODS:
if request.method in permissions.SAFE_METHODS:
return
role = Role.Contributor
if request.method == 'DELETE':
if request.method == 'DELETE' and obj.creator_id != request.user.id:
role = Role.Admin
if not self.has_access(obj.corpus, role.value):
access_repr = 'admin' if role == Role.Admin else 'write'
......@@ -1282,10 +1280,11 @@ class ElementMetadata(ListCreateAPIView):
def get_queryset(self):
if self.request and self.request.method == 'GET':
element = get_object_or_404(Element, id=self.kwargs['pk'], corpus__in=Corpus.objects.readable(self.request.user))
return element.metadatas.filter(entity_id=None)
return element.metadatas.all().select_related('entity')
# We need to use the default database to avoid stale read
# when a metadata is created immediately after an element creation
# when a metadata is created immediately after an element creation.
# Note that this returns a queryset of elements and not metadata!
return Element.objects.using('default').filter(corpus__in=Corpus.objects.writable(self.request.user))
def get_serializer_context(self):
......@@ -1293,9 +1292,7 @@ class ElementMetadata(ListCreateAPIView):
# Ignore this step when generating the schema with OpenAPI
if not self.request:
return context
if self.request.method == 'GET':
context['list'] = True
else:
if self.request.method != 'GET':
context['element'] = self.get_object()
return context
......
This diff is collapsed.
#!/usr/bin/env python3
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.utils import IntegrityError
from rest_framework.authtoken.models import Token
from arkindex.images.models import ImageServer
from arkindex.users.models import User
from ponos.models import Farm
# Constants used in architecture project
IMAGE_SERVER_ID = 12345
IMAGE_SERVER_BUCKET = 'iiif'
IMAGE_SERVER_REGION = 'local'
PONOS_FARM_ID = "001e411a-1111-2222-3333-444455556666"
PONOS_FARM_SEED = "b12868101dab84984481741663d809d2393784894d6e807ceee0bd95051bf971"
INTERNAL_API_TOKEN = "deadbeefTestToken"
class Command(BaseCommand):
help = 'Build required Database model instances for local development.'
def success(self, msg):
"""Helper to display success messages"""
self.stdout.write(self.style.SUCCESS(f"{msg}"))
def warn(self, msg):
"""Helper to display warning messages"""
self.stdout.write(self.style.WARNING(f"{msg}"))
def fail(self, msg):
"""Helper to display error messages"""
self.stdout.write(self.style.ERROR(f"{msg}"))
def check_user(self, user):
"""Ensure a user is admin + internal"""
if user.is_internal and user.is_admin:
self.success(f"Internal user {user} is valid")
else:
user.is_internal = True
user.is_admin = True
user.save()
self.warn(f"Updated user {user} to internal+admin")
def handle(self, **options):
# Never allow running this script in production
if not settings.DEBUG:
self.fail("You cannot run this script in production.")
return
# a Ponos farm with specific ID & seed
try:
farm = Farm.objects.get(id=PONOS_FARM_ID)
if farm.seed == PONOS_FARM_SEED:
self.success("Ponos farm valid")
else:
farm.seed = PONOS_FARM_SEED
farm.save()
self.warn(f"Ponos farm {farm.name} updated")
except Farm.DoesNotExist:
Farm.objects.create(
id=PONOS_FARM_ID,
seed=PONOS_FARM_SEED,
)
self.success("Ponos farm created")
# an internal API user with a specific token
try:
token = Token.objects.get(key=INTERNAL_API_TOKEN)
self.check_user(token.user)
except Token.DoesNotExist:
# Create a new internal user
user, _ = User.objects.get_or_create(
email='internal+bootstrap@teklia.com',
defaults={
'display_name': 'Bootstrap Internal user',
'is_internal': True,
'is_admin': True,
}
)
self.success("Created internal user")
self.check_user(user)
# Finally create a specific token for that user
if hasattr(user, "auth_token"):
# Support One-To-One relation
user.auth_token.delete()
Token.objects.create(key=INTERNAL_API_TOKEN, user=user)
self.success(f"Created token {INTERNAL_API_TOKEN}")
# an image server for local cantaloupe https://ark.localhost/iiif/2
try:
server = ImageServer.objects.get(url='https://ark.localhost/iiif/2')
if server.id != IMAGE_SERVER_ID:
# Migrate existing images & server id in a single transaction
with transaction.atomic():
server.images.update(server_id=IMAGE_SERVER_ID)
ImageServer.objects.filter(id=server.id).update(id=IMAGE_SERVER_ID)
self.warn(f"Image server {server.id} updated to {IMAGE_SERVER_ID}")
# Update internal reference for updates below
server.id = IMAGE_SERVER_ID
# Update base settings
if server.s3_bucket != IMAGE_SERVER_BUCKET or server.s3_region != IMAGE_SERVER_REGION or not server.validated:
server.s3_bucket = IMAGE_SERVER_BUCKET
server.s3_region = IMAGE_SERVER_REGION
server.validated = True
server.save()
self.warn("Updated image server S3 settings")
else:
self.success(f"Image server {server.id} valid")
except ImageServer.DoesNotExist:
try:
server = ImageServer.objects.create(
id=IMAGE_SERVER_ID,
url='https://ark.localhost/iiif/2',
s3_bucket=IMAGE_SERVER_BUCKET,
s3_region=IMAGE_SERVER_REGION,
display_name='Development local server',
validated=True,
)
self.success("Image server created")
except IntegrityError as e:
self.fail(f"Failed to create image server: {e}")
return
# Check there is not already a local server with invalid path
# We'll merge its image into the new one
# This bad server may have been created by automatic IIIF server detection
try:
bad_server = ImageServer.objects.get(url='https://ark.localhost/iiif')
bad_server.merge_into(server)
self.warn(f"Merged images from {bad_server.id} into {server.id}")
bad_server.delete()
self.warn("Deleted old server")
except ImageServer.DoesNotExist:
pass
......@@ -6,7 +6,13 @@ from django.contrib.gis.geos import LinearRing
from django.core.management.base import BaseCommand
from django.utils import timezone as DjangoTimeZone
from arkindex.dataimport.models import RepositoryType, WorkerVersion, WorkerVersionState, Workflow
from arkindex.dataimport.models import (
RepositoryType,
WorkerVersion,
WorkerVersionGPUUsage,
WorkerVersionState,
Workflow,
)
from arkindex.documents.models import Corpus, Element, MetaData, MetaType
from arkindex.images.models import Image, ImageServer
from arkindex.users.models import Group, Right, Role, User
......@@ -125,6 +131,19 @@ class Command(BaseCommand):
docker_image=docker_image
)
WorkerVersion.objects.create(
worker=worker_repo.workers.create(
name='Worker requiring a GPU',
slug='worker-gpu',
type='worker',
),
revision=revision,
configuration={'test': 42},
state=WorkerVersionState.Available,
docker_image=docker_image,
gpu_usage=WorkerVersionGPUUsage.Required
)
# Create a IIIF repository
repo = creds.repos.create(
url='http://gitlab/repo',
......
......@@ -43,19 +43,67 @@ class MetaDataSerializer(MetaDataLightSerializer):
"""
Serialises some Metadata for any Element
"""
entity = BaseEntitySerializer()
entity = BaseEntitySerializer(read_only=True)
class Meta:
model = MetaData
fields = MetaDataLightSerializer.Meta.fields + ('entity', )
class MetaDataBulkItemSerializer(MetaDataLightSerializer):
def metadata_number_validator(data, serializer):
# Those can be None if someone messed up their input; DRF will catch it itself afterwards, so we ignore None.
meta_type = data.get('type', serializer.instance.type if serializer.instance else None)
value = data.get('value', serializer.instance.value if serializer.instance else None)
if meta_type != MetaType.Numeric or value is None:
return
try:
# Python allows _ to separate numbers (123_456.789_101), but Postgres does not, so we forbid them.
assert '_' not in value
value = float(value)
assert not math.isinf(value)
assert not math.isnan(value)
except (AssertionError, TypeError, ValueError):
raise serializers.ValidationError({
'value': ['The value of a numeric metadata should be a valid floating-point number.']
})
def metadata_url_validator(data, serializer):
# Those can be None if someone messed up their input; DRF will catch it itself afterwards, so we ignore None.
meta_type = data.get('type', serializer.instance.type if serializer.instance else None)
value = data.get('value', serializer.instance.value if serializer.instance else None)
if meta_type != MetaType.URL or value is None:
return
try:
URLValidator(schemes=['http', 'https'])(value)
except DjangoValidationError:
raise serializers.ValidationError({
'value': ['The value of a URL metadata must be a valid HTTP or HTTPS URL.']
})
metadata_number_validator.requires_context = True
metadata_url_validator.requires_context = True
class MetaDataUpdateSerializer(MetaDataSerializer):
"""
Serialises some Metadata for any Element
Allow editing MetaData
"""
entity = serializers.PrimaryKeyRelatedField(
entity_id = serializers.PrimaryKeyRelatedField(
source='entity',
queryset=Entity.objects.none(),
write_only=True,
required=False,
allow_null=True,
style={'base_template': 'input.html'},
)
worker_version = serializers.PrimaryKeyRelatedField(
queryset=WorkerVersion.objects.all(),
required=False,
allow_null=True,
style={'base_template': 'input.html'},
......@@ -63,58 +111,33 @@ class MetaDataBulkItemSerializer(MetaDataLightSerializer):
class Meta:
model = MetaData
fields = MetaDataLightSerializer.Meta.fields + ('entity', )
fields = MetaDataSerializer.Meta.fields + ('entity_id', 'worker_version', )
validators = [
metadata_number_validator,
metadata_url_validator,
]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# We don't want to return the entity field when listing metadata
if self.context.get('list'):
self.fields.pop('entity')
return
corpus_id = None
# When used for CreateMetaData, the API adds the current element to the context
if self.context.get('element') is not None:
corpus_id = self.context['element'].corpus_id
# self.instance can be a queryset if this serializer is used for a list; just ignore anything that isn't a metadata
elif isinstance(self.instance, MetaData):
corpus_id = self.instance.element.corpus_id
if self.context.get('element'):
corpus = self.context['element'].corpus
elif self.instance:
corpus = self.instance.element.corpus
else:
return
self.fields['entity'].queryset = corpus.entities.all()
if corpus_id:
self.fields['entity_id'].queryset = Entity.objects.filter(corpus_id=corpus_id)
def validate(self, data):
data = super().validate(data)
# Those can be None if someone messed up their input; DRF will catch it itself afterwards, so we ignore None.
meta_type = data.get('type', self.instance.type if self.instance else None)
value = data.get('value', self.instance.value if self.instance else None)
if meta_type == MetaType.Numeric and value is not None:
try:
# Python allows _ to separate numbers (123_456.789_101), but Postgres does not, so we forbid them.
assert '_' not in value
value = float(value)
assert not math.isinf(value)
assert not math.isnan(value)
except (AssertionError, TypeError, ValueError):
raise serializers.ValidationError({
'value': ['The value of a numeric metadata should be a valid floating-point number.']
})
if meta_type == MetaType.URL and value is not None:
try:
URLValidator(schemes=['http', 'https'])(value)
except DjangoValidationError:
raise serializers.ValidationError({
'value': ['The value of a URL metadata must be a valid HTTP or HTTPS URL.']
})
return data
class MetaDataUpdateSerializer(MetaDataBulkItemSerializer):
class MetaDataBulkItemSerializer(MetaDataLightSerializer):
"""
Allow editing MetaData
Serialises some Metadata for any Element
"""
worker_version = serializers.PrimaryKeyRelatedField(
queryset=WorkerVersion.objects.all(),
entity_id = serializers.PrimaryKeyRelatedField(
source='entity',
queryset=Entity.objects.none(),
required=False,
allow_null=True,
style={'base_template': 'input.html'},
......@@ -122,7 +145,16 @@ class MetaDataUpdateSerializer(MetaDataBulkItemSerializer):
class Meta:
model = MetaData
fields = MetaDataBulkItemSerializer.Meta.fields + ('worker_version', )
fields = MetaDataLightSerializer.Meta.fields + ('entity_id', )
validators = [
metadata_number_validator,
metadata_url_validator,
]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.context.get('element') is not None:
self.fields['entity_id'].queryset = Entity.objects.filter(corpus_id=self.context['element'].corpus_id)
class MetaDataBulkSerializer(serializers.Serializer):
......@@ -587,7 +619,7 @@ class ElementCreateSerializer(ElementLightSerializer):
self.fields['corpus'].queryset = corpora.prefetch_related('types')
# Avoid a stale read issue when creating a child on a freshly created parent
self.fields['parent'].queryset = Element.objects.filter(corpus__in=corpora).using('default')
self.fields['parent'].queryset = Element.objects.filter(corpus__in=corpora).select_related('image').using('default')
def validate(self, data):
errors = defaultdict(list)
......@@ -604,6 +636,9 @@ class ElementCreateSerializer(ElementLightSerializer):
})
parent = data.get('parent')
image = data.get('image')
polygon = data.get('polygon')
if parent:
if parent.corpus_id != data['corpus'].id:
errors['corpus'].append(
......@@ -617,7 +652,14 @@ class ElementCreateSerializer(ElementLightSerializer):
data.setdefault('rotation_angle', 0)
data.setdefault('mirrored', False)
image = data.get('image')
if polygon and not image:
# Deduce the image from the parent if there is one
if parent is not None and parent.image_id:
image = parent.image
data['image'] = image
else:
errors['polygon'].append('An image or a parent with an image is required to create an element with a polygon.')
if image and (image.width == 0 or image.height == 0):
# Element creation with images with a width or height equal to zero
# will lead to many errors everywhere as this would create impossible polygons
......
......@@ -2,7 +2,7 @@ import bleach
from django.db.models import Max
from drf_spectacular.utils import extend_schema_field
from rest_framework import serializers
from rest_framework.exceptions import APIException, ValidationError
from rest_framework.exceptions import ValidationError
from arkindex.documents.dates import DateType
from arkindex.documents.models import AllowedMetaData, Corpus, Element, ElementType, MetaData, MetaType
......@@ -102,11 +102,6 @@ class CorpusLightSerializer(serializers.ModelSerializer):
)
class DuplicatedMetadata(APIException):
status_code = 200
default_detail = 'Metadata exists.'
class MetaDataLightSerializer(serializers.ModelSerializer):
"""
Serialize a metadata without its entity
......@@ -145,14 +140,36 @@ class MetaDataLightSerializer(serializers.ModelSerializer):
def create(self, validated_data):
user = self.context['request'].user
element = self.context['element']
if not (user.is_admin or user.is_internal):
self.check_allowed(corpus=element.corpus, **validated_data)
metadata = element.metadatas.using('default').filter(name=validated_data['name'])
if metadata.exists():
if metadata.filter(value=validated_data['value'], type=validated_data['type']).exists():
# Raise a 200_OK if a metadata with same type, name and value exists
raise DuplicatedMetadata
validated_data['index'] = metadata.aggregate(Max('index'))['index__max'] + 1
max_index = metadata.aggregate(Max('index'))['index__max']
# max_index will be None if there are no metadata.
# If there are any metadata, check that we are not creating a duplicate
if max_index is not None:
try:
existing_metadata = metadata.filter(value=validated_data['value'], type=validated_data['type']).get()
except MetaData.DoesNotExist:
# No existing metadata found: carry on!
pass
except MetaData.MultipleObjectsReturned:
# In theory, we should have a unique constraint on element_id/name/type/value, but that isn't possible
# because the values can get very long and exceed the size of a B-Tree index tuple (8/3 kibibytes).
# The APIs have unicity checks in place so this should never happen, but should is not enough.
# If there are any duplicate metadatas, we just let the exception escalate to Sentry so that we can troubleshoot.
raise
else:
# Send the existing ID in an HTTP 400 error
raise ValidationError({
'detail': ['This metadata already exists.'],
'id': str(existing_metadata.id),
})
# Increment the metadata index
validated_data['index'] = max_index + 1
return element.metadatas.create(**validated_data)
......
......@@ -87,7 +87,6 @@ class TestMoveElement(FixtureTestCase):
self.assertEqual(len(source_paths), 1)
self.assertEqual(list(source_paths.values('path')), [{'path': [self.destination.id]}])
# Assert children were also moved
self.assertEqual(list(children_paths.values('path')), [
{'path': [self.destination.id, self.source_with_children.id]},
......
......@@ -27,6 +27,23 @@ class TestCreateElements(FixtureAPITestCase):
)
cls.worker_version = WorkerVersion.objects.get(worker__slug='dla')
# The image is always the same in API responses
cls.image_response = {
'id': str(cls.image.id),
's3_url': None,
'width': 42,
'height': 42,
'path': 'kingdom/far/away',
'url': 'http://server/kingdom/far/away',
'status': 'checked',
'server': {
'display_name': 'Test Server',
'max_height': None,
'max_width': None,
'url': 'http://server',
}
}
def make_create_request(self, name='default', corpus=None, elt_type='volume', **options):
request = {
'path': reverse('api:elements-create'),
......@@ -57,13 +74,12 @@ class TestCreateElements(FixtureAPITestCase):
data = response.json()
self.assertIn('id', data)
volume = Element.objects.get(id=data['id'])
# Assert the fully serialized response matches the created volume
self.assertDictEqual(
data,
{
'id': str(volume.id),
'name': volume.name,
'type': volume.type.slug,
'name': volume.name,
'thumbnail_put_url': None,
'thumbnail_url': volume.thumbnail.s3_url,
'worker_version': None,
......@@ -99,8 +115,6 @@ class TestCreateElements(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
data = response.json()
page = self.corpus.elements.get(id=data['id'])
# Assert the fully serialized response matches the created page (exclude image zone)
del data['zone']
self.assertDictEqual(
data,
{
......@@ -109,6 +123,7 @@ class TestCreateElements(FixtureAPITestCase):
'type': page.type.slug,
'thumbnail_put_url': None,
'thumbnail_url': None,
'created': page.created.isoformat().replace('+00:00', 'Z'),
'creator': 'Test user',
'rights': ['read', 'write', 'admin'],
'worker_version': None,
......@@ -118,15 +133,47 @@ class TestCreateElements(FixtureAPITestCase):
'name': page.corpus.name,
'public': page.corpus.public
},
'zone': {
'id': str(page.id),
'image': self.image_response,
'polygon': [[0, 0], [0, 42], [42, 42], [42, 0], [0, 0]],
'url': page.iiif_url,
},
'rotation_angle': 0,
'mirrored': False,
'created': page.created.isoformat().replace('+00:00', 'Z'),
}
)
self.assertEqual(page.name, 'The castle of my dreams')
self.assertEqual(page.type, self.page_type)
self.assertEqual(page.creator, self.user)
def test_create_inherit_parent_image(self):
"""
Creating an element with a parent, a polygon and no image should inherit the parent's image
"""
polygon = [[10, 10], [10, 40], [40, 40], [40, 10], [10, 10]]
self.vol.image = self.image
self.vol.polygon = polygon
self.vol.save()
self.client.force_login(self.user)
request = self.make_create_request(
parent=str(self.vol.id),
elt_type='page',
name='The castle of my dreams',
polygon=polygon,
)
with self.assertNumQueries(16):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
data = response.json()
page = self.corpus.elements.get(id=data['id'])
self.assertEqual(response.json()['zone'], {
'id': str(page.id),
'image': self.image_response,
'polygon': polygon,
'url': page.iiif_url,
})
def test_create_volume_act(self):
# Create an volume child of type Act
self.client.force_login(self.user)
......@@ -178,6 +225,20 @@ class TestCreateElements(FixtureAPITestCase):
'worker_version': ['Only an internal user can create an element with a worker version.'],
})
def test_create_element_polygon_no_image(self):
self.client.force_login(self.user)
request = self.make_create_request(
elt_type='page',
name='The castle of my dreams again',
polygon=[[10, 10], [10, 40], [40, 40], [40, 10], [10, 10]],
)
with self.assertNumQueries(6):
response = self.client.post(**request)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
'polygon': ['An image or a parent with an image is required to create an element with a polygon.'],
})
def test_create_element_polygon_negative(self):
self.client.force_login(self.user)
request = self.make_create_request(
......@@ -210,8 +271,6 @@ class TestCreateElements(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
data = response.json()
page = self.corpus.elements.get(id=data['id'])
# Assert the fully serialized response matches the created element (exclude zone image field)
del data['zone']['image']
self.assertDictEqual(
data,
{
......@@ -232,7 +291,8 @@ class TestCreateElements(FixtureAPITestCase):
'zone': {
'id': str(page.id),
'polygon': polygon,
'url': str(page.iiif_url)
'image': self.image_response,
'url': page.iiif_url,
},
'rotation_angle': 0,
'mirrored': False,
......
......@@ -48,7 +48,7 @@ class TestDestroyElements(FixtureAPITestCase):
type=self.volume_type,
name='Castle story'
)
self.assertEqual(self.corpus.elements.filter(id=castle_story.id).exists(), True)
self.assertTrue(self.corpus.elements.filter(id=castle_story.id).exists())
with self.assertNumQueries(7):
response = self.client.delete(reverse('api:element-retrieve', kwargs={'pk': str(castle_story.id)}))
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
......@@ -63,6 +63,51 @@ class TestDestroyElements(FixtureAPITestCase):
'description': 'Element deletion',
})
@patch('arkindex.project.triggers.documents_tasks.element_trash.delay')
def test_element_destroy_creator(self, delay_mock):
"""
An element's creator can delete the element if it has write access
"""
self.private_corpus.memberships.create(user=self.user, level=Role.Contributor.value)
self.client.force_login(self.user)
castle_story = self.private_corpus.elements.create(
type=self.volume_type,
name='Castle story',
creator=self.user,
)
with self.assertNumQueries(6):
response = self.client.delete(reverse('api:element-retrieve', kwargs={'pk': str(castle_story.id)}))
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
self.assertEqual(delay_mock.call_count, 1)
args, kwargs = delay_mock.call_args
self.assertEqual(len(args), 0)
self.assertCountEqual(list(kwargs.pop('queryset')), list(self.private_corpus.elements.filter(id=castle_story.id)))
self.assertDictEqual(kwargs, {
'delete_children': True,
'user_id': self.user.id,
'description': 'Element deletion',
})
def test_element_destroy_creator_acl(self):
"""
An element's creator cannot delete without write access
"""
self.private_corpus.memberships.create(user=self.user, level=Role.Guest.value)
self.client.force_login(self.user)
castle_story = self.private_corpus.elements.create(
type=self.volume_type,
name='Castle story',
creator=self.user,
)
with self.assertNumQueries(5):
response = self.client.delete(reverse('api:element-retrieve', kwargs={'pk': str(castle_story.id)}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(
response.json(),
{'detail': 'You do not have write access to this element.'}
)
@patch('arkindex.project.triggers.documents_tasks.element_trash.delay')
def test_non_empty_element(self, delay_mock):
"""
......
......@@ -2,7 +2,7 @@ from django.urls import reverse
from rest_framework import status
from arkindex.dataimport.models import WorkerVersion
from arkindex.documents.models import AllowedMetaData, Corpus, EntityType, MetaType
from arkindex.documents.models import AllowedMetaData, Corpus, EntityType, MetaData, MetaType
from arkindex.project.tests import FixtureAPITestCase
from arkindex.users.models import Role, User
......@@ -41,11 +41,12 @@ class TestMetaData(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertListEqual(response.json(), [
{
'dates': [],
'id': str(self.metadata.id),
'name': 'folio',
'type': 'text',
'value': '123',
'dates': [],
'entity': None,
'worker_version': None
},
])
......@@ -57,11 +58,12 @@ class TestMetaData(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertListEqual(response.json(), [
{
'dates': [],
'id': str(self.metadata.id),
'name': 'folio',
'type': 'text',
'value': '123',
'dates': [],
'entity': None,
'worker_version': None
},
])
......@@ -77,7 +79,7 @@ class TestMetaData(FixtureAPITestCase):
name='Marc',
worker_version=self.worker_version,
)
self.vol.metadatas.create(
entity_meta = self.vol.metadatas.create(
name='meta',
type=MetaType.Text,
value='text_metadata',
......@@ -87,13 +89,31 @@ class TestMetaData(FixtureAPITestCase):
with self.assertNumQueries(6):
response = self.client.get(reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertListEqual(response.json(), [
self.assertCountEqual(response.json(), [
{
'dates': [],
'id': str(self.metadata.id),
'name': 'folio',
'type': 'text',
'value': '123',
'entity': None,
'worker_version': None,
},
{
'id': str(entity_meta.id),
'name': 'meta',
'type': 'text',
'value': 'text_metadata',
'dates': [],
'entity': {
'dates': [],
'id': str(entity.id),
'metas': None,
'name': 'Marc',
'type': 'person',
'validated': False,
'worker_version_id': str(self.worker_version.id),
},
'worker_version': None,
},
])
......@@ -112,6 +132,7 @@ class TestMetaData(FixtureAPITestCase):
'name': 'folio',
'type': 'text',
'value': '123',
'entity': None,
'worker_version': str(self.worker_version.id),
},
])
......@@ -233,20 +254,44 @@ class TestMetaData(FixtureAPITestCase):
def test_create_duplicated_metadata(self):
self.client.force_login(self.user)
response = self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'type': 'date', 'name': 'edition', 'value': '1986-june'}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response = self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'type': 'date', 'name': 'edition', 'value': '1986-june'}
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
with self.assertNumQueries(9):
response = self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'type': 'date', 'name': 'edition', 'value': '1986-june'}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
metadata_id = response.json()['id']
with self.assertNumQueries(7):
response = self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'type': 'date', 'name': 'edition', 'value': '1986-june'}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {
'detail': ['This metadata already exists.'],
'id': metadata_id,
})
metadata = self.vol.metadatas.filter(name='edition')
self.assertEqual(metadata.count(), 1)
def test_create_duplicated_metadata_exception(self):
"""
The DB cannot have a unique constraint in place for element/name/type/value, and the APIs are supposed to deal
with that by checking themselves before inserting. If we still find duplicate metadata in the database, the API
should fail to allow us to notice the issue and troubleshoot it.
"""
self.vol.metadatas.create(name='test', type=MetaType.Text, index=0, value='And again')
self.vol.metadatas.create(name='test', type=MetaType.Text, index=1, value='And again')
self.client.force_login(self.superuser)
with self.assertNumQueries(5):
with self.assertRaises(MetaData.MultipleObjectsReturned):
self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'name': 'test', 'type': 'text', 'value': 'And again'},
)
def test_delete_metadata_verified(self):
response = self.client.delete(reverse('api:metadata-edit', kwargs={'pk': str(self.metadata.id)}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
......@@ -459,7 +504,7 @@ class TestMetaData(FixtureAPITestCase):
)
response = self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'type': 'location', 'name': 'location', 'value': 'Texas', 'entity': entity.id}
data={'type': 'location', 'name': 'location', 'value': 'Texas', 'entity_id': entity.id}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.json())
self.assertEqual(self.vol.metadatas.get(name='location').entity, entity)
......@@ -474,7 +519,7 @@ class TestMetaData(FixtureAPITestCase):
metadata = self.vol.metadatas.create(type=MetaType.Location, name='location', value='Texas')
response = self.client.patch(
reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}),
data={'entity': entity.id}
data={'entity_id': entity.id}
)
self.assertEqual(response.status_code, status.HTTP_200_OK, response.json())
metadata.refresh_from_db()
......@@ -490,7 +535,7 @@ class TestMetaData(FixtureAPITestCase):
metadata = self.vol.metadatas.create(type=MetaType.Location, name='location', value='Texas', entity=entity)
response = self.client.patch(
reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}),
data={'entity': None},
data={'entity_id': None},
format='json'
)
self.assertEqual(response.status_code, status.HTTP_200_OK, response.json())
......@@ -506,7 +551,7 @@ class TestMetaData(FixtureAPITestCase):
)
response = self.client.post(
reverse('api:element-metadata', kwargs={'pk': str(self.vol.id)}),
data={'type': 'location', 'name': 'location', 'value': 'Texas', 'entity': entity.id}
data={'type': 'location', 'name': 'location', 'value': 'Texas', 'entity_id': entity.id}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
......@@ -520,7 +565,7 @@ class TestMetaData(FixtureAPITestCase):
metadata = self.vol.metadatas.create(type=MetaType.Location, name='location', value='Texas')
response = self.client.patch(
reverse('api:metadata-edit', kwargs={'pk': str(metadata.id)}),
data={'entity': entity.id}
data={'entity_id': entity.id}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
metadata.refresh_from_db()
......@@ -703,7 +748,7 @@ class TestMetaData(FixtureAPITestCase):
data={'metadata_list': [
{'type': 'location', 'name': 'location', 'value': 'Texas'},
{'type': 'date', 'name': 'date', 'value': '1885'},
{'type': 'numeric', 'name': 'numeric', 'value': '42', 'entity': str(entity.id)}
{'type': 'numeric', 'name': 'numeric', 'value': '42', 'entity_id': str(entity.id)}
]},
format='json'
)
......@@ -725,14 +770,14 @@ class TestMetaData(FixtureAPITestCase):
'name': 'location',
'type': 'location',
'value': 'Texas',
'entity': None,
'entity_id': None,
'dates': []
}, {
'id': str(md2.id),
'name': 'date',
'type': 'date',
'value': '1885',
'entity': None,
'entity_id': None,
'dates': [{
'day': None,
'month': None,
......@@ -744,7 +789,7 @@ class TestMetaData(FixtureAPITestCase):
'name': 'numeric',
'type': 'numeric',
'value': '42',
'entity': str(entity.id),
'entity_id': str(entity.id),
'dates': []
}
],
......@@ -818,15 +863,15 @@ class TestMetaData(FixtureAPITestCase):
'name': 'certainly not allowed',
'type': 'text',
'value': 'bla bla bla',
'entity': None,
'dates': []
'dates': [],
'entity_id': None,
}, {
'id': str(md2.id),
'name': 'not allowed',
'type': 'location',
'value': 'boo',
'entity': None,
'dates': []
'dates': [],
'entity_id': None,
},
],
'worker_version': None
......