Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (8)
......@@ -2,7 +2,15 @@ from django.contrib import admin
from django.db.models import Max
from enumfields.admin import EnumFieldListFilter
from arkindex.dataimport.models import DataFile, DataImport, Repository, Revision, Worker, WorkerVersion
from arkindex.dataimport.models import (
DataFile,
DataImport,
Repository,
Revision,
Worker,
WorkerConfiguration,
WorkerVersion,
)
from arkindex.users.admin import GroupMembershipInline, UserMembershipInline
......@@ -85,11 +93,16 @@ class WorkerVersionInline(admin.StackedInline):
return super().get_queryset(*args, **kwargs).prefetch_related('worker', 'revision')
class WorkerConfigurationInline(admin.StackedInline):
model = WorkerConfiguration
readonly_fields = ('configuration_hash', )
class WorkerAdmin(admin.ModelAdmin):
list_display = ('id', 'name', 'slug', 'type', 'repository')
field = ('id', 'name', 'slug', 'type', 'repository')
readonly_fields = ('id', )
inlines = [WorkerVersionInline, UserMembershipInline, GroupMembershipInline]
inlines = [WorkerVersionInline, UserMembershipInline, GroupMembershipInline, WorkerConfigurationInline]
class WorkerVersionAdmin(admin.ModelAdmin):
......@@ -99,9 +112,16 @@ class WorkerVersionAdmin(admin.ModelAdmin):
readonly_fields = ('id', )
class WorkerConfigurationAdmin(admin.ModelAdmin):
list_display = ('id', 'name', 'worker')
list_filter = ('worker', )
readonly_fields = ('id', 'configuration_hash')
admin.site.register(DataImport, DataImportAdmin)
admin.site.register(DataFile, DataFileAdmin)
admin.site.register(Revision, RevisionAdmin)
admin.site.register(Repository, RepositoryAdmin)
admin.site.register(Worker, WorkerAdmin)
admin.site.register(WorkerVersion, WorkerVersionAdmin)
admin.site.register(WorkerConfiguration, WorkerConfigurationAdmin)
......@@ -44,6 +44,7 @@ from arkindex.dataimport.models import (
Worker,
WorkerActivity,
WorkerActivityState,
WorkerConfiguration,
WorkerRun,
WorkerVersion,
)
......@@ -65,11 +66,13 @@ from arkindex.dataimport.serializers.imports import (
from arkindex.dataimport.serializers.workers import (
RepositorySerializer,
WorkerActivitySerializer,
WorkerConfigurationSerializer,
WorkerSerializer,
WorkerStatisticsSerializer,
WorkerVersionEditSerializer,
WorkerVersionSerializer,
)
from arkindex.dataimport.utils import hash_object
from arkindex.documents.models import Corpus, Element
from arkindex.project.fields import ArrayRemove
from arkindex.project.mixins import (
......@@ -985,6 +988,65 @@ class WorkerRetrieve(WorkerACLMixin, RetrieveAPIView):
return worker
@extend_schema(tags=['repos'])
@extend_schema_view(
get=extend_schema(
description=(
'List configurations for a given worker ID.\n\n'
'Requires a **read** access to the worker or its repository.'
),
),
post=extend_schema(
description=(
'Create a configuration for a given worker ID, name and JSON configuration.\n\n'
'Requires an **admin** access to the worker or its repository.'
)
),
)
class WorkerConfigurationList(WorkerACLMixin, ListCreateAPIView):
permission_classes = (IsVerified, )
serializer_class = WorkerConfigurationSerializer
queryset = WorkerConfiguration.objects.none()
def get_object(self):
return get_object_or_404(
Worker.objects.select_related('repository'),
pk=self.kwargs['pk']
)
def get_queryset(self):
worker = self.get_object()
if not self.has_read_access(worker):
raise PermissionDenied(detail='You do not have a guest access to this worker.')
return worker.configurations.order_by('name')
def create(self, request, *args, **kwargs):
worker = self.get_object()
if not self.has_admin_access(worker):
raise PermissionDenied(detail='You do not have an admin access to this worker.')
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
# Check unique constraints
name = serializer.validated_data['name']
configuration = serializer.validated_data.get('configuration', {})
configuration_hash = hash_object(configuration)
if worker.configurations.filter(Q(configuration_hash=configuration_hash) | Q(name=name)).exists():
raise ValidationError({'__all__': 'A worker configuration with this name or this configuration already exists for this worker'})
worker_configuration = WorkerConfiguration.objects.create(
worker=worker,
name=name,
configuration=configuration,
)
return Response(WorkerConfigurationSerializer(worker_configuration).data, status=status.HTTP_201_CREATED)
@extend_schema(tags=['imports'])
@extend_schema_view(
get=extend_schema(description=(
......
# Generated by Django 3.2.5 on 2021-11-04 09:42
import uuid
import django.db.models.deletion
from django.db import migrations, models
import arkindex.project.fields
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0038_dataimport_use_gpu'),
]
operations = [
migrations.CreateModel(
name='WorkerConfiguration',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('created', models.DateTimeField(auto_now_add=True)),
('updated', models.DateTimeField(auto_now=True)),
('name', models.CharField(max_length=250)),
('configuration', models.JSONField(blank=True, default=dict)),
('configuration_hash', arkindex.project.fields.MD5HashField(max_length=32)),
],
),
migrations.RemoveConstraint(
model_name='workerrun',
name='worker_run_configuration_objects',
),
migrations.RenameField(
model_name='workerrun',
old_name='configuration',
new_name='old_configuration',
),
migrations.AddConstraint(
model_name='workerrun',
constraint=models.CheckConstraint(check=models.Q(('old_configuration__typeof', 'object')), name='worker_run_old_configuration_objects'),
),
migrations.AddField(
model_name='workerconfiguration',
name='worker',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='configurations', to='dataimport.worker'),
),
migrations.AddField(
model_name='workeractivity',
name='configuration',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='worker_activities', to='dataimport.workerconfiguration'),
),
migrations.AddField(
model_name='workerrun',
name='configuration',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='worker_runs', to='dataimport.workerconfiguration'),
),
migrations.AddConstraint(
model_name='workerconfiguration',
constraint=models.CheckConstraint(check=models.Q(('configuration__typeof', 'object')), name='worker_configuration_configuration_objects'),
),
migrations.AlterUniqueTogether(
name='workerconfiguration',
unique_together={('worker', 'name'), ('worker', 'configuration_hash')},
),
]
......@@ -18,7 +18,7 @@ from arkindex.dataimport.providers import get_provider, git_providers
from arkindex.dataimport.utils import get_default_farm_id
from arkindex.documents.models import ClassificationState, Element
from arkindex.project.aws import S3FileMixin, S3FileStatus
from arkindex.project.fields import ArrayField
from arkindex.project.fields import ArrayField, MD5HashField
from arkindex.project.models import IndexableModel
from ponos.models import Artifact, State, Workflow
......@@ -601,19 +601,49 @@ class WorkerVersion(models.Model):
return self.configuration['docker'].get('command')
class WorkerConfiguration(IndexableModel):
name = models.CharField(max_length=250)
configuration = models.JSONField(default=dict, blank=True)
configuration_hash = MD5HashField()
worker = models.ForeignKey(
Worker,
on_delete=models.CASCADE,
related_name='configurations',
)
class Meta:
unique_together = (
('worker', 'configuration_hash'),
('worker', 'name')
)
constraints = [
models.CheckConstraint(
check=models.Q(configuration__typeof='object'),
name='worker_configuration_configuration_objects',
)
]
class WorkerRun(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
dataimport = models.ForeignKey('dataimport.DataImport', on_delete=models.CASCADE, related_name='worker_runs')
version = models.ForeignKey('dataimport.WorkerVersion', on_delete=models.CASCADE, related_name='worker_runs')
parents = ArrayField(models.UUIDField())
configuration = models.JSONField(default=dict)
old_configuration = models.JSONField(default=dict)
configuration = models.ForeignKey(
WorkerConfiguration,
related_name='worker_runs',
on_delete=models.SET_NULL,
null=True,
blank=True,
)
class Meta:
unique_together = (('version', 'dataimport'),)
constraints = [
models.CheckConstraint(
check=models.Q(configuration__typeof='object'),
name='worker_run_configuration_objects',
check=models.Q(old_configuration__typeof='object'),
name='worker_run_old_configuration_objects',
)
]
......@@ -692,6 +722,13 @@ class WorkerActivity(IndexableModel):
null=True,
blank=True,
)
configuration = models.ForeignKey(
WorkerConfiguration,
related_name='worker_activities',
on_delete=models.SET_NULL,
null=True,
blank=True,
)
# Specific WorkerActivity manager
objects = ActivityManager()
......
......@@ -323,7 +323,7 @@ class WorkerRunSerializer(serializers.ModelSerializer):
# Serialize worker with its basic informations
worker = WorkerLightSerializer(source='version.worker', read_only=True)
# A DictField will require valid dicts, but without a child= argument, it will allow any value
configuration = serializers.DictField(allow_empty=True, default={})
configuration = serializers.DictField(source='old_configuration', allow_empty=True, default={})
class Meta:
model = WorkerRun
......
......@@ -12,6 +12,7 @@ from arkindex.dataimport.models import (
Worker,
WorkerActivity,
WorkerActivityState,
WorkerConfiguration,
WorkerVersion,
WorkerVersionGPUUsage,
WorkerVersionState,
......@@ -186,3 +187,13 @@ class WorkerStatisticsSerializer(serializers.Serializer):
started = serializers.IntegerField(read_only=True)
processed = serializers.IntegerField(read_only=True)
error = serializers.IntegerField(read_only=True)
class WorkerConfigurationSerializer(serializers.ModelSerializer):
class Meta:
model = WorkerConfiguration
fields = (
'id',
'name',
'configuration',
)
......@@ -2,7 +2,8 @@ from django.db.models.signals import pre_save
from django.dispatch import receiver
from rest_framework.exceptions import ValidationError
from arkindex.dataimport.models import WorkerRun
from arkindex.dataimport.models import WorkerConfiguration, WorkerRun
from arkindex.dataimport.utils import hash_object
def _list_ancestors(graph, parents):
......@@ -39,3 +40,8 @@ def check_parents(sender, instance, **kwargs):
ancestors = _list_ancestors(graph, instance.parents)
if instance.id in ancestors:
raise ValidationError(f"Can't add or update WorkerRun {instance.id} because parents field isn't properly defined. It would create a cycle.")
@receiver(pre_save, sender=WorkerConfiguration)
def update_configuration_hash(sender, instance, **kwargs):
instance.configuration_hash = hash_object(instance.configuration)
......@@ -437,7 +437,7 @@ class TestWorkerRuns(FixtureAPITestCase):
def test_update_run_configuration(self):
self.client.force_login(self.user)
self.assertDictEqual(self.run_1.configuration, {})
self.assertDictEqual(self.run_1.old_configuration, {})
with self.assertNumQueries(8):
response = self.client.patch(
reverse('api:worker-run-details', kwargs={'pk': self.run_1.id}),
......@@ -453,7 +453,7 @@ class TestWorkerRuns(FixtureAPITestCase):
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.run_1.refresh_from_db()
self.assertDictEqual(self.run_1.configuration, {
self.assertDictEqual(self.run_1.old_configuration, {
'a': 'b',
'c': {
'd': 42
......
......@@ -959,3 +959,195 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
data={'revision': str(self.rev2.id), 'configuration': {"test": "test2"}, 'gpu_usage': 'not_supported'}, format='json'
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
def test_configurations_list_requires_login(self):
response = self.client.get(reverse('api:worker-configurations', kwargs={'pk': str(self.worker_1.id)}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_configurations_list_requires_verified(self):
self.user.verified_email = False
self.user.save()
self.client.force_login(self.user)
response = self.client.get(reverse('api:worker-configurations', kwargs={'pk': str(self.worker_1.id)}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_configurations_list_no_rights(self):
self.worker_1.repository.memberships.filter(user=self.user).delete()
self.client.force_login(self.user)
with self.assertNumQueries(7):
response = self.client.get(reverse('api:worker-configurations', kwargs={'pk': str(self.worker_1.id)}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertEqual(response.json(), {'detail': 'You do not have a guest access to this worker.'})
def test_configurations_list(self):
"""
A user is able to retrieve a worker configuration if he has a guest access on it or its repository
"""
repo2 = Repository.objects.create(
url='http://gitlab/repo2',
type=RepositoryType.Worker,
hook_token='hook-token2',
credentials=self.creds,
provider_name='GitLabProvider'
)
worker_2 = repo2.workers.create(name='Worker 2', slug='worker_2', type='classifier')
config_1 = worker_2.configurations.create(name='config_1', configuration={'key': 'value'})
config_2 = worker_2.configurations.create(name='config_2')
repo2.memberships.create(user=self.user, level=Role.Guest.value)
self.client.force_login(self.user)
with self.assertNumQueries(9):
response = self.client.get(reverse('api:worker-configurations', kwargs={'pk': str(worker_2.id)}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertListEqual(response.json()['results'], [{
'id': str(config_1.id),
'name': 'config_1',
'configuration': {'key': 'value'}
}, {
'id': str(config_2.id),
'name': 'config_2',
'configuration': {}
}])
def test_configurations_create_requires_login(self):
response = self.client.post(reverse('api:worker-configurations', kwargs={'pk': str(self.worker_1.id)}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_configurations_create_requires_verified(self):
self.user.verified_email = False
self.user.save()
self.client.force_login(self.user)
response = self.client.post(reverse('api:worker-configurations', kwargs={'pk': str(self.worker_1.id)}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_configurations_create_no_rights(self):
self.worker_1.repository.memberships.filter(user=self.user).delete()
self.client.force_login(self.user)
with self.assertNumQueries(7):
response = self.client.post(reverse('api:worker-configurations', kwargs={'pk': str(self.worker_1.id)}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertEqual(response.json(), {'detail': 'You do not have an admin access to this worker.'})
def test_configuration_create_empty(self):
self.client.force_login(self.user)
response = self.client.post(reverse('api:worker-configurations', kwargs={'pk': str(self.worker_1.id)}))
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
'name': ['This field is required.']
})
def test_configurations_create_name_already_exists(self):
name = 'config-name'
repo2 = Repository.objects.create(
url='http://gitlab/repo2',
type=RepositoryType.Worker,
hook_token='hook-token2',
credentials=self.creds,
provider_name='GitLabProvider'
)
worker_2 = repo2.workers.create(name='Worker 2', slug='worker_2', type='classifier')
worker_2.configurations.create(name=name, configuration={'key': 'value'})
repo2.memberships.create(user=self.user, level=Role.Admin.value)
self.client.force_login(self.user)
with self.assertNumQueries(8):
response = self.client.post(
reverse('api:worker-configurations', kwargs={'pk': str(worker_2.id)}),
data={'name': name, 'configuration': {'key': 'value', 'cahuete': 'bidule'}},
format='json',
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(
response.json(),
{'__all__': 'A worker configuration with this name or this configuration already exists for this worker'}
)
def test_configurations_create_configuration_already_exists(self):
config = {'key': 'value'}
repo2 = Repository.objects.create(
url='http://gitlab/repo2',
type=RepositoryType.Worker,
hook_token='hook-token2',
credentials=self.creds,
provider_name='GitLabProvider'
)
worker_2 = repo2.workers.create(name='Worker 2', slug='worker_2', type='classifier')
worker_2.configurations.create(name='config-name', configuration=config)
repo2.memberships.create(user=self.user, level=Role.Admin.value)
self.client.force_login(self.user)
with self.assertNumQueries(8):
response = self.client.post(
reverse('api:worker-configurations', kwargs={'pk': str(worker_2.id)}),
data={'name': 'New configuration', 'configuration': config},
format='json',
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(
response.json(),
{'__all__': 'A worker configuration with this name or this configuration already exists for this worker'}
)
def test_configurations_create(self):
"""
A user is able to create a worker configuration if he has an admin access on it or its repository
"""
name = 'New configuration'
config = {'key': 'value', 'cahuete': 'bidule'}
repo2 = Repository.objects.create(
url='http://gitlab/repo2',
type=RepositoryType.Worker,
hook_token='hook-token2',
credentials=self.creds,
provider_name='GitLabProvider'
)
worker_2 = repo2.workers.create(name='Worker 2', slug='worker_2', type='classifier')
worker_2.configurations.create(name='config_1', configuration={'key': 'value'})
repo2.memberships.create(user=self.user, level=Role.Admin.value)
self.client.force_login(self.user)
with self.assertNumQueries(9):
response = self.client.post(
reverse('api:worker-configurations', kwargs={'pk': str(worker_2.id)}),
data={'name': name, 'configuration': config},
format='json',
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
configuration = worker_2.configurations.get(name=name)
self.assertDictEqual(response.json(), {
'id': str(configuration.id),
'name': name,
'configuration': config
})
def test_configurations_create_no_config(self):
name = 'New configuration'
repo2 = Repository.objects.create(
url='http://gitlab/repo2',
type=RepositoryType.Worker,
hook_token='hook-token2',
credentials=self.creds,
provider_name='GitLabProvider'
)
worker_2 = repo2.workers.create(name='Worker 2', slug='worker_2', type='classifier')
worker_2.configurations.create(name='config_1', configuration={'key': 'value'})
repo2.memberships.create(user=self.user, level=Role.Admin.value)
self.client.force_login(self.user)
with self.assertNumQueries(9):
response = self.client.post(
reverse('api:worker-configurations', kwargs={'pk': str(worker_2.id)}),
data={'name': name},
format='json',
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
configuration = worker_2.configurations.get(name=name)
self.assertDictEqual(response.json(), {
'id': str(configuration.id),
'name': name,
'configuration': {}
})
import json
from hashlib import md5
from django.conf import settings
__default_farm_id = None
......@@ -23,3 +26,8 @@ def get_default_farm_id():
farm = Farm.objects.first() or Farm.objects.create(name="Default farm")
__default_farm_id = farm.id
return __default_farm_id
def hash_object(object):
object_json = json.dumps(object, sort_keys=True).encode('utf-8')
return md5(object_json).hexdigest()
......@@ -149,6 +149,88 @@ class TestEditTranscription(FixtureAPITestCase):
'worker_version_id': None,
})
def test_transcription_patch_orientation(self):
"""
Assert it is possible to edit only the text orientation
"""
self.client.force_login(self.user)
manual_tr_id = self.manual_transcription.id
response = self.client.patch(
reverse('api:transcription-edit', kwargs={'pk': manual_tr_id}),
format='json',
data={
'orientation': 'vertical-rl'
}
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.manual_transcription.refresh_from_db()
self.assertDictEqual(response.json(), {
'id': str(manual_tr_id),
'confidence': None,
'score': None,
'text': 'A manual transcription',
'orientation': 'vertical-rl',
'worker_version_id': None,
})
def test_transcription_patch_invalid_orientation(self):
"""
An invalid text orientation value causes an error
"""
self.client.force_login(self.user)
manual_tr_id = self.manual_transcription.id
response = self.client.patch(
reverse('api:transcription-edit', kwargs={'pk': manual_tr_id}),
format='json',
data={
'orientation': 'wobbly'
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {'orientation': ['Value is not of type TextOrientation']})
def test_transcription_edit_orientation(self):
"""
Assert it is possible to edit the text orientation with UpdateTranscription
"""
self.client.force_login(self.user)
manual_tr_id = self.manual_transcription.id
response = self.client.put(
reverse('api:transcription-edit', kwargs={'pk': manual_tr_id}),
format='json',
data={
'text': 'a knight was living lonely',
'orientation': 'vertical-rl',
}
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.manual_transcription.refresh_from_db()
self.assertDictEqual(response.json(), {
'id': str(manual_tr_id),
'confidence': None,
'score': None,
'text': 'a knight was living lonely',
'orientation': 'vertical-rl',
'worker_version_id': None,
})
def test_transcription_edit_invalid_orientation(self):
"""
An invalid text orientation value causes an error
"""
self.client.force_login(self.user)
manual_tr_id = self.manual_transcription.id
response = self.client.put(
reverse('api:transcription-edit', kwargs={'pk': manual_tr_id}),
format='json',
data={
'text': 'a knight was living lonely',
'orientation': 'wobbly',
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {'orientation': ['Value is not of type TextOrientation']})
def test_transcription_patch_write_right(self):
"""
A write right is required to patch a manual transcription
......
......@@ -22,6 +22,7 @@ from arkindex.dataimport.api import (
RevisionRetrieve,
StartProcess,
UpdateWorkerActivity,
WorkerConfigurationList,
WorkerList,
WorkerRetrieve,
WorkerRunDetails,
......@@ -205,6 +206,7 @@ api = [
# Workers
path('workers/', WorkerList.as_view(), name='workers-list'),
path('workers/<uuid:pk>/', WorkerRetrieve.as_view(), name='worker-retrieve'),
path('workers/<uuid:pk>/configurations/', WorkerConfigurationList.as_view(), name='worker-configurations'),
path('workers/<uuid:pk>/versions/', WorkerVersionList.as_view(), name='worker-versions'),
path('workers/versions/<uuid:pk>/', WorkerVersionRetrieve.as_view(), name='version-retrieve'),
path('workers/versions/<uuid:pk>/activity/', UpdateWorkerActivity.as_view(), name='update-worker-activity'),
......
......@@ -119,6 +119,9 @@ class WorkerACLMixin(ACLMixin):
def has_execution_access(self, worker):
return self.has_worker_access(worker, Role.Contributor.value)
def has_admin_access(self, worker):
return self.has_worker_access(worker, Role.Admin.value)
class CorpusACLMixin(ACLMixin):
......