diff --git a/arkindex/process/api.py b/arkindex/process/api.py index ccfd14072626d75965f38dd3b0d19bf3a557802e..2ed36862423534ec4faab55c745365f5950ca6cf 100644 --- a/arkindex/process/api.py +++ b/arkindex/process/api.py @@ -20,6 +20,7 @@ from drf_spectacular.utils import ( extend_schema_view, ) from rest_framework import permissions, status +from rest_framework.authentication import SessionAuthentication, TokenAuthentication from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError from rest_framework.generics import ( CreateAPIView, @@ -79,6 +80,7 @@ from arkindex.process.serializers.ingest import BucketSerializer, S3ImportSerial from arkindex.process.serializers.training import StartTrainingSerializer from arkindex.process.serializers.worker_runs import WorkerRunEditSerializer, WorkerRunSerializer from arkindex.process.serializers.workers import ( + DockerWorkerVersionSerializer, RepositorySerializer, WorkerActivitySerializer, WorkerConfigurationExistsErrorSerializer, @@ -2072,3 +2074,32 @@ class SelectProcessFailures(ProcessACLMixin, CreateAPIView): ), ignore_conflicts=True, ) + + +@extend_schema_view( + post=extend_schema( + responses={201: WorkerVersionSerializer}, + tags=['process'], + ) +) +class CreateDockerWorkerVersion(CreateAPIView): + """ + Create a worker version tied to a remote docker image. + The worker version is directly tagged as available. + + Cannot be used with Ponos task or agent authentication. Requires the `CreateDockerWorkerVersion` scope. + """ + permission_classes = (IsVerified, ) + serializer_class = DockerWorkerVersionSerializer + # Only user accounts can call that endpoint + authentication_classes = (SessionAuthentication, TokenAuthentication) + scopes = (Scope.CreateDockerWorkerVersion,) + + def create(self, request, *args, **kwargs): + serializer = self.get_serializer(data=request.data) + serializer.is_valid(raise_exception=True) + self.perform_create(serializer) + return Response( + status=status.HTTP_201_CREATED, + data=WorkerVersionSerializer(serializer.instance, context={'request': self.request}).data, + ) diff --git a/arkindex/process/migrations/0008_update_workerversion_available_requires_docker_image.py b/arkindex/process/migrations/0008_update_workerversion_available_requires_docker_image.py new file mode 100644 index 0000000000000000000000000000000000000000..652d712c3de5a00f825803ec7ff41a6978f4e28b --- /dev/null +++ b/arkindex/process/migrations/0008_update_workerversion_available_requires_docker_image.py @@ -0,0 +1,23 @@ +# Generated by Django 4.1.7 on 2023-07-03 14:47 + +from django.db import migrations, models + +import arkindex.process.models + + +class Migration(migrations.Migration): + + dependencies = [ + ('process', '0007_index_cleanup'), + ] + + operations = [ + migrations.RemoveConstraint( + model_name='workerversion', + name='workerversion_available_requires_docker_image', + ), + migrations.AddConstraint( + model_name='workerversion', + constraint=models.CheckConstraint(check=models.Q(models.Q(('state', arkindex.process.models.WorkerVersionState['Available']), _negated=True), models.Q(('docker_image_id', None), ('docker_image_iid', None), _negated=True), _connector='OR'), name='workerversion_available_requires_docker_image'), + ), + ] diff --git a/arkindex/process/models.py b/arkindex/process/models.py index 47e9755a51bf3f2f3afa3e118143c10dcf5d2c7f..6cee2bd401a9191f585ae4f4bb1394df6e6f88ce 100644 --- a/arkindex/process/models.py +++ b/arkindex/process/models.py @@ -1043,8 +1043,9 @@ class WorkerVersion(models.Model): class Meta: unique_together = (('worker', 'revision'),) constraints = [ + # Available worker versions must either have a docker_image_id or docker_image_iid set models.CheckConstraint( - check=~models.Q(state=WorkerVersionState.Available, docker_image_id=None), + check=~models.Q(state=WorkerVersionState.Available) | ~Q(docker_image_id=None, docker_image_iid=None), name='workerversion_available_requires_docker_image', ) ] diff --git a/arkindex/process/serializers/workers.py b/arkindex/process/serializers/workers.py index 0b4be829c0e09f4250cd07cc49519c9db0bc30f5..ffaf5256e606c95ef0ef12e142adea1190e9dd1b 100644 --- a/arkindex/process/serializers/workers.py +++ b/arkindex/process/serializers/workers.py @@ -1,8 +1,11 @@ +import base64 import urllib +import uuid from collections import defaultdict from enum import Enum from textwrap import dedent +from django.db import transaction from django.db.models import Q from drf_spectacular.utils import extend_schema_field from rest_framework import serializers @@ -11,6 +14,7 @@ from rest_framework.exceptions import ValidationError from arkindex.ponos.models import Task from arkindex.ponos.utils import get_process_from_task_auth from arkindex.process.models import ( + GitRef, Process, Repository, Revision, @@ -23,9 +27,10 @@ from arkindex.process.models import ( WorkerVersionGPUUsage, WorkerVersionState, ) -from arkindex.process.serializers.git import RevisionWithRefsSerializer +from arkindex.process.serializers.git import GitRefSerializer, RevisionWithRefsSerializer from arkindex.process.utils import hash_object from arkindex.project.serializer_fields import EnumField +from arkindex.users.models import Role class WorkerLightSerializer(serializers.ModelSerializer): @@ -443,3 +448,117 @@ class WorkerStatisticsSerializer(serializers.Serializer): Format: `[DD ]HH:MM:ss[.uuuuuu]` '''), ) + + +class DockerWorkerVersionSerializer(serializers.ModelSerializer): + + repository_url = serializers.CharField( + max_length=100, + help_text="URL of the worker's repository. If the repository does not exist, it will be created." + ) + worker_slug = serializers.CharField( + max_length=100, + help_text=dedent(''' + The slug/name of the worker to which a new version will be published. + If such a worker does not exist, it will be created. + '''), + ) + revision_hash = serializers.CharField(max_length=50) + revision_message = serializers.CharField(required=False, default="created from docker image") + revision_author = serializers.CharField(max_length=50, required=False, default="default") + revision_references = GitRefSerializer( + many=True, + required=False, + help_text="Optional list of types and names to create GitRefs", + ) + # Docker image IID is mandatory on this endpoint + docker_image_iid = serializers.CharField(max_length=80) + gpu_usage = EnumField(WorkerVersionGPUUsage, required=False, default=WorkerVersionGPUUsage.Disabled) + model_usage = serializers.BooleanField(default=False) + + class Meta: + model = WorkerVersion + fields = ( + 'configuration', + 'docker_image_iid', + 'gpu_usage', + 'model_usage', + # Related fields + 'repository_url', + 'worker_slug', + 'revision_hash', + 'revision_message', + 'revision_author', + 'revision_references', + ) + + @transaction.atomic + def create(self, validated_data): + """ + Retrieve or create the entire git stack in order to build a + worker version corresponding to a given revision on the repository. + """ + # Retrieve or create the Git repository + repository, created_repo = Repository.objects.get_or_create( + url=validated_data['repository_url'], + defaults={ + # Generate a default hook token (DB constraint) even if no webhook is created + 'hook_token': base64.b64encode(uuid.uuid4().bytes).decode('utf-8') + }, + ) + # Grant an admin access to the repository in case it got created + if created_repo: + repository.memberships.create(user=self.context['request'].user, level=Role.Admin.value) + + # Retrieve or create the revision (multiple workers may reference a same revision) + revision, _ = Revision.objects.get_or_create( + repo=repository, + hash=validated_data['revision_hash'], + defaults={ + 'message': validated_data['revision_message'], + 'author': validated_data['revision_author'], + }, + ) + + # Add Git references to the created revision + git_refs = validated_data.get('revision_references', []) + GitRef.objects.bulk_create( + ( + GitRef(revision=revision, repository=repository, type=ref['type'], name=ref['name']) + for ref in git_refs + ), + ignore_conflicts=True, + ) + + # Use a specific worker type in case a worker must be created + worker_type, _ = WorkerType.objects.get_or_create(slug='docker', defaults={'display_name': 'Docker'}) + + # Retrieve or create the worker + worker, _ = repository.workers.get_or_create( + slug=validated_data['worker_slug'], + repository=repository, + defaults={ + 'name': validated_data['worker_slug'], + 'type': worker_type, + }, + ) + + # Finally, create the worker version and mark it as available + version, created = WorkerVersion.objects.get_or_create( + revision=revision, + worker=worker, + defaults={ + 'configuration': validated_data['configuration'], + 'docker_image_iid': validated_data['docker_image_iid'], + 'gpu_usage': validated_data['gpu_usage'], + 'model_usage': validated_data['model_usage'], + 'state': WorkerVersionState.Available, + }, + ) + # Abort in case a version already exists with that revision and worker + if not created: + raise ValidationError({ + '__all__': ['A worker version already exists on this revision and this worker slug.'] + }) + + return version diff --git a/arkindex/process/tests/test_docker_worker_version.py b/arkindex/process/tests/test_docker_worker_version.py new file mode 100644 index 0000000000000000000000000000000000000000..2fba8ab09c5ff28acd54b716fe9dade208e7b0ef --- /dev/null +++ b/arkindex/process/tests/test_docker_worker_version.py @@ -0,0 +1,344 @@ +from django.urls import reverse +from rest_framework import status + +from arkindex.process.models import GitRefType, ProcessMode, Repository, Worker, WorkerType, WorkerVersionGPUUsage +from arkindex.project.tests import FixtureAPITestCase +from arkindex.users.models import Role, Scope + + +class TestDockerWorkerVersion(FixtureAPITestCase): + """ + Test endpoint to create a worker version from a docker image + """ + + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.creds = cls.user.credentials.get() + cls.repo = cls.creds.repos.get(url='http://my_repo.fake/workers/worker') + cls.rev = cls.repo.revisions.get() + cls.worker = Worker.objects.get(slug='reco') + cls.version = cls.worker.versions.get() + cls.user.user_scopes.create(scope=Scope.CreateDockerWorkerVersion) + cls.worker_type = WorkerType.objects.create(slug='docker', display_name='Docker') + + def test_create_requires_login(self): + response = self.client.post(reverse('api:version-from-docker')) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {'detail': 'Authentication credentials were not provided.'}) + + def test_create_deny_ponos_auth(self): + """ + Ponos task authentication is disabled on that endpoint + """ + process = self.rev.processes.create(mode=ProcessMode.Repository, creator=self.user) + process.start() + task = process.tasks.get() + response = self.client.post( + reverse('api:version-from-docker'), + HTTP_AUTHORIZATION=f'Ponos {task.token}', + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {'detail': 'Authentication credentials were not provided.'}) + + def test_create_requires_verified(self): + self.user.verified_email = False + self.user.save() + self.client.force_login(self.user) + response = self.client.post(reverse('api:version-from-docker')) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {'detail': 'You do not have permission to perform this action.'}) + + def test_create_requires_scope(self): + """ + A specific user scope is required to create a worker version from a docker image + """ + self.user.user_scopes.get(scope=Scope.CreateDockerWorkerVersion).delete() + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.post(reverse('api:version-from-docker')) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + self.assertDictEqual(response.json(), { + 'detail': 'You do not have permission to perform this action.', + 'missing_scopes': [Scope.CreateDockerWorkerVersion.value], + }) + + def test_create_admin_no_scope(self): + """ + User scope is not required for instance admins + """ + self.assertEqual(self.superuser.user_scopes.count(), 0) + self.client.force_login(self.superuser) + response = self.client.post( + reverse('api:version-from-docker'), + data={ + 'configuration': {'test': 'A'}, + 'docker_image_iid': 'a_docker_image', + 'repository_url': self.repo.url, + 'revision_hash': 'new_revision_hash', + 'worker_slug': self.worker.slug, + }, + format='json', + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + def test_create_required_fields(self): + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.post(reverse('api:version-from-docker')) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + { + 'configuration': ['This field is required.'], + 'docker_image_iid': ['This field is required.'], + 'repository_url': ['This field is required.'], + 'revision_hash': ['This field is required.'], + 'worker_slug': ['This field is required.'], + }, + ) + + def test_create_fields_validation(self): + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.post( + reverse('api:version-from-docker'), + data={ + 'configuration': 'str', + 'docker_image_iid': [], + 'repository_url': [], + 'revision_hash': '', + 'worker_slug': [], + 'revision_message': [], + 'revision_author': [], + 'revision_references': [{'a': 133}], + 'gpu_usage': 'AAAAAAAA', + 'model_usage': [], + }, + format='json', + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + 'docker_image_iid': ['Not a valid string.'], + 'gpu_usage': ['Value is not of type WorkerVersionGPUUsage'], + 'model_usage': ['Must be a valid boolean.'], + 'repository_url': ['Not a valid string.'], + 'revision_author': ['Not a valid string.'], + 'revision_hash': ['This field may not be blank.'], + 'revision_message': ['Not a valid string.'], + 'revision_references': [{ + 'name': ['This field is required.'], + 'type': ['This field is required.'] + }], + 'worker_slug': ['Not a valid string.'], + }) + + def test_create_duplicated(self): + """ + No worker version can be created with an existing revision hash and worker slug + """ + self.client.force_login(self.user) + with self.assertNumQueries(12): + response = self.client.post( + reverse('api:version-from-docker'), + data={ + 'configuration': {}, + 'docker_image_iid': 'some_docker_image', + 'repository_url': self.repo.url, + 'revision_hash': self.version.revision.hash, + 'worker_slug': self.version.worker.slug, + }, + format='json', + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual(response.json(), { + '__all__': ['A worker version already exists on this revision and this worker slug.'] + }) + + def test_create_default_message(self): + """ + A new version can be published with only the required fields + """ + self.client.force_login(self.user) + with self.assertNumQueries(18): + response = self.client.post( + reverse('api:version-from-docker'), + data={ + 'configuration': {'test': 'A'}, + 'docker_image_iid': 'a_docker_image', + 'repository_url': self.repo.url, + 'revision_hash': 'new_revision_hash', + 'worker_slug': self.worker.slug, + }, + format='json', + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + new_revision = self.repo.revisions.get(hash='new_revision_hash') + new_version = new_revision.versions.get() + self.assertEqual(new_version.worker, self.worker) + self.assertDictEqual(response.json(), { + 'id': str(new_version.id), + 'configuration': {'test': 'A'}, + 'docker_image': None, + 'docker_image_iid': 'a_docker_image', + 'docker_image_name': new_version.docker_image_name, + 'gpu_usage': WorkerVersionGPUUsage.Disabled.value, + 'model_usage': False, + 'revision': { + 'id': str(new_revision.id), + 'author': 'default', + 'commit_url': 'http://my_repo.fake/workers/worker/commit/new_revision_hash', + 'created': new_revision.created.isoformat().replace('+00:00', 'Z'), + 'hash': 'new_revision_hash', + 'message': 'created from docker image', + 'refs': [] + }, + 'state': 'available', + 'worker': { + 'id': str(self.worker.id), + 'name': 'Recognizer', + 'slug': 'reco', + 'type': 'recognizer' + } + }) + + def test_create(self): + """ + A new version can be published with the optional values + """ + self.client.force_login(self.user) + with self.assertNumQueries(19): + response = self.client.post( + reverse('api:version-from-docker'), + data={ + 'configuration': {'test': 'A'}, + 'docker_image_iid': 'a_docker_image', + 'repository_url': self.repo.url, + 'revision_hash': 'new_revision_hash', + 'worker_slug': self.worker.slug, + 'revision_message': 'Bruce was very clever', + 'revision_author': 'Iwan Roberts', + 'revision_references': [ + {'type': 'branch', 'name': 'master'}, + {'type': 'tag', 'name': '2.0'}, + ], + 'gpu_usage': WorkerVersionGPUUsage.Required.value, + 'model_usage': True, + }, + format='json', + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + new_revision = self.repo.revisions.get(hash='new_revision_hash') + refs = list(new_revision.refs.all()) + self.assertDictEqual( + {ref.type: ref.name for ref in refs}, + {GitRefType.Branch: 'master', GitRefType.Tag: '2.0'}, + ) + new_version = new_revision.versions.get() + self.assertDictEqual(response.json(), { + 'id': str(new_version.id), + 'configuration': {'test': 'A'}, + 'docker_image': None, + 'docker_image_iid': 'a_docker_image', + 'docker_image_name': new_version.docker_image_name, + 'gpu_usage': WorkerVersionGPUUsage.Required.value, + 'model_usage': True, + 'revision': { + 'id': str(new_revision.id), + 'author': 'Iwan Roberts', + 'commit_url': 'http://my_repo.fake/workers/worker/commit/new_revision_hash', + 'created': new_revision.created.isoformat().replace('+00:00', 'Z'), + 'hash': 'new_revision_hash', + 'message': 'Bruce was very clever', + 'refs': [ + {'id': str(ref.id), 'name': ref.name, 'type': ref.type.value} + for ref in refs + ], + }, + 'state': 'available', + 'worker': { + 'id': str(self.worker.id), + 'name': 'Recognizer', + 'slug': 'reco', + 'type': 'recognizer' + } + }) + # Existing repository memberships are not updated + self.assertListEqual(list(self.repo.memberships.values_list("level", "level")), []) + + def test_create_git_stack(self): + """ + All git references can be created including repository, revision, gitrefs and worker + including the default worker type + """ + self.worker_type.delete() + self.client.force_login(self.user) + with self.assertNumQueries(29): + response = self.client.post( + reverse('api:version-from-docker'), + data={ + 'configuration': {'key': 'value'}, + 'docker_image_iid': 'docker_image_42', + 'repository_url': 'https://gitlab.test.arkindex.org/project/', + 'revision_hash': 'deadbeef', + 'worker_slug': 'new_gen_classifier', + 'revision_references': [ + {'type': 'branch', 'name': 'master'}, + ], + }, + format='json', + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + new_repo = Repository.objects.get(url='https://gitlab.test.arkindex.org/project/') + new_revision = new_repo.revisions.get(hash='deadbeef') + new_ref = new_revision.refs.get() + new_version = new_revision.versions.get() + new_worker_type = new_version.worker.type + self.assertDictEqual(response.json(), { + 'id': str(new_version.id), + 'configuration': {'key': 'value'}, + 'docker_image': None, + 'docker_image_iid': 'docker_image_42', + 'docker_image_name': new_version.docker_image_name, + 'gpu_usage': WorkerVersionGPUUsage.Disabled.value, + 'model_usage': False, + 'revision': { + 'id': str(new_revision.id), + 'author': 'default', + 'commit_url': 'https://gitlab.test.arkindex.org/project/commit/deadbeef', + 'created': new_revision.created.isoformat().replace('+00:00', 'Z'), + 'hash': 'deadbeef', + 'message': 'created from docker image', + 'refs': [{'id': str(new_ref.id), 'name': new_ref.name, 'type': new_ref.type.value}], + }, + 'state': 'available', + 'worker': { + 'id': str(new_version.worker.id), + 'name': 'new_gen_classifier', + 'slug': 'new_gen_classifier', + 'type': str(new_worker_type), + } + }) + # Check attributes directly from the DB + self.assertEqual(new_repo.url, 'https://gitlab.test.arkindex.org/project/') + self.assertEqual(new_revision.author, 'default') + self.assertEqual(new_revision.message, 'created from docker image') + self.assertEqual(new_revision.repo, new_repo) + self.assertEqual(new_ref.name, 'master') + self.assertEqual(new_ref.repository, new_repo) + self.assertEqual(new_version.worker.name, 'new_gen_classifier') + self.assertEqual(new_version.worker.slug, 'new_gen_classifier') + self.assertEqual(new_version.worker.public, False) + self.assertEqual(new_version.worker.repository, new_repo) + self.assertEqual(new_version.configuration, {'key': 'value'}) + self.assertEqual(new_version.docker_image_id, None) + self.assertEqual(new_version.docker_image_iid, 'docker_image_42') + self.assertEqual(new_version.gpu_usage, WorkerVersionGPUUsage.Disabled) + self.assertEqual(new_version.model_usage, False) + self.assertEqual(new_worker_type.slug, 'docker') + self.assertEqual(new_worker_type.display_name, 'Docker') + # User is granted an admin role on the repository + self.assertListEqual(list(new_repo.memberships.values_list("user", "level")), [ + (self.user.id, Role.Admin.value) + ]) diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index a2d9e341b31b7214e5323c6aea42325121ad22ba..91a4ff189a5c7123ee1aee4abb5bd19155d6c98f 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -83,6 +83,7 @@ from arkindex.process.api import ( CorpusWorkersActivity, CorpusWorkerVersionList, CorpusWorkflow, + CreateDockerWorkerVersion, CreateProcessTemplate, DataFileCreate, DataFileList, @@ -264,6 +265,7 @@ api = [ path('workers/<uuid:pk>/configurations/', WorkerConfigurationList.as_view(), name='worker-configurations'), path('workers/<uuid:pk>/versions/', WorkerVersionList.as_view(), name='worker-versions'), path('workers/versions/<uuid:pk>/', WorkerVersionRetrieve.as_view(), name='version-retrieve'), + path('workers/versions/docker/', CreateDockerWorkerVersion.as_view(), name='version-from-docker'), path('workers/versions/<uuid:pk>/activity/', UpdateWorkerActivity.as_view(), name='update-worker-activity'), path('workers/configurations/<uuid:pk>/', WorkerConfigurationRetrieve.as_view(), name='configuration-retrieve'), diff --git a/arkindex/users/models.py b/arkindex/users/models.py index aab8e73c14ad8257e6a6cad824a50e980942ba14..7b020c114c2b79dd18882c931868c913bf7dedd1 100644 --- a/arkindex/users/models.py +++ b/arkindex/users/models.py @@ -256,6 +256,11 @@ class Scope(Enum): Allows access to S3 ingestion features. """ + CreateDockerWorkerVersion = 'create_docker_worker_version' + """ + Allows creating a worker version and its Git stack from a docker image + """ + class UserScope(models.Model): user = models.ForeignKey('users.User', related_name='user_scopes', on_delete=models.CASCADE)