diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index fe38b288b363e1fb2f00debf5209cb9fbef16d2c..e789d173a13c381369f082125fb72907f47d719d 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -31,7 +31,7 @@ from rest_framework.generics import ( from rest_framework.mixins import DestroyModelMixin from rest_framework.response import Response -from arkindex.dataimport.models import WorkerVersion +from arkindex.dataimport.models import WorkerActivity, WorkerActivityState, WorkerVersion from arkindex.documents.models import ( AllowedMetaData, Classification, @@ -54,6 +54,7 @@ from arkindex.documents.serializers.elements import ( ElementSlimSerializer, ElementTypeSerializer, MetaDataUpdateSerializer, + WorkerStatisticsSerializer, ) from arkindex.documents.serializers.light import CorpusAllowedMetaDataSerializer, ElementTypeLightSerializer from arkindex.documents.serializers.ml import ElementTranscriptionSerializer @@ -1410,3 +1411,38 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView): ) return Response(status=status.HTTP_204_NO_CONTENT) + + +@extend_schema_view( + get=extend_schema( + operation_id='RetriveWorkersActivity', + tags=['elements'] + ) +) +class WorkersActivity(CorpusACLMixin, ListAPIView): + """ + Retrieve corpus wise statistics about the activity of a single worker version + """ + permission_classes = (IsVerified, ) + serializer_class = WorkerStatisticsSerializer + pagination_class = None + queryset = WorkerActivity.objects.none() + + def list(self, request, *args, **kwargs): + corpus = self.get_corpus(self.kwargs['corpus'], role=Role.Admin) + + # Retrieve the distribution of activities on this corpus grouped by worker version + stats = WorkerActivity.objects \ + .filter(element_id__in=corpus.elements.values('id')) \ + .values('worker_version_id') \ + .annotate( + **{ + state.value: Count('id', filter=Q(state=state.value)) + for state in WorkerActivityState + } + ) + + return Response( + status=status.HTTP_200_OK, + data=WorkerStatisticsSerializer(stats, many=True).data + ) diff --git a/arkindex/documents/serializers/elements.py b/arkindex/documents/serializers/elements.py index 1acbaafbecfe8908a45288eb815cd08e46a61377..1cddedda6033656c4a9184d9f0f9b2d6cf5c5b2a 100644 --- a/arkindex/documents/serializers/elements.py +++ b/arkindex/documents/serializers/elements.py @@ -669,3 +669,14 @@ class ElementBulkSerializer(serializers.Serializer): element['type'] = type_ids[element['type']] return data + + +class WorkerStatisticsSerializer(serializers.Serializer): + """ + Serialize activity statistics of a worker version + """ + worker_version_id = serializers.UUIDField(read_only=True) + queued = serializers.IntegerField(read_only=True) + started = serializers.IntegerField(read_only=True) + processed = serializers.IntegerField(read_only=True) + error = serializers.IntegerField(read_only=True) diff --git a/arkindex/documents/tests/test_workers_activity.py b/arkindex/documents/tests/test_workers_activity.py new file mode 100644 index 0000000000000000000000000000000000000000..d3b61e053eba16ede895fc45272941f9b7a58fb6 --- /dev/null +++ b/arkindex/documents/tests/test_workers_activity.py @@ -0,0 +1,124 @@ +import itertools +import uuid + +from django.urls import reverse +from rest_framework import status + +from arkindex.dataimport.models import WorkerActivity, WorkerActivityState, WorkerVersion +from arkindex.documents.models import Corpus +from arkindex.project.tests import FixtureAPITestCase +from arkindex.users.models import Role, User + + +class TestWorkersActivity(FixtureAPITestCase): + + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.version_1 = WorkerVersion.objects.get(worker__slug='reco') + cls.version_2 = WorkerVersion.objects.get(worker__slug='dla') + cls.private_corpus = Corpus.objects.create(name='private', public=False) + cls.elts_count = cls.corpus.elements.count() + # Generate worker activities + WorkerActivity.objects.bulk_create([ + *( + WorkerActivity( + element_id=elt.id, + state=state, + worker_version_id=cls.version_1.id + ) for elt, state in zip(cls.corpus.elements.all(), itertools.cycle(WorkerActivityState)) + ), *( + WorkerActivity( + element_id=elt.id, + state=WorkerActivityState.Processed.value, + worker_version_id=cls.version_2.id + ) for elt in cls.corpus.elements.all() + ) + ]) + + def test_version_stats_requires_login(self): + with self.assertNumQueries(0): + response = self.client.get( + reverse('api:workers-activity', kwargs={'corpus': str(self.corpus.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {'detail': 'Authentication credentials were not provided.'}) + + def test_version_private_corpus(self): + self.client.force_login(self.user) + with self.assertNumQueries(6): + response = self.client.get( + reverse('api:workers-activity', kwargs={'corpus': str(self.private_corpus.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {'detail': 'You do not have admin access to this corpus.'}) + + def test_version_stats_requires_admin(self): + user = User.objects.create_user('user42@test.test', 'abcd') + self.corpus.memberships.create(user=user, level=Role.Contributor.value) + self.client.force_login(user) + with self.assertNumQueries(5): + response = self.client.get( + reverse('api:workers-activity', kwargs={'corpus': str(self.corpus.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertDictEqual(response.json(), {'detail': 'You do not have admin access to this corpus.'}) + + def test_versions_unexisting_corpus(self): + self.client.force_login(self.user) + with self.assertNumQueries(3): + response = self.client.get( + reverse('api:workers-activity', kwargs={'corpus': str(uuid.uuid4())}) + ) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + + def test_versions_empty(self): + """ + Handle a version that has absolutely no results on the corpus + """ + user = User.objects.create_user('user42@test.test', 'abcd') + self.private_corpus.memberships.create(user=user, level=Role.Admin.value) + self.client.force_login(user) + with self.assertNumQueries(6): + response = self.client.get( + reverse('api:workers-activity', kwargs={'corpus': str(self.private_corpus.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertListEqual(response.json(), []) + + def test_workers_activity_distributed_states(self): + error, processed, queued, started = [ + WorkerActivity.objects.filter( + element__corpus_id=self.corpus.id, + worker_version_id=self.version_1.id, + state=state + ).count() + for state in [ + WorkerActivityState.Error, + WorkerActivityState.Processed, + WorkerActivityState.Queued, + WorkerActivityState.Started + ] + ] + assert error > 0 and processed > 0 and queued > 0 and started > 0 + self.client.force_login(self.user) + with self.assertNumQueries(7): + response = self.client.get( + reverse('api:workers-activity', kwargs={'corpus': str(self.corpus.id)}) + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertCountEqual(response.json(), [ + { + 'worker_version_id': str(self.version_1.id), + 'queued': queued, + 'started': started, + 'processed': processed, + 'error': error, + }, { + 'worker_version_id': str(self.version_2.id), + 'queued': 0, + 'started': 0, + 'processed': self.corpus.elements.count(), + 'error': 0, + } + ]) diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 7a1fdc735ed90070dedf3e1d8362c15e69b06382..2f14c77b1d37b8ab460421ca1be09528418f69b8 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -49,6 +49,7 @@ from arkindex.documents.api.elements import ( ManageSelection, MetadataEdit, WorkerResultsDestroy, + WorkersActivity, ) from arkindex.documents.api.entities import ( CorpusRoles, @@ -137,6 +138,8 @@ api = [ path('corpus/<uuid:pk>/selection/', CorpusSelectionDestroy.as_view(), name='corpus-delete-selection'), path('corpus/<uuid:pk>/search/', CorpusSearch.as_view(), name='corpus-search'), path('corpus/<uuid:corpus>/workerversion/<uuid:version>/results/', WorkerResultsDestroy.as_view(), name='worker-delete-results'), + path('corpus/<uuid:corpus>/workers-activity/', WorkersActivity.as_view(), name='workers-activity'), + # Moderation path('classifications/', ClassificationCreate.as_view(), name='classification-create'),