Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (43)
Showing
with 675 additions and 532 deletions
......@@ -11,7 +11,7 @@ include:
# For jobs that run backend scripts directly
.backend-setup:
image: registry.gitlab.com/arkindex/backend/base:rq
image: registry.gitlab.com/arkindex/backend/base:django-3.1.4
cache:
paths:
......
FROM registry.gitlab.com/arkindex/backend/base:rq as build
FROM registry.gitlab.com/arkindex/backend/base:django-3.1.4 as build
RUN mkdir build
ADD . build
RUN cd build && python3 setup.py sdist
FROM registry.gitlab.com/arkindex/backend/base:rq
FROM registry.gitlab.com/arkindex/backend/base:django-3.1.4
ARG COMMON_BRANCH=master
ARG COMMON_ID=9855787
ARG PONOS_BRANCH=master
......
0.15.0-rc1
0.15.0
......@@ -54,7 +54,6 @@ from arkindex.project.openapi import AutoSchema
from arkindex.project.permissions import IsVerified
from arkindex.users.models import OAuthCredentials, User
from arkindex_common.enums import DataImportMode
from arkindex_common.ml_tool import MLToolType
from ponos.models import STATES_ORDERING, State
logger = logging.getLogger(__name__)
......@@ -206,7 +205,7 @@ class DataImportDetails(CorpusACLMixin, RetrieveUpdateDestroyAPIView):
return DataImport.objects.filter(
Q(corpus__isnull=True)
| Q(corpus__in=Corpus.objects.readable(self.request.user))
)
).select_related('corpus', 'workflow').prefetch_related('workflow__tasks').annotate(last_run=Max('workflow__tasks__run'))
def get_object(self):
if not hasattr(self, 'dataimport'):
......@@ -689,7 +688,7 @@ class WorkerList(ListCreateAPIView):
slug=serializer.validated_data['slug'],
defaults={
'name': serializer.validated_data['name'],
'type': MLToolType(serializer.validated_data['type']),
'type': serializer.validated_data['type'],
}
)
......
from django.core.management.base import BaseCommand, CommandError
from arkindex.dataimport.models import DataImport, DataImportMode
from arkindex.documents.models import ElementType
from arkindex.project.argparse import CorpusArgument, DataImportArgument, ElementArgument, UserArgument
class Command(BaseCommand):
help = 'Apply a template to build a set of DataImport'
def add_arguments(self, parser):
parser.add_argument(
'dataimport',
help='DataImport to use as a template for workers',
type=DataImportArgument()
)
parser.add_argument(
'--corpus',
required=True,
help='Corpus ID or name to create DataImports on',
type=CorpusArgument()
)
parser.add_argument(
'--element',
dest='elements',
nargs='+',
required=True,
help='Top level element ids to build workflows: each element will get its dataimport',
type=ElementArgument(),
)
parser.add_argument(
'--children-type',
type=str,
required=True,
help='Element type slug to use to build the new dataimport',
)
parser.add_argument(
'--chunks',
type=int,
default=1,
help='Number of chunks to build for the dataimport',
)
parser.add_argument(
'--creator',
required=False,
type=UserArgument(),
help='Creator of the new dataimport',
)
def handle(self, dataimport, corpus, elements, children_type, *args, **options):
# Check template (must be in worker mode and have some workers)
if dataimport.mode != DataImportMode.Workers:
raise CommandError("Only workers dataimports are supported")
if not dataimport.worker_runs.exists():
raise CommandError("This dataimport has no worker runs defined")
# Check new dataimport related models (corpus, elements, type)
for element in elements:
if element.corpus != corpus:
raise CommandError(f"Element {element.id} is not in corpus {corpus}")
try:
element_type = corpus.types.get(slug=children_type)
except ElementType.DoesNotExist:
raise CommandError(f"Element type {children_type} is not in corpus {corpus}")
# Now build a clone for each top level element
for element in elements:
self.clone(dataimport, element, element_type, options['creator'])
def clone(self, source, element, element_type, creator=None):
"""
Clone a dataimport configuration, on new elements
"""
# Build a dataimport that will load all specified children
# elements from that top level element
di = DataImport.objects.create(
mode=DataImportMode.Workers,
corpus=element.corpus,
creator=creator,
element=element,
load_children=True,
element_type=element_type,
)
print(f'Created DataImport {di.id}')
# Build linear worker runs
runs = {}
for wr in source.worker_runs.all():
runs[wr.id] = di.worker_runs.create(
version=wr.version,
parents=[]
)
# Set links
for wr in source.worker_runs.filter(parents__len__gt=0):
runs[wr.id].parents = [
runs[parent].id
for parent in wr.parents
]
runs[wr.id].save()
# Build and start workflow
di.start(chunks=2)
print(f'Started DataImport {di.id}')
......@@ -5,7 +5,6 @@ import uuid
from django.core.management.base import BaseCommand
from arkindex.dataimport.models import Repository, RepositoryType, Revision, Worker, WorkerVersion
from arkindex_common.ml_tool import MLToolType
logging.basicConfig(
level=logging.INFO,
......@@ -49,7 +48,7 @@ class Command(BaseCommand):
worker, _ = Worker.objects.get_or_create(
name=name,
slug=slug,
type=MLToolType.Classifier,
type="classifier",
repository=repo
)
......
# Generated by Django 3.1.3 on 2020-11-24 13:30
# Generated by Django 3.1.4 on 2020-12-04 08:57
from django.db import migrations, models
......@@ -6,17 +6,13 @@ from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('users', '0007_user_display_name'),
('dataimport', '0023_workerversion_constraint'),
]
operations = [
migrations.AlterField(
model_name='membership',
name='level',
field=models.PositiveIntegerField(help_text='User privilege level.'),
),
migrations.AlterUniqueTogether(
name='membership',
unique_together={('user', 'group')},
model_name='worker',
name='type',
field=models.CharField(max_length=50),
),
]
......@@ -15,7 +15,6 @@ from arkindex.project.aws import S3FileMixin, S3FileStatus
from arkindex.project.fields import ArrayField
from arkindex.project.models import IndexableModel
from arkindex_common.enums import DataImportMode
from arkindex_common.ml_tool import MLToolType
from ponos.models import Artifact, State, Workflow
......@@ -195,7 +194,7 @@ class DataImport(IndexableModel):
}
# Generate a task for each WorkerRun on the DataImport
for worker_run in self.worker_runs.all():
for worker_run in self.worker_runs.using('default').all():
task_name = f'{worker_run.version.slug}{task_suffix}'
tasks[task_name] = worker_run.build_task_recipe(import_task_name, elements_path, suffix=task_suffix)
......@@ -389,7 +388,7 @@ class Worker(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(max_length=100)
slug = models.CharField(max_length=100)
type = EnumField(MLToolType, max_length=50)
type = models.CharField(max_length=50)
repository = models.ForeignKey('dataimport.Repository', on_delete=models.CASCADE, related_name='workers')
class Meta:
......
......@@ -128,9 +128,7 @@ class DataImportFromFilesSerializer(serializers.Serializer):
def validate(self, data):
if data['mode'] == DataImportMode.PDF:
if len(data['files']) > 1:
self.fail('unique_pdf')
if data['files'][0].content_type != 'application/pdf':
if not all(f.content_type == 'application/pdf' for f in data['files']):
self.fail('pdf_only')
elif data['mode'] == DataImportMode.Images:
......
......@@ -6,15 +6,12 @@ from rest_framework.exceptions import ValidationError
from arkindex.dataimport.models import Repository, RepositoryType, Worker, WorkerVersion, WorkerVersionState
from arkindex.dataimport.serializers.git import RevisionWithRefsSerializer
from arkindex.project.serializer_fields import EnumField
from arkindex_common.ml_tool import MLToolType
class WorkerSerializer(serializers.ModelSerializer):
"""
Serialize a repository worker
"""
type = EnumField(MLToolType)
class Meta:
model = Worker
fields = (
......
......@@ -24,7 +24,9 @@ def check_parents(sender, instance, **kwargs):
linked to the same DataImport as the to-be-saved WorkerRun instance.
It will also check that adding or updating WorkerRun parents will not create cycles in the tree.
"""
parents = WorkerRun.objects.filter(id__in=instance.parents, dataimport=instance.dataimport).exclude(id=instance.id).values_list('id', flat=True)
parents = WorkerRun.objects.using('default') \
.filter(id__in=instance.parents, dataimport=instance.dataimport) \
.exclude(id=instance.id).values_list('id', flat=True)
if set(parents) != set(instance.parents):
raise ValidationError(f"Can't add or update WorkerRun {instance.id} because parents field isn't properly defined. It can be either because"
" one or several UUIDs don't refer to existing WorkerRuns or either because listed WorkerRuns doesn't belong to the"
......@@ -32,7 +34,7 @@ def check_parents(sender, instance, **kwargs):
graph = {
wr['id']: wr['parents']
for wr in WorkerRun.objects.filter(dataimport=instance.dataimport).values('id', 'parents')
for wr in WorkerRun.objects.using('default').filter(dataimport=instance.dataimport).values('id', 'parents')
}
ancestors = _list_ancestors(graph, instance.parents)
if instance.id in ancestors:
......
......@@ -2,7 +2,6 @@ from django.core.management import call_command
from arkindex.dataimport.models import Repository, RepositoryType, Revision, Worker, WorkerVersion, WorkerVersionState
from arkindex.project.tests import FixtureTestCase
from arkindex_common.ml_tool import MLToolType
class TestFakeWorker(FixtureTestCase):
......@@ -31,7 +30,7 @@ class TestFakeWorker(FixtureTestCase):
self.assertEqual(worker.name, name)
self.assertEqual(worker.slug, slug)
self.assertEqual(worker.type, MLToolType.Classifier)
self.assertEqual(worker.type, "classifier")
self.assertIsNotNone(revision.hash)
self.assertEqual(revision.message, "Fake revision")
......@@ -100,7 +99,7 @@ class TestFakeWorker(FixtureTestCase):
worker = Worker.objects.create(
name=name,
slug=slug,
type=MLToolType.Classifier,
type="classifier",
repository=repo
)
......
......@@ -5,7 +5,7 @@ import yaml
from django.urls import reverse
from rest_framework import status
from arkindex.dataimport.models import DataFile, DataImport, RepositoryType
from arkindex.dataimport.models import DataImport, RepositoryType
from arkindex.documents.models import Corpus, ElementType
from arkindex.project.tests import FixtureAPITestCase
from arkindex.users.models import User
......@@ -329,14 +329,16 @@ class TestImports(FixtureAPITestCase):
self.dataimport.start()
self.dataimport.workflow.tasks.all().delete()
self.client.force_login(self.user)
response = self.client.get(reverse('api:import-details', kwargs={'pk': self.dataimport.id}))
with self.assertNumQueries(7):
response = self.client.get(reverse('api:import-details', kwargs={'pk': self.dataimport.id}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertEqual(data['state'], State.Unscheduled.value)
def test_details(self):
self.client.force_login(self.user)
response = self.client.get(reverse('api:import-details', kwargs={'pk': self.dataimport.id}))
with self.assertNumQueries(5):
response = self.client.get(reverse('api:import-details', kwargs={'pk': self.dataimport.id}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertEqual(data['id'], str(self.dataimport.id))
......@@ -730,18 +732,6 @@ class TestImports(FixtureAPITestCase):
}, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
def test_from_files_pdf_single(self):
self.client.force_login(self.user)
pdf2 = DataFile.objects.create(
name='test2.pdf', size=1337, content_type='application/pdf', corpus=self.corpus)
response = self.client.post(reverse('api:import-from-files'), {
'files': [str(self.pdf_df.id), str(pdf2.id)],
'folder_type': 'volume',
'element_type': 'page',
'mode': 'pdf',
}, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
def test_from_files_images_wrong_type(self):
self.client.force_login(self.user)
response = self.client.post(reverse('api:import-from-files'), {
......@@ -863,7 +853,7 @@ class TestImports(FixtureAPITestCase):
run_mock = MagicMock()
run_mock.version.slug = 'my_worker'
run_mock.build_task_recipe.return_value = {'image': ''}
worker_runs_mock.all.return_value = [run_mock]
worker_runs_mock.using('default').all.return_value = [run_mock]
self.client.force_login(self.user)
response = self.client.post(
......@@ -872,7 +862,6 @@ class TestImports(FixtureAPITestCase):
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
dataimport.refresh_from_db()
self.maxDiff = None
recipe_dump = yaml.safe_load(dataimport.workflow.recipe)
self.assertCountEqual(
recipe_dump['tasks'].keys(),
......
......@@ -57,7 +57,7 @@ class TestRepositories(FixtureTestCase):
{
'id': str(w.id),
'name': w.name,
'type': w.type.value,
'type': w.type,
'slug': w.slug
} for w in self.worker_repo.workers.all()
]
......
......@@ -4,7 +4,6 @@ from arkindex.dataimport.models import RepositoryType, Worker, WorkerRun, Worker
from arkindex.dataimport.signals import _list_ancestors
from arkindex.project.tests import FixtureAPITestCase
from arkindex_common.enums import DataImportMode
from arkindex_common.ml_tool import MLToolType
class TestSignals(FixtureAPITestCase):
......@@ -28,7 +27,7 @@ class TestSignals(FixtureAPITestCase):
repository=cls.repo,
name='Worker 1',
slug='worker_1',
type=MLToolType.Classifier
type='classifier'
)
cls.version_1 = WorkerVersion.objects.create(
worker=cls.worker_1,
......
......@@ -47,7 +47,10 @@ class TestTranskribusImport(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {"__all__": ["You have not register your transkribus email"]})
def test_arkindex_has_not_access(self):
@patch("transkribus.TranskribusAPI.list_user_collection")
def test_arkindex_has_not_access(self, mock_transkribus):
# Not a mistake: Transkribus client raises `Exception` directly when it runs out of retries
mock_transkribus.side_effect = Exception("401 Unauthorized")
self.client.force_login(self.user)
response = self.client.post(reverse("api:import-transkribus"), {
"collection_id": "12345",
......
......@@ -5,7 +5,6 @@ from rest_framework import status
from arkindex.dataimport.models import Repository, RepositoryType, Revision, Worker, WorkerVersion, WorkerVersionState
from arkindex.project.tests import FixtureAPITestCase
from arkindex_common.ml_tool import MLToolType
from ponos.models import Workflow
RECIPE = '''
......@@ -81,7 +80,7 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
repository=repo2,
name='Worker 2',
slug='worker_2',
type=MLToolType.Classifier
type='classifier'
)
response = self.client.get(reverse('api:repository-workers', kwargs={'pk': str(repo2.id)}))
......@@ -100,7 +99,7 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
'id': str(self.worker_1.id),
'name': self.worker_1.name,
'slug': self.worker_1.slug,
'type': self.worker_1.type.value
'type': self.worker_1.type
})
def test_workers_retrieve_required_login(self):
......@@ -135,7 +134,7 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
self.assertNotEqual(data['id'], str(self.worker_1.id))
self.assertEqual(data['name'], 'Worker post')
self.assertEqual(data['slug'], 'worker_post')
self.assertEqual(data['type'], MLToolType.Classifier.value)
self.assertEqual(data['type'], 'classifier')
def test_workers_post_return_existing_worker(self):
self.client.force_login(self.user)
......@@ -149,7 +148,7 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
self.assertEqual(data['id'], str(self.worker_1.id))
self.assertEqual(data['name'], 'Recognizer')
self.assertEqual(data['slug'], 'reco')
self.assertEqual(data['type'], MLToolType.Recognizer.value)
self.assertEqual(data['type'], 'recognizer')
def test_workers_post_empty(self):
self.client.force_login(self.user)
......@@ -187,7 +186,7 @@ class TestWorkersWorkerVersions(FixtureAPITestCase):
repository=self.repo,
name='Worker 2',
slug='worker_2',
type=MLToolType.Classifier
type='classifier'
)
version_2 = WorkerVersion.objects.create(
worker=worker_2,
......
......@@ -340,8 +340,7 @@ class ElementsListMixin(object):
return best_classifications
def get_prefetch(self):
prefetch = {'corpus', 'zone__image__server', 'type'}
prefetch = {'zone__image__server'}
with_best_classes = self.clean_params.get('with_best_classes')
if with_best_classes and with_best_classes.lower() not in ('false', '0'):
prefetch.add(best_classifications_prefetch)
......@@ -356,6 +355,7 @@ class ElementsListMixin(object):
queryset = queryset \
.filter(**self.get_filters()) \
.prefetch_related(*self.get_prefetch()) \
.select_related('type', 'corpus', 'zone') \
.order_by(*self.get_order_by())
class_filters = self.get_classifications_filters()
......@@ -620,6 +620,12 @@ class ElementRetrieve(RetrieveUpdateDestroyAPIView):
context['element'] = self.get_object()
return context
def delete(self, request, *args, **kwargs):
self.check_object_permissions(self.request, self.get_object())
queryset = Element.objects.filter(id=self.kwargs['pk'])
element_trash(queryset, user_id=self.request.user.id)
return Response(status=status.HTTP_204_NO_CONTENT)
class ElementNeighbors(ListAPIView):
"""
......@@ -729,7 +735,7 @@ class ManageSelection(SelectionMixin, ListAPIView):
raise ValidationError(['Selection is not available on this instance.'])
def get_queryset(self):
filtered_queryset = self.get_selection().order_by('corpus', 'type', 'name')
filtered_queryset = self.get_selection().select_related('type', 'corpus', 'zone', 'zone__image', 'zone__image__server').order_by('corpus', 'type__slug', 'name', 'id')
with_best_classes = self.request.query_params.get('with_best_classes')
if with_best_classes and with_best_classes.lower() not in ('false', '0'):
......@@ -1168,3 +1174,27 @@ class ElementBulkCreate(CreateAPIView):
])
return [{'id': element_data['element'].id} for element_data in elements]
class CorpusDeleteSelection(CorpusACLMixin, SelectionMixin, DestroyAPIView):
"""
Delete all selected elements on a corpus
"""
serializer_class = CorpusSerializer
permission_classes = (IsVerified, )
openapi_overrides = {
'operationId': 'DeleteCorpusSelection',
'tags': ['elements']
}
def delete(self, request, *args, **kwargs):
corpus = self.get_corpus(self.kwargs['pk'], right=Right.Admin)
selected_elements = self.get_selection(corpus_id=corpus.id)
if not selected_elements.exists():
raise NotFound
for batch in range(0, selected_elements.count(), 50):
queryset = Element.objects.filter(id__in=list(selected_elements[batch:batch + 50].values_list('id', flat=True)))
element_trash(queryset, user_id=self.request.user.id)
return Response(status=status.HTTP_204_NO_CONTENT)
import logging
from uuid import UUID
from django.conf import settings
......@@ -41,6 +42,8 @@ from arkindex.project.mixins import CorpusACLMixin
from arkindex.project.permissions import IsVerified
from arkindex.project.triggers import reindex_start
logger = logging.getLogger(__name__)
class CorpusRoles(CorpusACLMixin, ListCreateAPIView):
"""
......@@ -104,10 +107,12 @@ class EntityDetails(RetrieveUpdateDestroyAPIView):
# Try to delete indexed entity if possible
try:
es_entity = ESEntity.get(id=instance.id.hex)
es_entity.delete()
except NotFoundError:
pass
else:
es_entity.delete()
except Exception as e:
logger.error(f"Failed to delete ES index entity {instance.id}: {e}")
instance.delete()
......
This diff is collapsed.