Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (120)
Showing
with 258 additions and 82 deletions
......@@ -11,7 +11,7 @@ include:
# For jobs that run backend scripts directly
.backend-setup:
image: registry.gitlab.com/arkindex/backend/base:django-4.0.2
image: registry.gitlab.com/arkindex/backend/base:django-4.0.4
cache:
paths:
......
repos:
- repo: https://github.com/asottile/seed-isort-config
rev: v2.2.0
hooks:
- id: seed-isort-config
- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.21
rev: v5.10.1
hooks:
- id: isort
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.3
rev: 3.9.2
hooks:
- id: flake8
additional_dependencies:
- 'flake8-copyright==0.2.2'
- 'flake8-debugger==3.1.0'
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.1.0
rev: v4.1.0
hooks:
- id: check-ast
- id: check-docstring-first
......@@ -31,6 +27,14 @@ repos:
args: ['--django']
- id: check-json
- id: requirements-txt-fixer
- id: end-of-file-fixer
- id: mixed-line-ending
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
args: ['--write-changes']
exclude: '^arkindex\/(documents\/export|sql_validation)\/.*\.sql'
- repo: meta
hooks:
- id: check-useless-excludes
FROM registry.gitlab.com/arkindex/backend/base:django-4.0.2 as build
FROM registry.gitlab.com/arkindex/backend/base:django-4.0.4 as build
RUN mkdir build
ADD . build
RUN cd build && python3 setup.py sdist
FROM registry.gitlab.com/arkindex/backend/base:django-4.0.2
FROM registry.gitlab.com/arkindex/backend/base:django-4.0.4
ARG PONOS_BRANCH=master
ARG PONOS_ID=10017043
ARG TRANSKRIBUS_BRANCH=master
......
......@@ -61,7 +61,7 @@ RUN python -m nuitka \
arkindex/manage.py
# Start over from a clean setup
FROM registry.gitlab.com/arkindex/backend/base:django-4.0.2 as build
FROM registry.gitlab.com/arkindex/backend/base:django-4.0.4 as build
# Import files from compilation
RUN mkdir /usr/share/arkindex
......
......@@ -65,7 +65,7 @@ gitlab:
### Local image server
Arkindex splits up image URLs in their image server and the image path. For example, a IIIF server at `http://iiif.irht.cnrs.fr/iiif/` and an image at `/Paris/JJ042/1.jpg` would be represented as an ImageServer instance holding one Image. Since Arkindex has a local IIIF server for image uploads and thumbnails, a special instance of ImageServer is required to point to this local server. In local developement, this server should be available at `https://ark.localhost/iiif`. You will therefore need to create an ImageServer via the Django admin or the Django shell with this URL. To set the local server ID, you can add a custom setting in `arkindex/config.yml`:
Arkindex splits up image URLs in their image server and the image path. For example, a IIIF server at `http://iiif.irht.cnrs.fr/iiif/` and an image at `/Paris/JJ042/1.jpg` would be represented as an ImageServer instance holding one Image. Since Arkindex has a local IIIF server for image uploads and thumbnails, a special instance of ImageServer is required to point to this local server. In local development, this server should be available at `https://ark.localhost/iiif`. You will therefore need to create an ImageServer via the Django admin or the Django shell with this URL. To set the local server ID, you can add a custom setting in `arkindex/config.yml`:
```yaml
local_imageserver_id: 999
......
1.2.0
1.2.3
......@@ -47,7 +47,7 @@ class DataImportAdmin(admin.ModelAdmin):
def get_queryset(self, *args, **kwargs):
return super().get_queryset(*args, **kwargs) \
.prefetch_related('workflow__tasks') \
.prefetch_related('workflow__tasks', 'corpus') \
.annotate(last_run=Max('workflow__tasks__run'))
......@@ -110,6 +110,7 @@ class WorkerVersionAdmin(admin.ModelAdmin):
list_filter = ('worker', )
field = ('id', 'worker', 'revision', 'configuration')
readonly_fields = ('id', )
raw_id_fields = ('docker_image', 'revision')
class WorkerConfigurationAdmin(admin.ModelAdmin):
......
......@@ -7,7 +7,7 @@ from django.conf import settings
from django.core.mail import send_mail
from django.db import transaction
from django.db.models import CharField, Count, F, Max, Q, Value
from django.db.models.functions import Cast, Concat, Greatest, Now, NullIf
from django.db.models.functions import Cast, Coalesce, Concat, Greatest, Now, NullIf
from django.db.models.query import Prefetch
from django.shortcuts import get_object_or_404
from django.template.loader import render_to_string
......@@ -134,6 +134,11 @@ logger = logging.getLogger(__name__)
description='Filter imports by beginning of UUID',
required=False,
),
OpenApiParameter(
'name',
description='Filter imports whose name contains the given string (case insensitive)',
required=False,
),
OpenApiParameter(
'with_workflow',
type=bool,
......@@ -195,9 +200,10 @@ class DataImportsList(ProcessACLMixin, ListAPIView):
qs = self.readable_processes \
.filter(filters) \
.prefetch_related('workflow__tasks')
# Order workflow by date of last updated task in workflow
# Order workflow by completion date when available, or by date of last updated task in workflow
qs = qs.annotate(
last_date=Greatest(Max('workflow__tasks__updated'), 'updated'),
date_order=Coalesce('workflow__finished', 'last_date'),
last_run=Max('workflow__tasks__run')
)
......@@ -220,7 +226,7 @@ class DataImportsList(ProcessACLMixin, ListAPIView):
state_query |= Q(workflow__tasks__isnull=True)
qs = qs.filter(state_query).exclude(id__in=excluded_imports.values('id')).distinct()
return qs.order_by('-last_date')
return qs.order_by('-date_order')
@extend_schema(tags=['imports'])
......@@ -425,7 +431,7 @@ class CorpusWorkflow(SelectionMixin, CorpusACLMixin, CreateAPIView):
use_cache=use_cache,
)
# Ensure process elements do exists
# Ensure process elements do exist
if selection:
elements = self.get_selection(corpus.id)
if not elements.exists():
......@@ -433,10 +439,6 @@ class CorpusWorkflow(SelectionMixin, CorpusACLMixin, CreateAPIView):
'__all__': ['No element match those filters.']
})
process.elements.set(elements)
elif not process.list_elements().exists():
raise ValidationError({
'__all__': ['No element match those filters.']
})
return Response(
status=status.HTTP_201_CREATED,
......@@ -495,6 +497,8 @@ class StartProcess(CorpusACLMixin, APIView):
errors['use_cache'] = 'The process must have workers attached to use cached results.'
if data.get('use_gpu'):
errors['use_gpu'] = 'The process must have workers attached to use GPUs.'
if not data.get('thumbnails'):
errors['__all__'] = ['The process must either use thumbnail generation or have worker runs.']
if errors:
raise ValidationError(errors)
......@@ -888,7 +892,7 @@ class WorkerVersionList(WorkerACLMixin, ListCreateAPIView):
filters = Q()
if self.simple_mode:
# Limit output to versions with tags or master/main branchs
# Limit output to versions with tags or master/main branches
filters = Q(revision__refs__type=GitRefType.Tag) | Q(revision__refs__type=GitRefType.Branch, revision__refs__name__in=["master", "main"])
return worker.versions \
......@@ -975,7 +979,7 @@ class WorkerVersionRetrieve(RetrieveUpdateAPIView):
def check_object_permissions(self, request, instance):
"""
Allow any user to retrieve a worker version without authentication
This behavior allows to retrieve a version information on public ressources
This behavior allows to retrieve a version information on public resources
Only internal users are allowed to update a worker version
"""
......@@ -1316,7 +1320,7 @@ class ListProcessElements(CustomPaginationViewMixin, CorpusACLMixin, ListAPIView
raise ValidationError({'__all__': [str(e)]})
if not self.with_image:
return queryset.values('id', 'type__slug', 'name')
return queryset.values('id', 'type_id', 'name')
return queryset.annotate(
# Build the image URL by concatenating the server's URL to the image's path
......@@ -1333,7 +1337,7 @@ class ListProcessElements(CustomPaginationViewMixin, CorpusACLMixin, ListAPIView
)
).values(
'id',
'type__slug',
'type_id',
'name',
'image_id',
'image__width',
......@@ -1437,6 +1441,14 @@ class UpdateWorkerActivity(GenericAPIView):
}
)
# Update other activities
if state == WorkerActivityState.Error.value:
WorkerActivity.objects.filter(
process_id=process_id,
element_id=element_id,
state=WorkerActivityState.Queued,
).update(state=WorkerActivityState.Error)
return Response(serializer.data)
......
......@@ -73,7 +73,7 @@ class Command(BaseCommand):
'AWS_ENDPOINT': settings.AWS_ENDPOINT,
'AWS_REGION': settings.AWS_REGION
}
# Assert s3 informations are passed to tasks
# Assert s3 information are passed to tasks
assert env_vars['AWS_ACCESS_KEY'] and env_vars['AWS_SECRET_KEY'], (
'S3 environment variables could not be found\n'
'Please define AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY before starting import'
......
......@@ -43,7 +43,14 @@ class CorpusWorkerVersionManager(models.Manager):
"""
Rebuild the corpus worker versions cache from all ML results.
"""
from arkindex.documents.models import Element, Transcription, Entity, TranscriptionEntity, Classification, MetaData
from arkindex.documents.models import (
Classification,
Element,
Entity,
MetaData,
Transcription,
TranscriptionEntity,
)
querysets = [
Element.objects.exclude(worker_version_id=None).values_list('corpus_id', 'worker_version_id'),
......
......@@ -138,10 +138,6 @@ class DataImport(IndexableModel):
filters['name__contains'] = self.name_contains
if self.element_type:
filters['type_id'] = self.element_type_id
else:
# Limit the scope of types available to merge
# This prevent memory from exploding when no type is selected
filters['type__corpus_id'] = self.corpus_id
return filters
......@@ -164,7 +160,7 @@ class DataImport(IndexableModel):
version_id=run.version_id,
parents=run.parents,
configuration_id=run.configuration_id)
# Save the correspondance between this process' worker_run and the new one
# Save the correspondence between this process' worker_run and the new one
new_runs[run.id] = new_run
# Remap parent ids correctly
......@@ -206,8 +202,11 @@ class DataImport(IndexableModel):
# Load the full corpus, only when elements has not been populated before
if elements is None:
# Handle all elements of the process corpus
# Handle all elements of the process' corpus
elements = Element.objects.filter(corpus=self.corpus_id)
if not self.load_children:
# Use top-level elements only
elements = elements.filter(paths__path=[])
# Filter elements depending on process properties
return elements.filter(**self._get_filters())
......@@ -279,7 +278,7 @@ class DataImport(IndexableModel):
}
}
# Import entities directy after import step
# Import entities directly after import step
if self.build_entities:
tasks['build_entities'] = {
'image': settings.ARKINDEX_TASKS_IMAGE,
......@@ -617,6 +616,12 @@ class WorkerConfiguration(IndexableModel):
related_name='configurations',
)
def __str__(self):
return self.name
def __repr__(self):
return f'<WorkerConfiguration {self.name!r} ({self.id})>'
class Meta:
unique_together = (
('worker', 'configuration_hash'),
......@@ -696,7 +701,7 @@ class WorkerActivityState(Enum):
class WorkerActivity(IndexableModel):
"""
Many-to-many relationship between Element and WorkerVersion
Used to track the activity of a worker version among mutliple elements
Used to track the activity of a worker version among multiple elements
"""
# Using an UUID helps to execute SQL raw INSERT
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
......
......@@ -106,7 +106,7 @@ class GitProvider(ABC):
For a worker repository, start a single process to build the new workers
For other repositories, start a process for each corpus depending on the repository
"""
from arkindex.dataimport.models import DataImport, RepositoryType, DataImportMode
from arkindex.dataimport.models import DataImport, DataImportMode, RepositoryType
mode = DataImportMode.Repository
user = rev.repo.credentials.user
refs = ", ".join(rev.refs.values_list('name', flat=True))
......@@ -211,7 +211,7 @@ class GitLabProvider(GitProvider):
raise ValidationError("A repository with this URL already exists")
# Determine the user's access level on this project
# When it is inherited from the group and not overriden in the project, project_access is not defined.
# When it is inherited from the group and not overridden in the project, project_access is not defined.
# When the project isn't in a group, or the user is added to a specific project in a group,
# group_access is not defined. project_access overrides group_access.
access_level = 0
......@@ -233,7 +233,7 @@ class GitLabProvider(GitProvider):
provider_name=self.__class__.__name__,
)
# Create a webhook using informations from the HTTP request
# Create a webhook using information from the HTTP request
self.create_hook(repo, project_id=int(id), request=request)
return repo
......@@ -279,7 +279,7 @@ class GitLabProvider(GitProvider):
raise APIException("Error while creating GitLab hook: {}".format(str(e)))
def create_revision(self, repo, sha, save=True):
from arkindex.dataimport.models import Revision, GitRefType
from arkindex.dataimport.models import GitRefType, Revision
project = self._get_project_from_repo(repo)
commit = project.commits.get(sha)
......
......@@ -135,10 +135,11 @@ class DataImportSerializer(DataImportLightSerializer):
class DataImportListSerializer(DataImportLightSerializer):
created = serializers.DateTimeField(read_only=True)
updated = serializers.DateTimeField(source='last_date', read_only=True)
finished = serializers.DateTimeField(source='workflow.finished', read_only=True, default=None)
class Meta(DataImportLightSerializer.Meta):
fields = DataImportLightSerializer.Meta.fields + ('created', 'updated')
read_only_fields = DataImportLightSerializer.Meta.read_only_fields + ('created', 'updated')
fields = DataImportLightSerializer.Meta.fields + ('created', 'updated', 'finished')
read_only_fields = DataImportLightSerializer.Meta.read_only_fields + ('created', 'updated', 'finished')
class DataImportFromFilesSerializer(serializers.Serializer):
......@@ -386,7 +387,7 @@ class WorkerRunSerializer(serializers.ModelSerializer):
"""
parents = serializers.ListField(child=serializers.UUIDField())
worker_version_id = serializers.UUIDField(source='version_id')
# Serialize worker with its basic informations
# Serialize worker with its basic information
worker = WorkerLightSerializer(source='version.worker', read_only=True)
configuration_id = serializers.PrimaryKeyRelatedField(queryset=WorkerConfiguration.objects.all(), required=False, allow_null=True)
......@@ -454,20 +455,18 @@ class ImportTranskribusSerializer(serializers.Serializer):
class CreateImportTranskribusErrorResponseSerializer(serializers.Serializer):
collection_id = serializers.CharField(required=False, help_text="Errors that occured during collection ID field validation.")
collection_id = serializers.CharField(required=False, help_text="Errors that occurred during collection ID field validation.")
class ProcessElementLightSerializer(serializers.ModelSerializer):
"""
Serialises an Element, using optimized query for ListProcessElement
"""
type = serializers.CharField(source='type__slug')
class Meta:
model = Element
fields = (
'id',
'type',
'type_id',
'name',
)
read_only_fields = fields
......
import urllib
from collections import defaultdict
from enum import Enum
from drf_spectacular.utils import extend_schema_field
from rest_framework import serializers
......@@ -42,18 +44,75 @@ class WorkerSerializer(WorkerLightSerializer):
fields = WorkerLightSerializer.Meta.fields + ('repository_id', )
class UserConfigurationFieldType(Enum):
Int = 'int'
Float = 'float'
String = 'string'
Enum = 'enum'
Boolean = 'bool'
class UserConfigurationFieldSerializer(serializers.Serializer):
title = serializers.CharField()
type = EnumField(UserConfigurationFieldType)
required = serializers.BooleanField(default=False)
choices = serializers.ListField(required=False, allow_empty=False, allow_null=True)
def to_internal_value(self, data):
errors = defaultdict(list)
allowed_fields = ['title', 'type', 'required', 'default', 'choices']
data_types = {
UserConfigurationFieldType.Int: serializers.IntegerField,
UserConfigurationFieldType.Float: serializers.FloatField,
UserConfigurationFieldType.String: serializers.CharField,
UserConfigurationFieldType.Enum: serializers.ChoiceField,
UserConfigurationFieldType.Boolean: serializers.BooleanField
}
for field in data:
if field not in allowed_fields:
errors[field].append(
'Configurable properties can only be defined using the following keys: title, type, required, default, choices.'
)
default_value = data.get('default')
data = super().to_internal_value(data)
field_type = data.get('type')
choices = data.get('choices')
if choices:
if field_type != UserConfigurationFieldType.Enum:
errors['choices'].append('The "choices" field can only be set for an "enum" type property.')
# If the configuration parameter is of enum type, an eventual default value won't match the field type
if default_value and default_value not in choices:
errors['default'].append(f'{default_value} is not an available choice.')
elif default_value:
try:
data_type = data_types[field_type]
data_type().to_internal_value(default_value)
except ValidationError:
errors['default'].append(f'Default value is not of type {field_type.value}.')
except KeyError:
errors['default'].append(f'Cannot check type: {field_type.value}.')
if errors:
raise ValidationError(errors)
return data
class WorkerVersionSerializer(serializers.ModelSerializer):
"""
Serialize a worker version
"""
# State defaults to created when instanciating a WorkerVersion
# State defaults to created when instantiating a WorkerVersion
state = EnumField(WorkerVersionState, required=False)
worker = WorkerLightSerializer(read_only=True)
revision = serializers.UUIDField()
element_count = serializers.IntegerField(read_only=True)
gpu_usage = EnumField(WorkerVersionGPUUsage, required=False, default=WorkerVersionGPUUsage.Disabled)
# Serialize worker with its basic informations
# Serialize worker with its basic information
class Meta:
model = WorkerVersion
fields = (
......@@ -86,6 +145,20 @@ class WorkerVersionSerializer(serializers.ModelSerializer):
except Revision.DoesNotExist:
raise ValidationError({'revision': ['Revision with this ID does not exist.']})
def validate_configuration(self, configuration):
errors = defaultdict(list)
user_configuration = configuration.get('user_configuration')
if not user_configuration:
return configuration
field = serializers.DictField(child=UserConfigurationFieldSerializer())
try:
field.to_internal_value(user_configuration)
except ValidationError as e:
errors['user_configuration'].append(e.detail)
if errors:
raise ValidationError(errors)
return configuration
def validate(self, data):
# Assert that a version set to available has a docker image
state = data.get('state') or self.instance and self.instance.state
......
......@@ -63,9 +63,11 @@ class TestImports(FixtureAPITestCase):
content_type='application/json',
)
cls.page_type = ElementType.objects.get(corpus=cls.corpus, slug='page')
cls.recognizer = WorkerVersion.objects.get(worker__slug='reco')
cls.version_gpu = WorkerVersion.objects.get(worker__slug='worker-gpu')
def setUp(self):
super().setUp()
# Create multiple processes the user can access
self.user2 = User.objects.create_user('user2@test.test', display_name='Process creator')
# Guest access (A user own a process on a corpus they are not a member anymore)
......@@ -100,7 +102,12 @@ class TestImports(FixtureAPITestCase):
'workflow': process.workflow and f'http://testserver/ponos/v1/workflow/{process.workflow.id}/',
'activity_state': process.activity_state.value,
'created': process.created.isoformat().replace('+00:00', 'Z'),
'updated': updated.isoformat().replace('+00:00', 'Z')
'updated': updated.isoformat().replace('+00:00', 'Z'),
'finished': (
process.workflow.finished.isoformat().replace('+00:00', 'Z')
if process.workflow and process.workflow.finished
else None
),
}
def build_task(self, workflow_id, run, state, depth=1):
......@@ -428,7 +435,7 @@ class TestImports(FixtureAPITestCase):
def test_delete_running(self):
"""
It is not possible to delete a runnin import
It is not possible to delete a running import
"""
self.client.force_login(self.user)
self.elts_process.start()
......@@ -865,7 +872,7 @@ class TestImports(FixtureAPITestCase):
self.elts_process.start()
self.elts_process.workflow.tasks.all().update(state=State.Error)
self.assertEqual(self.elts_process.state, State.Error)
with self.assertNumQueries(15):
with self.assertNumQueries(16):
response = self.client.post(reverse('api:import-retry', kwargs={'pk': self.elts_process.id}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.elts_process.refresh_from_db()
......@@ -1113,16 +1120,32 @@ class TestImports(FixtureAPITestCase):
{'__all__': ['Only a DataImport with Workers mode and not already launched can be started later on']}
)
def test_start_process_empty(self):
dataimport2 = self.corpus.imports.create(creator=self.user, mode=DataImportMode.Workers)
self.assertIsNone(dataimport2.workflow)
self.client.force_login(self.user)
with self.assertNumQueries(7):
response = self.client.post(
reverse('api:process-start', kwargs={'pk': str(dataimport2.id)})
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(
response.json(),
{'__all__': ['The process must either use thumbnail generation or have worker runs.']},
)
def test_start_process(self):
"""
A user can start a process with no parameters.
Defaut chunks, thumbnails and farm are used. Nor cache or workers activity is set.
Default chunks, thumbnails and farm are used. Nor cache or workers activity is set.
"""
dataimport2 = self.corpus.imports.create(creator=self.user, mode=DataImportMode.Workers)
dataimport2.worker_runs.create(version=self.recognizer, parents=[], configuration=None)
self.assertIsNone(dataimport2.workflow)
self.client.force_login(self.user)
with self.assertNumQueries(22):
with self.assertNumQueries(27):
response = self.client.post(
reverse('api:process-start', kwargs={'pk': str(dataimport2.id)})
)
......@@ -1133,8 +1156,11 @@ class TestImports(FixtureAPITestCase):
self.assertIsNotNone(dataimport2.workflow)
# Ensure default parameters are used
self.assertEqual(dataimport2.workflow.farm_id, get_default_farm_id())
self.assertEqual(dataimport2.workflow.tasks.count(), 1)
self.assertIn('--chunks-number 1', dataimport2.workflow.tasks.get().command)
self.assertEqual(dataimport2.workflow.tasks.count(), 2)
task1, task2 = dataimport2.workflow.tasks.order_by('slug')
self.assertEqual(task1.slug, 'initialisation')
self.assertEqual(task2.slug, f'reco_{str(self.recognizer.id)[:6]}')
self.assertIn('--chunks-number 1', task1.command)
def test_start_process_select_farm_id(self):
"""
......@@ -1144,8 +1170,9 @@ class TestImports(FixtureAPITestCase):
barley_farm = Farm.objects.create(name='Barley farm')
self.assertNotEqual(get_default_farm_id(), barley_farm.id)
workers_process = self.corpus.imports.create(creator=self.user, mode=DataImportMode.Workers)
workers_process.worker_runs.create(version=self.recognizer, parents=[], configuration=None)
self.client.force_login(self.user)
with self.assertNumQueries(22):
with self.assertNumQueries(27):
response = self.client.post(
reverse('api:process-start', kwargs={'pk': str(workers_process.id)}),
{'farm': str(barley_farm.id)}
......@@ -1237,7 +1264,7 @@ class TestImports(FixtureAPITestCase):
@patch('arkindex.project.triggers.dataimport_tasks.initialize_activity.delay')
def test_start_process_options_requires_workers(self, activities_delay_mock, worker_runs_mock):
"""
Cache and worker activity options can be trigerred when starting a process
Cache and worker activity options can be triggered when starting a process
"""
process = self.corpus.imports.create(
creator=self.user,
......@@ -1252,6 +1279,7 @@ class TestImports(FixtureAPITestCase):
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
'__all__': ['The process must either use thumbnail generation or have worker runs.'],
'use_cache': 'The process must have workers attached to use cached results.',
'worker_activity': 'The process must have workers attached to handle their activity.',
'use_gpu': 'The process must have workers attached to use GPUs.'
......
......@@ -4,7 +4,7 @@ from django.urls import reverse
from rest_framework import status
from arkindex.dataimport.models import DataImport, DataImportMode
from arkindex.documents.models import Corpus, Element
from arkindex.documents.models import Corpus, Element, ElementPath
from arkindex.images.models import Image
from arkindex.project.tests import FixtureAPITestCase
from arkindex.users.models import User
......@@ -51,7 +51,7 @@ class TestProcessElements(FixtureAPITestCase):
display_name='Line',
)
# Create 2 volumes
# Create 2 folders
cls.folder_1 = Element.objects.create(
corpus=cls.private_corpus,
type=cls.folder_type,
......@@ -60,8 +60,11 @@ class TestProcessElements(FixtureAPITestCase):
cls.folder_2 = Element.objects.create(
corpus=cls.private_corpus,
type=cls.folder_type,
name="Mongolfiere",
name="Montgolfière",
)
# Create top-level paths for the folders
cls.folder_1.paths.create(path=[])
cls.folder_2.paths.create(path=[])
# Create 2 pages in each volume and 1 common page
cls.page_1 = Element.objects.create(
......@@ -71,7 +74,7 @@ class TestProcessElements(FixtureAPITestCase):
)
cls.page_2 = Element.objects.create(
corpus=cls.private_corpus,
name="Mongolfiere 1",
name="Montgolfière 1",
type=cls.page_type
)
cls.page_3 = Element.objects.create(
......@@ -81,7 +84,7 @@ class TestProcessElements(FixtureAPITestCase):
)
cls.page_4 = Element.objects.create(
corpus=cls.private_corpus,
name="Mongolfiere 2",
name="Montgolfière 2",
type=cls.page_type,
rotation_angle=180,
)
......@@ -211,6 +214,7 @@ class TestProcessElements(FixtureAPITestCase):
def test_filter_name(self):
self.dataimport.name_contains = "rhum"
self.dataimport.load_children = True
self.dataimport.save()
elements = [self.folder_1, self.page_1, self.page_5]
......@@ -224,7 +228,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -247,7 +251,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -259,7 +263,12 @@ class TestProcessElements(FixtureAPITestCase):
elements = [self.folder_1, self.folder_2]
self.client.force_login(self.superuser)
with self.assertNumQueries(7):
with self.assertExactQueries('process_elements_filter_type.sql', skip=1, params={
'user_id': self.superuser.id,
'dataimport_id': str(self.dataimport.id),
'corpus_id': str(self.private_corpus.id),
'type_id': str(self.folder_type.id),
}):
response = self.client.get(reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id}))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
......@@ -268,7 +277,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -291,7 +300,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -311,7 +320,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(self.page_1.id),
'type': self.page_1.type.slug,
'type_id': str(self.page_1.type_id),
'name': self.page_1.name
}
])
......@@ -329,12 +338,12 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(self.page_1.id),
'type': self.page_1.type.slug,
'type_id': str(self.page_1.type_id),
'name': self.page_1.name
},
{
'id': str(self.folder_2.id),
'type': self.folder_2.type.slug,
'type_id': str(self.folder_2.type_id),
'name': self.folder_2.name
}
])
......@@ -355,7 +364,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -377,7 +386,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -399,7 +408,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -421,7 +430,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -442,7 +451,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -477,7 +486,7 @@ class TestProcessElements(FixtureAPITestCase):
self.assertCountEqual(data["results"], [
{
'id': str(element.id),
'type': element.type.slug,
'type_id': str(element.type_id),
'name': element.name
}
for element in elements
......@@ -530,6 +539,8 @@ class TestProcessElements(FixtureAPITestCase):
Elements count can be retrieved with with_count parameter
"""
self.client.force_login(self.superuser)
self.dataimport.load_children = True
self.dataimport.save()
with self.assertNumQueries(7):
response = self.client.get(
reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id}),
......@@ -554,12 +565,17 @@ class TestProcessElements(FixtureAPITestCase):
"""
Page size may be changed for cursor pagination
"""
Element.objects.bulk_create([
elements = [
Element(
corpus=self.private_corpus,
name='Similar name',
type=self.page_type
) for i in range(51)
]
Element.objects.bulk_create(elements)
ElementPath.objects.bulk_create([
ElementPath(element=element, path=[], ordering=0)
for element in elements
])
self.dataimport.name_contains = 'Similar'
self.dataimport.save()
......@@ -579,7 +595,13 @@ class TestProcessElements(FixtureAPITestCase):
The with_image parameter includes zone and image information
"""
self.client.force_login(self.superuser)
with self.assertNumQueries(6):
self.dataimport.load_children = True
self.dataimport.save()
with self.assertExactQueries('process_elements_with_image.sql', skip=1, params={
'user_id': self.superuser.id,
'dataimport_id': str(self.dataimport.id),
'corpus_id': str(self.private_corpus.id),
}):
response = self.client.get(
reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id}),
{'with_image': True}
......@@ -590,7 +612,7 @@ class TestProcessElements(FixtureAPITestCase):
{
'id': str(element.id),
'name': element.name,
'type': element.type.slug,
'type_id': str(element.type_id),
'image_id': str(element.image_id) if element.image_id else None,
'image_width': element.image.width if element.image else None,
'image_height': element.image.height if element.image else None,
......@@ -601,3 +623,26 @@ class TestProcessElements(FixtureAPITestCase):
}
for element in self.private_corpus.elements.all()
])
def test_corpus_top_level(self):
self.client.force_login(self.superuser)
with self.assertExactQueries('process_elements_top_level.sql', skip=1, params={
'user_id': self.superuser.id,
'dataimport_id': str(self.dataimport.id),
'corpus_id': str(self.private_corpus.id),
}):
response = self.client.get(
reverse('api:process-elements-list', kwargs={'pk': self.dataimport.id}),
)
self.assertCountEqual(response.json()['results'], [
{
'id': str(self.folder_1.id),
'name': 'Baba au rhum',
'type_id': str(self.folder_1.type_id),
},
{
'id': str(self.folder_2.id),
'name': 'Montgolfière',
'type_id': str(self.folder_2.type_id),
}
])
......@@ -20,6 +20,7 @@ Please, try again with another push or contact your system administrator.
--
Arkindex
"""
......
......@@ -414,7 +414,7 @@ class TestRepositories(FixtureTestCase):
self.assertFalse(import_corpus.public)
self.assertEqual(import_corpus.name, 'IIIF import from http://gitlab/repo')
self.assertEqual(import_corpus.types.count(), 11)
self.assertEqual(import_corpus.types.count(), 6)
# User is granted an admin access to both the repository and the corpus
corpus_right = import_corpus.memberships.get(user=self.user)
......
......@@ -254,7 +254,7 @@ class TestTemplates(FixtureAPITestCase):
def test_apply_process_template(self):
self.client.force_login(self.user)
with self.assertNumQueries(16):
with self.assertNumQueries(17):
response = self.client.post(
reverse('api:apply-process-template', kwargs={'pk': str(self.template.id)}),
data=json.dumps({"process_id": str(self.dataimport.id)}),
......
......@@ -20,6 +20,7 @@ class TestDataImportUtils(TestCase):
cls.barley_farm = Farm.objects.create(name='Barley farm')
def setUp(self):
super().setUp()
# Force clean the module default farm cached global variable
from arkindex.dataimport import utils
setattr(utils, '__default_farm_id', None)
......