Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (27)
Showing
with 523 additions and 440 deletions
1.3.2
1.3.4-rc1
......@@ -4,7 +4,7 @@ from enumfields.admin import EnumFieldListFilter
from arkindex.dataimport.models import (
DataFile,
DataImport,
Process,
Repository,
Revision,
Worker,
......@@ -16,24 +16,24 @@ from arkindex.users.admin import GroupMembershipInline, UserMembershipInline
class DataFileInline(admin.StackedInline):
model = DataImport.files.through
model = Process.files.through
extra = 0
class WorkerRunInline(admin.StackedInline):
model = DataImport.versions.through
model = Process.versions.through
raw_id_fields = ('version', )
extra = 0
class ElementInline(admin.StackedInline):
model = DataImport.elements.through
model = Process.elements.through
raw_id_fields = ('element', )
readonly_fields = ('element', )
extra = 0
class DataImportAdmin(admin.ModelAdmin):
class ProcessAdmin(admin.ModelAdmin):
list_display = ('id', 'creator', 'corpus', 'state', 'mode')
list_filter = [('mode', EnumFieldListFilter), ]
fieldsets = (
......@@ -126,7 +126,7 @@ class WorkerConfigurationAdmin(admin.ModelAdmin):
readonly_fields = ('id', 'configuration_hash')
admin.site.register(DataImport, DataImportAdmin)
admin.site.register(Process, ProcessAdmin)
admin.site.register(DataFile, DataFileAdmin)
admin.site.register(Revision, RevisionAdmin)
admin.site.register(Repository, RepositoryAdmin)
......
This diff is collapsed.
from django.core.management.base import BaseCommand, CommandError
from arkindex.dataimport.models import DataImport, DataImportMode
from arkindex.dataimport.models import Process, ProcessMode
from arkindex.documents.models import ElementType
from arkindex.project.argparse import CorpusArgument, DataImportArgument, ElementArgument, UserArgument
from arkindex.project.argparse import CorpusArgument, ElementArgument, ProcessArgument, UserArgument
class Command(BaseCommand):
help = 'Apply a template to build a set of DataImport'
help = 'Apply a template to build a set of Processes'
def add_arguments(self, parser):
parser.add_argument(
'dataimport',
help='DataImport to use as a template for workers',
type=DataImportArgument()
'process',
help='Process to use as a template for workers',
type=ProcessArgument()
)
parser.add_argument(
'--corpus',
required=True,
help='Corpus ID or name to create DataImports on',
help='Corpus ID or name to create Processes on',
type=CorpusArgument()
)
parser.add_argument(
......@@ -25,36 +25,36 @@ class Command(BaseCommand):
dest='elements',
nargs='+',
required=True,
help='Top level element ids to build workflows: each element will get its dataimport',
help='Top level element ids to build workflows: each element will get its process',
type=ElementArgument(),
)
parser.add_argument(
'--children-type',
type=str,
required=True,
help='Element type slug to use to build the new dataimport',
help='Element type slug to use to build the new process',
)
parser.add_argument(
'--chunks',
type=int,
default=1,
help='Number of chunks to build for the dataimport',
help='Number of chunks to build for the process',
)
parser.add_argument(
'--creator',
required=True,
type=UserArgument(),
help='Creator of the new dataimport',
help='Creator of the new process',
)
def handle(self, dataimport, corpus, elements, children_type, *args, **options):
def handle(self, process, corpus, elements, children_type, *args, **options):
# Check template (must be in worker mode and have some workers)
if dataimport.mode != DataImportMode.Workers:
raise CommandError("Only workers dataimports are supported")
if not dataimport.worker_runs.exists():
raise CommandError("This dataimport has no worker runs defined")
if process.mode != ProcessMode.Workers:
raise CommandError("Only workers processes are supported")
if not process.worker_runs.exists():
raise CommandError("This process has no worker runs defined")
# Check new dataimport related models (corpus, elements, type)
# Check new process related models (corpus, elements, type)
for element in elements:
if element.corpus != corpus:
raise CommandError(f"Element {element.id} is not in corpus {corpus}")
......@@ -66,20 +66,20 @@ class Command(BaseCommand):
# Now build a clone for each top level element
for element in elements:
self.clone(dataimport, element, element_type, options['creator'], options['chunks'])
self.clone(process, element, element_type, options['creator'], options['chunks'])
def clone(self, source, element, element_type, creator, chunks):
"""
Clone a dataimport configuration, on new elements
Clone a process configuration, on new elements
"""
# Name is using the source's name, along with the element's name
name = f"Run {source.name} on {element.name}" if source.name else element.name
# Build a dataimport that will load all specified children
# Build a process that will load all specified children
# elements from that top level element
di = DataImport.objects.create(
mode=DataImportMode.Workers,
p = Process.objects.create(
mode=ProcessMode.Workers,
name=name,
corpus=element.corpus,
creator=creator,
......@@ -88,12 +88,12 @@ class Command(BaseCommand):
element_type=element_type,
)
print(f'Created DataImport {di.id}')
print(f'Created Process {p.id}')
# Build linear worker runs
runs = {}
for wr in source.worker_runs.all():
runs[wr.id] = di.worker_runs.create(
runs[wr.id] = p.worker_runs.create(
version=wr.version,
parents=[]
)
......@@ -107,5 +107,5 @@ class Command(BaseCommand):
runs[wr.id].save()
# Build and start workflow
di.start(chunks=chunks)
print(f'Started DataImport {di.id}')
p.start(chunks=chunks)
print(f'Started Process {p.id}')
......@@ -47,7 +47,7 @@ class Migration(migrations.Migration):
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('created', models.DateTimeField(auto_now_add=True)),
('updated', models.DateTimeField(auto_now=True)),
('mode', enumfields.fields.EnumField(enum=arkindex.dataimport.models.DataImportMode, max_length=30)),
('mode', enumfields.fields.EnumField(enum=arkindex.dataimport.models.ProcessMode, max_length=30)),
('payload', django.contrib.postgres.fields.jsonb.JSONField(blank=True, null=True)),
],
options={
......
......@@ -3,20 +3,18 @@
from django.db import migrations
from enumfields import Enum
from arkindex.dataimport.models import DataImportMode
class OldImportModes(Enum):
Images = 'images'
PDF = 'pdf'
Files = 'files'
def update_import_mode(apps, schema_editor):
def update_process_mode(apps, schema_editor):
DataImport = apps.get_model('dataimport', 'DataImport')
DataImport.mode.field.enum = OldImportModes
updated_imports = DataImport.objects.filter(mode__in=OldImportModes)
DataImport.mode.field.enum = DataImportMode
updated_imports.update(mode=DataImportMode.Files)
updated_processes = DataImport.objects.filter(mode__in=OldImportModes)
updated_processes.update(mode=OldImportModes.Files)
class Migration(migrations.Migration):
......@@ -27,7 +25,7 @@ class Migration(migrations.Migration):
operations = [
migrations.RunPython(
update_import_mode,
update_process_mode,
reverse_code=migrations.RunPython.noop,
elidable=True
)
......
# Generated by Django 4.0.4 on 2022-08-31 09:14
import django.core.validators
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('training', '0004_modelversion_archive_hash'),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('ponos', '0032_stringify_json'),
('documents', '0058_remove_corpus_repository'),
('dataimport', '0058_merge_file_imports'),
]
operations = [
migrations.RenameModel(
old_name='DataImport',
new_name='Process',
),
migrations.RenameModel(
old_name='DataImportElement',
new_name='ProcessElement',
),
migrations.RenameField(
model_name='processelement',
old_name='dataimport',
new_name='process',
),
migrations.RenameField(
model_name='workerrun',
old_name='dataimport',
new_name='process',
),
migrations.AlterField(
model_name='process',
name='corpus',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='processes', to='documents.corpus'),
),
migrations.AlterField(
model_name='process',
name='creator',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='processes', to=settings.AUTH_USER_MODEL),
),
migrations.AlterField(
model_name='process',
name='elements',
field=models.ManyToManyField(related_name='processes', through='dataimport.ProcessElement', to='documents.element'),
),
migrations.AlterField(
model_name='process',
name='files',
field=models.ManyToManyField(related_name='processes', to='dataimport.datafile'),
),
migrations.AlterField(
model_name='process',
name='revision',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='processes', to='dataimport.revision'),
),
migrations.AlterField(
model_name='process',
name='versions',
field=models.ManyToManyField(related_name='processes', through='dataimport.WorkerRun', to='dataimport.workerversion'),
),
migrations.AlterField(
model_name='processelement',
name='element',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='process_elements', to='documents.element'),
),
migrations.AlterField(
model_name='processelement',
name='process',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='process_elements', to='dataimport.process'),
),
migrations.AlterModelOptions(
name='process',
options={'ordering': ['corpus', '-created'], 'verbose_name_plural': 'processes'},
),
]
......@@ -37,31 +37,31 @@ class ActivityState(Enum):
Error = 'error'
class DataImportMode(Enum):
Files = "files"
class ProcessMode(Enum):
Files = 'files'
Repository = 'repository'
IIIF = 'iiif'
Workers = 'workers'
Transkribus = 'transkribus'
Template = 'template'
S3 = 's3'
Training = "training"
Training = 'training'
class DataImport(IndexableModel):
class Process(IndexableModel):
"""
A single import workflow
"""
name = models.CharField(null=True, blank=True, max_length=100)
creator = models.ForeignKey('users.User', on_delete=models.CASCADE, related_name='imports')
corpus = models.ForeignKey('documents.Corpus', on_delete=models.CASCADE, related_name='imports', null=True)
mode = EnumField(DataImportMode, max_length=30)
files = models.ManyToManyField('dataimport.DataFile', related_name='imports')
creator = models.ForeignKey('users.User', on_delete=models.CASCADE, related_name='processes')
corpus = models.ForeignKey('documents.Corpus', on_delete=models.CASCADE, related_name='processes', null=True)
mode = EnumField(ProcessMode, max_length=30)
files = models.ManyToManyField('dataimport.DataFile', related_name='processes')
revision = models.ForeignKey(
'dataimport.Revision', related_name='dataimports', on_delete=models.CASCADE, blank=True, null=True)
'dataimport.Revision', related_name='processes', on_delete=models.CASCADE, blank=True, null=True)
workflow = models.OneToOneField('ponos.Workflow', on_delete=models.SET_NULL, null=True, blank=True)
versions = models.ManyToManyField('dataimport.WorkerVersion', through='dataimport.WorkerRun', related_name='imports')
versions = models.ManyToManyField('dataimport.WorkerVersion', through='dataimport.WorkerRun', related_name='processes')
activity_state = EnumField(ActivityState, max_length=32, default=ActivityState.Disabled)
# Used to define the root element on which the workflow will run
......@@ -77,7 +77,7 @@ class DataImport(IndexableModel):
)
# Used to define the element type during non-folder elements creation for Images, PDF, IIIF and S3 imports,
# or to filter elements by type with Workers imports
# or to filter elements by type with Workers processes
element_type = models.ForeignKey(
'documents.ElementType',
on_delete=models.SET_NULL,
......@@ -86,38 +86,38 @@ class DataImport(IndexableModel):
related_name='element_imports',
)
# Used to filter out elements with a name that doesn't contain the substring, only on Workers imports
# Used to filter out elements with a name that doesn't contain the substring, only on Workers processes
name_contains = models.CharField(null=True, blank=True, max_length=250)
# Used to save a user's selection for Workers imports
# Used to save a user's selection for Workers processes
elements = models.ManyToManyField(
'documents.Element',
through='dataimport.DataImportElement',
related_name='imports',
through='dataimport.ProcessElement',
related_name='processes',
)
# Used to include sub-elements of the actual query in Workers imports
# Used to include sub-elements of the actual query in Workers processes
load_children = models.BooleanField(default=False)
# Used to define the collection ID and entities import for Transkribus import
collection_id = models.PositiveIntegerField(null=True, blank=True)
# Use elements cache in Workers imports
# Use elements cache in Workers processes
use_cache = models.BooleanField(default=False)
# Allows to use a GPU in Workers imports
# Allows to use a GPU in Workers processes
use_gpu = models.BooleanField(default=False, blank=True)
# Refers a workflow template
template = models.ForeignKey(
'dataimport.DataImport',
'dataimport.Process',
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='applications',
)
# S3 bucket name, and prefix within the bucket, for DataImportMode.S3
# S3 bucket name, and prefix within the bucket, for ProcessMode.S3
# Bucket names are between 3 and 63 characters long
bucket_name = models.CharField(
max_length=63,
......@@ -160,12 +160,13 @@ class DataImport(IndexableModel):
class Meta:
ordering = ['corpus', '-created']
verbose_name_plural = 'processes'
@property
def state(self):
if not self.workflow:
return State.Unscheduled
# This allows annotating a DataImport queryset with "last_run" and preventing duplicate SQL queries
# This allows annotating a Process queryset with "last_run" and preventing duplicate SQL queries
if hasattr(self, 'last_run'):
# last_run may be None when there is a workflow without any tasks
if self.last_run is None:
......@@ -200,7 +201,7 @@ class DataImport(IndexableModel):
for run in self.worker_runs.all():
# Create a new WorkerRun with same version, configuration and parents.
new_run = WorkerRun(
dataimport=new_process,
process=new_process,
version_id=run.version_id,
parents=run.parents,
configuration_id=run.configuration_id,
......@@ -219,7 +220,7 @@ class DataImport(IndexableModel):
"""
Return a queryset of elements involved in this process
"""
if self.mode != DataImportMode.Workers:
if self.mode != ProcessMode.Workers:
return Element.objects.none()
elements = None
......@@ -266,7 +267,7 @@ class DataImport(IndexableModel):
assert self.workflow is None, 'A workflow is already setup'
if self.mode == DataImportMode.Repository:
if self.mode == ProcessMode.Repository:
assert self.revision is not None, \
'A revision is required to create an import workflow from GitLab repository'
if not self.revision.repo.enabled:
......@@ -281,15 +282,15 @@ class DataImport(IndexableModel):
},
}
elif self.mode == DataImportMode.IIIF:
elif self.mode == ProcessMode.IIIF:
tasks = {
import_task_name: {
'image': settings.ARKINDEX_TASKS_IMAGE,
'command': f'python -m arkindex_tasks.import_iiif.dataimport {self.id}',
'command': f'python -m arkindex_tasks.import_iiif.process {self.id}',
},
}
elif self.mode == DataImportMode.Transkribus:
elif self.mode == ProcessMode.Transkribus:
import_task_name = 'import_arkindex'
tasks = {
'export_transkribus': {
......@@ -305,7 +306,7 @@ class DataImport(IndexableModel):
}
}
elif self.mode == DataImportMode.Files:
elif self.mode == ProcessMode.Files:
tasks = {
import_task_name: {
'image': settings.ARKINDEX_TASKS_IMAGE,
......@@ -313,7 +314,7 @@ class DataImport(IndexableModel):
},
}
elif self.mode == DataImportMode.S3:
elif self.mode == ProcessMode.S3:
assert self.bucket_name, 'Missing S3 bucket name'
assert self.folder_type is not None, 'Missing folder type'
assert self.element_type is not None, 'Missing element type'
......@@ -336,14 +337,14 @@ class DataImport(IndexableModel):
},
}
elif self.mode == DataImportMode.Training:
elif self.mode == ProcessMode.Training:
worker_version = kwargs.get('worker_version')
model_version = kwargs.get('model_version')
worker_configuration = kwargs.get('worker_configuration')
assert worker_version, 'A worker version is required to start the training'
worker_run = WorkerRun.objects.create(
dataimport_id=self.id,
process_id=self.id,
version_id=worker_version.id,
configuration_id=worker_configuration and worker_configuration.id,
parents=[],
......@@ -367,7 +368,7 @@ class DataImport(IndexableModel):
}
}
elif self.mode == DataImportMode.Workers:
elif self.mode == ProcessMode.Workers:
import_task_name = 'initialisation'
if chunks is not None:
assert chunks <= settings.MAX_CHUNKS, f'Import distribution is limited to {settings.MAX_CHUNKS} chunks'
......@@ -393,10 +394,10 @@ class DataImport(IndexableModel):
# Handle chunks and thumbnails generation on processes that list elements during the initial task
# The S3 import does not generate a list of elements yet
if self.mode in (
DataImportMode.Workers,
DataImportMode.Transkribus,
DataImportMode.IIIF,
DataImportMode.Files,
ProcessMode.Workers,
ProcessMode.Transkribus,
ProcessMode.IIIF,
ProcessMode.Files
):
elts_chunk_files = ['elements.json']
if ml_workflow_chunks > 1:
......@@ -422,7 +423,7 @@ class DataImport(IndexableModel):
'parents': [import_task_name],
}
# Generate a task for each WorkerRun on the DataImport
# Generate a task for each WorkerRun on the Process
for worker_run in worker_runs:
task_name = f'{worker_run.version.slug}{task_suffix}'
task_recipe = worker_run.build_task_recipe(
......@@ -447,13 +448,13 @@ class DataImport(IndexableModel):
recipe['env'].update({
'ARKINDEX_PROCESS_ID': str(self.id),
})
if self.mode == DataImportMode.Transkribus:
if self.mode == ProcessMode.Transkribus:
recipe['env']['TRANSKRIBUS_EMAIL'] = settings.TRANSKRIBUS_EMAIL
recipe['env']['TRANSKRIBUS_PASSWORD'] = settings.TRANSKRIBUS_PASSWORD
if settings.TRANSKRIBUS_WORKER_VERSION:
recipe['env']['TRANSKRIBUS_WORKER_VERSION'] = str(settings.TRANSKRIBUS_WORKER_VERSION)
elif self.mode == DataImportMode.S3:
elif self.mode == ProcessMode.S3:
recipe['env']['INGEST_S3_ACCESS_KEY'] = settings.INGEST_S3_ACCESS_KEY
recipe['env']['INGEST_S3_SECRET_KEY'] = settings.INGEST_S3_SECRET_KEY
if settings.INGEST_S3_ENDPOINT:
......@@ -461,7 +462,7 @@ class DataImport(IndexableModel):
if settings.INGEST_S3_REGION:
recipe['env']['INGEST_S3_REGION'] = settings.INGEST_S3_REGION
elif settings.IMPORTS_WORKER_VERSION and self.mode == DataImportMode.Files:
elif settings.IMPORTS_WORKER_VERSION and self.mode == ProcessMode.Files:
recipe['env']['WORKER_VERSION_ID'] = str(settings.IMPORTS_WORKER_VERSION)
if self.corpus:
......@@ -473,7 +474,7 @@ class DataImport(IndexableModel):
return Workflow.objects.create(farm_id=farm_id, recipe=yaml.dump(recipe))
def start(self, use_cache=False, worker_activity=False, use_gpu=False, **kwargs):
if self.mode != DataImportMode.Workers:
if self.mode != ProcessMode.Workers:
assert not use_cache and not worker_activity, 'Only worker processes can be started with cache or worker activities'
if worker_activity:
......@@ -492,14 +493,14 @@ class DataImport(IndexableModel):
from arkindex.project.triggers import initialize_activity
initialize_activity(self)
if self.mode == DataImportMode.Workers:
if self.mode == ProcessMode.Workers:
CorpusWorkerVersion.objects.bulk_create([
CorpusWorkerVersion(corpus_id=self.corpus_id, worker_version_id=worker_version_id)
for worker_version_id in self.worker_runs.values_list('version_id', flat=True)
], ignore_conflicts=True)
def retry(self):
if self.mode == DataImportMode.Repository and self.revision is not None and not self.revision.repo.enabled:
if self.mode == ProcessMode.Repository and self.revision is not None and not self.revision.repo.enabled:
raise ValidationError('Git repository does not have any valid credentials')
if self.workflow:
self.workflow.retry()
......@@ -512,20 +513,20 @@ class DataImport(IndexableModel):
self.save()
class DataImportElement(models.Model):
class ProcessElement(models.Model):
"""
Link between DataImports and Elements to allow workflows from selections.
Link between Processes and Elements to allow workflows from selections.
May be removed later once the ElementPath algorithms are optimized,
as a selection could then be turned into a set.
"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
dataimport = models.ForeignKey(DataImport, on_delete=models.CASCADE, related_name='dataimport_elements')
element = models.ForeignKey('documents.Element', on_delete=models.CASCADE, related_name='dataimport_elements')
process = models.ForeignKey(Process, on_delete=models.CASCADE, related_name='process_elements')
element = models.ForeignKey('documents.Element', on_delete=models.CASCADE, related_name='process_elements')
class Meta:
unique_together = (
('dataimport', 'element'),
('process', 'element'),
)
......@@ -766,7 +767,7 @@ class WorkerConfiguration(IndexableModel):
class WorkerRun(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
dataimport = models.ForeignKey('dataimport.DataImport', on_delete=models.CASCADE, related_name='worker_runs')
process = models.ForeignKey('dataimport.Process', on_delete=models.CASCADE, related_name='worker_runs')
version = models.ForeignKey('dataimport.WorkerVersion', on_delete=models.CASCADE, related_name='worker_runs')
model_version = models.ForeignKey('training.ModelVersion', on_delete=models.CASCADE, related_name='worker_runs', null=True)
parents = ArrayField(models.UUIDField())
......@@ -780,7 +781,7 @@ class WorkerRun(models.Model):
summary = models.TextField()
class Meta:
unique_together = (('version', 'dataimport'),)
unique_together = (('version', 'process'),)
def build_task_recipe(self, import_task_name, elements_path, chunk=None, workflow_runs=None):
'''
......@@ -879,7 +880,7 @@ class WorkerActivity(IndexableModel):
default=WorkerActivityState.Queued,
)
process = models.ForeignKey(
DataImport,
Process,
related_name='activities',
on_delete=models.SET_NULL,
null=True,
......
......@@ -6,7 +6,7 @@ from arkindex.project.mixins import CorpusACLMixin
class IsTaskAdmin(CorpusACLMixin, permissions.BasePermission):
"""
Permission to access a task with high privilege
Allowed for superuser, dataimport creators and users with an admin right on its associated corpus
Allowed for superuser, process creators and users with an admin right on its associated corpus
Allowed for agents too
"""
......@@ -25,8 +25,8 @@ class IsTaskAdmin(CorpusACLMixin, permissions.BasePermission):
or (hasattr(self.user, "is_agent") and self.user.is_agent)
or (
task.workflow.dataimport.corpus_id
and self.has_admin_access(task.workflow.dataimport.corpus)
task.workflow.process.corpus_id
and self.has_admin_access(task.workflow.process.corpus)
)
)
......
......@@ -11,9 +11,6 @@ from rest_framework.exceptions import APIException, AuthenticationFailed, NotAut
logger = logging.getLogger(__name__)
CONF_PATH = settings.WORKERS_CONF_PATH
CONF_VERSION = settings.WORKERS_CONF_VERSION
class GitProvider(ABC):
......@@ -95,18 +92,18 @@ class GitProvider(ABC):
"""
Create and start a process to build new worker(s) from a repository revision
"""
from arkindex.dataimport.models import DataImportMode
mode = DataImportMode.Repository
from arkindex.dataimport.models import ProcessMode
mode = ProcessMode.Repository
user = rev.repo.credentials.user
refs = ", ".join(rev.refs.values_list('name', flat=True))
if not refs:
refs = rev.hash
# Limit the name length to the max size of DataImport name field
# Limit the name length to the max size of Process name field
name = f"Import {refs} from {rev.repo.url}"[:100]
dataimport = rev.dataimports.create(creator=user, mode=mode, name=name)
dataimport.start(thumbnails=False)
return dataimport
process = rev.processes.create(creator=user, mode=mode, name=name)
process.start(thumbnails=False)
return process
class GitLabProvider(GitProvider):
......
......@@ -2,14 +2,7 @@ from django.conf import settings
from rest_framework import serializers
from rest_framework.exceptions import PermissionDenied, ValidationError
from arkindex.dataimport.models import (
ActivityState,
DataFile,
DataImport,
DataImportMode,
WorkerConfiguration,
WorkerRun,
)
from arkindex.dataimport.models import ActivityState, DataFile, Process, ProcessMode, WorkerConfiguration, WorkerRun
from arkindex.dataimport.serializers.git import RevisionSerializer
from arkindex.dataimport.serializers.workers import (
WorkerConfigurationSerializer,
......@@ -21,25 +14,26 @@ from arkindex.documents.serializers.elements import ElementSlimSerializer
from arkindex.project.mixins import ProcessACLMixin
from arkindex.project.serializer_fields import EnumField, LinearRingField
from arkindex.training.models import Model, ModelVersion
from arkindex.training.serializers import ModelVersionLightSerializer
from arkindex.users.models import Role
from arkindex.users.utils import get_max_level
from ponos.models import Farm, State
from transkribus import TranskribusAPI
class DataImportLightSerializer(serializers.ModelSerializer):
class ProcessLightSerializer(serializers.ModelSerializer):
"""
Serialize a data importing workflow
Serialize a process
"""
state = EnumField(State, read_only=True)
mode = EnumField(DataImportMode, read_only=True)
mode = EnumField(ProcessMode, read_only=True)
creator = serializers.HiddenField(default=serializers.CurrentUserDefault())
workflow = serializers.HyperlinkedRelatedField(read_only=True, view_name='ponos:workflow-details')
activity_state = EnumField(ActivityState, read_only=True)
class Meta:
model = DataImport
model = Process
fields = (
'name',
'id',
......@@ -53,20 +47,20 @@ class DataImportLightSerializer(serializers.ModelSerializer):
read_only_fields = ('id', 'state', 'mode', 'corpus', 'creator', 'workflow', 'activity_state')
class DataImportTrainingSerializer(DataImportLightSerializer):
class ProcessTrainingSerializer(ProcessLightSerializer):
model_id = serializers.PrimaryKeyRelatedField(read_only=True)
train_folder_id = serializers.PrimaryKeyRelatedField(read_only=True)
validation_folder_id = serializers.PrimaryKeyRelatedField(read_only=True)
test_folder_id = serializers.PrimaryKeyRelatedField(read_only=True)
class Meta(DataImportLightSerializer.Meta):
fields = DataImportLightSerializer.Meta.fields + (
class Meta(ProcessLightSerializer.Meta):
fields = ProcessLightSerializer.Meta.fields + (
'model_id',
'train_folder_id',
'validation_folder_id',
'test_folder_id',
)
read_only_fields = DataImportLightSerializer.Meta.read_only_fields + (
read_only_fields = ProcessLightSerializer.Meta.read_only_fields + (
'model_id',
'train_folder_id',
'validation_folder_id',
......@@ -74,9 +68,9 @@ class DataImportTrainingSerializer(DataImportLightSerializer):
)
class DataImportSerializer(DataImportTrainingSerializer):
class ProcessSerializer(ProcessTrainingSerializer):
"""
Serialize a data importing workflow with its settings
Serialize a process with its settings
"""
revision = RevisionSerializer(read_only=True)
element = ElementSlimSerializer(read_only=True)
......@@ -98,14 +92,14 @@ class DataImportSerializer(DataImportTrainingSerializer):
)
template_id = serializers.PrimaryKeyRelatedField(
queryset=DataImport.objects.none(),
queryset=Process.objects.none(),
allow_null=True,
source='template',
style={'base_template': 'input.html'}
)
class Meta(DataImportTrainingSerializer.Meta):
fields = DataImportTrainingSerializer.Meta.fields + (
class Meta(ProcessTrainingSerializer.Meta):
fields = ProcessTrainingSerializer.Meta.fields + (
'files',
'revision',
'element',
......@@ -118,7 +112,7 @@ class DataImportSerializer(DataImportTrainingSerializer):
'use_gpu',
'template_id',
)
read_only_fields = DataImportTrainingSerializer.Meta.read_only_fields + (
read_only_fields = ProcessTrainingSerializer.Meta.read_only_fields + (
'files',
'revision',
'element',
......@@ -130,47 +124,47 @@ class DataImportSerializer(DataImportTrainingSerializer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
dataimport = self.context.get('dataimport')
if not dataimport or not dataimport.corpus:
process = self.context.get('process')
if not process or not process.corpus:
return
self.fields['element_type'].queryset = ElementType.objects.filter(corpus=dataimport.corpus)
self.fields['element_id'].queryset = dataimport.corpus.elements.all()
self.fields['element_type'].queryset = ElementType.objects.filter(corpus=process.corpus)
self.fields['element_id'].queryset = process.corpus.elements.all()
def validate(self, data):
data = super().validate(data)
# Editing a dataimport name only is always allowed
# Editing a process name only is always allowed
if set(data.keys()) == {'name'}:
return data
if not self.instance:
return
# Allow editing the element ID on file imports at any time
if self.instance.mode in (DataImportMode.Files, DataImportMode.IIIF) and set(data.keys()) == {'element'}:
# Allow editing the element ID on processes at any time
if self.instance.mode in (ProcessMode.Files, ProcessMode.IIIF) and set(data.keys()) == {'element'}:
return data
if self.instance.state == State.Running:
raise serializers.ValidationError({'__all__': ['Cannot edit a workflow while it is running']})
if self.instance.mode != DataImportMode.Workers:
raise serializers.ValidationError({'__all__': [f'Only processes of mode {DataImportMode.Workers} can be updated']})
if self.instance.mode != ProcessMode.Workers:
raise serializers.ValidationError({'__all__': [f'Only processes of mode {ProcessMode.Workers} can be updated']})
return data
class DataImportListSerializer(DataImportLightSerializer):
class ProcessListSerializer(ProcessLightSerializer):
created = serializers.DateTimeField(read_only=True)
updated = serializers.DateTimeField(source='last_date', read_only=True)
finished = serializers.DateTimeField(source='workflow.finished', read_only=True, default=None)
class Meta(DataImportLightSerializer.Meta):
fields = DataImportLightSerializer.Meta.fields + ('created', 'updated', 'finished')
read_only_fields = DataImportLightSerializer.Meta.read_only_fields + ('created', 'updated', 'finished')
class Meta(ProcessLightSerializer.Meta):
fields = ProcessLightSerializer.Meta.fields + ('created', 'updated', 'finished')
read_only_fields = ProcessLightSerializer.Meta.read_only_fields + ('created', 'updated', 'finished')
class DataImportFromFilesSerializer(serializers.Serializer):
class FilesProcessSerializer(serializers.Serializer):
mode = EnumField(DataImportMode, default=DataImportMode.Files)
mode = EnumField(ProcessMode, default=ProcessMode.Files)
files = serializers.PrimaryKeyRelatedField(queryset=DataFile.objects.all(), many=True)
folder_id = serializers.UUIDField(required=False, allow_null=True)
folder_type = serializers.SlugField(required=False, allow_null=True)
......@@ -178,7 +172,7 @@ class DataImportFromFilesSerializer(serializers.Serializer):
default_error_messages = {
'mode_not_allowed': 'This mode is not allowed when importing from files',
'files_required': 'At least one file is required to start an import',
'files_required': 'At least one file is required to start an import process',
'unique_corpus': 'Imports can only run on files from a single corpus',
'corpus_read_only': 'Cannot write in corpus',
'folder_not_found': 'Folder does not exist',
......@@ -192,7 +186,7 @@ class DataImportFromFilesSerializer(serializers.Serializer):
}
def validate_mode(self, mode):
if mode not in (DataImportMode.Files, DataImportMode.IIIF):
if mode not in (ProcessMode.Files, ProcessMode.IIIF):
self.fail('mode_not_allowed')
return mode
......@@ -219,11 +213,11 @@ class DataImportFromFilesSerializer(serializers.Serializer):
self.fail('folder_not_found')
def validate(self, data):
if data['mode'] == DataImportMode.Files:
if data['mode'] == ProcessMode.Files:
if not all(f.content_type == 'application/pdf' or f.content_type.startswith('image/') for f in data['files']):
self.fail('image_or_pdf')
elif data['mode'] == DataImportMode.IIIF:
elif data['mode'] == ProcessMode.IIIF:
if not set(f.content_type.split(';')[0] for f in data['files']) <= {'application/json', 'application/ld+json'}:
self.fail('iiif_only')
......@@ -248,7 +242,7 @@ class DataImportFromFilesSerializer(serializers.Serializer):
self.fail('type_not_folder', slug=folder_type_slug)
data['folder_type'] = folder_type
else:
if data['mode'] == DataImportMode.IIIF:
if data['mode'] == ProcessMode.IIIF:
# folder_type is required in IIIF
self.fail('iiif_folder_required')
if not folder_id:
......@@ -271,7 +265,7 @@ class CreateProcessTemplateSerializer(serializers.ModelSerializer):
name = serializers.CharField(required=False, max_length=100, default="")
class Meta:
model = DataImport
model = Process
fields = (
'id',
'name',
......@@ -281,7 +275,7 @@ class CreateProcessTemplateSerializer(serializers.ModelSerializer):
data = super().validate(data)
template = self.context['template']
data["revision"] = template.revision
data["mode"] = DataImportMode.Template
data["mode"] = ProcessMode.Template
data["creator"] = self.context['request'].user
data["corpus"] = template.corpus
......@@ -292,13 +286,13 @@ class CreateProcessTemplateSerializer(serializers.ModelSerializer):
def create(self, validated_data):
# The endpoint should copy all the worker runs content from the source template
# (from url id) onto the newly created one.
dataimport = super().create(validated_data)
self.context['template'].copy_runs(dataimport)
return dataimport
process = super().create(validated_data)
self.context['template'].copy_runs(process)
return process
class ApplyProcessTemplateSerializer(ProcessACLMixin, serializers.Serializer):
process_id = serializers.PrimaryKeyRelatedField(queryset=DataImport.objects.all(), source='process', required=True)
process_id = serializers.PrimaryKeyRelatedField(queryset=Process.objects.all(), source='process', required=True)
def create(self, validated_data):
# The endpoint should copy all the worker runs content from the source template
......@@ -306,7 +300,7 @@ class ApplyProcessTemplateSerializer(ProcessACLMixin, serializers.Serializer):
template_process = self.context["template"]
target_process = validated_data["process"]
# If the target process, already has worker runs, these will be deleted before applying the template
WorkerRun.objects.filter(dataimport_id=target_process.id).delete()
WorkerRun.objects.filter(process_id=target_process.id).delete()
# Apply the template by copying all the worker runs on to the new process
template_process.copy_runs(target_process)
target_process.template_id = template_process.id
......@@ -319,7 +313,7 @@ class ApplyProcessTemplateSerializer(ProcessACLMixin, serializers.Serializer):
# Check Contributor access rights on process
access_level = self.process_access_level(process)
if not access_level:
raise ValidationError(detail='Dataimport with this ID does not exist.')
raise ValidationError(detail='Process with this ID does not exist.')
elif access_level < Role.Contributor.value:
raise PermissionDenied(detail='You do not have a contributor access to this process.')
return process
......@@ -409,18 +403,20 @@ class WorkerRunSerializer(serializers.ModelSerializer):
worker_version_id = serializers.UUIDField(source='version_id')
# Serialize worker with its basic information
worker = WorkerLightSerializer(source='version.worker', read_only=True)
model_version = ModelVersionLightSerializer(read_only=True)
configuration_id = serializers.PrimaryKeyRelatedField(queryset=WorkerConfiguration.objects.all(), required=False, allow_null=True)
class Meta:
model = WorkerRun
read_only_fields = ('id', 'worker', 'dataimport_id', 'model_version_id')
read_only_fields = ('id', 'worker', 'process_id', 'model_version_id')
fields = (
'id',
'parents',
'worker_version_id',
'model_version_id',
'dataimport_id',
'process_id',
'worker',
'model_version',
'configuration_id',
)
......@@ -434,7 +430,7 @@ class WorkerRunEditSerializer(WorkerRunSerializer):
worker_version = WorkerVersionSerializer(read_only=True, source='version')
configuration = WorkerConfigurationSerializer(read_only=True)
process = DataImportTrainingSerializer(read_only=True, source='dataimport')
process = ProcessTrainingSerializer(read_only=True)
class Meta(WorkerRunSerializer.Meta):
fields = WorkerRunSerializer.Meta.fields + (
......
......@@ -2,7 +2,7 @@ from collections import defaultdict
from rest_framework import serializers
from arkindex.dataimport.models import DataImport, DataImportMode
from arkindex.dataimport.models import Process, ProcessMode
from arkindex.documents.models import Corpus, Element, ElementType
......@@ -42,10 +42,10 @@ class S3ImportSerializer(serializers.ModelSerializer):
write_only=True,
)
creator = serializers.HiddenField(default=serializers.CurrentUserDefault())
mode = serializers.HiddenField(default=DataImportMode.S3)
mode = serializers.HiddenField(default=ProcessMode.S3)
class Meta:
model = DataImport
model = Process
fields = (
'id',
'bucket_name',
......
from rest_framework import serializers
from rest_framework.exceptions import ValidationError
from arkindex.dataimport.models import (
DataImport,
DataImportMode,
WorkerConfiguration,
WorkerVersion,
WorkerVersionGPUUsage,
)
from arkindex.dataimport.models import Process, ProcessMode, WorkerConfiguration, WorkerVersion, WorkerVersionGPUUsage
from arkindex.documents.models import Corpus, Element
from arkindex.project.mixins import TrainingModelMixin, WorkerACLMixin
from arkindex.training.models import Model, ModelVersion
......@@ -87,7 +81,7 @@ class StartTrainingSerializer(serializers.ModelSerializer, WorkerACLMixin, Train
use_gpu = serializers.BooleanField(default=False, write_only=True)
class Meta:
model = DataImport
model = Process
fields = (
'id',
'name',
......@@ -170,7 +164,7 @@ class StartTrainingSerializer(serializers.ModelSerializer, WorkerACLMixin, Train
'train_folder': validated_data["train_folder_id"],
'validation_folder': validated_data.get("validation_folder_id"),
'test_folder': validated_data.get("test_folder_id"),
'mode': DataImportMode.Training,
'mode': ProcessMode.Training,
'creator': self._user,
})
......
......@@ -7,7 +7,7 @@ from rest_framework import serializers
from rest_framework.exceptions import ValidationError
from arkindex.dataimport.models import (
DataImport,
Process,
Repository,
Revision,
Worker,
......@@ -110,8 +110,8 @@ class UserConfigurationFieldSerializer(serializers.Serializer):
if subtype is not None:
if field_type != UserConfigurationFieldType.List:
errors['subtype'].append('The "subtype" field can only be set for a "list" type property.')
if subtype not in [UserConfigurationFieldType.Int, UserConfigurationFieldType.Float, UserConfigurationFieldType.String]:
errors['subtype'].append('Subtype can only be int, float or string.')
if subtype not in [UserConfigurationFieldType.Int, UserConfigurationFieldType.Float, UserConfigurationFieldType.String, UserConfigurationFieldType.Boolean]:
errors['subtype'].append('Subtype can only be int, float, bool or string.')
# Handle enums
if choices is not None:
if field_type != UserConfigurationFieldType.Enum:
......@@ -277,7 +277,7 @@ class WorkerActivitySerializer(serializers.ModelSerializer):
state = EnumField(WorkerActivityState)
element_id = serializers.UUIDField()
process_id = serializers.PrimaryKeyRelatedField(
queryset=DataImport.objects.all(),
queryset=Process.objects.all(),
# Avoid loading up to 1000 processes when opening this endpoint in a browser
style={'base_template': 'input.html'},
)
......
......@@ -22,25 +22,25 @@ def check_parents(sender, instance, **kwargs):
Since we can't properly validate parents field on WorkerRun by simply defining an Array of UUIDs,
we add a pre-save signal with constraints.
It will check that provided UUIDs refer to existing WorkerRun objects and that these objects are
linked to the same DataImport as the to-be-saved WorkerRun instance.
linked to the same Process as the to-be-saved WorkerRun instance.
It will also check that adding or updating WorkerRun parents will not create cycles in the tree.
"""
if not instance.parents:
return
# Use `dataimport_id` and not `dataimport` to avoid an extra SQL query that would use the replica
# Use `process_id` and not `process` to avoid an extra SQL query that would use the replica
parents = WorkerRun.objects.using('default') \
.filter(id__in=instance.parents, dataimport_id=instance.dataimport_id) \
.filter(id__in=instance.parents, process_id=instance.process_id) \
.exclude(id=instance.id).values_list('id', flat=True)
if set(parents) != set(instance.parents):
raise ValidationError(f"Can't add or update WorkerRun {instance.id} because parents field isn't properly defined. It can be either because"
" one or several UUIDs don't refer to existing WorkerRuns or either because listed WorkerRuns doesn't belong to the"
" same DataImport than this WorkerRun.")
" same Process than this WorkerRun.")
graph = {
wr['id']: wr['parents']
for wr in WorkerRun.objects.using('default').filter(dataimport_id=instance.dataimport_id).values('id', 'parents')
for wr in WorkerRun.objects.using('default').filter(process_id=instance.process_id).values('id', 'parents')
}
ancestors = _list_ancestors(graph, instance.parents)
if instance.id in ancestors:
......
......@@ -5,11 +5,11 @@ from django.db import transaction
from django_rq import job
from rq import Retry
from arkindex.dataimport.models import ActivityState, DataImport, WorkerActivity, WorkerActivityState
from arkindex.dataimport.models import ActivityState, Process, WorkerActivity, WorkerActivityState
@job('default', timeout=settings.RQ_TIMEOUTS['initialize_activity'], retry=Retry(max=4))
def initialize_activity(process: DataImport):
def initialize_activity(process: Process):
"""
List all worker versions used in a process and initialize their activity on processed elements.
4 retries allowed, for a total of 5 attempts, to try to mitigate some database errors from the large query.
......@@ -32,7 +32,7 @@ def initialize_activity(process: DataImport):
@job('default', timeout=settings.RQ_TIMEOUTS['process_delete'])
def process_delete(process: DataImport, delete_activity: Optional[WorkerActivityState] = None):
def process_delete(process: Process, delete_activity: Optional[WorkerActivityState] = None):
"""
Delete a process and removes its reference from related worker activities.
If `delete` is defined, activities of this state will be deleted.
......
......@@ -2,7 +2,7 @@ from django.test import override_settings
from django.urls import reverse
from rest_framework import status
from arkindex.dataimport.models import DataImport, DataImportMode, ImageServer
from arkindex.dataimport.models import ImageServer, Process, ProcessMode
from arkindex.documents.models import Corpus
from arkindex.project.tests import FixtureTestCase
from arkindex.users.models import Scope
......@@ -132,10 +132,10 @@ class TestCreateS3Import(FixtureTestCase):
})
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
data = response.json()
process = DataImport.objects.get(id=data['id'])
process = Process.objects.get(id=data['id'])
self.assertDictEqual(data, {'id': str(process.id)})
self.assertEqual(process.mode, DataImportMode.S3)
self.assertEqual(process.mode, ProcessMode.S3)
self.assertEqual(process.creator, self.user)
self.assertEqual(process.corpus_id, self.corpus.id)
self.assertEqual(process.element_id, element.id)
......@@ -181,10 +181,10 @@ class TestCreateS3Import(FixtureTestCase):
})
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
data = response.json()
process = DataImport.objects.get(id=data['id'])
process = Process.objects.get(id=data['id'])
self.assertDictEqual(data, {'id': str(process.id)})
self.assertEqual(process.mode, DataImportMode.S3)
self.assertEqual(process.mode, ProcessMode.S3)
self.assertEqual(process.creator, self.user)
self.assertEqual(process.corpus_id, self.corpus.id)
self.assertIsNone(process.element_id)
......
......@@ -6,8 +6,8 @@ from django.urls import reverse
from rest_framework import status
from arkindex.dataimport.models import (
DataImport,
DataImportMode,
Process,
ProcessMode,
WorkerConfiguration,
WorkerRun,
WorkerVersion,
......@@ -247,16 +247,16 @@ class TestCreateTrainingProcess(FixtureTestCase):
@override_settings(PONOS_RECIPE={})
def test_create_training(self):
self.client.force_login(self.user)
training_process = DataImport.objects.filter(name='Test training')
training_process = Process.objects.filter(name='Test training')
self.assertEqual(training_process.count(), 0)
with self.assertNumQueries(31):
with self.assertNumQueries(29):
response = self.client.post(reverse('api:process-training'), self.base_payload)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
self.assertEqual(training_process.count(), 1)
training_process = training_process.get()
self.assertEqual(response.json(), {
'id': str(training_process.id),
'mode': DataImportMode.Training.value,
'mode': ProcessMode.Training.value,
'activity_state': 'disabled',
'corpus': str(self.corpus.id),
'element': None,
......@@ -298,9 +298,9 @@ class TestCreateTrainingProcess(FixtureTestCase):
@override_settings(PONOS_RECIPE={})
def test_create_training_extra_parameters(self):
self.client.force_login(self.user)
training_process = DataImport.objects.filter(name='Test training')
training_process = Process.objects.filter(name='Test training')
self.assertEqual(training_process.count(), 0)
with self.assertNumQueries(36):
with self.assertNumQueries(34):
response = self.client.post(reverse('api:process-training'), {
**self.base_payload,
'validation_folder_id': str(self.validation_folder.id),
......@@ -314,7 +314,7 @@ class TestCreateTrainingProcess(FixtureTestCase):
training_process = training_process.get()
self.assertEqual(response.json(), {
'id': str(training_process.id),
'mode': DataImportMode.Training.value,
'mode': ProcessMode.Training.value,
'activity_state': 'disabled',
'corpus': str(self.corpus.id),
'element': None,
......
......@@ -8,7 +8,7 @@ from gitlab.exceptions import GitlabCreateError, GitlabGetError
from responses import matchers
from rest_framework.exceptions import APIException, AuthenticationFailed, NotAuthenticated, ValidationError
from arkindex.dataimport.models import DataImport, DataImportMode, GitRefType, Revision
from arkindex.dataimport.models import GitRefType, Process, ProcessMode, Revision
from arkindex.dataimport.providers import GitLabProvider
from arkindex.project.tests import FixtureTestCase
......@@ -510,12 +510,12 @@ class TestGitLabProvider(FixtureTestCase):
rev = self.repo.revisions.filter(hash=sha)
self.assertFalse(rev.exists())
repo_imports = DataImport.objects.filter(revision__repo_id=str(self.repo.id))
self.assertFalse(repo_imports.exists())
repo_processes = Process.objects.filter(revision__repo_id=str(self.repo.id))
self.assertFalse(repo_processes.exists())
GitLabProvider(credentials=self.creds).handle_webhook(self.repo, request_mock)
di = repo_imports.get()
self.assertEqual(di.mode, DataImportMode.Repository)
di = repo_processes.get()
self.assertEqual(di.mode, ProcessMode.Repository)
@override_settings(PUBLIC_HOSTNAME='https://arkindex.localhost')
def test_handle_webhook_duplicate_events(self):
......@@ -544,14 +544,14 @@ class TestGitLabProvider(FixtureTestCase):
rev = self.repo.revisions.filter(hash=sha)
self.assertTrue(rev.exists())
repo_imports = DataImport.objects.filter(revision__repo_id=str(self.repo.id))
self.assertFalse(repo_imports.exists())
repo_processes = Process.objects.filter(revision__repo_id=str(self.repo.id))
self.assertFalse(repo_processes.exists())
GitLabProvider(credentials=self.creds).handle_webhook(self.repo, request_mock)
# Checking that we didn't initiate revision creation
self.assertEqual(self.gl_mock().projects.get.call_count, 0)
self.assertEqual(self.gl_mock().projects.get().commits.get.call_count, 0)
self.assertFalse(repo_imports.exists())
self.assertFalse(repo_processes.exists())
@override_settings(PUBLIC_HOSTNAME='https://arkindex.localhost')
def test_handle_webhook_wrong_kind(self):
......@@ -561,7 +561,7 @@ class TestGitLabProvider(FixtureTestCase):
sha = '1337'
rev = self.repo.revisions.filter(hash=sha)
self.assertFalse(rev.exists())
repo_imports = DataImport.objects.filter(revision__repo_id=str(self.repo.id))
repo_processes = Process.objects.filter(revision__repo_id=str(self.repo.id))
glp = GitLabProvider(credentials=self.creds)
request_mock = MagicMock()
......@@ -585,14 +585,14 @@ class TestGitLabProvider(FixtureTestCase):
with self.assertRaises(ValidationError):
glp.handle_webhook(self.repo, request_mock)
self.assertFalse(rev.exists())
self.assertFalse(repo_imports.exists())
self.assertFalse(repo_processes.exists())
# Breaking change: a list!
request_mock.data = [request_mock.data]
with self.assertRaises(ValidationError):
glp.handle_webhook(self.repo, request_mock)
self.assertFalse(rev.exists())
self.assertFalse(repo_imports.exists())
self.assertFalse(repo_processes.exists())
@override_settings(PUBLIC_HOSTNAME='https://arkindex.localhost')
def test_handle_webhook_delete_branch(self):
......@@ -607,7 +607,7 @@ class TestGitLabProvider(FixtureTestCase):
)
rev.save()
self.assertTrue(self.repo.revisions.filter(hash='1').exists())
repo_imports = DataImport.objects.filter(revision__repo_id=str(self.repo.id))
repo_processes = Process.objects.filter(revision__repo_id=str(self.repo.id))
glp = GitLabProvider(credentials=self.creds)
glp.update_or_create_ref(self.repo, rev, 'test', GitRefType.Branch)
......@@ -626,7 +626,7 @@ class TestGitLabProvider(FixtureTestCase):
glp.handle_webhook(self.repo, request_mock)
self.assertTrue(self.repo.revisions.filter(hash='1').exists())
self.assertEqual(len(self.repo.refs.all()), 0)
self.assertFalse(repo_imports.exists())
self.assertFalse(repo_processes.exists())
@responses.activate
@override_settings(PUBLIC_HOSTNAME='https://arkindex.localhost', GITLAB_APP_ID='abcd', GITLAB_APP_SECRET='s3kr3t')
......@@ -702,7 +702,7 @@ class TestGitLabProvider(FixtureTestCase):
# Should only include data from fixtures at first
self.assertFalse(self.repo.revisions.exclude(id=self.rev.id).exists())
self.assertFalse(self.repo.refs.exists())
self.assertEqual(DataImport.objects.count(), 1)
self.assertEqual(Process.objects.count(), 1)
# Update references for this repo
provider = GitLabProvider(credentials=self.creds)
......@@ -734,14 +734,14 @@ class TestGitLabProvider(FixtureTestCase):
('A' * 250, GitRefType.Branch),
])
# We should now have 3 new dataimports for these refs
self.assertEqual(DataImport.objects.count(), 4)
self.assertListEqual(list(DataImport.objects.values_list('name', 'mode', 'revision__hash').order_by('revision__hash')), [
("Import v0.1 from http://gitlab/repo", DataImportMode.Repository, 'commit0'),
("Import master from http://gitlab/repo", DataImportMode.Repository, 'commit1'),
# Last import name has been limited to 100 chars due to DataImport model constraint
(f"Import {'A' * 93}", DataImportMode.Repository, 'commit2'),
("Process fixture", DataImportMode.Workers, None),
# We should now have 3 new processes for these refs
self.assertEqual(Process.objects.count(), 4)
self.assertListEqual(list(Process.objects.values_list('name', 'mode', 'revision__hash').order_by('revision__hash')), [
("Import v0.1 from http://gitlab/repo", ProcessMode.Repository, 'commit0'),
("Import master from http://gitlab/repo", ProcessMode.Repository, 'commit1'),
# Last process name has been limited to 100 chars due to Process model constraint
(f"Import {'A' * 93}", ProcessMode.Repository, 'commit2'),
("Process fixture", ProcessMode.Workers, None),
])
@responses.activate
......