Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (318)
Showing
with 764 additions and 407 deletions
......@@ -11,7 +11,7 @@ include:
# For jobs that run backend scripts directly
.backend-setup:
image: registry.gitlab.com/arkindex/backend/base:django-4.0.4
image: registry.gitlab.com/teklia/arkindex/backend/base:django-4.0.4
cache:
paths:
......@@ -19,10 +19,10 @@ include:
before_script:
# Custom line to install our own deps from Git using GitLab CI credentials
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/ponos#egg=ponos-server"
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/transkribus#egg=transkribus-client"
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/teklia/arkindex/ponos#egg=ponos-server"
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/teklia/arkindex/transkribus#egg=transkribus-client"
- pip install -r tests-requirements.txt codecov
- "echo 'database: {host: postgres, port: 5432}' > $CONFIG_PATH"
- "echo 'database: {host: postgres, port: 5432}\npublic_hostname: http://ci.arkindex.localhost' > $CONFIG_PATH"
# Those jobs require the base image; they might fail if the image is not up to date.
# Allow them to fail when building a new base image, to prevent them from blocking a new base image build
......@@ -51,7 +51,7 @@ backend-tests:
stage: test
services:
- name: postgis/postgis:12-3.1
- name: postgis/postgis:14-3.2
alias: postgres
artifacts:
......
FROM registry.gitlab.com/arkindex/backend/base:django-4.0.4 as build
FROM registry.gitlab.com/teklia/arkindex/backend/base:django-4.0.4 as build
RUN mkdir build
ADD . build
RUN cd build && python3 setup.py sdist
FROM registry.gitlab.com/arkindex/backend/base:django-4.0.4
FROM registry.gitlab.com/teklia/arkindex/backend/base:django-4.0.4
ARG PONOS_BRANCH=master
ARG PONOS_ID=10017043
ARG TRANSKRIBUS_BRANCH=master
......
......@@ -33,9 +33,8 @@ RUN \
mv /tmp/transkribus/transkribus /usr/share
# Build full requirements, removing relative or remote references to arkindex projects
# Special case for apistar, where we want to keep our own fork
# Special case for approximate requirements from ponos, where we want to keep the versions specified from this repo
RUN cat /tmp/ponos/requirements-server.txt /tmp/requirements-*arkindex.txt | sort | uniq | grep -v -E '^apistar|arkindex|^#|^Django~=|^boto3~=|^cryptography~=|^django-enumfields~=|^djangorestframework~=|^pyyaml~=|transkribus-client' > /requirements.txt
RUN cat /tmp/ponos/requirements-server.txt /tmp/requirements-*arkindex.txt | sort | uniq | grep -v -E '^arkindex|^#|^Django~=|^boto3~=|^cryptography~=|^django-enumfields~=|^djangorestframework~=|^pyyaml~=|transkribus-client' > /requirements.txt
# List all management commands
RUN find /usr/share/arkindex/*/management -name '*.py' -not -name '__init__.py' > /commands.txt
......@@ -61,7 +60,7 @@ RUN python -m nuitka \
arkindex/manage.py
# Start over from a clean setup
FROM registry.gitlab.com/arkindex/backend/base:django-4.0.4 as build
FROM registry.gitlab.com/teklia/arkindex/backend/base:django-4.0.4 as build
# Import files from compilation
RUN mkdir /usr/share/arkindex
......
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
PONOS_BRANCH=master
IMAGE_TAG=registry.gitlab.com/arkindex/backend
IMAGE_TAG=registry.gitlab.com/teklia/arkindex/backend
.PHONY: all release
......
Backend for Historical Manuscripts Indexing
===========================================
[![pipeline status](https://gitlab.com/arkindex/backend/badges/master/pipeline.svg)](https://gitlab.com/arkindex/backend/commits/master)
[![pipeline status](https://gitlab.com/teklia/arkindex/backend/badges/master/pipeline.svg)](https://gitlab.com/teklia/arkindex/backend/commits/master)
## Requirements
* Clone of the [architecture](https://gitlab.com/arkindex/architecture)
* Clone of the [architecture](https://gitlab.com/teklia/arkindex/architecture)
* Git
* Make
* Python 3.6+
......@@ -21,7 +21,7 @@ mkvirtualenv ark -a .
pip install -e .[test]
```
When the [architecture](https://gitlab.com/arkindex/architecture) is running locally to provide required services:
When the [architecture](https://gitlab.com/teklia/arkindex/architecture) is running locally to provide required services:
```
arkindex/manage.py migrate
......@@ -109,6 +109,7 @@ Aside from the usual Django commands, some custom commands are available via `ma
* `delete_corpus`: Delete a big corpus using a Ponos task;
* `reindex`: Reindex elements into Solr;
* `telegraf`: A special command with InfluxDB-compatible output for Grafana statistics.
* `move_lines_to_parents`: Moves element children to their geographical parents;
See `manage.py <command> --help` to view more details about a specific command.
......
1.2.3
1.3.4-rc1
......@@ -4,35 +4,36 @@ from enumfields.admin import EnumFieldListFilter
from arkindex.dataimport.models import (
DataFile,
DataImport,
Process,
Repository,
Revision,
Worker,
WorkerConfiguration,
WorkerType,
WorkerVersion,
)
from arkindex.users.admin import GroupMembershipInline, UserMembershipInline
class DataFileInline(admin.StackedInline):
model = DataImport.files.through
model = Process.files.through
extra = 0
class WorkerRunInline(admin.StackedInline):
model = DataImport.versions.through
model = Process.versions.through
raw_id_fields = ('version', )
extra = 0
class ElementInline(admin.StackedInline):
model = DataImport.elements.through
model = Process.elements.through
raw_id_fields = ('element', )
readonly_fields = ('element', )
extra = 0
class DataImportAdmin(admin.ModelAdmin):
class ProcessAdmin(admin.ModelAdmin):
list_display = ('id', 'creator', 'corpus', 'state', 'mode')
list_filter = [('mode', EnumFieldListFilter), ]
fieldsets = (
......@@ -72,8 +73,8 @@ class WorkerInline(admin.StackedInline):
class RepositoryAdmin(admin.ModelAdmin):
list_display = ('id', 'url', 'type')
fields = ('id', 'url', 'type', 'hook_token', 'provider_name', 'credentials')
list_display = ('id', 'url')
fields = ('id', 'url', 'hook_token', 'provider_name', 'credentials')
readonly_fields = ('id', )
inlines = [WorkerInline, UserMembershipInline, GroupMembershipInline]
......@@ -100,15 +101,21 @@ class WorkerConfigurationInline(admin.StackedInline):
class WorkerAdmin(admin.ModelAdmin):
list_display = ('id', 'name', 'slug', 'type', 'repository')
field = ('id', 'name', 'slug', 'type', 'repository')
fields = ('id', 'name', 'slug', 'type', 'repository', 'public')
readonly_fields = ('id', )
inlines = [WorkerVersionInline, UserMembershipInline, GroupMembershipInline, WorkerConfigurationInline]
class WorkerTypeAdmin(admin.ModelAdmin):
list_display = ('id', 'slug', 'display_name', 'created')
fields = ('id', 'slug', 'display_name')
readonly_fields = ('id', )
class WorkerVersionAdmin(admin.ModelAdmin):
list_display = ('id', 'worker', 'revision')
list_filter = ('worker', )
field = ('id', 'worker', 'revision', 'configuration')
fields = ('id', 'worker', 'revision', 'configuration', 'model_usage')
readonly_fields = ('id', )
raw_id_fields = ('docker_image', 'revision')
......@@ -119,10 +126,11 @@ class WorkerConfigurationAdmin(admin.ModelAdmin):
readonly_fields = ('id', 'configuration_hash')
admin.site.register(DataImport, DataImportAdmin)
admin.site.register(Process, ProcessAdmin)
admin.site.register(DataFile, DataFileAdmin)
admin.site.register(Revision, RevisionAdmin)
admin.site.register(Repository, RepositoryAdmin)
admin.site.register(Worker, WorkerAdmin)
admin.site.register(WorkerType, WorkerTypeAdmin)
admin.site.register(WorkerVersion, WorkerVersionAdmin)
admin.site.register(WorkerConfiguration, WorkerConfigurationAdmin)
This diff is collapsed.
from django.core.management.base import BaseCommand, CommandError
from arkindex.dataimport.models import DataImport, DataImportMode
from arkindex.dataimport.models import Process, ProcessMode
from arkindex.documents.models import ElementType
from arkindex.project.argparse import CorpusArgument, DataImportArgument, ElementArgument, UserArgument
from arkindex.project.argparse import CorpusArgument, ElementArgument, ProcessArgument, UserArgument
class Command(BaseCommand):
help = 'Apply a template to build a set of DataImport'
help = 'Apply a template to build a set of Processes'
def add_arguments(self, parser):
parser.add_argument(
'dataimport',
help='DataImport to use as a template for workers',
type=DataImportArgument()
'process',
help='Process to use as a template for workers',
type=ProcessArgument()
)
parser.add_argument(
'--corpus',
required=True,
help='Corpus ID or name to create DataImports on',
help='Corpus ID or name to create Processes on',
type=CorpusArgument()
)
parser.add_argument(
......@@ -25,36 +25,36 @@ class Command(BaseCommand):
dest='elements',
nargs='+',
required=True,
help='Top level element ids to build workflows: each element will get its dataimport',
help='Top level element ids to build workflows: each element will get its process',
type=ElementArgument(),
)
parser.add_argument(
'--children-type',
type=str,
required=True,
help='Element type slug to use to build the new dataimport',
help='Element type slug to use to build the new process',
)
parser.add_argument(
'--chunks',
type=int,
default=1,
help='Number of chunks to build for the dataimport',
help='Number of chunks to build for the process',
)
parser.add_argument(
'--creator',
required=True,
type=UserArgument(),
help='Creator of the new dataimport',
help='Creator of the new process',
)
def handle(self, dataimport, corpus, elements, children_type, *args, **options):
def handle(self, process, corpus, elements, children_type, *args, **options):
# Check template (must be in worker mode and have some workers)
if dataimport.mode != DataImportMode.Workers:
raise CommandError("Only workers dataimports are supported")
if not dataimport.worker_runs.exists():
raise CommandError("This dataimport has no worker runs defined")
if process.mode != ProcessMode.Workers:
raise CommandError("Only workers processes are supported")
if not process.worker_runs.exists():
raise CommandError("This process has no worker runs defined")
# Check new dataimport related models (corpus, elements, type)
# Check new process related models (corpus, elements, type)
for element in elements:
if element.corpus != corpus:
raise CommandError(f"Element {element.id} is not in corpus {corpus}")
......@@ -66,20 +66,20 @@ class Command(BaseCommand):
# Now build a clone for each top level element
for element in elements:
self.clone(dataimport, element, element_type, options['creator'], options['chunks'])
self.clone(process, element, element_type, options['creator'], options['chunks'])
def clone(self, source, element, element_type, creator, chunks):
"""
Clone a dataimport configuration, on new elements
Clone a process configuration, on new elements
"""
# Name is using the source's name, along with the element's name
name = f"Run {source.name} on {element.name}" if source.name else element.name
# Build a dataimport that will load all specified children
# Build a process that will load all specified children
# elements from that top level element
di = DataImport.objects.create(
mode=DataImportMode.Workers,
p = Process.objects.create(
mode=ProcessMode.Workers,
name=name,
corpus=element.corpus,
creator=creator,
......@@ -88,12 +88,12 @@ class Command(BaseCommand):
element_type=element_type,
)
print(f'Created DataImport {di.id}')
print(f'Created Process {p.id}')
# Build linear worker runs
runs = {}
for wr in source.worker_runs.all():
runs[wr.id] = di.worker_runs.create(
runs[wr.id] = p.worker_runs.create(
version=wr.version,
parents=[]
)
......@@ -107,5 +107,5 @@ class Command(BaseCommand):
runs[wr.id].save()
# Build and start workflow
di.start(chunks=chunks)
print(f'Started DataImport {di.id}')
p.start(chunks=chunks)
print(f'Started Process {p.id}')
......@@ -3,7 +3,7 @@ import uuid
from django.core.management.base import BaseCommand, CommandError
from arkindex.dataimport.models import Repository, RepositoryType, Revision, Worker, WorkerVersion
from arkindex.dataimport.models import Repository, Revision, Worker, WorkerType, WorkerVersion
class Command(BaseCommand):
......@@ -35,16 +35,20 @@ class Command(BaseCommand):
repo, _ = Repository.objects.get_or_create(
url=url,
type=RepositoryType.Worker,
defaults={
"hook_token": str(uuid.uuid4()),
}
)
worker_type, _ = WorkerType.objects.get_or_create(
slug="classifier",
defaults={'display_name': "classifier"}
)
worker, _ = Worker.objects.get_or_create(
name=name,
slug=slug,
type="classifier",
type=worker_type,
repository=repo
)
......
#!/usr/bin/env python3
import yaml
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from arkindex.dataimport.models import DataImport, DataImportMode
from arkindex.dataimport.utils import get_default_farm_id
from arkindex.project.argparse import CorpusArgument
from arkindex.users.models import User
from ponos.models import Workflow
IMPORT_NAME = 'import-s3'
MAX_DEPTH_WARN = 3
class Command(BaseCommand):
help = 'Start a S3 import distributed among multiple tasks'
def add_arguments(self, parser):
parser.add_argument(
'--source-bucket',
required=True,
help='AWS S3 import source bucket name',
)
parser.add_argument(
'--corpus',
required=True,
help='Corpus ID or name to import Volumes to',
type=CorpusArgument()
)
parser.add_argument(
'--bucket-prefix',
required=True,
help='Bucket subfolder to limit volumes import',
)
parser.add_argument(
'--max-folder-depth',
type=int,
required=True,
help='Recursion level for subfolders exploration',
)
parser.add_argument(
'--nb-chunks',
type=int,
required=True,
help='''
Number of tasks used for volumes import. A configuration file is written for each
task at /data/<IMAGE_NAME>/chunk_<WORKER_NUMBER>.yml, starting with chunk_1.yml
''',
)
parser.add_argument(
'--dest-bucket',
help='Arkindex S3 bucket to copy images to',
)
parser.add_argument(
'--image-server',
help='Use image server as dest server. No copy is needed',
)
parser.add_argument(
'--iiif-cache-bucket',
help='Image server cache bucket to populate with pre-computed images',
)
def handle(self, *args, **options):
# Parser exclusive
dest_bucket_mode = bool(options.get('dest_bucket'))
image_server_mode = bool(options.get('image_server'))
if not dest_bucket_mode ^ image_server_mode:
raise CommandError('Exactly one of the arguments --dest-bucket, --image-server is required')
env_vars = {
'AWS_ACCESS_KEY': settings.AWS_ACCESS_KEY,
'AWS_SECRET_KEY': settings.AWS_SECRET_KEY,
'AWS_ENDPOINT': settings.AWS_ENDPOINT,
'AWS_REGION': settings.AWS_REGION
}
# Assert s3 information are passed to tasks
assert env_vars['AWS_ACCESS_KEY'] and env_vars['AWS_SECRET_KEY'], (
'S3 environment variables could not be found\n'
'Please define AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY before starting import'
)
# Warn for high recursion level
depth = options['max_folder_depth']
if depth > MAX_DEPTH_WARN:
self.stderr.write(self.style.WARNING(
f'Maximum folder depth set to {depth}. A high value can considerably slow tasks import distribution'
))
iiif_cache_bucket = options.get('iiif_cache_bucket')
import_command = (
"python3 -m arkindex_tasks.import_s3 "
"--source-bucket {source_bucket} "
"--corpus-id {corpus.id} "
"--bucket-prefix '{bucket_prefix}' "
"--max-folder-depth {max_folder_depth} "
"--nb-chunks {nb_chunks} "
).format(**options)
if iiif_cache_bucket:
import_command += "--iiif-cache-bucket {} ".format(iiif_cache_bucket)
if dest_bucket_mode:
import_command += "--dest-bucket {}".format(options['dest_bucket'])
else:
import_command += "--image-server {}".format(options['image_server'])
tasks_config = {
IMPORT_NAME: {
'image': settings.ARKINDEX_TASKS_IMAGE,
'command': import_command
}
}
for n in range(1, options['nb_chunks'] + 1):
chunk_name = 'import_chunk_{}'.format(n)
tasks_config[chunk_name] = {
'parents': [IMPORT_NAME],
'image': settings.ARKINDEX_TASKS_IMAGE,
'command': 'python3 -m arkindex_tasks.import_s3.volumes_import /data/{}/chunk_{}.yml'
.format(IMPORT_NAME, n)
}
# Add automatic thumbnails generation
tasks_config['thumbnails_chunk_{}'.format(n)] = {
'parents': [chunk_name],
'image': settings.ARKINDEX_TASKS_IMAGE,
'command': 'python3 -m arkindex_tasks.generate_thumbnails /data/{}/elements.json'.format(chunk_name),
}
if not iiif_cache_bucket:
continue
tasks_config['cache_builder_{}'.format(n)] = {
'parents': [chunk_name],
'image': settings.ARKINDEX_TASKS_IMAGE,
'command': 'python3 -m arkindex_tasks.import_s3.build_cantaloupe_cache /data/{}/s3_elements.json'
.format(chunk_name)
}
recipe = settings.PONOS_RECIPE.copy()
recipe['tasks'] = tasks_config
recipe.setdefault('env', {}).update(env_vars)
workflow = Workflow.objects.create(farm_id=get_default_farm_id(), recipe=yaml.dump(recipe))
self.stdout.write('Created Workflow with id {}'.format(workflow.id))
admin = User.objects.filter(is_admin=True).first()
assert admin is not None, 'No admin user has been found to create a Dataimport'
dataimport = DataImport.objects.create(
workflow_id=workflow.id,
mode=DataImportMode.Images,
corpus_id=options['corpus'].id,
creator_id=admin.id
)
self.stdout.write(self.style.SUCCESS(
'Linked Workflow to DataImport {0} using user {1.email} ({1.id})'
.format(dataimport.id, admin)
))
......@@ -47,7 +47,7 @@ class Migration(migrations.Migration):
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('created', models.DateTimeField(auto_now_add=True)),
('updated', models.DateTimeField(auto_now=True)),
('mode', enumfields.fields.EnumField(enum=arkindex.dataimport.models.DataImportMode, max_length=30)),
('mode', enumfields.fields.EnumField(enum=arkindex.dataimport.models.ProcessMode, max_length=30)),
('payload', django.contrib.postgres.fields.jsonb.JSONField(blank=True, null=True)),
],
options={
......
# Generated by Django 3.1 on 2020-09-09 15:26
import enumfields.fields
from django.db import migrations
import arkindex.dataimport.models
from django.db import migrations, models
WORKER_REPO_PREFIX = 'https://gitlab.com/teklia/workers/'
......@@ -13,7 +10,7 @@ def update_repository_types(apps, schema_editor):
# Defaults the repository to 'worker' type except if defined below
Repository.objects \
.filter(url__startswith=WORKER_REPO_PREFIX) \
.update(type=arkindex.dataimport.models.RepositoryType.Worker)
.update(type="iiif")
class Migration(migrations.Migration):
......@@ -26,9 +23,8 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name='repository',
name='type',
field=enumfields.fields.EnumField(
default=arkindex.dataimport.models.RepositoryType.IIIF,
enum=arkindex.dataimport.models.RepositoryType,
field=models.CharField(
default="iiif",
max_length=10),
preserve_default=False,
),
......
# Generated by Django 4.0.2 on 2022-04-07 11:43
import uuid
import django.db.models.deletion
from django.db import migrations, models
def update_worker_types(apps, schema_editor):
Worker = apps.get_model('dataimport', 'Worker')
WorkerType = apps.get_model('dataimport', 'WorkerType')
# Get list of current worker types
current_types = Worker.objects.values('type').distinct()
created_types = WorkerType.objects.bulk_create(
[WorkerType(slug=type_slug['type'], display_name=type_slug['type'].capitalize()) for type_slug in current_types]
)
for worker_type in created_types:
Worker.objects.filter(type=worker_type.slug).update(type_fk=worker_type.id)
def retrieve_worker_type_slugs(apps, schema_editor):
Worker = apps.get_model('dataimport', 'Worker')
for worker in Worker.objects.all():
worker.type = worker.type_fk.slug
worker.save()
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0045_remove_dataimport_best_class'),
]
operations = [
migrations.CreateModel(
name='WorkerType',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('slug', models.CharField(max_length=100, unique=True)),
('display_name', models.CharField(max_length=100)),
('created', models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now)),
('updated', models.DateTimeField(auto_now=True))
],
),
migrations.AddField(
model_name='worker',
name='type_fk',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.PROTECT, related_name='type', to='dataimport.workertype'),
),
migrations.AlterField(
model_name="worker",
name="type",
field=models.CharField(max_length=50, null=True),
),
migrations.RunPython(
update_worker_types,
reverse_code=retrieve_worker_type_slugs
),
migrations.RemoveField(
model_name='worker',
name='type',
),
migrations.RenameField(
model_name='worker',
old_name='type_fk',
new_name='type',
),
migrations.AlterField(
model_name="worker",
name="type",
field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, related_name='type', to='dataimport.workertype'),
),
migrations.AlterField(
model_name="workertype",
name="created",
field=models.DateTimeField(auto_now_add=True)
)
]
# Generated by Django 4.0.2 on 2022-05-03 15:12
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0046_workertype_alter_worker_type'),
]
operations = [
migrations.AddField(
model_name='workerversion',
name='model_usage',
field=models.BooleanField(default=False),
),
]
# Generated by Django 4.0.2 on 2022-05-03 16:14
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('training', '0004_modelversion_archive_hash'),
('dataimport', '0047_workerversion_model_usage'),
]
operations = [
migrations.AddField(
model_name='workerrun',
name='model_version',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='worker_runs', to='training.modelversion'),
),
]
# Generated by Django 4.0.4 on 2022-06-14 13:52
import django.core.validators
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0048_workerrun_model_version'),
]
operations = [
migrations.AddField(
model_name='dataimport',
name='bucket_name',
field=models.CharField(blank=True, max_length=63, null=True, validators=[django.core.validators.MinLengthValidator(3)]),
),
migrations.AddField(
model_name='dataimport',
name='prefix',
field=models.CharField(blank=True, max_length=1024, null=True),
),
]
# Generated by Django 4.0.2 on 2022-06-13 12:40
from django.db import migrations, models
from django.db.models.query import Prefetch
def generate_worker_run_summaries(apps, schema_editor):
WorkerRun = apps.get_model('dataimport', 'WorkerRun')
GitRef = apps.get_model('dataimport', 'GitRef')
runs = WorkerRun.objects.select_related(
'version__worker',
'version__revision',
'model_version__model',
'configuration'
).only(
# Required to make joins and for the bulk update
'id',
'version_id',
'version__worker__name',
# Required for the prefetch_related. Using `revision_id` instead of `revision__id` causes Django to select the entire revision anyway
'version__revision__id',
'model_version_id',
'model_version__model__name',
'configuration__name'
).prefetch_related(
Prefetch(
'version__revision__refs',
# revision_id is required to make the link between refs and revisions
queryset=GitRef.objects.only('revision_id', 'name')
)
)
for run in runs:
summary_text = f"Worker {run.version.worker.name} @ "
git_ref_names = run.version.revision.refs.values_list('name', flat=True)
if len(git_ref_names) > 0:
summary_text += ", ".join(git_ref_names)
else:
summary_text += str(run.version.id)[0:6]
if run.model_version:
summary_text += f" with model {run.model_version.model.name} @ {str(run.model_version.id)[0:6]}"
if run.configuration:
summary_text += f" using configuration '{run.configuration.name}'"
run.summary = summary_text
WorkerRun.objects.bulk_update(runs, ['summary'])
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0049_dataimport_s3'),
]
operations = [
migrations.AddField(
model_name='workerrun',
name='summary',
field=models.TextField(null=True),
),
migrations.RunPython(
generate_worker_run_summaries,
reverse_code=migrations.RunPython.noop
),
migrations.AlterField(
model_name='workerrun',
name='summary',
field=models.TextField(),
),
]
# Generated by Django 4.0.5 on 2022-07-12 13:10
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0050_workerrun_summary'),
]
operations = [
migrations.AddField(
model_name='workerconfiguration',
name='archived',
field=models.BooleanField(default=False),
),
]
# Generated by Django 4.1a1 on 2022-07-07 14:14
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("documents", "0057_entity_list_index"),
("dataimport", "0051_workerconfiguration_archived"),
]
operations = [
migrations.AddField(
model_name="dataimport",
name="test_folder",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="test_folder_processes",
to="documents.element",
),
),
migrations.AddField(
model_name="dataimport",
name="train_folder",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="train_folder_processes",
to="documents.element",
),
),
migrations.AddField(
model_name='dataimport',
name='validation_folder',
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name='validation_folder_processes',
to='documents.element'
),
),
]