Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (76)
Showing
with 1775 additions and 1404 deletions
......@@ -11,7 +11,7 @@ include:
# For jobs that run backend scripts directly
.backend-setup:
image: registry.gitlab.com/teklia/arkindex/backend/base:lxml-4.9.2
image: registry.gitlab.teklia.com/arkindex/backend/base:gitlab-teklia
cache:
paths:
......@@ -19,8 +19,8 @@ include:
before_script:
# Custom line to install our own deps from Git using GitLab CI credentials
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/teklia/arkindex/transkribus#egg=transkribus-client"
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/teklia/arkindex/license#egg=teklia-license"
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.teklia.com/arkindex/transkribus#egg=transkribus-client"
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.teklia.com/arkindex/license#egg=teklia-license"
- pip install -r tests-requirements.txt
- "echo 'database: {host: postgres, port: 5432}\npublic_hostname: http://ci.arkindex.localhost' > $CONFIG_PATH"
......@@ -245,7 +245,7 @@ sentry-release:
release-notes:
stage: deploy
image: registry.gitlab.com/teklia/devops:latest
image: registry.gitlab.teklia.com/infra/devops:latest
rules:
- if: '$CI_COMMIT_TAG && $CI_COMMIT_TAG !~ /^base-.*/'
......@@ -257,7 +257,7 @@ release-notes:
bump-python-deps:
stage: deploy
image: registry.gitlab.com/teklia/devops:latest
image: registry.gitlab.teklia.com/infra/devops:latest
only:
- schedules
......
FROM registry.gitlab.com/teklia/arkindex/backend/base:lxml-4.9.2 as build
FROM registry.gitlab.teklia.com/arkindex/backend/base:gitlab-teklia as build
RUN mkdir build
ADD . build
RUN cd build && python3 setup.py sdist
FROM registry.gitlab.com/teklia/arkindex/backend/base:lxml-4.9.2
FROM registry.gitlab.teklia.com/arkindex/backend/base:gitlab-teklia
ARG TRANSKRIBUS_BRANCH=master
ARG TRANSKRIBUS_ID=11180199
ARG TRANSKRIBUS_ID=63
ARG LICENSE_BRANCH=master
ARG LICENSE_ID=45943500
ARG GITLAB_TOKEN="gaFM7LRa9zy9QMowcUhx"
ARG LICENSE_ID=37
# Auth token expires on 01/07/2024
ARG GITLAB_TOKEN="glpat-3sBZPFgkZbqJxfSqjcAa"
# Install transkribus-client from private repo
RUN \
mkdir /tmp/transkribus && \
wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.com/api/v4/projects/$TRANSKRIBUS_ID/repository/archive.tar.gz?sha=$TRANSKRIBUS_BRANCH -O /tmp/transkribus/archive.tar.gz && \
wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$TRANSKRIBUS_ID/repository/archive.tar.gz?sha=$TRANSKRIBUS_BRANCH -O /tmp/transkribus/archive.tar.gz && \
tar --strip-components=1 -xvf /tmp/transkribus/archive.tar.gz -C /tmp/transkribus && \
cd /tmp/transkribus && pip install --disable-pip-version-check --no-cache-dir --quiet . && \
rm -rf /tmp/transkribus
......@@ -22,7 +24,7 @@ RUN \
# Install teklia-license from private repo
RUN \
mkdir /tmp/teklia-license && \
wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.com/api/v4/projects/$LICENSE_ID/repository/archive.tar.gz?sha=$LICENSE_BRANCH -O /tmp/teklia-license.tar.gz && \
wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$LICENSE_ID/repository/archive.tar.gz?sha=$LICENSE_BRANCH -O /tmp/teklia-license.tar.gz && \
tar --strip-components=1 -xvf /tmp/teklia-license.tar.gz -C /tmp/teklia-license && \
cd /tmp/teklia-license && pip install --disable-pip-version-check --no-cache-dir --quiet . && \
rm -rf /tmp/teklia-license
......
FROM python:3.10-slim AS compilation
FROM python:3.10-slim-bookworm AS compilation
RUN apt-get update && apt-get install --no-install-recommends -y build-essential wget
RUN pip install nuitka
ARG TRANSKRIBUS_BRANCH=master
ARG TRANSKRIBUS_ID=11180199
ARG TRANSKRIBUS_ID=63
ARG LICENSE_BRANCH=master
ARG LICENSE_ID=45943500
ARG GITLAB_TOKEN="gaFM7LRa9zy9QMowcUhx"
ARG LICENSE_ID=37
# Auth token expires on 01/07/2024
ARG GITLAB_TOKEN="glpat-3sBZPFgkZbqJxfSqjcAa"
# We build in /usr/share because Django will try to load some files relative to that path
# once executed in the binary (management commands, ...)
......@@ -22,14 +24,14 @@ ADD requirements.txt /tmp/requirements-arkindex.txt
# Install transkribus-client from private repo
RUN \
mkdir /tmp/transkribus && \
wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.com/api/v4/projects/$TRANSKRIBUS_ID/repository/archive.tar.gz?sha=$TRANSKRIBUS_BRANCH -O /tmp/transkribus.tar.gz && \
wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$TRANSKRIBUS_ID/repository/archive.tar.gz?sha=$TRANSKRIBUS_BRANCH -O /tmp/transkribus.tar.gz && \
tar --strip-components=1 -xvf /tmp/transkribus.tar.gz -C /tmp/transkribus && \
mv /tmp/transkribus/transkribus /usr/share
# Install teklia-license from private repo
RUN \
mkdir /tmp/teklia-license && \
wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.com/api/v4/projects/$LICENSE_ID/repository/archive.tar.gz?sha=$LICENSE_BRANCH -O /tmp/teklia-license.tar.gz && \
wget --header "PRIVATE-TOKEN: $GITLAB_TOKEN" https://gitlab.teklia.com/api/v4/projects/$LICENSE_ID/repository/archive.tar.gz?sha=$LICENSE_BRANCH -O /tmp/teklia-license.tar.gz && \
tar --strip-components=1 -xvf /tmp/teklia-license.tar.gz -C /tmp/teklia-license && \
mv /tmp/teklia-license/teklia_license /usr/share && \
cp /tmp/teklia-license/requirements.txt /tmp/requirements-license-arkindex.txt
......@@ -61,7 +63,7 @@ RUN python -m nuitka \
arkindex/manage.py
# Start over from a clean setup
FROM registry.gitlab.com/teklia/arkindex/backend/base:lxml-4.9.2 as build
FROM registry.gitlab.teklia.com/arkindex/backend/base:gitlab-teklia as build
# Import files from compilation
RUN mkdir /usr/share/arkindex
......
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
IMAGE_TAG=registry.gitlab.com/teklia/arkindex/backend
IMAGE_TAG=registry.gitlab.teklia.com/arkindex/backend
.PHONY: all release
......@@ -32,7 +32,7 @@ test-fixtures:
test-fixtures-run:
arkindex/manage.py migrate
arkindex/manage.py build_fixtures
arkindex/manage.py dumpdata --indent 4 process documents images users auth ponos > arkindex/documents/fixtures/data.json
arkindex/manage.py dumpdata --indent 4 process documents images users auth ponos training > arkindex/documents/fixtures/data.json
test-fixtures-restore:
# This first renaming ensures that arkindex_tmp_fixtures exists; we don't want to drop arkindex_dev without a backup
......
Backend for Historical Manuscripts Indexing
===========================================
[![pipeline status](https://gitlab.com/teklia/arkindex/backend/badges/master/pipeline.svg)](https://gitlab.com/teklia/arkindex/backend/commits/master)
[![pipeline status](https://gitlab.teklia.com/arkindex/backend/badges/master/pipeline.svg)](https://gitlab.teklia.com/arkindex/backend/commits/master)
## Requirements
* Clone of the [architecture](https://gitlab.com/teklia/arkindex/architecture)
* Clone of the [architecture](https://gitlab.teklia.com/arkindex/architecture)
* Git
* Make
* Python 3.6+
......@@ -15,13 +15,13 @@ Backend for Historical Manuscripts Indexing
## Dev Setup
```
git clone git@gitlab.com:arkindex/backend.git
git clone git@gitlab.teklia.com:arkindex/backend.git
cd backend
mkvirtualenv ark -a .
pip install -e .[test]
```
When the [architecture](https://gitlab.com/teklia/arkindex/architecture) is running locally to provide required services:
When the [architecture](https://gitlab.teklia.com/arkindex/architecture) is running locally to provide required services:
```
arkindex/manage.py migrate
......@@ -47,7 +47,7 @@ The line that sets the PDF policy is `<policy domain="coder" rights="none" patte
Arkindex uses OAuth to let a user connect their GitLab account(s) and register Git repositories. In local development, you will need to register Arkindex as a GitLab OAuth application for it to work.
Go to GitLab's [Applications settings](https://gitlab.com/profile/applications) and create a new application with the `api` scope and add the following callback URIs:
Go to GitLab's [Applications settings](https://gitlab.teklia.com/profile/applications) and create a new application with the `api` scope and add the following callback URIs:
```
http://127.0.0.1:8000/api/v1/oauth/providers/gitlab/callback/
......
1.4.2
1.5.0
......@@ -68,7 +68,7 @@ from arkindex.documents.serializers.light import CorpusAllowedMetaDataSerializer
from arkindex.documents.serializers.ml import ElementTranscriptionSerializer
from arkindex.images.models import Image
from arkindex.ponos.utils import is_admin_or_ponos_task
from arkindex.process.models import WorkerRun, WorkerVersion
from arkindex.process.models import WorkerConfiguration, WorkerRun, WorkerVersion
from arkindex.project.fields import Unnest
from arkindex.project.mixins import ACLMixin, CorpusACLMixin, SelectionMixin
from arkindex.project.openapi import UUID_OR_FALSE, AutoSchema
......@@ -82,6 +82,7 @@ from arkindex.project.triggers import (
selection_worker_results_delete,
worker_results_delete,
)
from arkindex.training.models import ModelVersion
from arkindex.users.models import Role
from arkindex.users.utils import filter_rights
......@@ -1165,14 +1166,14 @@ class ElementChildren(ElementsListBase):
# Let `self.get_filters` handle filtering optimisations
if self.clean_params.get('order', 'position').lower() == 'position':
# This condition is necessary because when ordering by position ('paths__ordering')
# we run into this bug https://gitlab.com/teklia/arkindex/backend/-/issues/769 and unless
# we run into this bug https://gitlab.teklia.com/arkindex/backend/-/issues/769 and unless
# the ordering is also used in the .distinct(), this leads to some results being
# "cut off" from the response. For example, if you have 5 distinct elements, but two
# of these elements have multiple paths with different orderings within the same parent,
# the DISTINCT will work as expected during the COUNT query, and find 5 elements;
# however when next the elements are selected, the path ordering is used in the
# SELECT query which triggers the "duplicating" bug
# (https://gitlab.com/teklia/arkindex/backend/-/issues/76).This SELECT would return 7 elements
# (https://gitlab.teklia.com/arkindex/backend/-/issues/76).This SELECT would return 7 elements
# (2 duplicates) but the previous result from the COUNT would mean that only 5 of these
# 7 elements would get returned, and some elements would therefore (randomly) be missing
# from the results. Adding the 'paths__ordering' to the .distinct() here forces the
......@@ -1263,14 +1264,6 @@ class ElementRetrieve(ACLMixin, RetrieveUpdateDestroyAPIView):
get=extend_schema(
operation_id='ListElementNeighbors',
tags=['elements'],
parameters=[
OpenApiParameter(
'n',
type={'type': 'integer', 'minimum': 1, 'maximum': 10},
description='Number of neighbors to retrieve around the element',
required=False,
)
],
)
)
class ElementNeighbors(ACLMixin, ListAPIView):
......@@ -1284,16 +1277,9 @@ class ElementNeighbors(ACLMixin, ListAPIView):
queryset = Element.objects.none()
def get_queryset(self):
n = self.request.query_params.get('n', 1)
try:
n = int(n)
except (TypeError, ValueError):
raise ValidationError({'n': 'Should be an integer between 1 and 10'})
if not 1 <= n <= 10:
raise ValidationError({'n': 'Should be an integer between 1 and 10'})
element = get_object_or_404(
Element.objects.select_related('corpus'),
Element.objects.select_related('corpus').only('id', 'corpus__public'),
id=self.kwargs['pk']
)
......@@ -1301,7 +1287,7 @@ class ElementNeighbors(ACLMixin, ListAPIView):
if not self.has_access(element.corpus, Role.Guest.value):
raise PermissionDenied(detail='You do not have a read access to this element.')
return Element.objects.get_neighbors(element, n)
return Element.objects.get_neighbors(element)
@extend_schema(tags=['elements'], request=None)
......@@ -2194,6 +2180,21 @@ class CorpusSelectionDestroy(CorpusACLMixin, SelectionMixin, DestroyAPIView):
description='Only delete Worker Results on selected elements in this corpus. '
'Cannot be used together with `element_id`.',
),
OpenApiParameter(
'model_version_id',
type=UUID,
required=False,
description='Only delete Worker Results produced by a specific model version.',
),
OpenApiParameter(
'configuration_id',
type=UUID_OR_FALSE,
required=False,
description=dedent("""
Only delete Worker Results produced by a specific worker configuration.
If set to false, only delete results that use no specific configuration.
""")
),
],
tags=['ml'],
)
......@@ -2246,6 +2247,38 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView):
except WorkerVersion.DoesNotExist:
errors['worker_version_id'].append('This worker version does not exist.')
model_version = None
if 'model_version_id' in self.request.query_params:
try:
model_version_id = UUID(self.request.query_params['model_version_id'])
except (TypeError, ValueError):
errors['model_version_id'].append('Invalid UUID.')
else:
try:
model_version = ModelVersion.objects.get(id=model_version_id)
except ModelVersion.DoesNotExist:
errors['model_version_id'].append('This model version does not exist.')
configuration = None
if 'configuration_id' in self.request.query_params:
conf_id = self.request.query_params['configuration_id']
if conf_id.lower() in ('false', '0'):
configuration = False
else:
try:
conf_id = UUID(conf_id)
except (TypeError, ValueError):
errors['configuration_id'].append(
'Invalid UUID. You can set "false" to exclude results with a configuration.'
)
else:
try:
configuration = WorkerConfiguration.objects.get(id=conf_id)
except WorkerConfiguration.DoesNotExist:
errors['configuration_id'].append(
'This worker configuration does not exist.'
)
if errors:
raise ValidationError(errors)
......@@ -2253,6 +2286,8 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView):
selection_worker_results_delete(
corpus=corpus,
version=worker_version,
model_version=model_version,
configuration=configuration,
user_id=self.request.user.id,
)
else:
......@@ -2260,6 +2295,8 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView):
corpus_id=corpus.id,
version=worker_version,
element_id=element_id,
model_version=model_version,
configuration=configuration,
user_id=self.request.user.id,
)
......
......@@ -70,6 +70,35 @@ def delete_element(element_id: UUID) -> None:
""", {'id': element_id})
logger.info(f"Deleted {cursor.rowcount} usage from process as element")
# Set folders references on training processes to None
cursor.execute("""
UPDATE process_process
SET train_folder_id = NULL
WHERE train_folder_id = %(id)s
OR train_folder_id IN (
SELECT element_id FROM documents_elementpath WHERE path && ARRAY[%(id)s]
)
""", {'id': element_id})
logger.info(f"Deleted {cursor.rowcount} usage from process as train folder")
cursor.execute("""
UPDATE process_process
SET validation_folder_id = NULL
WHERE validation_folder_id = %(id)s
OR validation_folder_id IN (
SELECT element_id FROM documents_elementpath WHERE path && ARRAY[%(id)s]
)
""", {'id': element_id})
logger.info(f"Deleted {cursor.rowcount} usage from process as validation folder")
cursor.execute("""
UPDATE process_process
SET test_folder_id = NULL
WHERE test_folder_id = %(id)s
OR test_folder_id IN (
SELECT element_id FROM documents_elementpath WHERE path && ARRAY[%(id)s]
)
""", {'id': element_id})
logger.info(f"Deleted {cursor.rowcount} usage from process as test folder")
# Remove user selections
cursor.execute("""
DELETE FROM documents_selection selection
......
PRAGMA foreign_keys = ON;
CREATE TABLE export_version AS SELECT 6 AS version;
CREATE TABLE export_version AS SELECT 7 AS version;
CREATE TABLE image_server (
id VARCHAR(37) NOT NULL,
id INTEGER NOT NULL,
display_name VARCHAR(250) NOT NULL,
url TEXT NOT NULL,
max_width INTEGER,
......@@ -18,7 +18,7 @@ CREATE TABLE image (
url TEXT NOT NULL,
width INTEGER NOT NULL,
height INTEGER NOT NULL,
server_id VARCHAR(37) NOT NULL,
server_id INTEGER NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (server_id) REFERENCES image_server (id) ON DELETE CASCADE,
UNIQUE (url),
......@@ -146,7 +146,7 @@ CREATE TABLE entity_type (
);
CREATE TABLE transcription_entity (
id VARCHAR(37) NOT NULL,
id INTEGER NOT NULL,
transcription_id VARCHAR(37) NOT NULL,
entity_id VARCHAR(37) NOT NULL,
offset INTEGER NOT NULL,
......
This diff is collapsed.
......@@ -29,7 +29,7 @@ PONOS_FARM_NAME = 'Bootstrap farm'
PONOS_FARM_SEED = "b12868101dab84984481741663d809d2393784894d6e807ceee0bd95051bf971"
IMPORT_WORKER_VERSION_ID = 'f2bb8dd7-55e9-49ae-9bd9-b1d2e5d491b9'
IMPORT_WORKER_SLUG = 'file_import'
IMPORT_WORKER_REPO = 'https://gitlab.com/teklia/arkindex/tasks'
IMPORT_WORKER_REPO = 'https://gitlab.teklia.com/arkindex/tasks'
IMPORT_WORKER_REVISION_MESSAGE = 'File import worker bootstrap'
IMPORT_WORKER_REVISION_AUTHOR = 'Dev Bootstrap'
ADMIN_API_TOKEN = "deadbeefTestToken"
......
......@@ -17,16 +17,11 @@ from arkindex.process.models import (
WorkerVersion,
WorkerVersionGPUUsage,
WorkerVersionState,
Workflow,
)
from arkindex.project.tools import fake_now
from arkindex.users.models import Group, Right, Role, User
def fake_now():
# Use the same creation date for all created objects
return datetime(2020, 2, 2, 1, 23, 45, 678000, tzinfo=timezone.utc)
def square_polygon(x1, x2):
return LinearRing((x1, x1), (x2, x1), (x2, x2), (x1, x2), (x1, x1))
......@@ -65,11 +60,11 @@ class Command(BaseCommand):
group = Group.objects.create(name="User group", public=False)
group.add_member(user=user, level=Role.Admin.value)
group.add_member(
user=User.objects.create(email='user2@user.fr', display_name='Test user write'),
user=User.objects.create(email='user2@user.fr', display_name='Test user write', verified_email=True),
level=Role.Contributor.value
)
group.add_member(
user=User.objects.create(email='user3@user.fr', display_name='Test user read'),
user=User.objects.create(email='user3@user.fr', display_name='Test user read', verified_email=True),
level=Role.Guest.value
)
......@@ -109,15 +104,26 @@ class Command(BaseCommand):
)
# Create worker types
dla_worker_type = WorkerType.objects.create(slug="dla")
recognizer_worker_type = WorkerType.objects.create(slug="recognizer")
gpu_worker_type = WorkerType.objects.create(slug="worker")
dla_worker_type = WorkerType.objects.create(slug="dla", display_name="Document Layout Analysis")
recognizer_worker_type = WorkerType.objects.create(slug="recognizer", display_name="Recognizer")
gpu_worker_type = WorkerType.objects.create(slug="worker", display_name="Worker requiring a GPU")
import_worker_type = WorkerType.objects.create(slug="import", display_name="Import")
# Create a fake docker build with a docker image task
farm = Farm.objects.create(name="Wheat farm")
workflow = Workflow.objects.create(farm=farm)
build_task = workflow.tasks.create(run=0, depth=0, slug='docker_build', state=State.Completed)
build_process = Process.objects.create(
farm=farm,
creator=superuser,
mode=ProcessMode.Repository,
)
build_task = build_process.tasks.create(
run=0,
depth=0,
slug='docker_build',
state=State.Completed,
# Use an expiry very far away so that task is never expired
expiry=datetime(2100, 12, 31, 23, 59, 59, 999999, timezone.utc),
)
docker_image = build_task.artifacts.create(size=42_000, path='/path/to/docker_build')
# Create some workers for the repository with their available version
......@@ -273,6 +279,10 @@ class Command(BaseCommand):
display_name='Word',
)
# Create 2 datasets
corpus.datasets.create(name='First Dataset', description='dataset number one', creator=user)
corpus.datasets.create(name='Second Dataset', description='dataset number two', creator=user)
# Create 2 volumes
vol1 = Element.objects.create(
corpus=corpus,
......
......@@ -41,7 +41,7 @@ from arkindex.process.models import (
from arkindex.training.models import Model
from arkindex.users.models import Role, User
EXPORT_VERSION = 6
EXPORT_VERSION = 7
TABLE_NAMES = {
'export_version',
......
......@@ -90,7 +90,7 @@ class Command(BaseCommand):
ROW_NUMBER() OVER (
PARTITION BY parent_id
ORDER BY ordering, name, element_id
) AS ordering
) - 1 AS ordering
FROM (
SELECT
DISTINCT ON (element_id, path[array_length(path, 1)])
......
This diff is collapsed.
# Generated by Django 2.2.9 on 2020-02-18 11:18
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('documents', '0002_initial'),
]
operations = [
migrations.AlterField(
model_name='mlclass',
name='name',
field=models.CharField(max_length=1024),
),
]
This diff is collapsed.