Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (18)
Showing
with 1407 additions and 1221 deletions
......@@ -18,8 +18,8 @@ include:
- .cache/pip
before_script:
- pip install -r tests-requirements.txt
- "echo 'database: {host: postgres, port: 5432}\npublic_hostname: http://ci.arkindex.localhost' > $CONFIG_PATH"
- "echo database: {host: postgres, port: 5432} > $CONFIG_PATH"
- pip install -e .[test]
# Those jobs require the base image; they might fail if the image is not up to date.
# Allow them to fail when building a new base image, to prevent them from blocking a new base image build
......@@ -58,7 +58,7 @@ backend-tests:
- test-report.xml
script:
- python3 setup.py test
- arkindex/manage.py test
backend-lint:
image: python:3.10
......@@ -91,7 +91,6 @@ backend-migrations:
alias: postgres
script:
- pip install -e .
- arkindex/manage.py makemigrations --check --noinput --dry-run -v 3
backend-openapi:
......
......@@ -13,13 +13,10 @@ clean:
find . -name '*.pyc' -exec rm {} \;
build:
CI_PROJECT_DIR=$(ROOT_DIR) CI_REGISTRY_IMAGE=$(IMAGE_TAG) $(ROOT_DIR)/ci/build.sh Dockerfile
binary:
CI_PROJECT_DIR=$(ROOT_DIR) CI_REGISTRY_IMAGE=$(IMAGE_TAG) $(ROOT_DIR)/ci/build.sh Dockerfile.binary -binary
CI_PROJECT_DIR=$(ROOT_DIR) CI_REGISTRY_IMAGE=$(IMAGE_TAG) $(ROOT_DIR)/ci/build.sh
worker:
arkindex/manage.py rqworker -v 2 default high
arkindex/manage.py rqworker -v 2 default high tasks
test-fixtures:
$(eval export PGPASSWORD=devdata)
......
......@@ -160,6 +160,11 @@ We use [rq](https://python-rq.org/), integrated via [django-rq](https://pypi.org
To run them, use `make worker` to start a RQ worker. You will need to have Redis running; `make slim` or `make` in the architecture will provide it. `make` in the architecture also provides a RQ worker running in Docker from a binary build.
Process tasks are run in RQ by default (Community Edition). Two RQ workers must be running at the same time to actually run a process with worker activities, so the initialisation task can wait for the worker activity task to finish:
```sh
$ manage.py rqworker -v 3 default high & manage.py rqworker -v 3 tasks
```
## Metrics
The application serves metrics for Prometheus under the `/metrics` prefix.
A specific port can be used by setting the `PROMETHEUS_METRICS_PORT` environment variable, thus separating the application from the metrics API.
1.6.0-alpha3
1.5.4
......@@ -1303,17 +1303,16 @@ class ElementNeighbors(ACLMixin, ListAPIView):
queryset = Element.objects.none()
def get_queryset(self):
element = get_object_or_404(
# Include the attributes required for ACL checks and the API response
Element.objects.select_related("corpus", "type").only("id", "name", "type__slug", "corpus__public"),
Element
.objects
.filter(corpus__in=Corpus.objects.readable(self.request.user))
.select_related("corpus", "type")
.only("id", "name", "type__slug", "corpus__public"),
id=self.kwargs["pk"]
)
# Check access permission
if not self.has_access(element.corpus, Role.Guest.value):
raise PermissionDenied(detail="You do not have a read access to this element.")
return Element.objects.get_neighbors(element)
......@@ -2193,6 +2192,14 @@ class CorpusSelectionDestroy(CorpusACLMixin, SelectionMixin, DestroyAPIView):
delete=extend_schema(
operation_id="DestroyWorkerResults",
parameters=[
OpenApiParameter(
"worker_run_id",
type=UUID,
required=False,
description="Only delete Worker Results produced by a specific worker run. "
"If this parameter is set, any `worker_version_id`, `model_version_id` "
"or `configuration_id` parameters will be ignored.",
),
OpenApiParameter(
"worker_version_id",
type=UUID,
......@@ -2233,38 +2240,30 @@ class CorpusSelectionDestroy(CorpusACLMixin, SelectionMixin, DestroyAPIView):
)
class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView):
"""
Delete all Worker Results from all WorkerVersions or a specific one
on a Corpus or under a specified parent element (parent element included)
Delete all Worker Results, or Worker Results produced by specific WorkerRuns or WorkerVersions
(the results to delete can also be filtered by ModelVersion and Configuration)
on a Corpus, the selection, or under a specified parent element (parent element included)
"""
permission_classes = (IsVerified, )
# https://github.com/tfranzel/drf-spectacular/issues/308
@extend_schema(responses={204: None})
def delete(self, request, *args, **kwargs):
corpus = self.get_corpus(self.kwargs["corpus"], role=Role.Admin)
def results_filters(self):
errors = defaultdict(list)
use_selection = self.request.query_params.get("use_selection", "false").lower() not in ("false", "0")
if use_selection:
# Only check for selected elements if the selection feature is enabled
if settings.ARKINDEX_FEATURES["selection"]:
if "element_id" in self.request.query_params:
errors["use_selection"].append("use_selection and element_id cannot be used simultaneously.")
if not self.request.user.selected_elements.filter(corpus=corpus).exists():
errors["use_selection"].append("No elements of the specified corpus have been selected.")
else:
errors["use_selection"].append("Selection is not available on this instance.")
element_id = None
if "element_id" in self.request.query_params:
if "worker_run_id" in self.request.query_params:
try:
element_id = UUID(self.request.query_params["element_id"])
worker_run_id = UUID(self.request.query_params["worker_run_id"])
except (TypeError, ValueError):
errors["element_id"].append("Invalid UUID.")
raise ValidationError({"worker_run_id": ["Invalid UUID."]})
else:
if not corpus.elements.filter(id=element_id).exists():
errors["element_id"].append("This element does not exist in the specified corpus.")
try:
worker_run = WorkerRun.objects.get(id=worker_run_id)
except WorkerRun.DoesNotExist:
raise ValidationError({"worker_run_id": ["This worker run does not exist."]})
# Ignore the other parameters when a worker run ID is set
return {
"worker_run": worker_run,
}
worker_version = None
if "worker_version_id" in self.request.query_params:
......@@ -2314,22 +2313,60 @@ class WorkerResultsDestroy(CorpusACLMixin, DestroyAPIView):
if errors:
raise ValidationError(errors)
return {
"version": worker_version,
"model_version": model_version,
"configuration": configuration
}
# https://github.com/tfranzel/drf-spectacular/issues/308
@extend_schema(responses={204: None})
def delete(self, request, *args, **kwargs):
corpus = self.get_corpus(self.kwargs["corpus"], role=Role.Admin)
errors = defaultdict(list)
use_selection = self.request.query_params.get("use_selection", "false").lower() not in ("false", "0")
if use_selection:
# Only check for selected elements if the selection feature is enabled
if settings.ARKINDEX_FEATURES["selection"]:
if "element_id" in self.request.query_params:
errors["use_selection"].append("use_selection and element_id cannot be used simultaneously.")
if not self.request.user.selected_elements.filter(corpus=corpus).exists():
errors["use_selection"].append("No elements of the specified corpus have been selected.")
else:
errors["use_selection"].append("Selection is not available on this instance.")
element_id = None
if "element_id" in self.request.query_params:
try:
element_id = UUID(self.request.query_params["element_id"])
except (TypeError, ValueError):
errors["element_id"].append("Invalid UUID.")
else:
if not corpus.elements.filter(id=element_id).exists():
errors["element_id"].append("This element does not exist in the specified corpus.")
try:
filters = self.results_filters()
except ValidationError as errs:
errors = errors | errs.detail
if errors:
raise ValidationError(errors)
if use_selection:
selection_worker_results_delete(
corpus=corpus,
version=worker_version,
model_version=model_version,
configuration=configuration,
user_id=self.request.user.id,
**filters
)
else:
worker_results_delete(
corpus_id=corpus.id,
version=worker_version,
element_id=element_id,
model_version=model_version,
configuration=configuration,
user_id=self.request.user.id,
**filters
)
return Response(status=status.HTTP_204_NO_CONTENT)
......
This diff is collapsed.
......@@ -104,6 +104,7 @@ def element_trash(queryset: ElementQuerySet, delete_children: bool) -> None:
@job("high", timeout=settings.RQ_TIMEOUTS["worker_results_delete"])
def selection_worker_results_delete(
corpus_id: str,
worker_run_id: Optional[str] = None,
model_version_id: Optional[str] = None,
configuration_id: Optional[str | Literal[False]] = None,
version_id: Optional[str] = None,
......@@ -123,6 +124,7 @@ def selection_worker_results_delete(
worker_results_delete(
corpus_id=corpus_id,
element_id=element_id,
worker_run_id=worker_run_id,
version_id=version_id,
model_version_id=model_version_id,
configuration_id=configuration_id,
......@@ -132,6 +134,7 @@ def selection_worker_results_delete(
@job("high", timeout=settings.RQ_TIMEOUTS["worker_results_delete"])
def worker_results_delete(
corpus_id: str,
worker_run_id: Optional[str] = None,
version_id: Optional[str] = None,
element_id: Optional[str] = None,
model_version_id: Optional[str] = None,
......@@ -142,6 +145,8 @@ def worker_results_delete(
whole corpus, under a specified parent element (parent element included), or on a single element.
Results can be filtered depending on a specific model version and a specific or unset configuration.
"""
assert (not worker_run_id or not version_id), "The worker_run_id and version_id parameters are mutually exclusive."
elements = Element.objects.filter(corpus_id=corpus_id)
classifications = Classification.objects.filter(element__corpus_id=corpus_id)
transcriptions = Transcription.objects.filter(element__corpus_id=corpus_id)
......@@ -155,8 +160,19 @@ def worker_results_delete(
metadata = MetaData.objects.filter(element__corpus_id=corpus_id)
worker_activities = WorkerActivity.objects.filter(element__corpus_id=corpus_id)
# When a worker run ID is defined, filter by that worker run ID
if worker_run_id:
elements = elements.filter(worker_run_id=worker_run_id)
classifications = classifications.filter(worker_run_id=worker_run_id)
transcriptions = transcriptions.filter(worker_run_id=worker_run_id)
transcription_entities = transcription_entities.filter(transcription__worker_run_id=worker_run_id)
worker_transcription_entities = worker_transcription_entities.filter(worker_run_id=worker_run_id)
metadata = metadata.filter(worker_run_id=worker_run_id)
# There is no worker_run_id on Worker Activities so the best thing we can do is delete the worker activities
# attached to the elements produced with that worker run, and they are already being deleted by elements.trash()
worker_activities = worker_activities.none()
# When a version ID is defined, filter by the exact version ID
if version_id:
elif version_id:
elements = elements.filter(worker_version_id=version_id)
classifications = classifications.filter(worker_version_id=version_id)
transcriptions = transcriptions.filter(worker_version_id=version_id)
......@@ -164,9 +180,12 @@ def worker_results_delete(
worker_transcription_entities = worker_transcription_entities.filter(worker_version_id=version_id)
metadata = metadata.filter(worker_version_id=version_id)
worker_activities = worker_activities.filter(worker_version_id=version_id)
# Otherwise, select everything that has any worker version ID.
# Otherwise, select everything that has any worker version ID. (When something has been created
# by a worker run, it always has a worker version; however we have things that were created with
# a worker version but without a worker run.)
# We use worker_version_id != None and not worker_version_id__isnull=False,
# because isnull would cause an unnecessary LEFT JOIN query.
# No extra filtering is needed on worker activities, since worker versions cannot be null there.
else:
elements = elements.exclude(worker_version_id=None)
classifications = classifications.exclude(worker_version_id=None)
......@@ -174,7 +193,6 @@ def worker_results_delete(
transcription_entities = transcription_entities.exclude(transcription__worker_version_id=None)
worker_transcription_entities = worker_transcription_entities.exclude(worker_version_id=None)
metadata = metadata.exclude(worker_version_id=None)
worker_activities = worker_activities.exclude(worker_version_id=None)
if element_id:
# include_children causes a deletion *only* on the element's descendants.
......@@ -202,17 +220,16 @@ def worker_results_delete(
metadata = metadata.filter(element_id=element_id)
worker_activities = worker_activities.filter(element_id=element_id)
if model_version_id:
if not worker_run_id and model_version_id:
elements = elements.filter(worker_run__model_version_id=model_version_id)
classifications = classifications.filter(worker_run__model_version_id=model_version_id)
transcriptions = transcriptions.filter(worker_run__model_version_id=model_version_id)
transcription_entities = transcription_entities.filter(transcription__worker_run__model_version_id=model_version_id)
worker_transcription_entities = worker_transcription_entities.filter(worker_run__model_version_id=model_version_id)
metadata = metadata.filter(worker_run__model_version_id=model_version_id)
# Activities are not linked to a worker run and cannot be filtered by model version
worker_activities = worker_activities.none()
worker_activities = worker_activities.filter(model_version_id=model_version_id)
if configuration_id is not None:
if not worker_run_id and configuration_id is not None:
if configuration_id is False:
# Only delete results generated on a worker run with no configuration
elements = elements.filter(worker_run__configuration_id=None)
......@@ -247,6 +264,7 @@ def worker_results_delete(
# we were supposed to delete worker results on.
worker_results_delete(
corpus_id=corpus_id,
worker_run_id=worker_run_id,
version_id=version_id,
element_id=element_id,
model_version_id=model_version_id,
......
......@@ -3,7 +3,7 @@ from django.db.models.signals import pre_delete
from arkindex.documents.models import Corpus, Element, EntityType, MetaType, Transcription
from arkindex.documents.tasks import corpus_delete
from arkindex.ponos.models import Farm, State, Task
from arkindex.process.models import CorpusWorkerVersion, ProcessMode, Repository, WorkerVersion
from arkindex.process.models import CorpusWorkerVersion, ProcessDataset, ProcessMode, Repository, WorkerVersion
from arkindex.project.tests import FixtureTestCase, force_constraints_immediate
from arkindex.training.models import Dataset
......@@ -118,13 +118,14 @@ class TestDeleteCorpus(FixtureTestCase):
cls.dataset2 = Dataset.objects.create(name="Dead Sea Scrolls", description="How to trigger a Third Impact", creator=cls.user, corpus=cls.corpus2)
# Process on cls.corpus and with a dataset from cls.corpus
dataset_process1 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
dataset_process1.datasets.set([dataset1])
ProcessDataset.objects.create(process=dataset_process1, dataset=dataset1, sets=dataset1.sets)
# Process on cls.corpus with a dataset from another corpus
dataset_process2 = cls.corpus.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
dataset_process2.datasets.set([dataset1, cls.dataset2])
ProcessDataset.objects.create(process=dataset_process2, dataset=dataset1, sets=dataset1.sets)
ProcessDataset.objects.create(process=dataset_process2, dataset=cls.dataset2, sets=cls.dataset2.sets)
# Process on another corpus with a dataset from another corpus and none from cls.corpus
cls.dataset_process2 = cls.corpus2.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
cls.dataset_process2.datasets.set([cls.dataset2])
cls.dataset_process3 = cls.corpus2.processes.create(creator=cls.user, mode=ProcessMode.Dataset)
ProcessDataset.objects.create(process=cls.dataset_process3, dataset=cls.dataset2, sets=cls.dataset2.sets)
cls.rev = cls.repo.revisions.create(
hash="42",
......@@ -200,14 +201,14 @@ class TestDeleteCorpus(FixtureTestCase):
self.df.refresh_from_db()
self.vol.refresh_from_db()
self.page.refresh_from_db()
self.dataset_process2.refresh_from_db()
self.dataset_process3.refresh_from_db()
self.assertTrue(self.repo.revisions.filter(id=self.rev.id).exists())
self.assertEqual(self.process.revision, self.rev)
self.assertEqual(self.process.files.get(), self.df)
self.assertTrue(Element.objects.get_descending(self.vol.id).filter(id=self.page.id).exists())
self.assertTrue(self.corpus2.datasets.filter(id=self.dataset2.id).exists())
self.assertTrue(self.corpus2.processes.filter(id=self.dataset_process2.id).exists())
self.assertTrue(self.corpus2.processes.filter(id=self.dataset_process3.id).exists())
md = self.vol.metadatas.get()
self.assertEqual(md.name, "meta")
......
from unittest.mock import patch
from django.test import override_settings
from django.urls import reverse
......@@ -163,7 +164,8 @@ class TestClasses(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {"search": ["There cannot be more than 3 unique search terms."]})
def test_corpus_classes_corpus_rights(self):
@patch("arkindex.project.mixins.has_access", return_value=False)
def test_corpus_classes_corpus_rights(self, has_access_mock):
self.client.force_login(self.user)
private_corpus = Corpus.objects.create(name="private")
response = self.client.post(reverse("api:corpus-classes", kwargs={"pk": private_corpus.pk}), {})
......@@ -233,7 +235,8 @@ class TestClasses(FixtureAPITestCase):
response = self.client.put(reverse("api:ml-class-retrieve", kwargs={"corpus": self.corpus.id, "mlclass": self.text.id}), {"name": "new name"})
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_update_requires_contributor(self):
@patch("arkindex.project.mixins.has_access", return_value=False)
def test_update_requires_contributor(self, has_access_mock):
self.user.rights.update(level=Role.Guest.value)
self.client.force_login(self.user)
response = self.client.put(reverse("api:ml-class-retrieve", kwargs={"corpus": self.corpus.id, "mlclass": self.text.id}), {"name": "new name"})
......@@ -262,7 +265,8 @@ class TestClasses(FixtureAPITestCase):
response = self.client.patch(reverse("api:ml-class-retrieve", kwargs={"corpus": self.corpus.id, "mlclass": self.text.id}), {"name": "new name"})
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_partial_update_requires_contributor(self):
@patch("arkindex.project.mixins.has_access", return_value=False)
def test_partial_update_requires_contributor(self, has_access_mock):
self.user.rights.update(level=Role.Guest.value)
self.client.force_login(self.user)
response = self.client.patch(reverse("api:ml-class-retrieve", kwargs={"corpus": self.corpus.id, "mlclass": self.text.id}), {"name": "new name"})
......@@ -295,7 +299,8 @@ class TestClasses(FixtureAPITestCase):
response = self.client.delete(reverse("api:ml-class-retrieve", kwargs={"corpus": self.corpus.id, "mlclass": self.text.id}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_destroy_requires_contributor(self):
@patch("arkindex.project.mixins.has_access", return_value=False)
def test_destroy_requires_contributor(self, has_access_mock):
self.user.rights.update(level=Role.Guest.value)
self.client.force_login(self.user)
response = self.client.delete(reverse("api:ml-class-retrieve", kwargs={"corpus": self.corpus.id, "mlclass": self.text.id}))
......
This diff is collapsed.