diff --git a/VERSION b/VERSION index bd8bf882d06184bb54615a59477e3c5e35f522fc..943f9cbc4ec7ee28e6febd1d4451df4c39bcb193 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.7.0 +1.7.1 diff --git a/arkindex/documents/indexer.py b/arkindex/documents/indexer.py index d90d4f9a478a2894ff0453f71e60dcbe15476997..3ef0188ad3be6b5fc6abd41d36efd969832cbf44 100644 --- a/arkindex/documents/indexer.py +++ b/arkindex/documents/indexer.py @@ -123,6 +123,7 @@ class Indexer: # Classification fields {"name": "classification_id", "indexed": False, "required": False, "type": "uuid"}, {"name": "classification_name", "indexed": True, "required": False, "type": "full_string"}, + {"name": "classification_text", "indexed": True, "required": False, "type": "string"}, {"name": "classification_confidence", "indexed": True, "required": False, "type": "pfloat"}, {"name": "classification_worker", "indexed": True, "required": False, "type": "full_string"}, # Metadata fields @@ -269,6 +270,7 @@ class Indexer: "id": str(self.build_solr_id(element, classification)), "classification_id": str(classification.id), "classification_name": classification.ml_class.name, + "classification_text": classification.ml_class.name, "classification_confidence": classification.confidence, "classification_worker": self.hash_worker(classification.worker_run) }) for classification in element.classifications.all() diff --git a/arkindex/documents/serializers/search.py b/arkindex/documents/serializers/search.py index 799ade083c065add32fa87094555bedd67b837e4..7a5d499f63b6dc59d838ffa9ddcbae3df9cef957 100644 --- a/arkindex/documents/serializers/search.py +++ b/arkindex/documents/serializers/search.py @@ -32,6 +32,7 @@ class SolrDocumentSerializer(serializers.Serializer): classification_id = serializers.UUIDField(allow_null=True) classification_name = serializers.CharField(allow_null=True) + classification_text = serializers.CharField(allow_null=True) classification_confidence = serializers.FloatField(min_value=0, max_value=1, allow_null=True) classification_worker = serializers.CharField(allow_null=True) @@ -102,9 +103,10 @@ class CorpusSearchQuerySerializer(serializers.Serializer): ("element", "element"), ("transcription", "transcription"), ("metadata", "metadata"), - ("entity", "entity") + ("entity", "entity"), + ("classification", "classification") ], - default={"element", "transcription", "metadata", "entity"}, + default={"element", "transcription", "metadata", "entity", "classification"}, help_text="List of sources to be searched on.", ) diff --git a/arkindex/documents/tests/commands/test_reindex.py b/arkindex/documents/tests/commands/test_reindex.py index 9446a4dcb3521209a2d46345fc2e163e91e94fe8..a3ce57de6ee76cb65d070b018070d7b0c552d7d4 100644 --- a/arkindex/documents/tests/commands/test_reindex.py +++ b/arkindex/documents/tests/commands/test_reindex.py @@ -251,6 +251,7 @@ class TestReindexCommand(FixtureTestCase): "parent_type": self.page.type.display_name, "classification_id": str(cl_1.id), "classification_name": cl_1.ml_class.name, + "classification_text": cl_1.ml_class.name, "classification_confidence": cl_1.confidence, "classification_worker": self.worker.name, }, @@ -265,6 +266,7 @@ class TestReindexCommand(FixtureTestCase): "parent_type": self.page.type.display_name, "classification_id": str(cl_2.id), "classification_name": cl_2.ml_class.name, + "classification_text": cl_2.ml_class.name, "classification_confidence": cl_2.confidence, "classification_worker": self.worker.name, } diff --git a/arkindex/documents/tests/test_indexer.py b/arkindex/documents/tests/test_indexer.py index 3a25eadf5aad8e8a77364c8cf3bc3bfc8ada7b0f..8fd06cb9e49d9d95511ea12ed86d563e807b899c 100644 --- a/arkindex/documents/tests/test_indexer.py +++ b/arkindex/documents/tests/test_indexer.py @@ -181,6 +181,7 @@ class TestIndexerCommand(FixtureTestCase): "id": str(indexer.build_solr_id(self.page, cl_1)), "classification_id": str(cl_1.id), "classification_name": cl_1.ml_class.name, + "classification_text": cl_1.ml_class.name, "classification_confidence": cl_1.confidence, "classification_worker": self.worker.name, }, @@ -189,6 +190,7 @@ class TestIndexerCommand(FixtureTestCase): "id": str(indexer.build_solr_id(self.page, cl_2)), "classification_id": str(cl_2.id), "classification_name": cl_2.ml_class.name, + "classification_text": cl_2.ml_class.name, "classification_confidence": cl_2.confidence, "classification_worker": self.worker.name, } diff --git a/arkindex/documents/tests/test_search_api.py b/arkindex/documents/tests/test_search_api.py index 9aacaf4d34af09a2817daf16e51ba0868f37113c..cd08ee1708c183892d8125edcb197b4cccafae86 100644 --- a/arkindex/documents/tests/test_search_api.py +++ b/arkindex/documents/tests/test_search_api.py @@ -121,11 +121,8 @@ class TestSearchApi(FixtureAPITestCase): @override_settings(ARKINDEX_FEATURES={"search": True}) @patch("arkindex.documents.api.search.solr") def test_search(self, mock_solr): + self.maxDiff = None collection_name = f"project-{self.corpus.id}" - possible_queries = [ - '(element_text:("I search" OR "Found") OR transcription_text:("I search" OR "Found")) AND (metadata_name:"folio" AND entity_type:"person")', - '(transcription_text:("I search" OR "Found") OR element_text:("I search" OR "Found")) AND (metadata_name:"folio" AND entity_type:"person")' - ] docs = [{ "id": "document_id", "parent_id": "parent_id", @@ -142,6 +139,7 @@ class TestSearchApi(FixtureAPITestCase): "transcription_worker": "1234567890_A worker", "classification_id": "classification_id", "classification_name": "my class", + "classification_text": "my class", "classification_confidence": 0.1, "classification_worker": "1234567890_A worker", "metadata_id": "metadata_id", @@ -157,11 +155,12 @@ class TestSearchApi(FixtureAPITestCase): # Mock SolrClient mock_solr.collections.exists.return_value = True - solr_response = self.build_solr_response(docs=docs, query=possible_queries[0]) + query = '(element_text:("I search" OR "Found") OR transcription_text:("I search" OR "Found") OR classification_text:("I search" OR "Found")) AND (metadata_name:"folio" AND entity_type:"person")' + solr_response = self.build_solr_response(docs=docs, query=query) mock_solr.query.return_value = solr_response payload = { - "sources[]": ["element", "transcription"], + "sources[]": ["element", "transcription", "classification"], "metadata_name": "folio", "entity_type": "person", "query": '"I search" OR "Found"', @@ -182,7 +181,11 @@ class TestSearchApi(FixtureAPITestCase): self.assertEqual(index_name, collection_name) (index_name, args), kwargs = mock_solr.query.call_args self.assertEqual(index_name, collection_name) - self.assertIn(args.pop("q"), possible_queries) + # The order in which the sources appear in the query is random, so we just check that all the sources are there + q = args.pop("q") + self.assertIn('element_text:("I search" OR "Found")', q) + self.assertIn('transcription_text:("I search" OR "Found")', q) + self.assertIn('classification_text:("I search" OR "Found")', q) self.assertDictEqual(args, { "start": 0, "rows": 20, diff --git a/arkindex/ponos/migrations/0014_task_task_finished_requires_final_state.py b/arkindex/ponos/migrations/0014_task_task_finished_requires_final_state.py new file mode 100644 index 0000000000000000000000000000000000000000..13dbe15627c24fa28c4abc8c337d8960c4c3fe18 --- /dev/null +++ b/arkindex/ponos/migrations/0014_task_task_finished_requires_final_state.py @@ -0,0 +1,43 @@ +# Generated by Django 5.0.8 on 2025-01-07 10:11 + +from django.db import migrations, models + +from arkindex.ponos.models import State + +# Copy the FINAL_STATES here so that if we ever change them, +# Django will detect it and require a new migration +FINAL_STATES = ( + State.Completed, + State.Failed, + State.Error, + State.Stopped, + State.Cancelled, +) + +def clear_unexpected_finish_dates(apps, schema_editor): + Task = apps.get_model("ponos", "Task") + Task.objects.exclude(state__in=FINAL_STATES).exclude(finished=None).update(finished=None) + + +class Migration(migrations.Migration): + + dependencies = [ + ("ponos", "0013_task_ttl"), + ("process", "0046_workerrun_ttl"), + ] + + operations = [ + migrations.RunPython( + clear_unexpected_finish_dates, + reverse_code=migrations.RunPython.noop, + elidable=True, + ), + migrations.AddConstraint( + model_name="task", + constraint=models.CheckConstraint( + check=models.Q(finished=None) | models.Q(state__in=FINAL_STATES), + name="task_finished_requires_final_state", + violation_error_message="Only tasks in a final state can have a finish date set.", + ), + ), + ] diff --git a/arkindex/ponos/models.py b/arkindex/ponos/models.py index c9d3e109f8818d65ee4ed5e03059cf7f51c7a4b4..e77e10df10c44acead36174613d930f84b16ae54 100644 --- a/arkindex/ponos/models.py +++ b/arkindex/ponos/models.py @@ -394,6 +394,11 @@ class Task(models.Model): name="task_finished_after_started", violation_error_message="The task finish date must not be earlier than the task start date.", ), + models.CheckConstraint( + check=Q(finished=None) | Q(state__in=FINAL_STATES), + name="task_finished_requires_final_state", + violation_error_message="Only tasks in a final state can have a finish date set.", + ), ] def __str__(self) -> str: diff --git a/arkindex/ponos/tests/tasks/test_partial_update.py b/arkindex/ponos/tests/tasks/test_partial_update.py index e43cc217ae4d2690c4862846aa66ae0ed79842ed..f4c8adbe30a6260adb86d1c18ff89f9943af60e6 100644 --- a/arkindex/ponos/tests/tasks/test_partial_update.py +++ b/arkindex/ponos/tests/tasks/test_partial_update.py @@ -83,6 +83,7 @@ class TestTaskPartialUpdate(FixtureAPITestCase): for (state_from, state_to) in self.docker_task_transitions: with self.subTest(state_from=state_from, state_to=state_to): self.task1.state = state_from + self.task1.finished = None self.task1.save() resp = self.client.patch( reverse("api:task-details", args=[self.task1.id]), @@ -114,6 +115,7 @@ class TestTaskPartialUpdate(FixtureAPITestCase): for (state_from, state_to) in self.slurm_task_transitions: with self.subTest(state_from=state_from, state_to=state_to): self.task1.state = state_from + self.task1.finished = None self.task1.save() resp = self.client.patch( reverse("api:task-details", args=[self.task1.id]), diff --git a/arkindex/ponos/tests/tasks/test_update.py b/arkindex/ponos/tests/tasks/test_update.py index 97029d9324ec065e5005327b9975a48cf7aabc22..762a7fd31cac76e51dfad964cc3aee684902708c 100644 --- a/arkindex/ponos/tests/tasks/test_update.py +++ b/arkindex/ponos/tests/tasks/test_update.py @@ -501,6 +501,7 @@ class TestTaskUpdate(FixtureAPITestCase): for (state_from, state_to) in self.docker_task_transitions: with self.subTest(state_from=state_from, state_to=state_to): self.task1.state = state_from + self.task1.finished = None self.task1.save() resp = self.client.put( reverse("api:task-details", args=[self.task1.id]), @@ -532,6 +533,7 @@ class TestTaskUpdate(FixtureAPITestCase): for (state_from, state_to) in self.slurm_task_transitions: with self.subTest(state_from=state_from, state_to=state_to): self.task1.state = state_from + self.task1.finished = None self.task1.save() resp = self.client.put( reverse("api:task-details", args=[self.task1.id]), diff --git a/arkindex/process/api.py b/arkindex/process/api.py index 8348ff930641d50335a665336eb1f0b95502208f..b1dfb348b851fa2ca1a69f39f43a48305a137ab7 100644 --- a/arkindex/process/api.py +++ b/arkindex/process/api.py @@ -922,7 +922,7 @@ class WorkerTypesList(ListAPIView): "mode", type=str, default="complete", - description=("If set to `simple`, exclude versions with no tag") + description="When this is set to `simple`, only the worker versions with a tag set, or on the `main` or `master` branches, will be returned." ) ], ), @@ -967,8 +967,8 @@ class WorkerVersionList(ListCreateAPIView): def get_queryset(self): filters = Q() if self.simple_mode: - # Limit output to versions with tags - filters = Q(tag__isnull=False) + # Limit output to versions with tags or from the main/master branch + filters = Q(tag__isnull=False) | Q(branch__in=["main", "master"]) return self.worker.versions \ .using("default") \ .filter(filters) \ diff --git a/arkindex/process/management/commands/update_system_workers.py b/arkindex/process/management/commands/update_system_workers.py index 49949726e778305b7f38c53beaa7520df947f495..911350f92333dcb50e13e0faddd76292a60adc39 100644 --- a/arkindex/process/management/commands/update_system_workers.py +++ b/arkindex/process/management/commands/update_system_workers.py @@ -1,11 +1,18 @@ +from collections import defaultdict + +import requests +import yaml from django.conf import settings from django.core.management.base import BaseCommand, CommandError from django.db import transaction from django.db.models import Max -from teklia_toolbox.config import ConfigParser +from teklia_toolbox.config import ConfigParser, ConfigurationError from arkindex.process.models import ArkindexFeature, FeatureUsage, Worker, WorkerType, WorkerVersion, WorkerVersionState +REQUEST_TIMEOUT = (30, 60) +WORKER_YAML_VERSION = 2 + def parse_config(): parser = ConfigParser() @@ -15,16 +22,33 @@ def parse_config(): features_parser = parser.add_subparser("features") for feature in ArkindexFeature: feature_parser = features_parser.add_subparser(feature.value, allow_extra_keys=False, default={}) - feature_parser.add_option("image", type=str) + feature_parser.add_option("image", type=str, default=None) feature_parser.add_option("command", type=str, default=None) - - return parser.parse(settings.BASE_DIR / "system_workers.yml") + teklia_worker_parser = feature_parser.add_subparser("teklia_worker", allow_extra_keys=False, default=None) + teklia_worker_parser.add_option("name", type=str, default=None) + teklia_worker_parser.add_option("version", type=str, default=None) + teklia_worker_parser.add_option("slug", type=str, default=None) + + parsed = parser.parse(settings.BASE_DIR / "system_workers.yml") + + errors = defaultdict(list) + for feature, config in parsed["features"].items(): + if config["image"] and config["teklia_worker"]: + errors[feature].append("Exactly one of image/command or teklia_parser must be set") + continue + if (config["command"] and config["image"] is None): + errors[feature].append("command argument must be set with the image argument") + if (subparser := config["teklia_worker"]) and (None in subparser.values()): + errors[feature].append("teklia_parser configuration must define a name, a version and a slug") + if errors: + raise ConfigurationError(errors) + return parsed class Command(BaseCommand): help = "Update the workers used to provide Arkindex features to the versions compatible with this release." - def get_system_worker(self, feature: ArkindexFeature) -> Worker: + def get_system_worker(self, feature: ArkindexFeature, repo = None) -> Worker: """ Update or create a `system` worker for this feature. Creates the `system` worker type if it does not exist. @@ -48,6 +72,7 @@ class Command(BaseCommand): worker, created = Worker.objects.get_or_create( type=worker_type, slug=feature.value, + repository_url=repo, defaults={ "name": feature.name, "public": True, @@ -58,7 +83,7 @@ class Command(BaseCommand): self.stdout.write(f"Created new {worker.name} system worker") else: self.stdout.write(f"Using existing system worker {worker.name}") - self.update_existing_worker(worker) + self. update_existing_worker(worker) return worker @@ -83,7 +108,7 @@ class Command(BaseCommand): else: self.stdout.write("Worker is up to date") - def update_or_create_version(self, worker: Worker, feature: ArkindexFeature, docker_image: str, docker_command: str = None) -> None: + def update_or_create_version(self, worker: Worker, feature: ArkindexFeature, docker_image: str, docker_command: str = None, configuration: dict = {}) -> None: """ On a specified worker, assigns an existing version to a feature or creates a new one. Expects that no version is already assigned to this feature on any worker. @@ -97,11 +122,9 @@ class Command(BaseCommand): # Sorting by state means we prefer `available` versions first, then `created`, then `error`. worker_version = worker.versions.filter( docker_image_iid=docker_image, - # We ignore existing versions with attributes that could interfere with the features, - # like blocking a start or retry, or wasting resources. - configuration__user_configuration__isnull=True, gpu_usage=FeatureUsage.Disabled, model_usage=FeatureUsage.Disabled, + configuration=configuration, **docker_command_filter ).order_by("state", "-updated").first() @@ -130,11 +153,11 @@ class Command(BaseCommand): feature=feature, state=WorkerVersionState.Available, version=max_version + 1, - configuration={"docker": {"command": docker_command}} if docker_command else {} + configuration=configuration, ) self.stdout.write(self.style.SUCCESS(f"Using new worker version {worker_version.id}")) - def check_existing_version(self, worker_version: WorkerVersion, feature: ArkindexFeature, docker_image: str, docker_command: str = None) -> None: + def check_existing_version(self, worker_version: WorkerVersion, feature: ArkindexFeature, *, docker_image: str, docker_command: str = None, configuration: dict = {}, repo: str = None) -> None: self.stdout.write(f"Current worker version: {worker_version.id} ({worker_version.docker_image_iid})") valid = True @@ -158,8 +181,10 @@ class Command(BaseCommand): self.stdout.write(self.style.WARNING("This version uses a custom Docker command which could interfere with the feature.")) valid = False - if worker_version.required_user_configuration_fields: - self.stdout.write(self.style.WARNING("This version requires a custom worker configuration which could interfere with the feature.")) + if worker_version.configuration != configuration: + self.stdout.write(self.style.WARNING( + "This version uses a custom configuration which could interfere with the feature." + )) valid = False if valid: @@ -176,26 +201,64 @@ class Command(BaseCommand): worker = worker_version.worker if worker.archived is not None: # except if it is archived, since the new version would be invalid - worker = self.get_system_worker(feature) + worker = self.get_system_worker(feature, repo) else: self.update_existing_worker(worker_version.worker) - self.update_or_create_version(worker, feature, docker_image, docker_command) + self.update_or_create_version(worker, feature, docker_image, docker_command, configuration) @transaction.atomic - def update_feature(self, feature: ArkindexFeature, config: dict): - self.stdout.write(f" {feature.name} ".center(80, "─")) - self.stdout.write(f"Using {config['image']} to provide {feature.name}") + def update_feature(self, feature: ArkindexFeature, *, image, command, configuration = {}, repo = None): + self.stdout.write(f"Using {image} to provide {feature.name}") try: worker_version = WorkerVersion.objects.get_by_feature(feature) - self.check_existing_version(worker_version, feature, config["image"], config["command"]) + self.check_existing_version( + worker_version, + feature, + docker_image=image, + docker_command=command, + configuration=configuration + ) except WorkerVersion.DoesNotExist: - worker = self.get_system_worker(feature) - self.update_or_create_version(worker, feature, config["image"], config["command"]) + worker = self.get_system_worker(feature, repo) + self.update_or_create_version(worker, feature, image, command, configuration) + + def update_feature_from_worker(self, feature, *, name, version, slug): + """ + Update a feature from a worker repository hosted on https://gitlab.teklia.com/ + """ + repo = f"https://gitlab.teklia.com/{name}" + self.stdout.write(f"Configuring feature {feature} from {repo}") + + # Retrieve the .arkindex.yml file with no auth + url = f"{repo}/-/raw/{version}/.arkindex.yml" + with requests.get(url, timeout=REQUEST_TIMEOUT) as resp: + try: + resp.raise_for_status() + except requests.exceptions.HTTPError as e: + raise CommandError(f"Error retrieving configuration at {url}: {e.response.status_code}.") + data = yaml.safe_load(resp.content) + if not isinstance(data, dict) or data.get("version", 0) < WORKER_YAML_VERSION or not data.get("workers"): + raise CommandError(f"Error retrieving configuration at {url}: invalid YAML configuration.") + # Look for the worker matching feature's slug + worker_conf = next((worker for worker in data["workers"] if worker["slug"] == slug), None) + if worker_conf is None: + raise CommandError(f"No worker with slug {slug} in .arkindex.yml at {url}.") + image = f"registry.gitlab.teklia.com/{name}:{version}" + command = worker_conf.get("docker", {}).get("command", None) + self.update_feature(feature, image=image, command=command, configuration=worker_conf, repo=repo) def handle(self, *args, **options): config = parse_config() for feature_value, feature_config in config["features"].items(): feature = ArkindexFeature(feature_value) - self.update_feature(feature, feature_config) + self.stdout.write(f" {feature.name} ".center(80, "─")) + worker = feature_config.pop("teklia_worker", None) + if worker is not None: + self.update_feature_from_worker(feature, **worker) + else: + configuration = {} + if (command := feature_config["command"]): + configuration["docker"] = {"command": command} + self.update_feature(feature, **feature_config, configuration=configuration) diff --git a/arkindex/process/models.py b/arkindex/process/models.py index b618b6c530dc3ee5f8435a8a31dcc6fe9b9930f9..4b0ac2d2aecc3638fe008dbe0903fe13aeb960db 100644 --- a/arkindex/process/models.py +++ b/arkindex/process/models.py @@ -70,6 +70,8 @@ class ProcessMode(Enum): class ExportFormat(Enum): PDF = "pdf" PageXML = "page_xml" + DOCX = "docx" + CSV = "csv" class Process(IndexableModel): @@ -703,6 +705,8 @@ class ArkindexFeature(Enum): # FEATURE_FORMAT_MAP dictionary which maps export formats to arkindex features ExportPDF = "pdf_export" ExportPageXML = "pagexml_export" + ExportDOCX = "docx_export" + ExportCSV = "csv_export" class WorkerVersion(models.Model): @@ -1222,5 +1226,7 @@ class CorpusWorkerVersion(models.Model): # A mapping of export formats with their corresponding arkindex feature FEATURE_FORMAT_MAP = { ExportFormat.PDF: ArkindexFeature.ExportPDF, - ExportFormat.PageXML: ArkindexFeature.ExportPageXML + ExportFormat.PageXML: ArkindexFeature.ExportPageXML, + ExportFormat.DOCX: ArkindexFeature.ExportDOCX, + ExportFormat.CSV: ArkindexFeature.ExportCSV } diff --git a/arkindex/process/tests/commands/test_update_system_workers.py b/arkindex/process/tests/commands/test_update_system_workers.py index 30736c395469d251ef5a4457ec545d29d7600336..351326b4cdf031700c64eea77f55ccf5d4d7fbfb 100644 --- a/arkindex/process/tests/commands/test_update_system_workers.py +++ b/arkindex/process/tests/commands/test_update_system_workers.py @@ -2,13 +2,16 @@ from io import StringIO from textwrap import dedent from unittest.mock import patch +import responses from django.core.management import CommandError, call_command from django.utils import timezone from arkindex.process.models import ArkindexFeature, FeatureUsage, Worker, WorkerType, WorkerVersion, WorkerVersionState from arkindex.project.tests import ArkindexTestCase -MOCK_CONFIG = { + +def mock_config(): + return { "features": { "file_import": {"image": "registry.example.com/file-import:latest", "command": None}, "init_elements": @@ -19,6 +22,19 @@ MOCK_CONFIG = { } } +def mock_teklia_worker_config(): + return { + "features": { + "file_import": { + "teklia_worker": { + "name": "repository/import/file", + "version": "0.1.0", + "slug": "file-import", + } + } + } + } + class TestUpdateSystemWorkers(ArkindexTestCase): @@ -41,8 +57,8 @@ class TestUpdateSystemWorkers(ArkindexTestCase): Assert that a WorkerVersion has the expected attributes to provide a given feature """ self.assertEqual(worker_version.feature, feature) - self.assertEqual(worker_version.docker_image_iid, MOCK_CONFIG["features"][feature.value]["image"]) - self.assertEqual(worker_version.docker_command, MOCK_CONFIG["features"][feature.value]["command"]) + self.assertEqual(worker_version.docker_image_iid, mock_config()["features"][feature.value]["image"]) + self.assertEqual(worker_version.docker_command, mock_config()["features"][feature.value]["command"]) self.assertEqual(worker_version.state, WorkerVersionState.Available) self.assertEqual(worker_version.required_user_configuration_fields, set()) self.assertEqual(worker_version.gpu_usage, FeatureUsage.Disabled) @@ -51,7 +67,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): self.assertIsNone(worker_version.worker.archived) self.assertTrue(worker_version.worker.public) - @patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG) + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_config) def test_from_scratch(self): output = self.update_system_workers() @@ -122,7 +138,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): """ ).strip()) - @patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG) + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_config) def test_existing_worker_type(self): worker_type = WorkerType.objects.create(slug="system", display_name="Système") @@ -189,7 +205,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): """ ).strip()) - @patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG) + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_config) def test_existing_system_workers(self): worker_type = WorkerType.objects.create(slug="system", display_name="Système") file_import_worker = Worker.objects.create( @@ -283,7 +299,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): """ ).strip()) - @patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG) + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_config) def test_worker_slug_conflict(self): worker_type = WorkerType.objects.create(slug="systemnt", display_name="Systemn't") @@ -311,7 +327,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): """ ).strip()) - @patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG) + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_config) def test_update_existing_system_workers(self): worker_type = WorkerType.objects.create(slug="system", display_name="Système") file_import_worker = Worker.objects.create( @@ -413,7 +429,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): """ ).strip()) - @patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG) + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_config) def test_assigns_existing_compatible_versions(self): """ The command should assign the feature to an existing WorkerVersion compatible with it, @@ -576,7 +592,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): pagexml_export_version.refresh_from_db() self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML) - @patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG) + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_config) def test_unassigns_incompatible_versions(self): worker_type = WorkerType.objects.create(slug="systemnt", display_name="Systemn't") file_import_worker = Worker.objects.create( @@ -671,7 +687,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): Current worker version: {init_elements_version.id} (registry.gitlab.teklia.com/callico/callico:latest) This version has an invalid Docker image. This version uses a custom Docker command which could interfere with the feature. - This version requires a custom worker configuration which could interfere with the feature. + This version uses a custom configuration which could interfere with the feature. Unassigning feature from the current version Worker is up to date Creating new worker version @@ -698,7 +714,7 @@ class TestUpdateSystemWorkers(ArkindexTestCase): """ ).strip()) - @patch("arkindex.process.management.commands.update_system_workers.parse_config", lambda: MOCK_CONFIG) + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_config) def test_noop(self): worker_type = WorkerType.objects.create(slug="system", display_name="Système") worker = Worker.objects.create(type=worker_type, slug="worker", name="Worker", public=True) @@ -790,3 +806,199 @@ class TestUpdateSystemWorkers(ArkindexTestCase): self.check_feature_version(pdf_export_version, ArkindexFeature.ExportPDF) pagexml_export_version.refresh_from_db() self.check_feature_version(pagexml_export_version, ArkindexFeature.ExportPageXML) + + @responses.activate + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_teklia_worker_config) + def test_teklia_worker_invalid_slug(self): + responses.add( + responses.GET, + "https://gitlab.teklia.com/repository/import/file/-/raw/0.1.0/.arkindex.yml", + json={"version": 2, "workers": [{"slug": "no"}]}, + ) + with self.assertRaisesMessage(CommandError, ( + "No worker with slug file-import in .arkindex.yml at " + "https://gitlab.teklia.com/repository/import/file/-/raw/0.1.0/.arkindex.yml." + )): + self.update_system_workers() + + @responses.activate + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_teklia_worker_config) + def test_teklia_worker_invalid_yaml(self): + responses.add( + responses.GET, + "https://gitlab.teklia.com/repository/import/file/-/raw/0.1.0/.arkindex.yml", + json={"version": 42} + ) + with self.assertRaisesMessage(CommandError, ( + "Error retrieving configuration at " + "https://gitlab.teklia.com/repository/import/file/-/raw/0.1.0/.arkindex.yml: " + "invalid YAML configuration" + )): + self.update_system_workers() + + @responses.activate + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_teklia_worker_config) + def test_teklia_worker_from_scratch(self): + responses.add( + responses.GET, + "https://gitlab.teklia.com/repository/import/file/-/raw/0.1.0/.arkindex.yml", + json={ + "version": 2, + "workers": [{ + "slug": "file-import", + "name": "File Import", + "type": "import_type", + "description": "test", + }], + }, + ) + + with self.assertRaises(WorkerVersion.DoesNotExist): + WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport) + + output = self.update_system_workers() + worker_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport) + self.assertEqual(output, dedent( + f""" + ────────────────────────────────── FileImport ────────────────────────────────── + Configuring feature Fileimport from https://gitlab.teklia.com/repository/import/file + Using registry.gitlab.teklia.com/repository/import/file:0.1.0 to provide FileImport + Created new System worker type ({worker_version.worker.type_id}) + Created new FileImport system worker + Creating new worker version + Using new worker version {worker_version.id} + """ + ).strip()) + self.assertEqual(worker_version.configuration, { + "description": "test", + "name": "File Import", + "slug": "file-import", + "type": "import_type", + }) + self.assertEqual(worker_version.docker_image_iid, "registry.gitlab.teklia.com/repository/import/file:0.1.0") + self.assertEqual(worker_version.feature, ArkindexFeature.FileImport) + self.assertEqual(worker_version.state, WorkerVersionState.Available) + self.assertEqual(worker_version.version, 1) + self.assertEqual(worker_version.docker_image_iid, "registry.gitlab.teklia.com/repository/import/file:0.1.0") + self.assertEqual(worker_version.worker.name, "FileImport") + self.assertEqual(worker_version.worker.repository_url, "https://gitlab.teklia.com/repository/import/file") + + # Retrying does not averride the version + self.assertEqual(self.update_system_workers(), dedent( + f""" + ────────────────────────────────── FileImport ────────────────────────────────── + Configuring feature Fileimport from https://gitlab.teklia.com/repository/import/file + Using registry.gitlab.teklia.com/repository/import/file:0.1.0 to provide FileImport + Current worker version: {worker_version.id} (registry.gitlab.teklia.com/repository/import/file:0.1.0) + Worker is up to date + Worker version for FileImport is up to date + """ + ).strip()) + + @responses.activate + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_teklia_worker_config) + def test_teklia_worker_unassign(self): + worker_type = WorkerType.objects.create(slug="system", display_name="Système") + file_import_worker = Worker.objects.create( + type=worker_type, + slug="file_import", + name="Sir File-a-Lot", + public=True, + ) + version = file_import_worker.versions.create( + version=1, + docker_image_iid="test", + state=WorkerVersionState.Available, + feature=ArkindexFeature.FileImport, + ) + responses.add( + responses.GET, + "https://gitlab.teklia.com/repository/import/file/-/raw/0.1.0/.arkindex.yml", + json={ + "version": 2, + "workers": [{ + "slug": "file-import", + "name": "File Import", + "type": "import_type", + "description": "test", + }], + }, + ) + + output = self.update_system_workers() + worker_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport) + self.assertEqual(output, dedent( + f""" + ────────────────────────────────── FileImport ────────────────────────────────── + Configuring feature Fileimport from https://gitlab.teklia.com/repository/import/file + Using registry.gitlab.teklia.com/repository/import/file:0.1.0 to provide FileImport + Current worker version: {version.id} (test) + This version has an invalid Docker image. + This version uses a custom configuration which could interfere with the feature. + Unassigning feature from the current version + Worker is up to date + Creating new worker version + Using new worker version {worker_version.id} + """ + ).strip()) + self.assertEqual(worker_version.configuration, { + "description": "test", + "name": "File Import", + "slug": "file-import", + "type": "import_type", + }) + self.assertEqual(worker_version.docker_image_iid, "registry.gitlab.teklia.com/repository/import/file:0.1.0") + self.assertEqual(worker_version.feature, ArkindexFeature.FileImport) + self.assertEqual(worker_version.state, WorkerVersionState.Available) + self.assertEqual(worker_version.version, 2) + self.assertEqual(worker_version.worker.name, "Sir File-a-Lot") + self.assertEqual(worker_version.worker.repository_url, None) + + @responses.activate + @patch("arkindex.process.management.commands.update_system_workers.parse_config", mock_teklia_worker_config) + def test_teklia_worker_existing(self): + worker_type = WorkerType.objects.create(slug="system", display_name="Système") + worker_conf = { + "slug": "file-import", + "name": "File Import", + "type": "import_type", + "description": "test", + } + file_import_worker = Worker.objects.create( + type=worker_type, + slug="file_import", + name="Sir File-a-Lot", + public=True, + ) + version = file_import_worker.versions.create( + version=1, + docker_image_iid="registry.gitlab.teklia.com/repository/import/file:0.1.0", + state=WorkerVersionState.Available, + feature=ArkindexFeature.FileImport, + configuration=worker_conf, + ) + responses.add( + responses.GET, + "https://gitlab.teklia.com/repository/import/file/-/raw/0.1.0/.arkindex.yml", + json={"version": 2, "workers": [worker_conf]}, + ) + + output = self.update_system_workers() + self.assertEqual(WorkerVersion.objects.count(), 1) + worker_version = WorkerVersion.objects.get_by_feature(ArkindexFeature.FileImport) + self.assertEqual(output, dedent( + f""" + ────────────────────────────────── FileImport ────────────────────────────────── + Configuring feature Fileimport from https://gitlab.teklia.com/repository/import/file + Using registry.gitlab.teklia.com/repository/import/file:0.1.0 to provide FileImport + Current worker version: {version.id} (registry.gitlab.teklia.com/repository/import/file:0.1.0) + Worker is up to date + Worker version for FileImport is up to date + """ + ).strip()) + self.assertEqual(worker_version.docker_image_iid, "registry.gitlab.teklia.com/repository/import/file:0.1.0") + self.assertEqual(worker_version.feature, ArkindexFeature.FileImport) + self.assertEqual(worker_version.state, WorkerVersionState.Available) + self.assertEqual(worker_version.version, 1) + self.assertEqual(worker_version.worker.name, "Sir File-a-Lot") + self.assertEqual(worker_version.worker.repository_url, None) diff --git a/arkindex/process/tests/worker_versions/test_list.py b/arkindex/process/tests/worker_versions/test_list.py index e670faefdcf44893aad992bec24fd3fddf05998a..4fc3d5800b9d252f60aec39e4a43a3dedf6a1f02 100644 --- a/arkindex/process/tests/worker_versions/test_list.py +++ b/arkindex/process/tests/worker_versions/test_list.py @@ -19,6 +19,16 @@ class TestWorkerVersionList(FixtureAPITestCase): cls.version_1 = cls.worker_reco.versions.get() cls.version_2 = cls.worker_dla.versions.get() + cls.version_main_branch = cls.worker_reco.versions.create( + configuration={}, + branch="main", + revision_url="https://gitlab.com/NERV/eva/commit/12" + ) + cls.version_other_branch = cls.worker_reco.versions.create( + configuration={}, + branch="eva-00", + revision_url="https://gitlab.com/NERV/eva/commit/000002" + ) farm = Farm.objects.first() process = cls.corpus.processes.create( @@ -52,7 +62,7 @@ class TestWorkerVersionList(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertDictEqual(response.json(), { - "count": 2, + "count": 4, "next": None, "number": 1, "previous": None, @@ -79,6 +89,50 @@ class TestWorkerVersionList(FixtureAPITestCase): "revision_url": "https://gitlab.com/NERV/eva/commit/63e377e7f88c743d8428fc4e4eaedfc1c9356754", "created": "2050-09-09T09:09:09.090909Z", }, + { + "id": str(self.version_other_branch.id), + "configuration": {}, + "docker_image_iid": None, + "state": "created", + "gpu_usage": FeatureUsage.Disabled.value, + "model_usage": FeatureUsage.Disabled.value, + "worker": { + "id": str(self.worker_reco.id), + "name": self.worker_reco.name, + "type": self.worker_reco.type.slug, + "slug": self.worker_reco.slug, + "description": self.worker_reco.description, + "archived": bool(self.worker_reco.archived), + "repository_url": self.worker_reco.repository_url, + }, + "version": None, + "tag": None, + "branch": "eva-00", + "revision_url": "https://gitlab.com/NERV/eva/commit/000002", + "created": self.version_other_branch.created.isoformat().replace("+00:00", "Z"), + }, + { + "id": str(self.version_main_branch.id), + "configuration": {}, + "docker_image_iid": None, + "state": "created", + "gpu_usage": FeatureUsage.Disabled.value, + "model_usage": FeatureUsage.Disabled.value, + "worker": { + "id": str(self.worker_reco.id), + "name": self.worker_reco.name, + "type": self.worker_reco.type.slug, + "slug": self.worker_reco.slug, + "description": self.worker_reco.description, + "archived": bool(self.worker_reco.archived), + "repository_url": self.worker_reco.repository_url, + }, + "version": None, + "tag": None, + "branch": "main", + "revision_url": "https://gitlab.com/NERV/eva/commit/12", + "created": self.version_main_branch.created.isoformat().replace("+00:00", "Z"), + }, { "id": str(self.version_1.id), "configuration": {"test": 42}, @@ -139,8 +193,75 @@ class TestWorkerVersionList(FixtureAPITestCase): self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() - self.assertEqual(data["count"], 1) - self.assertEqual(data["results"][0]["id"], str(self.worker_reco.versions.first().id)) + self.assertEqual(data["count"], 3) + self.assertListEqual(data["results"], [ + { + "id": str(self.version_other_branch.id), + "configuration": {}, + "docker_image_iid": None, + "state": "created", + "gpu_usage": FeatureUsage.Disabled.value, + "model_usage": FeatureUsage.Disabled.value, + "worker": { + "id": str(self.worker_reco.id), + "name": self.worker_reco.name, + "type": self.worker_reco.type.slug, + "slug": self.worker_reco.slug, + "description": self.worker_reco.description, + "archived": bool(self.worker_reco.archived), + "repository_url": self.worker_reco.repository_url, + }, + "version": None, + "tag": None, + "branch": "eva-00", + "revision_url": "https://gitlab.com/NERV/eva/commit/000002", + "created": self.version_other_branch.created.isoformat().replace("+00:00", "Z"), + }, + { + "id": str(self.version_main_branch.id), + "configuration": {}, + "docker_image_iid": None, + "state": "created", + "gpu_usage": FeatureUsage.Disabled.value, + "model_usage": FeatureUsage.Disabled.value, + "worker": { + "id": str(self.worker_reco.id), + "name": self.worker_reco.name, + "type": self.worker_reco.type.slug, + "slug": self.worker_reco.slug, + "description": self.worker_reco.description, + "archived": bool(self.worker_reco.archived), + "repository_url": self.worker_reco.repository_url, + }, + "version": None, + "tag": None, + "branch": "main", + "revision_url": "https://gitlab.com/NERV/eva/commit/12", + "created": self.version_main_branch.created.isoformat().replace("+00:00", "Z"), + }, + { + "id": str(self.version_1.id), + "configuration": {"test": 42}, + "docker_image_iid": self.version_1.docker_image_iid, + "state": "available", + "gpu_usage": FeatureUsage.Disabled.value, + "model_usage": FeatureUsage.Disabled.value, + "worker": { + "id": str(self.worker_reco.id), + "name": self.worker_reco.name, + "type": self.worker_reco.type.slug, + "slug": self.worker_reco.slug, + "description": self.worker_reco.description, + "archived": bool(self.worker_reco.archived), + "repository_url": self.worker_reco.repository_url, + }, + "version": 1, + "tag": None, + "branch": None, + "revision_url": None, + "created": self.version_1.created.isoformat().replace("+00:00", "Z"), + } + ]) def test_list_filter_worker(self): self.client.force_login(self.user) @@ -167,7 +288,8 @@ class TestWorkerVersionList(FixtureAPITestCase): def test_list_simple_mode(self): """ - With the mode attribute set to simple, worker versions with no tag are excluded + With the mode attribute set to simple, worker versions with no tag or not from the + master / main branch are excluded """ WorkerVersion.objects.bulk_create([ WorkerVersion( @@ -186,10 +308,14 @@ class TestWorkerVersionList(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() - self.assertEqual(data["count"], 4) + self.assertEqual(data["count"], 6) self.assertCountEqual( [version["tag"] for version in data["results"]], - ["v0.1", "test", None, None], + ["v0.1", "test", None, None, None, None], + ) + self.assertCountEqual( + [version["branch"] for version in data["results"]], + [None, None, None, None, "main", "eva-00"], ) # Simple mode filters out versions without a tag @@ -201,8 +327,12 @@ class TestWorkerVersionList(FixtureAPITestCase): ) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() - self.assertEqual(data["count"], 2) + self.assertEqual(data["count"], 3) self.assertCountEqual( [version["tag"] for version in data["results"]], - ["v0.1", "test"], + ["v0.1", "test", None], + ) + self.assertCountEqual( + [version["branch"] for version in data["results"]], + [None, None, "main"], ) diff --git a/arkindex/project/config.py b/arkindex/project/config.py index edcb4e3a4f736c881cd14a091047bf0707502569..b97f561fc4b63e234fb82432f7d4847c544aa4e8 100644 --- a/arkindex/project/config.py +++ b/arkindex/project/config.py @@ -112,6 +112,7 @@ def get_settings_parser(base_dir): email_parser.add_option("port", type=int) email_parser.add_option("user", type=str) email_parser.add_option("password", type=str) + email_parser.add_option("from_address", type=str, default=None) email_parser.add_option("error_report_recipients", type=str, many=True, default=[]) export_parser = parser.add_subparser("export", default={}) diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index d3173a6245291737df01f330537e19716a2d166f..e7daebda121af5e1cfdf1659ee4fe726cf6c9805 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -480,7 +480,7 @@ if conf["email"]: EMAIL_HOST = conf["email"]["host"] EMAIL_PORT = conf["email"]["port"] EMAIL_HOST_USER = conf["email"]["user"] - DEFAULT_FROM_EMAIL = SERVER_EMAIL = EMAIL_HOST_USER + DEFAULT_FROM_EMAIL = SERVER_EMAIL = conf["email"]["from_address"] or EMAIL_HOST_USER EMAIL_HOST_PASSWORD = conf["email"]["password"] EMAIL_USE_TLS = True else: diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml index 1de12bd3e8d5ab4153378b78a765ef46c79f682b..fed278c1c26d9b17837037b68b9b6922cffe7745 100644 --- a/arkindex/project/tests/config_samples/override.yaml +++ b/arkindex/project/tests/config_samples/override.yaml @@ -39,6 +39,7 @@ database: email: error_report_recipients: - noreply@nasa.gov + from_address: None host: smtp.wanadoo.fr password: hunter2 port: 25 diff --git a/arkindex/system_workers.yml b/arkindex/system_workers.yml index 3a94f9635b51bc788ab053da20308d28fb3191ba..ce6d9d21ca643017ef72b77ddd96f3a153055aac 100644 --- a/arkindex/system_workers.yml +++ b/arkindex/system_workers.yml @@ -1,7 +1,7 @@ # When releasing Arkindex, check that the Docker images set here are up to date, # then update the `version` to the current Arkindex version as set in the `VERSION` file # to confirm that the images have been manually checked. -version: 1.7.0 +version: 1.7.1 features: file_import: @@ -10,10 +10,24 @@ features: image: registry.gitlab.teklia.com/arkindex/workers/init-elements:0.1.1 command: worker-init-elements s3_ingest: - image: registry.gitlab.teklia.com/arkindex/workers/import/s3:0.1.0 + image: registry.gitlab.teklia.com/arkindex/workers/import/s3:0.2.0rc3 pdf_export: - image: registry.gitlab.teklia.com/arkindex/workers/export:0.2.0 - command: worker-export-pdf + teklia_worker: + name: arkindex/workers/export + version: 0.2.1 + slug: pdf-export pagexml_export: - image: registry.gitlab.teklia.com/arkindex/workers/export:0.2.0 - command: worker-export-pagexml + teklia_worker: + name: arkindex/workers/export + version: 0.2.1 + slug: pagexml-export + docx_export: + teklia_worker: + name: arkindex/workers/export + version: 0.2.1 + slug: docx-export + csv_export: + teklia_worker: + name: arkindex/workers/export + version: 0.2.1 + slug: csv-export diff --git a/arkindex/templates/index.html b/arkindex/templates/index.html index bd9d0f593c527fc98b18a73cef9c94b4e61274dd..daedb0d6068d9d1f75a2e8a2462b07add2ce991e 100644 --- a/arkindex/templates/index.html +++ b/arkindex/templates/index.html @@ -15,7 +15,7 @@ <meta name="banner_message" content="{{ frontend_banner.message }}"> <meta name="banner_style" content="{{ frontend_banner.style.value }}"> {% endif %} - <title>ArkIndex {{ frontend_version }}</title> + <title>Arkindex {{ frontend_version }}</title> <link href="{{ cdn_assets_url }}/{{ frontend_version }}/arkindex-{{ frontend_version }}.css" rel="stylesheet"> </head> <body>