diff --git a/.gitignore b/.gitignore index 71d78337c7ffc621211cc066c1afc5c4197226cd..da5d6529fa7d2567f5fcb91ac3a958e36319c6f7 100644 --- a/.gitignore +++ b/.gitignore @@ -11,10 +11,10 @@ media workers .vscode local_settings.py -arkindex/iiif-users/ .coverage htmlcov ponos openapi/*.yml !openapi/paths.yml *.key +arkindex/config.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8d345cf2207f994ad42167c05e0d92593ff97e58..a50f2e98976c008b5f9e6b00006a87db8b037c0a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,6 +16,7 @@ stages: - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/common#egg=arkindex-common" - "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/ponos#egg=ponos-server" - pip install -r tests-requirements.txt codecov + - "echo 'database: {host: postgres, port: 5432}' > $CONFIG_PATH" variables: # For the postgres image @@ -24,8 +25,7 @@ stages: POSTGRES_PASSWORD: devdata # For the backend - DB_HOST: postgres - DB_PORT: 5432 + CONFIG_PATH: "$CI_PROJECT_DIR/config.yml" # Pip cache PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" diff --git a/arkindex/dataimport/management/commands/import_s3.py b/arkindex/dataimport/management/commands/import_s3.py index 730d07e55cb4df725920a07e24b4bec52121a39a..d3b8e18ecebb29d92de2560a903a68443c18c9ab 100644 --- a/arkindex/dataimport/management/commands/import_s3.py +++ b/arkindex/dataimport/management/commands/import_s3.py @@ -141,7 +141,7 @@ class Command(BaseCommand): recipe = settings.PONOS_RECIPE.copy() recipe['tasks'] = tasks_config - recipe['env'].update(env_vars) + recipe.setdefault('env', {}).update(env_vars) workflow = Workflow.objects.create(recipe=yaml.dump(recipe)) logger.info('Created Workflow with id {}'.format(workflow.id)) diff --git a/arkindex/dataimport/models.py b/arkindex/dataimport/models.py index 921ef5e4ce61523f81c654adde21576f391c6f5f..d62a63c017fdc4558c11dd36d4b8623f66f8724b 100644 --- a/arkindex/dataimport/models.py +++ b/arkindex/dataimport/models.py @@ -68,25 +68,7 @@ class DataImport(IndexableModel): raise ValidationError('Git repository does not have any valid credentials') if self.mode == DataImportMode.Repository: - tasks = { - import_task_name: { - 'image': settings.ARKINDEX_APP_IMAGE, - 'command': 'manage.py import {}'.format(self.id), - 'env': { - 'ARKINDEX_ENV': 'prod', - 'DB_HOST': settings.DATABASES['default']['HOST'], - 'DB_PORT': settings.DATABASES['default']['PORT'], - 'DB_USER': settings.DATABASES['default']['USER'], - 'DB_PASSWORD': settings.DATABASES['default']['PASSWORD'], - 'DB_NAME': settings.DATABASES['default']['NAME'], - 'LOCAL_IMAGESERVER_ID': settings.LOCAL_IMAGESERVER_ID, - 'REDIS_HOST': settings.REDIS_HOST, - # Some empty folder to bypass the system check - 'ML_CLASSIFIERS_DIR': '/data/current', - }, - 'tags': ['agent:git'], - }, - } + raise NotImplementedError elif self.mode == DataImportMode.Elements: assert self.payload, \ diff --git a/arkindex/dataimport/tests/test_git_flow.py b/arkindex/dataimport/tests/test_git_flow.py index ed47ce68c323277c5c001f58680001321a5548ec..afebc16edd633c7e9e901b126a0a624227a0a313 100644 --- a/arkindex/dataimport/tests/test_git_flow.py +++ b/arkindex/dataimport/tests/test_git_flow.py @@ -5,7 +5,6 @@ from arkindex.documents.models import Element from arkindex.dataimport.models import DataImport from arkindex.dataimport.filetypes import IIIFFileType from arkindex.dataimport.git import GitFlow, SimpleDiff, DiffType -from ponos.models import State import os.path import tempfile import shutil @@ -199,116 +198,6 @@ class TestGitFlow(FixtureTestCase): self.assertEqual(flow.repo.iter_commits.call_count, 1) self.assertEqual(flow.repo.git.ls_files.call_count, 1) - def test_diff_revision(self): - """ - Test GitFlow performs a diff when there is an existing revision - """ - new_rev = self.repo.revisions.create(hash='1337', message='a', ref='master', author='me') - self.dataimport.start() - self.dataimport.workflow.tasks.all().update(state=State.Completed) - self.assertEqual(self.dataimport.state, State.Completed) - self.dataimport.id = None - self.dataimport.revision = new_rev - - diff1, diff2 = MagicMock(), MagicMock() - diff1.change_type, diff2.change_type = 'M', 'D' - diff1.a_path, diff1.b_path = 'path1a', 'path1b' - diff2.a_path, diff2.b_path = 'path2a', 'path2b' - - commit1, commit2 = MagicMock(), MagicMock() - commit1.hexsha = '42' - commit2.hexsha = '1337' - commit1.diff.return_value = [diff1, diff2] - - repo = MagicMock() - repo.commit.return_value = commit2 - repo.iter_commits.return_value = [commit1, commit2] - - flow = GitFlow(self.dataimport, self.working_dir) - flow.repo = repo - diffs = flow.diff() - - self.assertListEqual(diffs, [ - SimpleDiff(DiffType.Modification, 'path1a', 'path1b'), - SimpleDiff(DiffType.Deletion, 'path2a', 'path2b'), - ]) - - self.assertEqual(commit1.diff.call_count, 1) - self.assertEqual(commit1.diff.call_args, call(commit2)) - self.assertEqual(repo.commit.call_count, 1) - self.assertEqual(repo.commit.call_args, call('1337')) - self.assertEqual(repo.iter_commits.call_count, 1) - self.assertEqual(repo.iter_commits.call_args, call('1337')) - - def test_diff_check_workflows(self): - """ - Test GitFlow performs a diff with existing revisions with successful imports - - Example with commits "42 -> 1337 -> cafe": - Importing rev 42: None -> 42, completed - Importing rev 1337: 42 -> 1337, failed - Expected diff when importing rev cafe: 42 -> cafe - """ - # Setup Revisions - rev_1337 = self.repo.revisions.create(hash='1337', message='a', ref='master', author='me') - rev_cafe = self.repo.revisions.create(hash='cafe', message='tasty', ref='master', author='me') - - # Setup workflows - self.dataimport.start() - self.dataimport.workflow.tasks.all().update(state=State.Completed) - self.assertEqual(self.dataimport.state, State.Completed) - import1337 = DataImport( - corpus=self.corpus, - creator=self.user, - revision=rev_1337, - payload={ - "repo_id": str(self.repo.id), - "sha": rev_1337.hash, - }, - mode=DataImportMode.Repository, - ) - import1337.start() - import1337.workflow.tasks.all().update(state=State.Failed) - self.assertEqual(import1337.state, State.Failed) - - # Setup Git objects - diff1 = MagicMock(change_type='M', a_path='path1a', b_path='path1b') - diff2 = MagicMock(change_type='D', a_path='path2a', b_path='path2b') - - commit_42, commit_1337, commit_cafe = \ - MagicMock(hexsha='42'), MagicMock(hexsha='1337'), MagicMock(hexsha='cafe') - commit_42.diff.return_value = [diff1, diff2] - - repo = MagicMock() - repo.commit.return_value = commit_cafe - repo.iter_commits.return_value = [commit_42, commit_1337, commit_cafe] - - # Run GitFlow's diff for cafe - import_cafe = DataImport( - corpus=self.corpus, - creator=self.user, - revision=rev_cafe, - payload={ - "repo_id": str(self.repo.id), - "sha": rev_cafe.hash, - }, - mode=DataImportMode.Repository, - ) - flow = GitFlow(import_cafe, self.working_dir) - flow.repo = repo - diffs = flow.diff() - - self.assertEqual(repo.commit.call_count, 1) - self.assertEqual(repo.commit.call_args, call('cafe')) - self.assertEqual(repo.iter_commits.call_count, 1) - self.assertEqual(repo.iter_commits.call_args, call('cafe')) - self.assertEqual(commit_42.diff.call_count, 1) - self.assertEqual(commit_42.diff.call_args, call(commit_cafe)) - self.assertListEqual(diffs, [ - SimpleDiff(DiffType.Modification, 'path1a', 'path1b'), - SimpleDiff(DiffType.Deletion, 'path2a', 'path2b'), - ]) - @patch('arkindex.dataimport.git.FileType.get') def test_dispatch(self, filetype_mock): """ diff --git a/arkindex/dataimport/tests/test_gitlab_provider.py b/arkindex/dataimport/tests/test_gitlab_provider.py index 294b30536a548cf7e32c1d00b8968907d645cdc9..bfc3ea3ac5136173dd5828b23cd183f6abee34ae 100644 --- a/arkindex/dataimport/tests/test_gitlab_provider.py +++ b/arkindex/dataimport/tests/test_gitlab_provider.py @@ -1,8 +1,6 @@ from unittest.mock import patch, MagicMock from rest_framework.exceptions import APIException, NotAuthenticated, AuthenticationFailed, ValidationError from gitlab.exceptions import GitlabGetError, GitlabCreateError -from ponos.models import State -from arkindex_common.enums import DataImportMode from arkindex.project.tests import FixtureTestCase from arkindex.dataimport.providers import GitLabProvider from arkindex.dataimport.models import DataImport @@ -293,36 +291,6 @@ class TestGitLabProvider(FixtureTestCase): self.assertEqual(self.gl_mock().projects.get.call_count, 1) self.assertEqual(self.gl_mock().projects.get().commits.list.call_count, 1) - def test_handle_webhook(self): - """ - Test GitLabProvider correctly handles GitLab webhook push events - """ - request_mock = MagicMock() - request_mock.META = { - 'HTTP_X_GITLAB_EVENT': 'Push Hook', - 'HTTP_X_GITLAB_TOKEN': 'hook-token', - } - request_mock.data = { - 'object_kind': 'push', - 'ref': 'refs/heads/master', - 'checkout_sha': '1337', - 'commits': [ - { - 'message': 'commit message', - 'author': { - 'name': 'bob', - } - } - ] - } - - repo_imports = DataImport.objects.filter(payload__repo_id=str(self.repo.id)) - self.assertFalse(repo_imports.exists()) - GitLabProvider(url='http://aaa', credentials=self.creds).handle_webhook(self.repo, request_mock) - di = repo_imports.get() - self.assertEqual(di.mode, DataImportMode.Repository) - self.assertEqual(di.workflow.state, State.Unscheduled) - def test_handle_webhook_missing_headers(self): """ Test GitLabProvider checks HTTP headers on webhooks diff --git a/arkindex/dataimport/tests/test_imports.py b/arkindex/dataimport/tests/test_imports.py index 23c66e4d8f3c22e0a752a4aae4c5b1385439613b..9a8ef1fef347213562cb3199428a2573b899532c 100644 --- a/arkindex/dataimport/tests/test_imports.py +++ b/arkindex/dataimport/tests/test_imports.py @@ -221,19 +221,6 @@ class TestImports(FixtureAPITestCase): self.assertEqual(self.dataimport.state, State.Unscheduled) self.assertIsNotNone(self.dataimport.workflow) - def test_retry_repo_disabled(self): - self.client.force_login(self.user) - self.dataimport.mode = DataImportMode.Repository - self.dataimport.revision = self.rev - self.dataimport.save() - self.dataimport.start() - self.dataimport.workflow.tasks.all().update(state=State.Error) - self.assertEqual(self.dataimport.state, State.Error) - self.creds.delete() - response = self.client.post(reverse('api:import-retry', kwargs={'pk': self.dataimport.id})) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.creds.save() - def test_from_files_requires_login(self): response = self.client.post(reverse('api:import-from-files'), { 'files': [str(self.img_df.id)], diff --git a/arkindex/dataimport/tests/test_repos.py b/arkindex/dataimport/tests/test_repos.py index a7e18847289e3971cd628bccc2f405b73f507cb1..bde23ad21d9f5947772042b8d09b1e4478096234 100644 --- a/arkindex/dataimport/tests/test_repos.py +++ b/arkindex/dataimport/tests/test_repos.py @@ -1,4 +1,3 @@ -from unittest.mock import patch from django.urls import reverse from rest_framework import status from rest_framework.exceptions import ValidationError @@ -48,24 +47,6 @@ class TestRepositories(FixtureTestCase): self.assertEqual(Workflow.objects.count(), 0) - @patch('arkindex.dataimport.providers.GitLabProvider.get_or_create_latest_revision') - def test_start(self, gitlab_rev_mock): - gitlab_rev_mock.return_value = self.rev, False - self.client.force_login(self.superuser) - self.assertEqual(Workflow.objects.count(), 0) - - resp = self.client.get(reverse('api:repository-import', kwargs={'pk': str(self.repo.id)})) - self.assertEqual(resp.status_code, status.HTTP_200_OK) - data = resp.json() - - di = DataImport.objects.get(id=data['import_id']) - self.assertEqual(di.corpus, self.corpus) - self.assertEqual(di.mode, DataImportMode.Repository) - self.assertEqual(di.creator, self.user) - self.assertEqual(di.revision, self.rev) - - self.assertEqual(Workflow.objects.count(), 1) - def test_start_no_credentials(self): """ Test the repository start endpoint fails without credentials diff --git a/arkindex/project/checks.py b/arkindex/project/checks.py index afaec3ee4f5b3340cdecd014df09521ef59b6f7f..52511a76e0f6dd1e3d655203c7634479c6d4e772 100644 --- a/arkindex/project/checks.py +++ b/arkindex/project/checks.py @@ -35,22 +35,6 @@ def api_urls_check(*args, **kwargs): ] -@register() -@only_runserver -def ml_tools_check(*args, **kwargs): - """ - Check that the ML classifiers defined in settings actually exist - """ - from django.conf import settings - if not os.path.isdir(settings.ML_CLASSIFIERS_DIR): - return [Error( - 'ML tools directory does not exist', - hint='settings.ML_CLASSIFIERS_DIR = "{}"'.format(settings.ML_CLASSIFIERS_DIR), - id='arkindex.E008', - )] - return [] - - @register() @only_runserver def local_imageserver_check(*args, **kwargs): @@ -87,7 +71,6 @@ def docker_images_check(*args, **kwargs): return [] images = ( - (settings.ARKINDEX_APP_IMAGE, 'ARKINDEX_APP_IMAGE'), (settings.ARKINDEX_TASKS_IMAGE, 'ARKINDEX_TASKS_IMAGE'), ) for image_tag, setting_name in images: @@ -112,6 +95,23 @@ def docker_images_check(*args, **kwargs): return errors +@register() +@only_runserver +def ponos_key_check(*args, **kwargs): + """ + Warn about a missing Ponos private key that would prevent any Ponos agent from authenticating + """ + from django.conf import settings + if not os.path.exists(settings.PONOS_PRIVATE_KEY): + return [Warning( + f'Ponos private key at {settings.PONOS_PRIVATE_KEY} not found. ' + 'Agents will be unable to connect to this server.', + hint=f'`ponos.private_key` in {settings.CONFIG_PATH}', + id='arkindex.W007', + )] + return [] + + @register() def ponos_recipe_check(*args, **kwargs): """ @@ -120,10 +120,6 @@ def ponos_recipe_check(*args, **kwargs): from django.conf import settings errors = [] - if settings.PONOS_RECIPE is None: - # In a Ponos task - return [] - recipe = settings.PONOS_RECIPE.copy() # Add a dummy task because Ponos wants at least one task recipe['tasks'] = {'task1': {'image': 'hello-world'}} @@ -136,6 +132,15 @@ def ponos_recipe_check(*args, **kwargs): id='arkindex.E007', )) + for variable in ('ARKINDEX_API_URL', 'ARKINDEX_API_TOKEN', 'ARKINDEX_API_CSRF_COOKIE'): + if variable not in recipe.get('env', {}): + errors.append(Warning( + f'The {variable} environment variable should be defined ' + 'to allow API client autoconfiguration in Ponos tasks', + hint=f'`ponos.default_env` in {settings.CONFIG_PATH}', + id='arkindex.W006', + )) + return errors @@ -192,7 +197,6 @@ def s3_check(*args, **kwargs): aws_settings = { 'AWS_ACCESS_KEY': 'AWS access key ID', 'AWS_SECRET_KEY': 'AWS secret key', - 'AWS_ENDPOINT': 'AWS endpoint', 'AWS_THUMBNAIL_BUCKET': 'S3 thumbnails bucket name', 'AWS_STAGING_BUCKET': 'S3 staging bucket name', } @@ -205,6 +209,7 @@ def s3_check(*args, **kwargs): hint='settings.{} = {}'.format(name, repr(value)), id='arkindex.E011', )) + return errors diff --git a/arkindex/project/config.py b/arkindex/project/config.py new file mode 100644 index 0000000000000000000000000000000000000000..c3412da32aaa5ccdae176b5c6b4d45da21d43fc6 --- /dev/null +++ b/arkindex/project/config.py @@ -0,0 +1,252 @@ +from collections import namedtuple +from collections.abc import Mapping +from enum import Enum +from pathlib import Path +import json +import os +import sys +import yaml + +Option = namedtuple('Option', ['type', 'default']) + +UNSET = object() +""" +Used as a default value in `ConfigParser.add_option(default=UNSET)` +because default=None implies that the option is optional +""" + + +def _all_checks(): + """ + Prevents checking for path existence when running unit tests or other dev-related operations. + This is the same as settings.ALL_CHECKS, but since the configuration is accessed before settings + are initialized, it has to be copied here. + This is made as a method to make mocking in unit tests much simpler than with a module-level constant. + """ + os.environ.get('ALL_CHECKS') == 'true' or 'runserver' in sys.argv + + +def file_path(data): + path = Path(data).resolve() + if _all_checks(): + assert path.exists(), f'{path} does not exist' + assert path.is_file(), f'{path} is not a file' + return path + + +def dir_path(data): + path = Path(data).resolve() + if _all_checks(): + assert path.exists(), f'{path} does not exist' + assert path.is_dir(), f'{path} is not a directory' + return path + + +class ConfigurationError(ValueError): + + def __init__(self, errors, *args, **kwargs): + super().__init__(*args, **kwargs) + self.errors = errors + + def __str__(self): + return json.dumps(self.errors) + + def __repr__(self): + return '{}({!s})'.format(self.__class__.__name__, self) + + +class ConfigParser(object): + + def __init__(self): + self.options = {} + + def add_option(self, name, *, type=str, many=False, default=UNSET): + assert name not in self.options, f'{name} is an already defined option' + assert callable(type), 'Option type must be callable' + if many: + self.options[name] = Option(lambda data: list(map(type, data)), default) + else: + self.options[name] = Option(type, default) + + def add_subparser(self, *args, **kwargs): + """ + Add a parser as a new option to this parser, + to allow finer control over nested configuration options. + """ + parser = ConfigParser() + self.add_option(*args, **kwargs, type=parser.parse_data) + return parser + + def parse_data(self, data): + """ + Parse configuration data from a dict. + Will raise ConfigurationError if any error is detected. + """ + if not isinstance(data, Mapping): + raise ConfigurationError('Parser data must be a mapping') + + parsed, errors = {}, {} + for name, option in self.options.items(): + if name in data: + value = data[name] + elif option.default is UNSET: + errors[name] = 'This option is required' + continue + elif option.default is None: + parsed[name] = None + continue + else: + value = option.default + + try: + parsed[name] = option.type(value) + except ConfigurationError as e: + # Allow nested error dicts for nicer error messages with add_subparser + errors[name] = e.errors + except Exception as e: + errors[name] = str(e) + + if errors: + raise ConfigurationError(errors) + return parsed + + def parse(self, path, exist_ok=False): + if not path.is_file() and exist_ok: + # Act like the file is empty + return self.parse_data({}) + with open(path) as f: + return self.parse_data(yaml.safe_load(f)) + + +class CacheType(Enum): + Redis = 'redis' + Memcached = 'memcached' + Filesystem = 'filesystem' + Memory = 'memory' + Dummy = 'dummy' + + +class CookieSameSiteOption(Enum): + Lax = 'lax' + Strict = 'strict' + # Cannot redefine Python's None! + None_ = 'none' + + +def get_settings_parser(base_dir): + parser = ConfigParser() + parser.add_option('arkindex_env', type=str, default='dev') + parser.add_option('internal_group_id', type=int, default=2) + parser.add_option('local_imageserver_id', type=int, default=1) + parser.add_option('ml_classifiers_dir', type=dir_path, default=(base_dir / '../../ml-classifiers').resolve()) + parser.add_option('allowed_hosts', type=str, many=True, default=[]) + + # SECURITY WARNING: keep the secret key used in production secret! + parser.add_option('secret_key', type=str, default='jf0w^y&ml(caax8f&a1mub)(js9(l5mhbbhosz3gi+m01ex+lo') + parser.add_option('jwt_signing_key', type=str, default=None) + + database_parser = parser.add_subparser('database', default={}) + database_parser.add_option('name', type=str, default='arkindex_dev') + database_parser.add_option('host', type=str, default='localhost') + database_parser.add_option('port', type=int, default=9100) + database_parser.add_option('user', type=str, default='devuser') + database_parser.add_option('password', type=str, default='devdata') + + email_parser = parser.add_subparser('email', default=None) + email_parser.add_option('host', type=str) + email_parser.add_option('port', type=int) + email_parser.add_option('user', type=str) + email_parser.add_option('password', type=str) + email_parser.add_option('error_report_recipients', type=str, many=True, default=[]) + + static_parser = parser.add_subparser('static', default={}) + static_parser.add_option('root_path', type=dir_path, default=None) + static_parser.add_option('cdn_assets_url', type=str, default=None) + static_parser.add_option('mirador_url', type=str, default=None) + static_parser.add_option('universal_viewer_url', type=str, default=None) + static_parser.add_option('frontend_version', type=str, default=None) + + elasticsearch_parser = parser.add_subparser('elasticsearch', default={}) + elasticsearch_parser.add_option('hosts', type=str, many=True, default=['localhost']) + + influxdb_parser = parser.add_subparser('influxdb', default={}) + influxdb_parser.add_option('api_url', type=str, default='http://localhost:8086/') + + gitlab_parser = parser.add_subparser('gitlab', default={}) + gitlab_parser.add_option('app_id', type=str, default=None) + gitlab_parser.add_option('app_secret', type=str, default=None) + + redis_parser = parser.add_subparser('redis', default={}) + redis_parser.add_option('host', type=str, default='localhost') + redis_parser.add_option('capacity', type=int, default=1000) + + csrf_parser = parser.add_subparser('csrf', default={}) + csrf_parser.add_option('cookie_name', type=str, default='arkindex.csrf') + csrf_parser.add_option('cookie_domain', type=str, default=None) + csrf_parser.add_option('cookie_samesite', type=CookieSameSiteOption, default=CookieSameSiteOption.Lax) + csrf_parser.add_option('trusted_origins', type=str, many=True, default=[]) + + session_parser = parser.add_subparser('session', default={}) + session_parser.add_option('cookie_name', type=str, default='arkindex.auth') + session_parser.add_option('cookie_domain', type=str, default=None) + session_parser.add_option('cookie_samesite', type=CookieSameSiteOption, default=CookieSameSiteOption.Lax) + + cors_parser = parser.add_subparser('cors', default={}) + cors_parser.add_option('origin_whitelist', type=str, many=True, default=[ + 'universalviewer.io', # TODO: Remove this one? + 'localhost:8080', + '127.0.0.1:8080', + ]) + cors_parser.add_option('suffixes', type=str, many=True, default=[]) + + ponos_parser = parser.add_subparser('ponos', default={}) + # Do not use file_path here to allow the backend to start without a Ponos key + ponos_parser.add_option('private_key', type=Path, default=(base_dir / 'ponos.key').resolve()) + ponos_parser.add_option('default_env', type=dict, default={}) + + docker_parser = parser.add_subparser('docker', default={}) + docker_parser.add_option('tasks_image', type=str, default='registry.gitlab.com/arkindex/tasks') + + sentry_parser = parser.add_subparser('sentry', default={}) + sentry_parser.add_option('dsn', type=str, default=None) + sentry_parser.add_option('frontend_dsn', type=str, default=None) + + cache_parser = ConfigParser() + cache_parser.add_option('type', type=CacheType, default=None) + cache_parser.add_option('url', type=str, default=None) + cache_parser.add_option('path', type=dir_path, default=None) + + def cache_validator(value): + data = cache_parser.parse_data(value) + cache_type = data['type'] + if cache_type == CacheType.Filesystem and not data.get('path'): + raise ConfigurationError({'path': 'cache.path is required for a filesystem cache'}) + if cache_type in (CacheType.Redis, CacheType.Memcached) and not data.get('url'): + raise ConfigurationError({'url': f'cache.url is required for a {cache_type.name} cache'}) + return data + + parser.add_option('cache', default={}, type=cache_validator) + + s3_parser = ConfigParser() + s3_parser.add_option('access_key_id', type=str, default=None) + s3_parser.add_option('secret_access_key', type=str, default=None) + s3_parser.add_option('endpoint', type=str, default=None) + s3_parser.add_option('region', type=str, default=None) + s3_parser.add_option('thumbnails_bucket', type=str, default='thumbnails') + s3_parser.add_option('staging_bucket', type=str, default='staging') + s3_parser.add_option('ponos_logs_bucket', type=str, default='ponos-logs') + s3_parser.add_option('ponos_artifacts_bucket', type=str, default='ponos-artifacts') + + def s3_validator(value): + data = s3_parser.parse_data(value) + if not data.get('access_key_id') and not data.get('secret_access_key'): + # No configuration specified; + # just ignore and let the system checks warn about this without preventing startup + return data + if not data.get('endpoint') and not data.get('region'): + raise ConfigurationError('One of `s3.endpoint` or `s3.region` are required') + return data + + parser.add_option('s3', type=s3_validator, default={}) + + return parser diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index 5da5a3a16b69513d6086e8a295c4fb3a6ce7cb15..74438d5cafa08c6579e37a729f825a57ccc3556b 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -10,74 +10,62 @@ For the full list of settings and their values, see https://docs.djangoproject.com/en/1.11/ref/settings/ """ -import logging import os import sys import warnings -from datetime import timedelta from corsheaders.defaults import default_headers +from datetime import timedelta +from pathlib import Path +from arkindex.project.config import get_settings_parser, CacheType +# Build paths inside the project like this: BASE_DIR / ... +BASE_DIR = Path(__file__).resolve().parent.parent -def env2list(env_name, separator=',', default=[]): - ''' - Load env variable as a list - ''' - value = os.environ.get(env_name) - return value and value.split(separator) or default - - -# Database -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.postgresql', - 'NAME': os.environ.get('DB_NAME', 'arkindex_dev'), - 'USER': os.environ.get('DB_USER', 'devuser'), - 'PASSWORD': os.environ.get('DB_PASSWORD', 'devdata'), - 'HOST': os.environ.get('DB_HOST', 'localhost'), - 'PORT': os.environ.get('DB_PORT', 9100), - } -} - -# Admins in charge -ADMINS = [('', address) for address in env2list('ADMIN_EMAIL')] +# Used for special cases during configuration parsing and settings loading +TEST_ENV = 'test' in sys.argv -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -ML_CLASSIFIERS_DIR = os.environ.get('ML_CLASSIFIERS_DIR', os.path.join(BASE_DIR, '../../ml-classifiers')) +CONFIG_PATH = Path(os.environ.get('CONFIG_PATH', BASE_DIR / 'config.yml')) +parser = get_settings_parser(BASE_DIR) +conf = parser.parse(CONFIG_PATH, exist_ok=True) # Read Version either from Docker static file or local file _version = '/etc/arkindex.version' \ if os.path.exists('/etc/arkindex.version') \ - else os.path.join(os.path.dirname(BASE_DIR), 'VERSION') + else BASE_DIR.parent / 'VERSION' with open(_version) as f: VERSION = f.read().strip() -# By default the frontend version is the same as the backend -FRONTEND_VERSION = os.environ.get('FRONTEND_VERSION', VERSION) +ARKINDEX_ENV = conf['arkindex_env'] +ML_CLASSIFIERS_DIR = conf['ml_classifiers_dir'] +SECRET_KEY = conf['secret_key'] +LOCAL_IMAGESERVER_ID = conf['local_imageserver_id'] -# Local IIIF server -LOCAL_IMAGESERVER_ID = int(os.environ.get('LOCAL_IMAGESERVER_ID', 1)) +ALLOWED_HOSTS = conf['allowed_hosts'] +# Docker container name resolution +ALLOWED_HOSTS += ['127.0.0.1', 'localhost', 'backend', 'ark-backend'] -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = os.environ.get('SECRET_KEY', 'jf0w^y&ml(caax8f&a1mub)(js9(l5mhbbhosz3gi+m01ex+lo') +# Database +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.postgresql', + 'NAME': conf['database']['name'], + 'USER': conf['database']['user'], + 'PASSWORD': conf['database']['password'], + 'HOST': conf['database']['host'], + 'PORT': conf['database']['port'], + } +} # SECURITY WARNING: don't run with debug turned on in production! -ARKINDEX_ENV = os.environ.get('ARKINDEX_ENV', 'dev') DEBUG = ARKINDEX_ENV == 'dev' # Run all system checks when starting the server ALL_CHECKS = os.environ.get('ALL_CHECKS') == 'true' or 'runserver' in sys.argv -ALLOWED_HOSTS = env2list('ALLOWED_HOSTS') - -# Docker container name resolution -ALLOWED_HOSTS += ['127.0.0.1', 'localhost', 'backend', 'ark-backend'] - # Required for django-debug-toolbar INTERNAL_IPS = ['127.0.0.1', '127.0.1.1'] # Application definition - INSTALLED_APPS = [ 'django.contrib.admin', 'django.contrib.auth', @@ -175,9 +163,11 @@ USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ STATIC_URL = '/static/' -STATIC_ROOT = os.environ.get('STATIC_ROOT') +STATIC_ROOT = conf['static']['root_path'] STATICFILES_DIRS = [] +# By default the frontend version is the same as the backend +FRONTEND_VERSION = conf['static'].get('frontend_version', VERSION) # API REST_FRAMEWORK = { @@ -198,15 +188,13 @@ SIMPLE_JWT = { 'USER_ID_CLAIM': 'agent_id', 'ROTATE_REFRESH_TOKENS': True, 'ACCESS_TOKEN_LIFETIME': timedelta(hours=6), - 'SIGNING_KEY': os.environ.get('SIGNING_KEY', SECRET_KEY), + 'SIGNING_KEY': conf['jwt_signing_key'] or SECRET_KEY, } SEARCH_FILTER_MAX_TERMS = 10 # Elastic search config -ELASTIC_SEARCH_HOSTS = [ - os.environ.get('ES_HOST', 'localhost'), -] +ELASTIC_SEARCH_HOSTS = conf['elasticsearch']['hosts'] # The Scroll API is required to go over 10K results ES_RESULTS_LIMIT = 10000 # ES defaults to three items returned in a nested query if the inner_hits size is not defined @@ -214,11 +202,8 @@ ES_INNER_RESULTS_LIMIT = 6 # Maximum length for query strings—very long queries can cause timeouts ES_QUERY_STRING_MAX_LENGTH = 1000 -# Silent logger for elasticsearch -logging.getLogger('elasticsearch').setLevel(logging.WARNING) - # InfluxDB API root -INFLUXDB_API_URL = os.environ.get('INFLUXDB_API_URL', 'http://localhost:8086/') +INFLUXDB_API_URL = conf['influxdb']['api_url'] # Use SSL proxy SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') @@ -237,56 +222,56 @@ IIIF_DOWNLOAD_TIMEOUT = (30, 60) # check_images sample size when checking all servers CHECK_IMAGES_SAMPLE_SIZE = 20 -TRANSCRIPTIONS_IMPORT_QUEUE_SIZE = 25000 -TRANSCRIPTIONS_IMPORT_CHUNK_SIZE = 10000 - # GitLab OAuth -GITLAB_APP_ID = os.environ.get('GITLAB_APP_ID') -GITLAB_APP_SECRET = os.environ.get('GITLAB_APP_SECRET') +GITLAB_APP_ID = conf['gitlab']['app_id'] +GITLAB_APP_SECRET = conf['gitlab']['app_secret'] + +if conf['cache']['type'] is None: + conf['cache']['type'] = CacheType.Dummy if DEBUG else CacheType.Memory -if os.environ.get('REDIS_CACHE_LOCATION'): +if conf['cache']['type'] == CacheType.Redis: CACHES = { 'default': { 'BACKEND': 'redis_cache.RedisCache', - 'LOCATION': os.environ.get('REDIS_CACHE_LOCATION'), + 'LOCATION': conf['cache']['url'], } } -# Cache into memcached -elif os.environ.get('MEMCACHED_HOST'): +elif conf['cache']['type'] == CacheType.Memcached: CACHES = { 'default': { 'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache', - 'LOCATION': os.environ['MEMCACHED_HOST'], + 'LOCATION': conf['cache']['url'], } } -elif os.environ.get('CACHE_DIR'): +elif conf['cache']['type'] == CacheType.Filesystem: CACHES = { 'default': { 'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache', - 'LOCATION': os.environ['CACHE_DIR'], + 'LOCATION': str(conf['cache']['path']), } } -else: - # On dev, use a dummy cache - # On prod, use at least a local memory cache - _cache = 'django.core.cache.backends.dummy.DummyCache' if DEBUG else 'django.core.cache.backends.locmem.LocMemCache' +elif conf['cache']['type'] == CacheType.Memory: CACHES = { 'default': { - 'BACKEND': _cache + 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache' + } + } +elif conf['cache']['type'] == CacheType.Dummy: + CACHES = { + 'default': { + 'BACKEND': 'django.core.cache.backends.dummy.DummyCache' } } # Django Channels layer using Redis -REDIS_HOST = os.environ.get('REDIS_HOST', 'localhost') -REDIS_CAPACITY = int(os.environ.get('REDIS_CAPACITY', 1000)) CHANNEL_LAYERS = { "default": { "BACKEND": "channels_redis.core.RedisChannelLayer", "CONFIG": { "hosts": [ - (REDIS_HOST, 6379) + (conf['redis']['host'], 6379) ], - "capacity": REDIS_CAPACITY, + "capacity": conf['redis']['capacity'], }, }, } @@ -329,6 +314,9 @@ LOGGING = { 'handlers': ['console'], 'level': 'INFO', }, + 'elasticsearch': { + 'level': 'WARNING', + }, 'elasticsearch.trace': { 'handlers': ['console_debug'], 'level': 'DEBUG', @@ -345,52 +333,34 @@ LOGGING = { }, } -# Sentry Error reporting -SENTRY_DSN = os.environ.get('SENTRY_DSN') -FRONTEND_SENTRY_DSN = os.environ.get('FRONTEND_SENTRY_DSN') - # Email EMAIL_SUBJECT_PREFIX = '[Arkindex {}] '.format(ARKINDEX_ENV) -if os.environ.get('EMAIL_HOST'): +if conf['email']: + ADMINS = [('', address) for address in conf['email']['error_report_recipients']] EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' - EMAIL_HOST = os.environ.get('EMAIL_HOST') - EMAIL_PORT = os.environ.get('EMAIL_PORT') - EMAIL_HOST_USER = os.environ.get('EMAIL_HOST_USER') + EMAIL_HOST = conf['email']['host'] + EMAIL_PORT = conf['email']['port'] + EMAIL_HOST_USER = conf['email']['user'] DEFAULT_FROM_EMAIL = SERVER_EMAIL = EMAIL_HOST_USER - EMAIL_HOST_PASSWORD = os.environ.get('EMAIL_HOST_PASSWORD') + EMAIL_HOST_PASSWORD = conf['email']['password'] EMAIL_USE_TLS = True else: EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' # Cookies -def samesite(name, default): - env = '{}_COOKIE_SAMESITE'.format(name) - value = os.environ.get(env, default).lower() - if value == 'none': - value = None - assert value in ('lax', 'strict', None), 'Invalid {} value {}'.format(env, value) - return value - - -CSRF_COOKIE_NAME = os.environ.get('CSRF_COOKIE_NAME', 'arkindex.csrf') -CSRF_TRUSTED_ORIGINS = env2list('CSRF_TRUSTED_ORIGINS') -CSRF_COOKIE_DOMAIN = os.environ.get('COOKIE_DOMAIN') -CSRF_COOKIE_SAMESITE = samesite('CSRF', 'lax') - -SESSION_COOKIE_NAME = os.environ.get('SESSION_COOKIE_NAME', 'arkindex.auth') -SESSION_COOKIE_DOMAIN = os.environ.get('COOKIE_DOMAIN') -SESSION_COOKIE_SAMESITE = samesite('SESSION', 'lax') +CSRF_COOKIE_NAME = conf['csrf']['cookie_name'] +CSRF_COOKIE_DOMAIN = conf['csrf']['cookie_domain'] +CSRF_COOKIE_SAMESITE = conf['csrf']['cookie_samesite'].value +CSRF_TRUSTED_ORIGINS = conf['csrf']['trusted_origins'] + +SESSION_COOKIE_NAME = conf['session']['cookie_name'] +SESSION_COOKIE_DOMAIN = conf['session']['cookie_domain'] +SESSION_COOKIE_SAMESITE = conf['session']['cookie_samesite'].value # Required for authentication over websockets SESSION_COOKIE_HTTPONLY = False -CORS_ORIGIN_WHITELIST = env2list('CORS_ORIGIN_WHITELIST', default=[ - 'universalviewer.io', - 'localhost:8080', - '127.0.0.1:8080', - 'localhost:5000', - '127.0.0.1:5000', -]) +CORS_ORIGIN_WHITELIST = conf['cors']['origin_whitelist'] CORS_ALLOW_CREDENTIALS = True CORS_ALLOW_HEADERS = default_headers + ( 'cache-control', # Allow the frontend to prevent caching some API requests @@ -398,67 +368,68 @@ CORS_ALLOW_HEADERS = default_headers + ( CORS_URLS_REGEX = r'^/(api|ponos)/.*$' # Support CORS suffixes -cors_suffixes = env2list('CORS_SUFFIXES') -if cors_suffixes: +if conf['cors']['suffixes']: CORS_ORIGIN_REGEX_WHITELIST = [ r"^https://.+{}".format(suffix) - for suffix in cors_suffixes + for suffix in conf['cors']['suffixes'] ] # Amazon S3 -PONOS_AWS_ACCESS_KEY = AWS_ACCESS_KEY = os.environ.get('AWS_ACCESS_KEY_ID') -PONOS_AWS_SECRET_KEY = AWS_SECRET_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY') -PONOS_AWS_ENDPOINT = AWS_ENDPOINT = os.environ.get('AWS_ENDPOINT') -PONOS_AWS_REGION = AWS_REGION = os.environ.get('AWS_REGION') -PONOS_S3_LOGS_BUCKET = os.environ.get('PONOS_S3_LOGS_BUCKET', 'ponos-logs') -PONOS_S3_ARTIFACTS_BUCKET = os.environ.get('PONOS_S3_ARTIFACTS_BUCKET', 'ponos-artifacts') -AWS_THUMBNAIL_BUCKET = os.environ.get('AWS_THUMBNAIL_BUCKET', 'thumbnails') -AWS_STAGING_BUCKET = os.environ.get('AWS_STAGING_BUCKET', 'staging') +PONOS_AWS_ACCESS_KEY = AWS_ACCESS_KEY = conf['s3']['access_key_id'] +PONOS_AWS_SECRET_KEY = AWS_SECRET_KEY = conf['s3']['secret_access_key'] +PONOS_AWS_ENDPOINT = AWS_ENDPOINT = conf['s3']['endpoint'] +PONOS_AWS_REGION = AWS_REGION = conf['s3']['region'] +PONOS_S3_LOGS_BUCKET = conf['s3']['ponos_logs_bucket'] +PONOS_S3_ARTIFACTS_BUCKET = conf['s3']['ponos_artifacts_bucket'] +AWS_THUMBNAIL_BUCKET = conf['s3']['thumbnails_bucket'] +AWS_STAGING_BUCKET = conf['s3']['staging_bucket'] # Ponos integration -if os.environ.get('PONOS_TASK'): - # In a ponos docker task - PONOS_RECIPE = None - ML_CLASSIFIERS_DIR = '/arkindex/classifiers' +_ponos_env = { + 'ARKINDEX_API_CSRF_COOKIE': CSRF_COOKIE_NAME +} +if DEBUG: + # In dev, include overridable API info + _ponos_env.update({ + 'ARKINDEX_API_URL': 'http://localhost:8000/api/v1/', + 'ARKINDEX_API_TOKEN': 'deadbeefTestToken', + }) +_ponos_env.update(conf['ponos']['default_env']) +PONOS_RECIPE = { + 'env': _ponos_env, +} +PONOS_PRIVATE_KEY = conf['ponos']['private_key'] -else: - # As scheduler or dev - PONOS_RECIPE = { - 'env': { - 'ARKINDEX_API_URL': os.environ.get('ARKINDEX_API_URL', 'http://localhost:8000/api/v1/'), - 'ARKINDEX_API_TOKEN': os.environ.get('ARKINDEX_API_TOKEN', 'deadbeefTestToken'), - 'ARKINDEX_API_CSRF_COOKIE': CSRF_COOKIE_NAME, - }, - } - PONOS_PRIVATE_KEY = os.environ.get('PONOS_PRIVATE_KEY_PATH', os.path.join(BASE_DIR, 'ponos.key')) +# Docker images used by our ponos workflow +ARKINDEX_TASKS_IMAGE = conf['docker']['tasks_image'] -if 'test' in sys.argv: +# User groups with special permissions +INTERNAL_GROUP_ID = conf['internal_group_id'] + +# CDN Assets URL to use for arkindex remote CSS/JS/Images assets +CDN_ASSETS_URL = conf['static']['cdn_assets_url'] +if CDN_ASSETS_URL is not None: + CDN_ASSETS_URL = CDN_ASSETS_URL.rstrip('/') + STATIC_URL = f"{CDN_ASSETS_URL}/{VERSION}/static/" + +MIRADOR_URL = conf['static']['mirador_url'] +UNIVERSAL_VIEWER_URL = conf['static']['universal_viewer_url'] + +SENTRY_DSN = conf['sentry']['dsn'] +FRONTEND_SENTRY_DSN = conf['sentry']['frontend_dsn'] + +if TEST_ENV: # Overrides for unit tests AWS_ACCESS_KEY = 'test' AWS_SECRET_KEY = 'test' AWS_ENDPOINT = 'http://s3' PONOS_PRIVATE_KEY = None + LOCAL_IMAGESERVER_ID = 1 # Turn Django's UnorderedObjectListWarning into exceptions warnings.filterwarnings('error', category=RuntimeWarning, module='django.core.paginator') warnings.filterwarnings('error', category=RuntimeWarning, module='rest_framework.pagination') -# Docker images used by our ponos workflow -ARKINDEX_APP_IMAGE = os.environ.get('ARKINDEX_APP_IMAGE', 'registry.gitlab.com/arkindex/backend') -ARKINDEX_TASKS_IMAGE = os.environ.get('ARKINDEX_TASKS_IMAGE', 'registry.gitlab.com/arkindex/tasks') - -# User groups with special permissions -INTERNAL_GROUP_ID = int(os.environ.get('INTERNAL_GROUP_ID', 2)) - -# CDN Assets URL to use for arkindex remote CSS/JS/Images assets -CDN_ASSETS_URL = os.environ.get('CDN_ASSETS_URL') -if CDN_ASSETS_URL is not None: - CDN_ASSETS_URL = CDN_ASSETS_URL.rstrip('/') - STATIC_URL = f"{CDN_ASSETS_URL}/{VERSION}/static/" - -MIRADOR_URL = os.environ.get('MIRADOR_URL') -UNIVERSAL_VIEWER_URL = os.environ.get('UNIVERSAL_VIEWER_URL') - # Optional unit tests runner with code coverage try: import django_nose # noqa @@ -472,7 +443,7 @@ except ImportError: pass # Local settings -if 'test' not in sys.argv: +if DEBUG and not TEST_ENV: try: from .local_settings import * # noqa except ImportError: diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2730b5ddf753b376a937a0f4e8cce749c73f8fab --- /dev/null +++ b/arkindex/project/tests/config_samples/defaults.yaml @@ -0,0 +1,67 @@ +allowed_hosts: [] +arkindex_env: dev +cache: + path: null + type: null + url: null +cors: + origin_whitelist: + - universalviewer.io + - localhost:8080 + - 127.0.0.1:8080 + suffixes: [] +csrf: + cookie_domain: null + cookie_name: arkindex.csrf + cookie_samesite: lax + trusted_origins: [] +database: + host: localhost + name: arkindex_dev + password: devdata + port: 9100 + user: devuser +docker: + tasks_image: registry.gitlab.com/arkindex/tasks +elasticsearch: + hosts: + - localhost +email: null +gitlab: + app_id: null + app_secret: null +influxdb: + api_url: http://localhost:8086/ +internal_group_id: 2 +jwt_signing_key: null +local_imageserver_id: 1 +ml_classifiers_dir: /somewhere/ml-classifiers +ponos: + default_env: {} + private_key: /somewhere/backend/arkindex/ponos.key +redis: + capacity: 1000 + host: localhost +s3: + access_key_id: null + endpoint: null + ponos_artifacts_bucket: ponos-artifacts + ponos_logs_bucket: ponos-logs + region: null + secret_access_key: null + staging_bucket: staging + thumbnails_bucket: thumbnails +secret_key: jf0w^y&ml(caax8f&a1mub)(js9(l5mhbbhosz3gi+m01ex+lo +sentry: + dsn: null + frontend_dsn: null +session: + cookie_domain: null + cookie_name: arkindex.auth + cookie_samesite: lax +static: + cdn_assets_url: null + frontend_version: null + mirador_url: null + root_path: null + universal_viewer_url: null diff --git a/arkindex/project/tests/config_samples/errors.yaml b/arkindex/project/tests/config_samples/errors.yaml new file mode 100644 index 0000000000000000000000000000000000000000..962d579177010937688847f5dc87ff8f83ca5757 --- /dev/null +++ b/arkindex/project/tests/config_samples/errors.yaml @@ -0,0 +1,57 @@ +allowed_hosts: all of them +arkindex_env: off +cache: + type: redis +cors: + origin_whitelist: france + suffixes: 1 +csrf: + cookie_domain: null + cookie_name: null + cookie_samesite: relax + trusted_origins: 12.5 +database: + host: null + password: hunter2 + port: rotterdam + user: bob +docker: + tasks_image: + here: have a dict +elasticsearch: + hosts: ghosts +email: + host: 123 +gitlab: + app_id: yes + app_secret: [] +influxdb: + api_url: no +internal_group_id: 2 +jwt_signing_key: null +local_imageserver_id: 1 +ml_classifiers_dir: /aaaaa +ponos: + default_env: {} + private_key: /dev/zero +redis: + capacity: over nine thousand + host: radish +s3: + endpoint: null + ponos_artifacts_bucket: {} + ponos_logs_bucket: null + region: null + secret_access_key: null + staging_bucket: null + thumbnails_bucket: 1234 +secret_key: false +session: + cookie_domain: -1 + cookie_name: .inf + cookie_samesite: foo +static: + cdn_assets_url: 1 + mirador_url: 2 + root_path: /aaaaa + universal_viewer_url: .nan diff --git a/arkindex/project/tests/config_samples/expected_errors.yaml b/arkindex/project/tests/config_samples/expected_errors.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb59d9e79db598aa37c8a128dc357c17d919a5fa --- /dev/null +++ b/arkindex/project/tests/config_samples/expected_errors.yaml @@ -0,0 +1,21 @@ +cache: + url: cache.url is required for a Redis cache +cors: + suffixes: "'int' object is not iterable" +csrf: + cookie_samesite: "'relax' is not a valid CookieSameSiteOption" + trusted_origins: "'float' object is not iterable" +database: + port: "invalid literal for int() with base 10: 'rotterdam'" +email: + password: This option is required + port: This option is required + user: This option is required +ml_classifiers_dir: /aaaaa does not exist +redis: + capacity: "invalid literal for int() with base 10: 'over nine thousand'" +session: + cookie_samesite: "'foo' is not a valid CookieSameSiteOption" +static: + root_path: /aaaaa does not exist + diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62d38b08adfbe05a245bbd2faa741a5800116d25 --- /dev/null +++ b/arkindex/project/tests/config_samples/override.yaml @@ -0,0 +1,76 @@ +allowed_hosts: +- something.com +arkindex_env: prod +cache: + path: / + type: filesystem + url: http://aaa +cors: + origin_whitelist: + - localtoast:1337 + suffixes: + - a + - b +csrf: + cookie_domain: something.com + cookie_name: csrfcookie + cookie_samesite: strict + trusted_origins: + - trust-no-one +database: + host: dbhost + name: arkindex_db + password: hunter2 + port: 9123 + user: littlebobbytables +docker: + tasks_image: registry.gitlab.com/arkindex/stonks +elasticsearch: + hosts: + - google +email: + error_report_recipients: + - noreply@nasa.gov + host: smtp.wanadoo.fr + password: hunter2 + port: 25 + user: teklia@wanadoo.fr +gitlab: + app_id: a + app_secret: b +influxdb: + api_url: http://graph/ +internal_group_id: 4 +jwt_signing_key: deadbeef +local_imageserver_id: 45 +ml_classifiers_dir: /tmp +ponos: + default_env: + A: B + private_key: /a/b/c +redis: + capacity: 9001 + host: radish +s3: + access_key_id: abcd + endpoint: http://somewhere + ponos_artifacts_bucket: zstandardland + ponos_logs_bucket: plaintexttown + region: middle-earth-1 + secret_access_key: hunter2 + staging_bucket: dropboxbutworse + thumbnails_bucket: toenails +secret_key: abcdef +sentry: + dsn: https://nowhere + frontend_dsn: https://nowhere/frontend +session: + cookie_domain: cookie-dolmen + cookie_name: stonehenge + cookie_samesite: none +static: + cdn_assets_url: http://cdn.teklia.horse/ + frontend_version: 1.2.3-alpha4 + mirador_url: gopher://mirador/ + root_path: / + universal_viewer_url: gopher://uv/ diff --git a/arkindex/project/tests/test_checks.py b/arkindex/project/tests/test_checks.py index f01cada5fc50c51e5e33ddc87b1a598f6a31f63e..8e529e1b92a68f0698b0e63934cc84322fede3c8 100644 --- a/arkindex/project/tests/test_checks.py +++ b/arkindex/project/tests/test_checks.py @@ -3,6 +3,7 @@ from django.test import TestCase, override_settings from django.conf import settings from django.urls import path from django.core.checks import Error, Warning +from pathlib import Path from subprocess import CalledProcessError import subprocess @@ -35,30 +36,6 @@ class ChecksTestCase(TestCase): ] ) - @patch('arkindex.project.checks.os.path.isdir') - def test_ml_tools_check(self, isdir_mock): - """ - Test the ML tools existence checks - """ - from arkindex.project.checks import ml_tools_check - - isdir_mock.return_value = True - self.assertListEqual(ml_tools_check(), []) - - isdir_mock.return_value = False - - with self.settings(ML_CLASSIFIERS_DIR='oops'): - self.assertListEqual( - ml_tools_check(), - [ - Error( - 'ML tools directory does not exist', - hint='settings.ML_CLASSIFIERS_DIR = "oops"', - id='arkindex.E008', - ), - ], - ) - def test_local_imageserver_check(self): """ Test the local imageserver existence check @@ -85,19 +62,12 @@ class ChecksTestCase(TestCase): @patch('arkindex.project.checks.subprocess.run') @override_settings( - ARKINDEX_APP_IMAGE='nope', ARKINDEX_TASKS_IMAGE='nuh', ) def test_docker_images_check(self, run_mock): from arkindex.project.checks import docker_images_check expected_calls = [ - call( - ['docker', 'image', 'inspect', 'nope'], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - check=True, - ), call( ['docker', 'image', 'inspect', 'nuh'], stdout=subprocess.PIPE, @@ -106,20 +76,7 @@ class ChecksTestCase(TestCase): ), ] - run_mock.side_effect = [CalledProcessError(1, ''), None, None] - self.assertListEqual(docker_images_check(), [ - Error( - 'Docker image with tag "nope" was not found.', - hint='settings.ARKINDEX_APP_IMAGE = "nope"', - id='arkindex.E006', - ) - ]) - - self.assertEqual(run_mock.call_count, 2) - self.assertEqual(run_mock.call_args_list, expected_calls) - - run_mock.reset_mock() - run_mock.side_effect = [None, CalledProcessError(1, ''), CalledProcessError(1, '')] + run_mock.side_effect = CalledProcessError(1, '') self.assertListEqual(docker_images_check(), [ Error( 'Docker image with tag "nuh" was not found.', @@ -128,7 +85,7 @@ class ChecksTestCase(TestCase): ) ]) - self.assertEqual(run_mock.call_count, 2) + self.assertEqual(run_mock.call_count, 1) self.assertEqual(run_mock.call_args_list, expected_calls) @patch('arkindex.project.checks.subprocess.run') @@ -142,14 +99,8 @@ class ChecksTestCase(TestCase): with self.settings(ARKINDEX_APP_IMAGE='nope', ARKINDEX_TASKS_IMAGE='nuh'): self.assertListEqual(docker_images_check(), []) - self.assertEqual(run_mock.call_count, 2) + self.assertEqual(run_mock.call_count, 1) self.assertEqual(run_mock.call_args_list, [ - call( - ['docker', 'image', 'inspect', 'nope'], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - check=True, - ), call( ['docker', 'image', 'inspect', 'nuh'], stdout=subprocess.PIPE, @@ -158,6 +109,7 @@ class ChecksTestCase(TestCase): ), ]) + @override_settings() @patch('arkindex.project.checks.parse_recipe') def test_ponos_recipe_check(self, parse_mock): from arkindex.project.checks import ponos_recipe_check @@ -170,6 +122,30 @@ class ChecksTestCase(TestCase): id='arkindex.E007', )]) + settings.CONFIG_PATH = Path('/somewhere/config.yml') + del settings.PONOS_RECIPE['env'] + parse_mock.side_effect = None + self.assertListEqual(ponos_recipe_check(), [ + Warning( + 'The ARKINDEX_API_URL environment variable should be defined ' + 'to allow API client autoconfiguration in Ponos tasks', + hint='`ponos.default_env` in /somewhere/config.yml', + id='arkindex.W006', + ), + Warning( + 'The ARKINDEX_API_TOKEN environment variable should be defined ' + 'to allow API client autoconfiguration in Ponos tasks', + hint='`ponos.default_env` in /somewhere/config.yml', + id='arkindex.W006', + ), + Warning( + 'The ARKINDEX_API_CSRF_COOKIE environment variable should be defined ' + 'to allow API client autoconfiguration in Ponos tasks', + hint='`ponos.default_env` in /somewhere/config.yml', + id='arkindex.W006', + ), + ]) + @override_settings() def test_internal_group_check(self): from arkindex.project.checks import internal_group_check @@ -223,7 +199,6 @@ class ChecksTestCase(TestCase): del settings.AWS_ACCESS_KEY del settings.AWS_SECRET_KEY - del settings.AWS_ENDPOINT del settings.AWS_THUMBNAIL_BUCKET del settings.AWS_STAGING_BUCKET self.assertCountEqual(s3_check(), [ @@ -237,11 +212,6 @@ class ChecksTestCase(TestCase): hint='settings.AWS_SECRET_KEY = None', id='arkindex.E011', ), - Error( - 'AWS endpoint is missing; all S3-related features will fail.', - hint='settings.AWS_ENDPOINT = None', - id='arkindex.E011', - ), Error( 'S3 thumbnails bucket name is missing; all S3-related features will fail.', hint='settings.AWS_THUMBNAIL_BUCKET = None', @@ -256,7 +226,6 @@ class ChecksTestCase(TestCase): settings.AWS_ACCESS_KEY = 'key' settings.AWS_SECRET_KEY = 's3kr3t' - settings.AWS_ENDPOINT = 'http://somewhere' settings.AWS_THUMBNAIL_BUCKET = 'Thumbs.db' settings.AWS_STAGING_BUCKET = 'buckette' self.assertListEqual(s3_check(), []) diff --git a/arkindex/project/tests/test_config.py b/arkindex/project/tests/test_config.py new file mode 100644 index 0000000000000000000000000000000000000000..e44e962b129ddfeeff5959adfbfc2a4c38f78bea --- /dev/null +++ b/arkindex/project/tests/test_config.py @@ -0,0 +1,143 @@ +from unittest import TestCase +from unittest.mock import patch +from enum import Enum +from io import StringIO +from pathlib import Path +from arkindex.project.config import dir_path, file_path, ConfigurationError, ConfigParser, get_settings_parser +import yaml +import tempfile + +SAMPLES = Path(__file__).resolve().parent / 'config_samples' + + +class TestConfig(TestCase): + + @patch('arkindex.project.config._all_checks') + def test_file_path(self, all_checks_mock): + all_checks_mock.return_value = True + + with self.assertRaisesRegex(AssertionError, ' does not exist'): + file_path('/aaaaaaa') + + with tempfile.NamedTemporaryFile() as f: + parent_path = Path(f.name).parent + with self.assertRaisesRegex(AssertionError, ' is not a file'): + file_path(parent_path) + + self.assertEqual(file_path(f.name), Path(f.name)) + + # Existence checks should be ignored without all_checks + all_checks_mock.return_value = False + self.assertEqual(file_path(parent_path), parent_path) + self.assertEqual(file_path('/aaaaaaa'), Path('/aaaaaaa')) + + @patch('arkindex.project.config._all_checks') + def test_dir_path(self, all_checks_mock): + all_checks_mock.return_value = True + with tempfile.TemporaryDirectory() as d: + self.assertEqual(dir_path(d), Path(d)) + + with self.assertRaisesRegex(AssertionError, ' does not exist'): + dir_path('/aaaaaaa') + + with tempfile.NamedTemporaryFile() as f: + with self.assertRaisesRegex(AssertionError, ' is not a directory'): + dir_path(f.name) + + # Existence checks should be ignored without all_checks + all_checks_mock.return_value = False + self.assertEqual(dir_path(f.name), Path(f.name)) + self.assertEqual(dir_path('/aaaaaaa'), Path('/aaaaaaa')) + + def test_configuration_error(self): + error = ConfigurationError({'a': 'b'}) + self.assertDictEqual(error.errors, {'a': 'b'}) + self.assertEqual(str(error), '{"a": "b"}') + self.assertEqual(repr(error), 'ConfigurationError({"a": "b"})') + + def test_add_option(self): + parser = ConfigParser() + parser.add_option('test', type=int) + with self.assertRaisesRegex(AssertionError, 'test is an already defined option'): + parser.add_option('test') + with self.assertRaisesRegex(AssertionError, 'Option type must be callable'): + parser.add_option('toast', type=...) + + def test_parse_not_found(self): + parser = ConfigParser() + parser.add_option('something', default='thing') + with self.assertRaises(FileNotFoundError): + parser.parse(Path('/aaaaaaa')) + self.assertDictEqual( + parser.parse(Path('/aaaaaaa'), exist_ok=True), + {'something': 'thing'}, + ) + + def _dump_settings(self, data): + """ + Dump settings as a YAML string, but turn non-primitive YAML types into their string representation. + """ + stream = StringIO() + dumper = yaml.SafeDumper(stream) + + def str_representer(self, data): + if isinstance(data, Enum): + data = data.value + else: + data = str(data) + return self.represent_str(data) + + dumper.add_representer(None, str_representer) + dumper.ignore_aliases = lambda *args: True + + try: + dumper.open() + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + return stream.getvalue() + + # Ignore non-existent paths + @patch('arkindex.project.config.dir_path', new=Path) + @patch('arkindex.project.config.file_path', new=Path) + def test_settings_defaults(self): + parser = get_settings_parser(Path('/somewhere/backend/arkindex')) + self.assertIsInstance(parser, ConfigParser) + data = parser.parse_data({}) + + with (SAMPLES / 'defaults.yaml').open() as f: + expected = f.read() + + actual = self._dump_settings(data) + + self.maxDiff = None + self.assertEqual(expected, actual) + + @patch('arkindex.project.config.dir_path', new=Path) + @patch('arkindex.project.config.file_path', new=Path) + def test_settings_override(self): + parser = get_settings_parser(Path('/somewhere/backend/arkindex')) + self.assertIsInstance(parser, ConfigParser) + data = parser.parse(SAMPLES / 'override.yaml') + + with (SAMPLES / 'override.yaml').open() as f: + expected = f.read() + + actual = self._dump_settings(data) + + self.maxDiff = None + self.assertEqual(expected, actual) + + @patch('arkindex.project.config._all_checks') + def test_settings_errors(self, all_checks_mock): + all_checks_mock.return_value = True + parser = get_settings_parser(Path('/somewhere/backend/arkindex')) + self.assertIsInstance(parser, ConfigParser) + with self.assertRaises(ConfigurationError) as e: + parser.parse(SAMPLES / 'errors.yaml') + + with (SAMPLES / 'expected_errors.yaml').open() as f: + expected_errors = yaml.safe_load(f.read()) + self.maxDiff = None + self.assertDictEqual(expected_errors, e.exception.errors) diff --git a/ci/static-collect.sh b/ci/static-collect.sh index aa1e28588a7ad2234fa68e61a60b3686ceabc658..989ac6442b5338cfe0391ad1540adb25f9cd8996 100755 --- a/ci/static-collect.sh +++ b/ci/static-collect.sh @@ -1,4 +1,5 @@ #!/bin/sh mkdir -p static pip install -e . -PONOS_DATA_DIR=/tmp STATIC_ROOT=$(pwd)/static arkindex/manage.py collectstatic +echo "static: {root_path: '$(pwd)/static'}" > "$CONFIG_PATH" +arkindex/manage.py collectstatic --noinput