Skip to content
Snippets Groups Projects
Commit 3930813b authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Use a YAML file for settings

Can't have failing tests if you don't have tests

Allow missing config file

Settings cleanup

Remove ml_tools_check

Update Ponos recipe check

Unit testing

Fix lint

Allow starting without a Ponos private key

Remove maxDiff

Fix tests
parent 23a4d244
No related branches found
No related tags found
No related merge requests found
Showing
with 797 additions and 428 deletions
......@@ -11,10 +11,10 @@ media
workers
.vscode
local_settings.py
arkindex/iiif-users/
.coverage
htmlcov
ponos
openapi/*.yml
!openapi/paths.yml
*.key
arkindex/config.yml
......@@ -16,6 +16,7 @@ stages:
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/common#egg=arkindex-common"
- "pip install -e git+https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.com/arkindex/ponos#egg=ponos-server"
- pip install -r tests-requirements.txt codecov
- "echo 'database: {host: postgres, port: 5432}' > $CONFIG_PATH"
variables:
# For the postgres image
......@@ -24,8 +25,7 @@ stages:
POSTGRES_PASSWORD: devdata
# For the backend
DB_HOST: postgres
DB_PORT: 5432
CONFIG_PATH: "$CI_PROJECT_DIR/config.yml"
# Pip cache
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
......
......@@ -141,7 +141,7 @@ class Command(BaseCommand):
recipe = settings.PONOS_RECIPE.copy()
recipe['tasks'] = tasks_config
recipe['env'].update(env_vars)
recipe.setdefault('env', {}).update(env_vars)
workflow = Workflow.objects.create(recipe=yaml.dump(recipe))
logger.info('Created Workflow with id {}'.format(workflow.id))
......
......@@ -68,25 +68,7 @@ class DataImport(IndexableModel):
raise ValidationError('Git repository does not have any valid credentials')
if self.mode == DataImportMode.Repository:
tasks = {
import_task_name: {
'image': settings.ARKINDEX_APP_IMAGE,
'command': 'manage.py import {}'.format(self.id),
'env': {
'ARKINDEX_ENV': 'prod',
'DB_HOST': settings.DATABASES['default']['HOST'],
'DB_PORT': settings.DATABASES['default']['PORT'],
'DB_USER': settings.DATABASES['default']['USER'],
'DB_PASSWORD': settings.DATABASES['default']['PASSWORD'],
'DB_NAME': settings.DATABASES['default']['NAME'],
'LOCAL_IMAGESERVER_ID': settings.LOCAL_IMAGESERVER_ID,
'REDIS_HOST': settings.REDIS_HOST,
# Some empty folder to bypass the system check
'ML_CLASSIFIERS_DIR': '/data/current',
},
'tags': ['agent:git'],
},
}
raise NotImplementedError
elif self.mode == DataImportMode.Elements:
assert self.payload, \
......
......@@ -5,7 +5,6 @@ from arkindex.documents.models import Element
from arkindex.dataimport.models import DataImport
from arkindex.dataimport.filetypes import IIIFFileType
from arkindex.dataimport.git import GitFlow, SimpleDiff, DiffType
from ponos.models import State
import os.path
import tempfile
import shutil
......@@ -199,116 +198,6 @@ class TestGitFlow(FixtureTestCase):
self.assertEqual(flow.repo.iter_commits.call_count, 1)
self.assertEqual(flow.repo.git.ls_files.call_count, 1)
def test_diff_revision(self):
"""
Test GitFlow performs a diff when there is an existing revision
"""
new_rev = self.repo.revisions.create(hash='1337', message='a', ref='master', author='me')
self.dataimport.start()
self.dataimport.workflow.tasks.all().update(state=State.Completed)
self.assertEqual(self.dataimport.state, State.Completed)
self.dataimport.id = None
self.dataimport.revision = new_rev
diff1, diff2 = MagicMock(), MagicMock()
diff1.change_type, diff2.change_type = 'M', 'D'
diff1.a_path, diff1.b_path = 'path1a', 'path1b'
diff2.a_path, diff2.b_path = 'path2a', 'path2b'
commit1, commit2 = MagicMock(), MagicMock()
commit1.hexsha = '42'
commit2.hexsha = '1337'
commit1.diff.return_value = [diff1, diff2]
repo = MagicMock()
repo.commit.return_value = commit2
repo.iter_commits.return_value = [commit1, commit2]
flow = GitFlow(self.dataimport, self.working_dir)
flow.repo = repo
diffs = flow.diff()
self.assertListEqual(diffs, [
SimpleDiff(DiffType.Modification, 'path1a', 'path1b'),
SimpleDiff(DiffType.Deletion, 'path2a', 'path2b'),
])
self.assertEqual(commit1.diff.call_count, 1)
self.assertEqual(commit1.diff.call_args, call(commit2))
self.assertEqual(repo.commit.call_count, 1)
self.assertEqual(repo.commit.call_args, call('1337'))
self.assertEqual(repo.iter_commits.call_count, 1)
self.assertEqual(repo.iter_commits.call_args, call('1337'))
def test_diff_check_workflows(self):
"""
Test GitFlow performs a diff with existing revisions with successful imports
Example with commits "42 -> 1337 -> cafe":
Importing rev 42: None -> 42, completed
Importing rev 1337: 42 -> 1337, failed
Expected diff when importing rev cafe: 42 -> cafe
"""
# Setup Revisions
rev_1337 = self.repo.revisions.create(hash='1337', message='a', ref='master', author='me')
rev_cafe = self.repo.revisions.create(hash='cafe', message='tasty', ref='master', author='me')
# Setup workflows
self.dataimport.start()
self.dataimport.workflow.tasks.all().update(state=State.Completed)
self.assertEqual(self.dataimport.state, State.Completed)
import1337 = DataImport(
corpus=self.corpus,
creator=self.user,
revision=rev_1337,
payload={
"repo_id": str(self.repo.id),
"sha": rev_1337.hash,
},
mode=DataImportMode.Repository,
)
import1337.start()
import1337.workflow.tasks.all().update(state=State.Failed)
self.assertEqual(import1337.state, State.Failed)
# Setup Git objects
diff1 = MagicMock(change_type='M', a_path='path1a', b_path='path1b')
diff2 = MagicMock(change_type='D', a_path='path2a', b_path='path2b')
commit_42, commit_1337, commit_cafe = \
MagicMock(hexsha='42'), MagicMock(hexsha='1337'), MagicMock(hexsha='cafe')
commit_42.diff.return_value = [diff1, diff2]
repo = MagicMock()
repo.commit.return_value = commit_cafe
repo.iter_commits.return_value = [commit_42, commit_1337, commit_cafe]
# Run GitFlow's diff for cafe
import_cafe = DataImport(
corpus=self.corpus,
creator=self.user,
revision=rev_cafe,
payload={
"repo_id": str(self.repo.id),
"sha": rev_cafe.hash,
},
mode=DataImportMode.Repository,
)
flow = GitFlow(import_cafe, self.working_dir)
flow.repo = repo
diffs = flow.diff()
self.assertEqual(repo.commit.call_count, 1)
self.assertEqual(repo.commit.call_args, call('cafe'))
self.assertEqual(repo.iter_commits.call_count, 1)
self.assertEqual(repo.iter_commits.call_args, call('cafe'))
self.assertEqual(commit_42.diff.call_count, 1)
self.assertEqual(commit_42.diff.call_args, call(commit_cafe))
self.assertListEqual(diffs, [
SimpleDiff(DiffType.Modification, 'path1a', 'path1b'),
SimpleDiff(DiffType.Deletion, 'path2a', 'path2b'),
])
@patch('arkindex.dataimport.git.FileType.get')
def test_dispatch(self, filetype_mock):
"""
......
from unittest.mock import patch, MagicMock
from rest_framework.exceptions import APIException, NotAuthenticated, AuthenticationFailed, ValidationError
from gitlab.exceptions import GitlabGetError, GitlabCreateError
from ponos.models import State
from arkindex_common.enums import DataImportMode
from arkindex.project.tests import FixtureTestCase
from arkindex.dataimport.providers import GitLabProvider
from arkindex.dataimport.models import DataImport
......@@ -293,36 +291,6 @@ class TestGitLabProvider(FixtureTestCase):
self.assertEqual(self.gl_mock().projects.get.call_count, 1)
self.assertEqual(self.gl_mock().projects.get().commits.list.call_count, 1)
def test_handle_webhook(self):
"""
Test GitLabProvider correctly handles GitLab webhook push events
"""
request_mock = MagicMock()
request_mock.META = {
'HTTP_X_GITLAB_EVENT': 'Push Hook',
'HTTP_X_GITLAB_TOKEN': 'hook-token',
}
request_mock.data = {
'object_kind': 'push',
'ref': 'refs/heads/master',
'checkout_sha': '1337',
'commits': [
{
'message': 'commit message',
'author': {
'name': 'bob',
}
}
]
}
repo_imports = DataImport.objects.filter(payload__repo_id=str(self.repo.id))
self.assertFalse(repo_imports.exists())
GitLabProvider(url='http://aaa', credentials=self.creds).handle_webhook(self.repo, request_mock)
di = repo_imports.get()
self.assertEqual(di.mode, DataImportMode.Repository)
self.assertEqual(di.workflow.state, State.Unscheduled)
def test_handle_webhook_missing_headers(self):
"""
Test GitLabProvider checks HTTP headers on webhooks
......
......@@ -221,19 +221,6 @@ class TestImports(FixtureAPITestCase):
self.assertEqual(self.dataimport.state, State.Unscheduled)
self.assertIsNotNone(self.dataimport.workflow)
def test_retry_repo_disabled(self):
self.client.force_login(self.user)
self.dataimport.mode = DataImportMode.Repository
self.dataimport.revision = self.rev
self.dataimport.save()
self.dataimport.start()
self.dataimport.workflow.tasks.all().update(state=State.Error)
self.assertEqual(self.dataimport.state, State.Error)
self.creds.delete()
response = self.client.post(reverse('api:import-retry', kwargs={'pk': self.dataimport.id}))
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.creds.save()
def test_from_files_requires_login(self):
response = self.client.post(reverse('api:import-from-files'), {
'files': [str(self.img_df.id)],
......
from unittest.mock import patch
from django.urls import reverse
from rest_framework import status
from rest_framework.exceptions import ValidationError
......@@ -48,24 +47,6 @@ class TestRepositories(FixtureTestCase):
self.assertEqual(Workflow.objects.count(), 0)
@patch('arkindex.dataimport.providers.GitLabProvider.get_or_create_latest_revision')
def test_start(self, gitlab_rev_mock):
gitlab_rev_mock.return_value = self.rev, False
self.client.force_login(self.superuser)
self.assertEqual(Workflow.objects.count(), 0)
resp = self.client.get(reverse('api:repository-import', kwargs={'pk': str(self.repo.id)}))
self.assertEqual(resp.status_code, status.HTTP_200_OK)
data = resp.json()
di = DataImport.objects.get(id=data['import_id'])
self.assertEqual(di.corpus, self.corpus)
self.assertEqual(di.mode, DataImportMode.Repository)
self.assertEqual(di.creator, self.user)
self.assertEqual(di.revision, self.rev)
self.assertEqual(Workflow.objects.count(), 1)
def test_start_no_credentials(self):
"""
Test the repository start endpoint fails without credentials
......
......@@ -35,22 +35,6 @@ def api_urls_check(*args, **kwargs):
]
@register()
@only_runserver
def ml_tools_check(*args, **kwargs):
"""
Check that the ML classifiers defined in settings actually exist
"""
from django.conf import settings
if not os.path.isdir(settings.ML_CLASSIFIERS_DIR):
return [Error(
'ML tools directory does not exist',
hint='settings.ML_CLASSIFIERS_DIR = "{}"'.format(settings.ML_CLASSIFIERS_DIR),
id='arkindex.E008',
)]
return []
@register()
@only_runserver
def local_imageserver_check(*args, **kwargs):
......@@ -87,7 +71,6 @@ def docker_images_check(*args, **kwargs):
return []
images = (
(settings.ARKINDEX_APP_IMAGE, 'ARKINDEX_APP_IMAGE'),
(settings.ARKINDEX_TASKS_IMAGE, 'ARKINDEX_TASKS_IMAGE'),
)
for image_tag, setting_name in images:
......@@ -112,6 +95,23 @@ def docker_images_check(*args, **kwargs):
return errors
@register()
@only_runserver
def ponos_key_check(*args, **kwargs):
"""
Warn about a missing Ponos private key that would prevent any Ponos agent from authenticating
"""
from django.conf import settings
if not os.path.exists(settings.PONOS_PRIVATE_KEY):
return [Warning(
f'Ponos private key at {settings.PONOS_PRIVATE_KEY} not found. '
'Agents will be unable to connect to this server.',
hint=f'`ponos.private_key` in {settings.CONFIG_PATH}',
id='arkindex.W007',
)]
return []
@register()
def ponos_recipe_check(*args, **kwargs):
"""
......@@ -120,10 +120,6 @@ def ponos_recipe_check(*args, **kwargs):
from django.conf import settings
errors = []
if settings.PONOS_RECIPE is None:
# In a Ponos task
return []
recipe = settings.PONOS_RECIPE.copy()
# Add a dummy task because Ponos wants at least one task
recipe['tasks'] = {'task1': {'image': 'hello-world'}}
......@@ -136,6 +132,15 @@ def ponos_recipe_check(*args, **kwargs):
id='arkindex.E007',
))
for variable in ('ARKINDEX_API_URL', 'ARKINDEX_API_TOKEN', 'ARKINDEX_API_CSRF_COOKIE'):
if variable not in recipe.get('env', {}):
errors.append(Warning(
f'The {variable} environment variable should be defined '
'to allow API client autoconfiguration in Ponos tasks',
hint=f'`ponos.default_env` in {settings.CONFIG_PATH}',
id='arkindex.W006',
))
return errors
......@@ -192,7 +197,6 @@ def s3_check(*args, **kwargs):
aws_settings = {
'AWS_ACCESS_KEY': 'AWS access key ID',
'AWS_SECRET_KEY': 'AWS secret key',
'AWS_ENDPOINT': 'AWS endpoint',
'AWS_THUMBNAIL_BUCKET': 'S3 thumbnails bucket name',
'AWS_STAGING_BUCKET': 'S3 staging bucket name',
}
......@@ -205,6 +209,7 @@ def s3_check(*args, **kwargs):
hint='settings.{} = {}'.format(name, repr(value)),
id='arkindex.E011',
))
return errors
......
from collections import namedtuple
from collections.abc import Mapping
from enum import Enum
from pathlib import Path
import json
import os
import sys
import yaml
Option = namedtuple('Option', ['type', 'default'])
UNSET = object()
"""
Used as a default value in `ConfigParser.add_option(default=UNSET)`
because default=None implies that the option is optional
"""
def _all_checks():
"""
Prevents checking for path existence when running unit tests or other dev-related operations.
This is the same as settings.ALL_CHECKS, but since the configuration is accessed before settings
are initialized, it has to be copied here.
This is made as a method to make mocking in unit tests much simpler than with a module-level constant.
"""
os.environ.get('ALL_CHECKS') == 'true' or 'runserver' in sys.argv
def file_path(data):
path = Path(data).resolve()
if _all_checks():
assert path.exists(), f'{path} does not exist'
assert path.is_file(), f'{path} is not a file'
return path
def dir_path(data):
path = Path(data).resolve()
if _all_checks():
assert path.exists(), f'{path} does not exist'
assert path.is_dir(), f'{path} is not a directory'
return path
class ConfigurationError(ValueError):
def __init__(self, errors, *args, **kwargs):
super().__init__(*args, **kwargs)
self.errors = errors
def __str__(self):
return json.dumps(self.errors)
def __repr__(self):
return '{}({!s})'.format(self.__class__.__name__, self)
class ConfigParser(object):
def __init__(self):
self.options = {}
def add_option(self, name, *, type=str, many=False, default=UNSET):
assert name not in self.options, f'{name} is an already defined option'
assert callable(type), 'Option type must be callable'
if many:
self.options[name] = Option(lambda data: list(map(type, data)), default)
else:
self.options[name] = Option(type, default)
def add_subparser(self, *args, **kwargs):
"""
Add a parser as a new option to this parser,
to allow finer control over nested configuration options.
"""
parser = ConfigParser()
self.add_option(*args, **kwargs, type=parser.parse_data)
return parser
def parse_data(self, data):
"""
Parse configuration data from a dict.
Will raise ConfigurationError if any error is detected.
"""
if not isinstance(data, Mapping):
raise ConfigurationError('Parser data must be a mapping')
parsed, errors = {}, {}
for name, option in self.options.items():
if name in data:
value = data[name]
elif option.default is UNSET:
errors[name] = 'This option is required'
continue
elif option.default is None:
parsed[name] = None
continue
else:
value = option.default
try:
parsed[name] = option.type(value)
except ConfigurationError as e:
# Allow nested error dicts for nicer error messages with add_subparser
errors[name] = e.errors
except Exception as e:
errors[name] = str(e)
if errors:
raise ConfigurationError(errors)
return parsed
def parse(self, path, exist_ok=False):
if not path.is_file() and exist_ok:
# Act like the file is empty
return self.parse_data({})
with open(path) as f:
return self.parse_data(yaml.safe_load(f))
class CacheType(Enum):
Redis = 'redis'
Memcached = 'memcached'
Filesystem = 'filesystem'
Memory = 'memory'
Dummy = 'dummy'
class CookieSameSiteOption(Enum):
Lax = 'lax'
Strict = 'strict'
# Cannot redefine Python's None!
None_ = 'none'
def get_settings_parser(base_dir):
parser = ConfigParser()
parser.add_option('arkindex_env', type=str, default='dev')
parser.add_option('internal_group_id', type=int, default=2)
parser.add_option('local_imageserver_id', type=int, default=1)
parser.add_option('ml_classifiers_dir', type=dir_path, default=(base_dir / '../../ml-classifiers').resolve())
parser.add_option('allowed_hosts', type=str, many=True, default=[])
# SECURITY WARNING: keep the secret key used in production secret!
parser.add_option('secret_key', type=str, default='jf0w^y&ml(caax8f&a1mub)(js9(l5mhbbhosz3gi+m01ex+lo')
parser.add_option('jwt_signing_key', type=str, default=None)
database_parser = parser.add_subparser('database', default={})
database_parser.add_option('name', type=str, default='arkindex_dev')
database_parser.add_option('host', type=str, default='localhost')
database_parser.add_option('port', type=int, default=9100)
database_parser.add_option('user', type=str, default='devuser')
database_parser.add_option('password', type=str, default='devdata')
email_parser = parser.add_subparser('email', default=None)
email_parser.add_option('host', type=str)
email_parser.add_option('port', type=int)
email_parser.add_option('user', type=str)
email_parser.add_option('password', type=str)
email_parser.add_option('error_report_recipients', type=str, many=True, default=[])
static_parser = parser.add_subparser('static', default={})
static_parser.add_option('root_path', type=dir_path, default=None)
static_parser.add_option('cdn_assets_url', type=str, default=None)
static_parser.add_option('mirador_url', type=str, default=None)
static_parser.add_option('universal_viewer_url', type=str, default=None)
static_parser.add_option('frontend_version', type=str, default=None)
elasticsearch_parser = parser.add_subparser('elasticsearch', default={})
elasticsearch_parser.add_option('hosts', type=str, many=True, default=['localhost'])
influxdb_parser = parser.add_subparser('influxdb', default={})
influxdb_parser.add_option('api_url', type=str, default='http://localhost:8086/')
gitlab_parser = parser.add_subparser('gitlab', default={})
gitlab_parser.add_option('app_id', type=str, default=None)
gitlab_parser.add_option('app_secret', type=str, default=None)
redis_parser = parser.add_subparser('redis', default={})
redis_parser.add_option('host', type=str, default='localhost')
redis_parser.add_option('capacity', type=int, default=1000)
csrf_parser = parser.add_subparser('csrf', default={})
csrf_parser.add_option('cookie_name', type=str, default='arkindex.csrf')
csrf_parser.add_option('cookie_domain', type=str, default=None)
csrf_parser.add_option('cookie_samesite', type=CookieSameSiteOption, default=CookieSameSiteOption.Lax)
csrf_parser.add_option('trusted_origins', type=str, many=True, default=[])
session_parser = parser.add_subparser('session', default={})
session_parser.add_option('cookie_name', type=str, default='arkindex.auth')
session_parser.add_option('cookie_domain', type=str, default=None)
session_parser.add_option('cookie_samesite', type=CookieSameSiteOption, default=CookieSameSiteOption.Lax)
cors_parser = parser.add_subparser('cors', default={})
cors_parser.add_option('origin_whitelist', type=str, many=True, default=[
'universalviewer.io', # TODO: Remove this one?
'localhost:8080',
'127.0.0.1:8080',
])
cors_parser.add_option('suffixes', type=str, many=True, default=[])
ponos_parser = parser.add_subparser('ponos', default={})
# Do not use file_path here to allow the backend to start without a Ponos key
ponos_parser.add_option('private_key', type=Path, default=(base_dir / 'ponos.key').resolve())
ponos_parser.add_option('default_env', type=dict, default={})
docker_parser = parser.add_subparser('docker', default={})
docker_parser.add_option('tasks_image', type=str, default='registry.gitlab.com/arkindex/tasks')
sentry_parser = parser.add_subparser('sentry', default={})
sentry_parser.add_option('dsn', type=str, default=None)
sentry_parser.add_option('frontend_dsn', type=str, default=None)
cache_parser = ConfigParser()
cache_parser.add_option('type', type=CacheType, default=None)
cache_parser.add_option('url', type=str, default=None)
cache_parser.add_option('path', type=dir_path, default=None)
def cache_validator(value):
data = cache_parser.parse_data(value)
cache_type = data['type']
if cache_type == CacheType.Filesystem and not data.get('path'):
raise ConfigurationError({'path': 'cache.path is required for a filesystem cache'})
if cache_type in (CacheType.Redis, CacheType.Memcached) and not data.get('url'):
raise ConfigurationError({'url': f'cache.url is required for a {cache_type.name} cache'})
return data
parser.add_option('cache', default={}, type=cache_validator)
s3_parser = ConfigParser()
s3_parser.add_option('access_key_id', type=str, default=None)
s3_parser.add_option('secret_access_key', type=str, default=None)
s3_parser.add_option('endpoint', type=str, default=None)
s3_parser.add_option('region', type=str, default=None)
s3_parser.add_option('thumbnails_bucket', type=str, default='thumbnails')
s3_parser.add_option('staging_bucket', type=str, default='staging')
s3_parser.add_option('ponos_logs_bucket', type=str, default='ponos-logs')
s3_parser.add_option('ponos_artifacts_bucket', type=str, default='ponos-artifacts')
def s3_validator(value):
data = s3_parser.parse_data(value)
if not data.get('access_key_id') and not data.get('secret_access_key'):
# No configuration specified;
# just ignore and let the system checks warn about this without preventing startup
return data
if not data.get('endpoint') and not data.get('region'):
raise ConfigurationError('One of `s3.endpoint` or `s3.region` are required')
return data
parser.add_option('s3', type=s3_validator, default={})
return parser
......@@ -10,74 +10,62 @@ For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.11/ref/settings/
"""
import logging
import os
import sys
import warnings
from datetime import timedelta
from corsheaders.defaults import default_headers
from datetime import timedelta
from pathlib import Path
from arkindex.project.config import get_settings_parser, CacheType
# Build paths inside the project like this: BASE_DIR / ...
BASE_DIR = Path(__file__).resolve().parent.parent
def env2list(env_name, separator=',', default=[]):
'''
Load env variable as a list
'''
value = os.environ.get(env_name)
return value and value.split(separator) or default
# Database
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql',
'NAME': os.environ.get('DB_NAME', 'arkindex_dev'),
'USER': os.environ.get('DB_USER', 'devuser'),
'PASSWORD': os.environ.get('DB_PASSWORD', 'devdata'),
'HOST': os.environ.get('DB_HOST', 'localhost'),
'PORT': os.environ.get('DB_PORT', 9100),
}
}
# Admins in charge
ADMINS = [('', address) for address in env2list('ADMIN_EMAIL')]
# Used for special cases during configuration parsing and settings loading
TEST_ENV = 'test' in sys.argv
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
ML_CLASSIFIERS_DIR = os.environ.get('ML_CLASSIFIERS_DIR', os.path.join(BASE_DIR, '../../ml-classifiers'))
CONFIG_PATH = Path(os.environ.get('CONFIG_PATH', BASE_DIR / 'config.yml'))
parser = get_settings_parser(BASE_DIR)
conf = parser.parse(CONFIG_PATH, exist_ok=True)
# Read Version either from Docker static file or local file
_version = '/etc/arkindex.version' \
if os.path.exists('/etc/arkindex.version') \
else os.path.join(os.path.dirname(BASE_DIR), 'VERSION')
else BASE_DIR.parent / 'VERSION'
with open(_version) as f:
VERSION = f.read().strip()
# By default the frontend version is the same as the backend
FRONTEND_VERSION = os.environ.get('FRONTEND_VERSION', VERSION)
ARKINDEX_ENV = conf['arkindex_env']
ML_CLASSIFIERS_DIR = conf['ml_classifiers_dir']
SECRET_KEY = conf['secret_key']
LOCAL_IMAGESERVER_ID = conf['local_imageserver_id']
# Local IIIF server
LOCAL_IMAGESERVER_ID = int(os.environ.get('LOCAL_IMAGESERVER_ID', 1))
ALLOWED_HOSTS = conf['allowed_hosts']
# Docker container name resolution
ALLOWED_HOSTS += ['127.0.0.1', 'localhost', 'backend', 'ark-backend']
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = os.environ.get('SECRET_KEY', 'jf0w^y&ml(caax8f&a1mub)(js9(l5mhbbhosz3gi+m01ex+lo')
# Database
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql',
'NAME': conf['database']['name'],
'USER': conf['database']['user'],
'PASSWORD': conf['database']['password'],
'HOST': conf['database']['host'],
'PORT': conf['database']['port'],
}
}
# SECURITY WARNING: don't run with debug turned on in production!
ARKINDEX_ENV = os.environ.get('ARKINDEX_ENV', 'dev')
DEBUG = ARKINDEX_ENV == 'dev'
# Run all system checks when starting the server
ALL_CHECKS = os.environ.get('ALL_CHECKS') == 'true' or 'runserver' in sys.argv
ALLOWED_HOSTS = env2list('ALLOWED_HOSTS')
# Docker container name resolution
ALLOWED_HOSTS += ['127.0.0.1', 'localhost', 'backend', 'ark-backend']
# Required for django-debug-toolbar
INTERNAL_IPS = ['127.0.0.1', '127.0.1.1']
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
......@@ -175,9 +163,11 @@ USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.11/howto/static-files/
STATIC_URL = '/static/'
STATIC_ROOT = os.environ.get('STATIC_ROOT')
STATIC_ROOT = conf['static']['root_path']
STATICFILES_DIRS = []
# By default the frontend version is the same as the backend
FRONTEND_VERSION = conf['static'].get('frontend_version', VERSION)
# API
REST_FRAMEWORK = {
......@@ -198,15 +188,13 @@ SIMPLE_JWT = {
'USER_ID_CLAIM': 'agent_id',
'ROTATE_REFRESH_TOKENS': True,
'ACCESS_TOKEN_LIFETIME': timedelta(hours=6),
'SIGNING_KEY': os.environ.get('SIGNING_KEY', SECRET_KEY),
'SIGNING_KEY': conf['jwt_signing_key'] or SECRET_KEY,
}
SEARCH_FILTER_MAX_TERMS = 10
# Elastic search config
ELASTIC_SEARCH_HOSTS = [
os.environ.get('ES_HOST', 'localhost'),
]
ELASTIC_SEARCH_HOSTS = conf['elasticsearch']['hosts']
# The Scroll API is required to go over 10K results
ES_RESULTS_LIMIT = 10000
# ES defaults to three items returned in a nested query if the inner_hits size is not defined
......@@ -214,11 +202,8 @@ ES_INNER_RESULTS_LIMIT = 6
# Maximum length for query strings—very long queries can cause timeouts
ES_QUERY_STRING_MAX_LENGTH = 1000
# Silent logger for elasticsearch
logging.getLogger('elasticsearch').setLevel(logging.WARNING)
# InfluxDB API root
INFLUXDB_API_URL = os.environ.get('INFLUXDB_API_URL', 'http://localhost:8086/')
INFLUXDB_API_URL = conf['influxdb']['api_url']
# Use SSL proxy
SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https')
......@@ -237,56 +222,56 @@ IIIF_DOWNLOAD_TIMEOUT = (30, 60)
# check_images sample size when checking all servers
CHECK_IMAGES_SAMPLE_SIZE = 20
TRANSCRIPTIONS_IMPORT_QUEUE_SIZE = 25000
TRANSCRIPTIONS_IMPORT_CHUNK_SIZE = 10000
# GitLab OAuth
GITLAB_APP_ID = os.environ.get('GITLAB_APP_ID')
GITLAB_APP_SECRET = os.environ.get('GITLAB_APP_SECRET')
GITLAB_APP_ID = conf['gitlab']['app_id']
GITLAB_APP_SECRET = conf['gitlab']['app_secret']
if conf['cache']['type'] is None:
conf['cache']['type'] = CacheType.Dummy if DEBUG else CacheType.Memory
if os.environ.get('REDIS_CACHE_LOCATION'):
if conf['cache']['type'] == CacheType.Redis:
CACHES = {
'default': {
'BACKEND': 'redis_cache.RedisCache',
'LOCATION': os.environ.get('REDIS_CACHE_LOCATION'),
'LOCATION': conf['cache']['url'],
}
}
# Cache into memcached
elif os.environ.get('MEMCACHED_HOST'):
elif conf['cache']['type'] == CacheType.Memcached:
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache',
'LOCATION': os.environ['MEMCACHED_HOST'],
'LOCATION': conf['cache']['url'],
}
}
elif os.environ.get('CACHE_DIR'):
elif conf['cache']['type'] == CacheType.Filesystem:
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache',
'LOCATION': os.environ['CACHE_DIR'],
'LOCATION': str(conf['cache']['path']),
}
}
else:
# On dev, use a dummy cache
# On prod, use at least a local memory cache
_cache = 'django.core.cache.backends.dummy.DummyCache' if DEBUG else 'django.core.cache.backends.locmem.LocMemCache'
elif conf['cache']['type'] == CacheType.Memory:
CACHES = {
'default': {
'BACKEND': _cache
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'
}
}
elif conf['cache']['type'] == CacheType.Dummy:
CACHES = {
'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache'
}
}
# Django Channels layer using Redis
REDIS_HOST = os.environ.get('REDIS_HOST', 'localhost')
REDIS_CAPACITY = int(os.environ.get('REDIS_CAPACITY', 1000))
CHANNEL_LAYERS = {
"default": {
"BACKEND": "channels_redis.core.RedisChannelLayer",
"CONFIG": {
"hosts": [
(REDIS_HOST, 6379)
(conf['redis']['host'], 6379)
],
"capacity": REDIS_CAPACITY,
"capacity": conf['redis']['capacity'],
},
},
}
......@@ -329,6 +314,9 @@ LOGGING = {
'handlers': ['console'],
'level': 'INFO',
},
'elasticsearch': {
'level': 'WARNING',
},
'elasticsearch.trace': {
'handlers': ['console_debug'],
'level': 'DEBUG',
......@@ -345,52 +333,34 @@ LOGGING = {
},
}
# Sentry Error reporting
SENTRY_DSN = os.environ.get('SENTRY_DSN')
FRONTEND_SENTRY_DSN = os.environ.get('FRONTEND_SENTRY_DSN')
# Email
EMAIL_SUBJECT_PREFIX = '[Arkindex {}] '.format(ARKINDEX_ENV)
if os.environ.get('EMAIL_HOST'):
if conf['email']:
ADMINS = [('', address) for address in conf['email']['error_report_recipients']]
EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend'
EMAIL_HOST = os.environ.get('EMAIL_HOST')
EMAIL_PORT = os.environ.get('EMAIL_PORT')
EMAIL_HOST_USER = os.environ.get('EMAIL_HOST_USER')
EMAIL_HOST = conf['email']['host']
EMAIL_PORT = conf['email']['port']
EMAIL_HOST_USER = conf['email']['user']
DEFAULT_FROM_EMAIL = SERVER_EMAIL = EMAIL_HOST_USER
EMAIL_HOST_PASSWORD = os.environ.get('EMAIL_HOST_PASSWORD')
EMAIL_HOST_PASSWORD = conf['email']['password']
EMAIL_USE_TLS = True
else:
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
# Cookies
def samesite(name, default):
env = '{}_COOKIE_SAMESITE'.format(name)
value = os.environ.get(env, default).lower()
if value == 'none':
value = None
assert value in ('lax', 'strict', None), 'Invalid {} value {}'.format(env, value)
return value
CSRF_COOKIE_NAME = os.environ.get('CSRF_COOKIE_NAME', 'arkindex.csrf')
CSRF_TRUSTED_ORIGINS = env2list('CSRF_TRUSTED_ORIGINS')
CSRF_COOKIE_DOMAIN = os.environ.get('COOKIE_DOMAIN')
CSRF_COOKIE_SAMESITE = samesite('CSRF', 'lax')
SESSION_COOKIE_NAME = os.environ.get('SESSION_COOKIE_NAME', 'arkindex.auth')
SESSION_COOKIE_DOMAIN = os.environ.get('COOKIE_DOMAIN')
SESSION_COOKIE_SAMESITE = samesite('SESSION', 'lax')
CSRF_COOKIE_NAME = conf['csrf']['cookie_name']
CSRF_COOKIE_DOMAIN = conf['csrf']['cookie_domain']
CSRF_COOKIE_SAMESITE = conf['csrf']['cookie_samesite'].value
CSRF_TRUSTED_ORIGINS = conf['csrf']['trusted_origins']
SESSION_COOKIE_NAME = conf['session']['cookie_name']
SESSION_COOKIE_DOMAIN = conf['session']['cookie_domain']
SESSION_COOKIE_SAMESITE = conf['session']['cookie_samesite'].value
# Required for authentication over websockets
SESSION_COOKIE_HTTPONLY = False
CORS_ORIGIN_WHITELIST = env2list('CORS_ORIGIN_WHITELIST', default=[
'universalviewer.io',
'localhost:8080',
'127.0.0.1:8080',
'localhost:5000',
'127.0.0.1:5000',
])
CORS_ORIGIN_WHITELIST = conf['cors']['origin_whitelist']
CORS_ALLOW_CREDENTIALS = True
CORS_ALLOW_HEADERS = default_headers + (
'cache-control', # Allow the frontend to prevent caching some API requests
......@@ -398,67 +368,68 @@ CORS_ALLOW_HEADERS = default_headers + (
CORS_URLS_REGEX = r'^/(api|ponos)/.*$'
# Support CORS suffixes
cors_suffixes = env2list('CORS_SUFFIXES')
if cors_suffixes:
if conf['cors']['suffixes']:
CORS_ORIGIN_REGEX_WHITELIST = [
r"^https://.+{}".format(suffix)
for suffix in cors_suffixes
for suffix in conf['cors']['suffixes']
]
# Amazon S3
PONOS_AWS_ACCESS_KEY = AWS_ACCESS_KEY = os.environ.get('AWS_ACCESS_KEY_ID')
PONOS_AWS_SECRET_KEY = AWS_SECRET_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
PONOS_AWS_ENDPOINT = AWS_ENDPOINT = os.environ.get('AWS_ENDPOINT')
PONOS_AWS_REGION = AWS_REGION = os.environ.get('AWS_REGION')
PONOS_S3_LOGS_BUCKET = os.environ.get('PONOS_S3_LOGS_BUCKET', 'ponos-logs')
PONOS_S3_ARTIFACTS_BUCKET = os.environ.get('PONOS_S3_ARTIFACTS_BUCKET', 'ponos-artifacts')
AWS_THUMBNAIL_BUCKET = os.environ.get('AWS_THUMBNAIL_BUCKET', 'thumbnails')
AWS_STAGING_BUCKET = os.environ.get('AWS_STAGING_BUCKET', 'staging')
PONOS_AWS_ACCESS_KEY = AWS_ACCESS_KEY = conf['s3']['access_key_id']
PONOS_AWS_SECRET_KEY = AWS_SECRET_KEY = conf['s3']['secret_access_key']
PONOS_AWS_ENDPOINT = AWS_ENDPOINT = conf['s3']['endpoint']
PONOS_AWS_REGION = AWS_REGION = conf['s3']['region']
PONOS_S3_LOGS_BUCKET = conf['s3']['ponos_logs_bucket']
PONOS_S3_ARTIFACTS_BUCKET = conf['s3']['ponos_artifacts_bucket']
AWS_THUMBNAIL_BUCKET = conf['s3']['thumbnails_bucket']
AWS_STAGING_BUCKET = conf['s3']['staging_bucket']
# Ponos integration
if os.environ.get('PONOS_TASK'):
# In a ponos docker task
PONOS_RECIPE = None
ML_CLASSIFIERS_DIR = '/arkindex/classifiers'
_ponos_env = {
'ARKINDEX_API_CSRF_COOKIE': CSRF_COOKIE_NAME
}
if DEBUG:
# In dev, include overridable API info
_ponos_env.update({
'ARKINDEX_API_URL': 'http://localhost:8000/api/v1/',
'ARKINDEX_API_TOKEN': 'deadbeefTestToken',
})
_ponos_env.update(conf['ponos']['default_env'])
PONOS_RECIPE = {
'env': _ponos_env,
}
PONOS_PRIVATE_KEY = conf['ponos']['private_key']
else:
# As scheduler or dev
PONOS_RECIPE = {
'env': {
'ARKINDEX_API_URL': os.environ.get('ARKINDEX_API_URL', 'http://localhost:8000/api/v1/'),
'ARKINDEX_API_TOKEN': os.environ.get('ARKINDEX_API_TOKEN', 'deadbeefTestToken'),
'ARKINDEX_API_CSRF_COOKIE': CSRF_COOKIE_NAME,
},
}
PONOS_PRIVATE_KEY = os.environ.get('PONOS_PRIVATE_KEY_PATH', os.path.join(BASE_DIR, 'ponos.key'))
# Docker images used by our ponos workflow
ARKINDEX_TASKS_IMAGE = conf['docker']['tasks_image']
if 'test' in sys.argv:
# User groups with special permissions
INTERNAL_GROUP_ID = conf['internal_group_id']
# CDN Assets URL to use for arkindex remote CSS/JS/Images assets
CDN_ASSETS_URL = conf['static']['cdn_assets_url']
if CDN_ASSETS_URL is not None:
CDN_ASSETS_URL = CDN_ASSETS_URL.rstrip('/')
STATIC_URL = f"{CDN_ASSETS_URL}/{VERSION}/static/"
MIRADOR_URL = conf['static']['mirador_url']
UNIVERSAL_VIEWER_URL = conf['static']['universal_viewer_url']
SENTRY_DSN = conf['sentry']['dsn']
FRONTEND_SENTRY_DSN = conf['sentry']['frontend_dsn']
if TEST_ENV:
# Overrides for unit tests
AWS_ACCESS_KEY = 'test'
AWS_SECRET_KEY = 'test'
AWS_ENDPOINT = 'http://s3'
PONOS_PRIVATE_KEY = None
LOCAL_IMAGESERVER_ID = 1
# Turn Django's UnorderedObjectListWarning into exceptions
warnings.filterwarnings('error', category=RuntimeWarning, module='django.core.paginator')
warnings.filterwarnings('error', category=RuntimeWarning, module='rest_framework.pagination')
# Docker images used by our ponos workflow
ARKINDEX_APP_IMAGE = os.environ.get('ARKINDEX_APP_IMAGE', 'registry.gitlab.com/arkindex/backend')
ARKINDEX_TASKS_IMAGE = os.environ.get('ARKINDEX_TASKS_IMAGE', 'registry.gitlab.com/arkindex/tasks')
# User groups with special permissions
INTERNAL_GROUP_ID = int(os.environ.get('INTERNAL_GROUP_ID', 2))
# CDN Assets URL to use for arkindex remote CSS/JS/Images assets
CDN_ASSETS_URL = os.environ.get('CDN_ASSETS_URL')
if CDN_ASSETS_URL is not None:
CDN_ASSETS_URL = CDN_ASSETS_URL.rstrip('/')
STATIC_URL = f"{CDN_ASSETS_URL}/{VERSION}/static/"
MIRADOR_URL = os.environ.get('MIRADOR_URL')
UNIVERSAL_VIEWER_URL = os.environ.get('UNIVERSAL_VIEWER_URL')
# Optional unit tests runner with code coverage
try:
import django_nose # noqa
......@@ -472,7 +443,7 @@ except ImportError:
pass
# Local settings
if 'test' not in sys.argv:
if DEBUG and not TEST_ENV:
try:
from .local_settings import * # noqa
except ImportError:
......
allowed_hosts: []
arkindex_env: dev
cache:
path: null
type: null
url: null
cors:
origin_whitelist:
- universalviewer.io
- localhost:8080
- 127.0.0.1:8080
suffixes: []
csrf:
cookie_domain: null
cookie_name: arkindex.csrf
cookie_samesite: lax
trusted_origins: []
database:
host: localhost
name: arkindex_dev
password: devdata
port: 9100
user: devuser
docker:
tasks_image: registry.gitlab.com/arkindex/tasks
elasticsearch:
hosts:
- localhost
email: null
gitlab:
app_id: null
app_secret: null
influxdb:
api_url: http://localhost:8086/
internal_group_id: 2
jwt_signing_key: null
local_imageserver_id: 1
ml_classifiers_dir: /somewhere/ml-classifiers
ponos:
default_env: {}
private_key: /somewhere/backend/arkindex/ponos.key
redis:
capacity: 1000
host: localhost
s3:
access_key_id: null
endpoint: null
ponos_artifacts_bucket: ponos-artifacts
ponos_logs_bucket: ponos-logs
region: null
secret_access_key: null
staging_bucket: staging
thumbnails_bucket: thumbnails
secret_key: jf0w^y&ml(caax8f&a1mub)(js9(l5mhbbhosz3gi+m01ex+lo
sentry:
dsn: null
frontend_dsn: null
session:
cookie_domain: null
cookie_name: arkindex.auth
cookie_samesite: lax
static:
cdn_assets_url: null
frontend_version: null
mirador_url: null
root_path: null
universal_viewer_url: null
allowed_hosts: all of them
arkindex_env: off
cache:
type: redis
cors:
origin_whitelist: france
suffixes: 1
csrf:
cookie_domain: null
cookie_name: null
cookie_samesite: relax
trusted_origins: 12.5
database:
host: null
password: hunter2
port: rotterdam
user: bob
docker:
tasks_image:
here: have a dict
elasticsearch:
hosts: ghosts
email:
host: 123
gitlab:
app_id: yes
app_secret: []
influxdb:
api_url: no
internal_group_id: 2
jwt_signing_key: null
local_imageserver_id: 1
ml_classifiers_dir: /aaaaa
ponos:
default_env: {}
private_key: /dev/zero
redis:
capacity: over nine thousand
host: radish
s3:
endpoint: null
ponos_artifacts_bucket: {}
ponos_logs_bucket: null
region: null
secret_access_key: null
staging_bucket: null
thumbnails_bucket: 1234
secret_key: false
session:
cookie_domain: -1
cookie_name: .inf
cookie_samesite: foo
static:
cdn_assets_url: 1
mirador_url: 2
root_path: /aaaaa
universal_viewer_url: .nan
cache:
url: cache.url is required for a Redis cache
cors:
suffixes: "'int' object is not iterable"
csrf:
cookie_samesite: "'relax' is not a valid CookieSameSiteOption"
trusted_origins: "'float' object is not iterable"
database:
port: "invalid literal for int() with base 10: 'rotterdam'"
email:
password: This option is required
port: This option is required
user: This option is required
ml_classifiers_dir: /aaaaa does not exist
redis:
capacity: "invalid literal for int() with base 10: 'over nine thousand'"
session:
cookie_samesite: "'foo' is not a valid CookieSameSiteOption"
static:
root_path: /aaaaa does not exist
allowed_hosts:
- something.com
arkindex_env: prod
cache:
path: /
type: filesystem
url: http://aaa
cors:
origin_whitelist:
- localtoast:1337
suffixes:
- a
- b
csrf:
cookie_domain: something.com
cookie_name: csrfcookie
cookie_samesite: strict
trusted_origins:
- trust-no-one
database:
host: dbhost
name: arkindex_db
password: hunter2
port: 9123
user: littlebobbytables
docker:
tasks_image: registry.gitlab.com/arkindex/stonks
elasticsearch:
hosts:
- google
email:
error_report_recipients:
- noreply@nasa.gov
host: smtp.wanadoo.fr
password: hunter2
port: 25
user: teklia@wanadoo.fr
gitlab:
app_id: a
app_secret: b
influxdb:
api_url: http://graph/
internal_group_id: 4
jwt_signing_key: deadbeef
local_imageserver_id: 45
ml_classifiers_dir: /tmp
ponos:
default_env:
A: B
private_key: /a/b/c
redis:
capacity: 9001
host: radish
s3:
access_key_id: abcd
endpoint: http://somewhere
ponos_artifacts_bucket: zstandardland
ponos_logs_bucket: plaintexttown
region: middle-earth-1
secret_access_key: hunter2
staging_bucket: dropboxbutworse
thumbnails_bucket: toenails
secret_key: abcdef
sentry:
dsn: https://nowhere
frontend_dsn: https://nowhere/frontend
session:
cookie_domain: cookie-dolmen
cookie_name: stonehenge
cookie_samesite: none
static:
cdn_assets_url: http://cdn.teklia.horse/
frontend_version: 1.2.3-alpha4
mirador_url: gopher://mirador/
root_path: /
universal_viewer_url: gopher://uv/
......@@ -3,6 +3,7 @@ from django.test import TestCase, override_settings
from django.conf import settings
from django.urls import path
from django.core.checks import Error, Warning
from pathlib import Path
from subprocess import CalledProcessError
import subprocess
......@@ -35,30 +36,6 @@ class ChecksTestCase(TestCase):
]
)
@patch('arkindex.project.checks.os.path.isdir')
def test_ml_tools_check(self, isdir_mock):
"""
Test the ML tools existence checks
"""
from arkindex.project.checks import ml_tools_check
isdir_mock.return_value = True
self.assertListEqual(ml_tools_check(), [])
isdir_mock.return_value = False
with self.settings(ML_CLASSIFIERS_DIR='oops'):
self.assertListEqual(
ml_tools_check(),
[
Error(
'ML tools directory does not exist',
hint='settings.ML_CLASSIFIERS_DIR = "oops"',
id='arkindex.E008',
),
],
)
def test_local_imageserver_check(self):
"""
Test the local imageserver existence check
......@@ -85,19 +62,12 @@ class ChecksTestCase(TestCase):
@patch('arkindex.project.checks.subprocess.run')
@override_settings(
ARKINDEX_APP_IMAGE='nope',
ARKINDEX_TASKS_IMAGE='nuh',
)
def test_docker_images_check(self, run_mock):
from arkindex.project.checks import docker_images_check
expected_calls = [
call(
['docker', 'image', 'inspect', 'nope'],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True,
),
call(
['docker', 'image', 'inspect', 'nuh'],
stdout=subprocess.PIPE,
......@@ -106,20 +76,7 @@ class ChecksTestCase(TestCase):
),
]
run_mock.side_effect = [CalledProcessError(1, ''), None, None]
self.assertListEqual(docker_images_check(), [
Error(
'Docker image with tag "nope" was not found.',
hint='settings.ARKINDEX_APP_IMAGE = "nope"',
id='arkindex.E006',
)
])
self.assertEqual(run_mock.call_count, 2)
self.assertEqual(run_mock.call_args_list, expected_calls)
run_mock.reset_mock()
run_mock.side_effect = [None, CalledProcessError(1, ''), CalledProcessError(1, '')]
run_mock.side_effect = CalledProcessError(1, '')
self.assertListEqual(docker_images_check(), [
Error(
'Docker image with tag "nuh" was not found.',
......@@ -128,7 +85,7 @@ class ChecksTestCase(TestCase):
)
])
self.assertEqual(run_mock.call_count, 2)
self.assertEqual(run_mock.call_count, 1)
self.assertEqual(run_mock.call_args_list, expected_calls)
@patch('arkindex.project.checks.subprocess.run')
......@@ -142,14 +99,8 @@ class ChecksTestCase(TestCase):
with self.settings(ARKINDEX_APP_IMAGE='nope', ARKINDEX_TASKS_IMAGE='nuh'):
self.assertListEqual(docker_images_check(), [])
self.assertEqual(run_mock.call_count, 2)
self.assertEqual(run_mock.call_count, 1)
self.assertEqual(run_mock.call_args_list, [
call(
['docker', 'image', 'inspect', 'nope'],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True,
),
call(
['docker', 'image', 'inspect', 'nuh'],
stdout=subprocess.PIPE,
......@@ -158,6 +109,7 @@ class ChecksTestCase(TestCase):
),
])
@override_settings()
@patch('arkindex.project.checks.parse_recipe')
def test_ponos_recipe_check(self, parse_mock):
from arkindex.project.checks import ponos_recipe_check
......@@ -170,6 +122,30 @@ class ChecksTestCase(TestCase):
id='arkindex.E007',
)])
settings.CONFIG_PATH = Path('/somewhere/config.yml')
del settings.PONOS_RECIPE['env']
parse_mock.side_effect = None
self.assertListEqual(ponos_recipe_check(), [
Warning(
'The ARKINDEX_API_URL environment variable should be defined '
'to allow API client autoconfiguration in Ponos tasks',
hint='`ponos.default_env` in /somewhere/config.yml',
id='arkindex.W006',
),
Warning(
'The ARKINDEX_API_TOKEN environment variable should be defined '
'to allow API client autoconfiguration in Ponos tasks',
hint='`ponos.default_env` in /somewhere/config.yml',
id='arkindex.W006',
),
Warning(
'The ARKINDEX_API_CSRF_COOKIE environment variable should be defined '
'to allow API client autoconfiguration in Ponos tasks',
hint='`ponos.default_env` in /somewhere/config.yml',
id='arkindex.W006',
),
])
@override_settings()
def test_internal_group_check(self):
from arkindex.project.checks import internal_group_check
......@@ -223,7 +199,6 @@ class ChecksTestCase(TestCase):
del settings.AWS_ACCESS_KEY
del settings.AWS_SECRET_KEY
del settings.AWS_ENDPOINT
del settings.AWS_THUMBNAIL_BUCKET
del settings.AWS_STAGING_BUCKET
self.assertCountEqual(s3_check(), [
......@@ -237,11 +212,6 @@ class ChecksTestCase(TestCase):
hint='settings.AWS_SECRET_KEY = None',
id='arkindex.E011',
),
Error(
'AWS endpoint is missing; all S3-related features will fail.',
hint='settings.AWS_ENDPOINT = None',
id='arkindex.E011',
),
Error(
'S3 thumbnails bucket name is missing; all S3-related features will fail.',
hint='settings.AWS_THUMBNAIL_BUCKET = None',
......@@ -256,7 +226,6 @@ class ChecksTestCase(TestCase):
settings.AWS_ACCESS_KEY = 'key'
settings.AWS_SECRET_KEY = 's3kr3t'
settings.AWS_ENDPOINT = 'http://somewhere'
settings.AWS_THUMBNAIL_BUCKET = 'Thumbs.db'
settings.AWS_STAGING_BUCKET = 'buckette'
self.assertListEqual(s3_check(), [])
......
from unittest import TestCase
from unittest.mock import patch
from enum import Enum
from io import StringIO
from pathlib import Path
from arkindex.project.config import dir_path, file_path, ConfigurationError, ConfigParser, get_settings_parser
import yaml
import tempfile
SAMPLES = Path(__file__).resolve().parent / 'config_samples'
class TestConfig(TestCase):
@patch('arkindex.project.config._all_checks')
def test_file_path(self, all_checks_mock):
all_checks_mock.return_value = True
with self.assertRaisesRegex(AssertionError, ' does not exist'):
file_path('/aaaaaaa')
with tempfile.NamedTemporaryFile() as f:
parent_path = Path(f.name).parent
with self.assertRaisesRegex(AssertionError, ' is not a file'):
file_path(parent_path)
self.assertEqual(file_path(f.name), Path(f.name))
# Existence checks should be ignored without all_checks
all_checks_mock.return_value = False
self.assertEqual(file_path(parent_path), parent_path)
self.assertEqual(file_path('/aaaaaaa'), Path('/aaaaaaa'))
@patch('arkindex.project.config._all_checks')
def test_dir_path(self, all_checks_mock):
all_checks_mock.return_value = True
with tempfile.TemporaryDirectory() as d:
self.assertEqual(dir_path(d), Path(d))
with self.assertRaisesRegex(AssertionError, ' does not exist'):
dir_path('/aaaaaaa')
with tempfile.NamedTemporaryFile() as f:
with self.assertRaisesRegex(AssertionError, ' is not a directory'):
dir_path(f.name)
# Existence checks should be ignored without all_checks
all_checks_mock.return_value = False
self.assertEqual(dir_path(f.name), Path(f.name))
self.assertEqual(dir_path('/aaaaaaa'), Path('/aaaaaaa'))
def test_configuration_error(self):
error = ConfigurationError({'a': 'b'})
self.assertDictEqual(error.errors, {'a': 'b'})
self.assertEqual(str(error), '{"a": "b"}')
self.assertEqual(repr(error), 'ConfigurationError({"a": "b"})')
def test_add_option(self):
parser = ConfigParser()
parser.add_option('test', type=int)
with self.assertRaisesRegex(AssertionError, 'test is an already defined option'):
parser.add_option('test')
with self.assertRaisesRegex(AssertionError, 'Option type must be callable'):
parser.add_option('toast', type=...)
def test_parse_not_found(self):
parser = ConfigParser()
parser.add_option('something', default='thing')
with self.assertRaises(FileNotFoundError):
parser.parse(Path('/aaaaaaa'))
self.assertDictEqual(
parser.parse(Path('/aaaaaaa'), exist_ok=True),
{'something': 'thing'},
)
def _dump_settings(self, data):
"""
Dump settings as a YAML string, but turn non-primitive YAML types into their string representation.
"""
stream = StringIO()
dumper = yaml.SafeDumper(stream)
def str_representer(self, data):
if isinstance(data, Enum):
data = data.value
else:
data = str(data)
return self.represent_str(data)
dumper.add_representer(None, str_representer)
dumper.ignore_aliases = lambda *args: True
try:
dumper.open()
dumper.represent(data)
dumper.close()
finally:
dumper.dispose()
return stream.getvalue()
# Ignore non-existent paths
@patch('arkindex.project.config.dir_path', new=Path)
@patch('arkindex.project.config.file_path', new=Path)
def test_settings_defaults(self):
parser = get_settings_parser(Path('/somewhere/backend/arkindex'))
self.assertIsInstance(parser, ConfigParser)
data = parser.parse_data({})
with (SAMPLES / 'defaults.yaml').open() as f:
expected = f.read()
actual = self._dump_settings(data)
self.maxDiff = None
self.assertEqual(expected, actual)
@patch('arkindex.project.config.dir_path', new=Path)
@patch('arkindex.project.config.file_path', new=Path)
def test_settings_override(self):
parser = get_settings_parser(Path('/somewhere/backend/arkindex'))
self.assertIsInstance(parser, ConfigParser)
data = parser.parse(SAMPLES / 'override.yaml')
with (SAMPLES / 'override.yaml').open() as f:
expected = f.read()
actual = self._dump_settings(data)
self.maxDiff = None
self.assertEqual(expected, actual)
@patch('arkindex.project.config._all_checks')
def test_settings_errors(self, all_checks_mock):
all_checks_mock.return_value = True
parser = get_settings_parser(Path('/somewhere/backend/arkindex'))
self.assertIsInstance(parser, ConfigParser)
with self.assertRaises(ConfigurationError) as e:
parser.parse(SAMPLES / 'errors.yaml')
with (SAMPLES / 'expected_errors.yaml').open() as f:
expected_errors = yaml.safe_load(f.read())
self.maxDiff = None
self.assertDictEqual(expected_errors, e.exception.errors)
#!/bin/sh
mkdir -p static
pip install -e .
PONOS_DATA_DIR=/tmp STATIC_ROOT=$(pwd)/static arkindex/manage.py collectstatic
echo "static: {root_path: '$(pwd)/static'}" > "$CONFIG_PATH"
arkindex/manage.py collectstatic --noinput
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment