From 75a039cee9c5714a41658bce1cb1a6a54d53e5a8 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Mon, 13 Jun 2022 15:01:57 +0000 Subject: [PATCH] Add ListBuckets endpoint --- arkindex/dataimport/api.py | 21 ++- arkindex/dataimport/serializers/ingest.py | 6 + arkindex/dataimport/tests/test_ingest.py | 71 ++++++++ arkindex/images/tests/test_image.py | 12 +- arkindex/project/api_v1.py | 4 + arkindex/project/aws.py | 58 +++++-- arkindex/project/checks.py | 102 ++++++++++- arkindex/project/config.py | 47 +++-- arkindex/project/settings.py | 11 +- .../tests/config_samples/defaults.yaml | 5 + .../project/tests/config_samples/errors.yaml | 5 + .../tests/config_samples/override.yaml | 5 + arkindex/project/tests/test_checks.py | 164 ++++++++++++++++++ arkindex/users/models.py | 5 + tests-requirements.txt | 1 + 15 files changed, 472 insertions(+), 45 deletions(-) create mode 100644 arkindex/dataimport/serializers/ingest.py create mode 100644 arkindex/dataimport/tests/test_ingest.py diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py index a5b9e55cfd..fc048aa245 100644 --- a/arkindex/dataimport/api.py +++ b/arkindex/dataimport/api.py @@ -70,6 +70,7 @@ from arkindex.dataimport.serializers.imports import ( WorkerRunEditSerializer, WorkerRunSerializer, ) +from arkindex.dataimport.serializers.ingest import BucketSerializer from arkindex.dataimport.serializers.workers import ( RepositorySerializer, WorkerActivitySerializer, @@ -82,6 +83,7 @@ from arkindex.dataimport.serializers.workers import ( ) from arkindex.dataimport.utils import hash_object from arkindex.documents.models import Corpus, Element +from arkindex.project.aws import get_ingest_resource from arkindex.project.fields import ArrayRemove from arkindex.project.mixins import ( ConflictAPIException, @@ -96,7 +98,7 @@ from arkindex.project.pagination import CustomCursorPagination from arkindex.project.permissions import IsInternal, IsVerified, IsVerifiedOrReadOnly from arkindex.project.tools import RTrimChr from arkindex.project.triggers import process_delete -from arkindex.users.models import OAuthCredentials, Role +from arkindex.users.models import OAuthCredentials, Role, Scope from arkindex.users.utils import get_max_level from ponos.models import STATES_ORDERING, State @@ -1777,3 +1779,20 @@ class ClearProcess(ProcessACLMixin, DestroyAPIView): process = self.get_object() process.clear() return Response(status=status.HTTP_204_NO_CONTENT) + + +@extend_schema(tags=['ingest']) +class BucketList(ListAPIView): + """ + List the S3 buckets that are available for ingestion. + """ + + permission_classes = (IsVerified, ) + scopes = (Scope.S3Ingest, ) + pagination_class = None + serializer_class = BucketSerializer + + def get_queryset(self): + # Listing buckets does not use any pagination at all, + # so this will make a single API query once. + return list(get_ingest_resource().buckets.all()) diff --git a/arkindex/dataimport/serializers/ingest.py b/arkindex/dataimport/serializers/ingest.py new file mode 100644 index 0000000000..eb06436236 --- /dev/null +++ b/arkindex/dataimport/serializers/ingest.py @@ -0,0 +1,6 @@ +from rest_framework import serializers + + +class BucketSerializer(serializers.Serializer): + + name = serializers.CharField() diff --git a/arkindex/dataimport/tests/test_ingest.py b/arkindex/dataimport/tests/test_ingest.py new file mode 100644 index 0000000000..2406ad7ce2 --- /dev/null +++ b/arkindex/dataimport/tests/test_ingest.py @@ -0,0 +1,71 @@ +import boto3 +from django.test import override_settings +from django.urls import reverse +from rest_framework import status + +from arkindex.project.tests import FixtureAPITestCase +from arkindex.users.models import Scope +from moto import mock_s3 + + +class TestIngest(FixtureAPITestCase): + + def test_list_buckets_requires_login(self): + with self.assertNumQueries(0): + resp = self.client.get(reverse('api:bucket-list')) + self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN) + + def test_list_buckets_requires_verified(self): + self.user.verified_email = False + self.user.save() + self.user.user_scopes.create(scope=Scope.S3Ingest) + self.client.force_login(self.user) + + with self.assertNumQueries(2): + resp = self.client.get(reverse('api:bucket-list')) + self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN) + + def test_list_buckets_requires_scope(self): + self.assertFalse(self.user.user_scopes.exists()) + self.client.force_login(self.user) + + with self.assertNumQueries(3): + resp = self.client.get(reverse('api:bucket-list')) + self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN) + + @override_settings( + INGEST_S3_ACCESS_KEY=None, + INGEST_S3_SECRET_KEY=None, + INGEST_S3_ENDPOINT=None, + INGEST_S3_REGION=None, + ) + def test_list_buckets_missing_settings(self): + self.user.user_scopes.create(scope=Scope.S3Ingest) + self.client.force_login(self.user) + with self.assertNumQueries(3), self.assertRaises(AssertionError): + self.client.get(reverse('api:bucket-list')) + + @override_settings( + INGEST_S3_ACCESS_KEY='just-let-me-in', + INGEST_S3_SECRET_KEY='i-dunno', + INGEST_S3_ENDPOINT=None, + INGEST_S3_REGION='atlantis-1', + ) + @mock_s3 + def test_list_buckets(self): + self.user.user_scopes.create(scope=Scope.S3Ingest) + self.client.force_login(self.user) + + # Create some buckets with moto to get some test data + s3 = boto3.resource('s3', region_name='us-east-1') + s3.create_bucket(Bucket='dank-memes') + s3.create_bucket(Bucket='cursed-emojis') + + with self.assertNumQueries(3): + resp = self.client.get(reverse('api:bucket-list')) + self.assertEqual(resp.status_code, status.HTTP_200_OK) + + self.assertListEqual(resp.json(), [ + {'name': 'dank-memes'}, + {'name': 'cursed-emojis'}, + ]) diff --git a/arkindex/images/tests/test_image.py b/arkindex/images/tests/test_image.py index e28c2d3a8e..f530d116a0 100644 --- a/arkindex/images/tests/test_image.py +++ b/arkindex/images/tests/test_image.py @@ -40,23 +40,25 @@ class TestImage(FixtureTestCase): Params={'Bucket': 'iiif', 'Key': 'abcd'}, )) - @override_settings(LOCAL_IMAGESERVER_ID=1, AWS_REGION=None) - @patch('arkindex.project.aws.session.client') + @override_settings(LOCAL_IMAGESERVER_ID=1, AWS_REGION='baba-au-rhum-1') + @patch('arkindex.project.aws.boto3.session.Session') @patch('arkindex.project.aws.Config') - def test_s3_get_url_different_region(self, config_mock, s3_client_mock): + def test_s3_get_url_different_region(self, config_mock, session_mock): img = ImageServer.objects.create(s3_region='middle-earth-1').images.create(path='abcd') config_mock.return_value = 'config' img.s3_object = MagicMock() img.s3_url - self.assertEqual(s3_client_mock.return_value.generate_presigned_url.call_count, 1) + self.assertEqual(session_mock.call_count, 1) + self.assertEqual(session_mock().client.call_count, 1) self.assertEqual( - s3_client_mock.call_args, + session_mock().client.call_args, call( 's3', region_name='middle-earth-1', config='config' ) ) + self.assertEqual(session_mock().client().generate_presigned_url.call_count, 1) @override_settings(LOCAL_IMAGESERVER_ID=1, AWS_REGION=None) @patch('arkindex.project.aws.s3.meta.client.generate_presigned_url') diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py index 436214d3a2..7f89187bd7 100644 --- a/arkindex/project/api_v1.py +++ b/arkindex/project/api_v1.py @@ -4,6 +4,7 @@ from django.views.generic.base import RedirectView from arkindex.dataimport.api import ( ApplyProcessTemplate, AvailableRepositoriesList, + BucketList, ClearProcess, CorpusWorkersActivity, CorpusWorkerVersionList, @@ -248,6 +249,9 @@ api = [ path('model/<uuid:pk>/versions/', ModelVersionsList.as_view(), name='model-versions'), path('modelversion/<uuid:pk>/download/', ModelVersionDownload.as_view(), name='model-version-download'), + # S3 ingest + path('ingest/buckets/', BucketList.as_view(), name='bucket-list'), + # Image management path('image/', ImageCreate.as_view(), name='image-create'), path('image/iiif/url/', IIIFURLCreate.as_view(), name='iiif-url-create'), diff --git a/arkindex/project/aws.py b/arkindex/project/aws.py index c000dc519d..0e97f1325c 100644 --- a/arkindex/project/aws.py +++ b/arkindex/project/aws.py @@ -13,24 +13,42 @@ from tenacity import retry, retry_if_exception, stop_after_delay logger = logging.getLogger(__name__) -session = boto3.session.Session( - aws_access_key_id=settings.AWS_ACCESS_KEY, - aws_secret_access_key=settings.AWS_SECRET_KEY, -) - -config = Config( - region_name=settings.AWS_REGION, - signature_version='s3v4', - s3={ - 'addressing_style': 'auto' if settings.AWS_ENDPOINT else 'virtual', - } -) - -s3 = session.resource( - 's3', - endpoint_url=settings.AWS_ENDPOINT, - config=config, -) +def get_s3_resource( + access_key_id=settings.AWS_ACCESS_KEY, + secret_access_key=settings.AWS_SECRET_KEY, + endpoint=settings.AWS_ENDPOINT, + region=settings.AWS_REGION): + session = boto3.session.Session( + aws_access_key_id=access_key_id, + aws_secret_access_key=secret_access_key, + ) + + config = Config( + region_name=region, + signature_version='s3v4', + s3={ + 'addressing_style': 'auto' if endpoint else 'virtual', + } + ) + + return session.resource( + 's3', + endpoint_url=endpoint, + config=config, + ) + + +s3 = get_s3_resource() + + +def get_ingest_resource(): + assert settings.INGEST_S3_ENDPOINT or settings.INGEST_S3_REGION, 'An endpoint or region is required' + return get_s3_resource( + access_key_id=settings.INGEST_S3_ACCESS_KEY, + secret_access_key=settings.INGEST_S3_SECRET_KEY, + endpoint=settings.INGEST_S3_ENDPOINT, + region=settings.INGEST_S3_REGION, + ) def requires_s3_object(func): @@ -90,6 +108,10 @@ class S3FileMixin(object): def s3_url(self) -> str: # Handle different regions signatures if self.s3_region != settings.AWS_REGION: + session = boto3.session.Session( + aws_access_key_id=settings.AWS_ACCESS_KEY, + aws_secret_access_key=settings.AWS_SECRET_KEY, + ) client = session.client( 's3', config=Config(signature_version='s3v4'), diff --git a/arkindex/project/checks.py b/arkindex/project/checks.py index bb6633b1d7..26ca08da48 100644 --- a/arkindex/project/checks.py +++ b/arkindex/project/checks.py @@ -1,4 +1,11 @@ -import os.path +""" +Arkindex-specific Django system checks. + +If you make any changes to those checks, you should consider updating the system +checks documentation on the wiki: +<https://wiki.vpn/en/arkindex/deploy/checks> +""" +import os import sys import yaml @@ -177,3 +184,96 @@ def python_version_check(*args, **kwargs): ) ] return [] + + +@register() +@only_runserver +def ingest_check(*args, **kwargs): + from django.conf import settings + warnings = [] + + if settings.INGEST_S3_ACCESS_KEY is None: + warnings.append(Warning( + 'INGEST_S3_ACCESS_KEY is not set. The S3 ingest feature will not work.', + id='arkindex.W010', + )) + + if settings.INGEST_S3_SECRET_KEY is None: + warnings.append(Warning( + 'INGEST_S3_SECRET_KEY is not set. The S3 ingest feature will not work.', + id='arkindex.W010', + )) + + if settings.INGEST_S3_REGION is None and settings.INGEST_S3_ENDPOINT is None: + warnings.append(Warning( + 'Neither INGEST_S3_REGION nor INGEST_S3_ENDPOINT are set. The S3 ingest feature will not work.', + id='arkindex.W010', + )) + + return warnings + + +@register() +def botocore_config_check(*args, **kwargs): + """ + When not all S3 settings have both an access key ID, secret access key + and either an endpoint or a region set, botocore might use some environment variables + or configuration files to fill in the blanks. This can cause confusion for developers + as some unit tests might fail or some other behaviors might be different, and can cause + data leaks in production where some credentials might be used unexpectedly. + """ + from django.conf import settings + + required_settings = [ + settings.AWS_ACCESS_KEY, + settings.AWS_SECRET_KEY, + settings.AWS_ENDPOINT or settings.AWS_REGION, + settings.INGEST_S3_ACCESS_KEY, + settings.INGEST_S3_SECRET_KEY, + settings.INGEST_S3_ENDPOINT or settings.INGEST_S3_REGION, + ] + if all(setting is not None for setting in required_settings): + # All settings are set, no complaints + return [] + + forbidden_variables = { + 'AWS_ACCESS_KEY_ID', + 'AWS_CONFIG_FILE', + 'AWS_CREDENTIAL_FILE', + 'AWS_DEFAULT_PROFILE', + 'AWS_DEFAULT_REGION', + 'AWS_EXECUTION_ENV', + 'AWS_PROFILE', + 'AWS_REGION', + 'AWS_SECRET_ACCESS_KEY', + 'AWS_SECURITY_TOKEN', + 'AWS_SESSION_TOKEN', + 'BOTO_CONFIG', + } + forbidden_files = { + '~/.aws/config', + '~/.aws/credentials', + } + warnings = [] + + found_variables = forbidden_variables & set(os.environ.keys()) + warnings.extend([ + Warning( + f'The {variable} environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ) + for variable in found_variables + ]) + + for path in forbidden_files: + if os.path.exists(os.path.expanduser(path)): + warnings.extend([ + Warning( + f'The {path} file exists, and not all S3 settings are set.\n' + 'The settings in this file might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ) + ]) + + return warnings diff --git a/arkindex/project/config.py b/arkindex/project/config.py index 4243317cdb..9decae996f 100644 --- a/arkindex/project/config.py +++ b/arkindex/project/config.py @@ -1,3 +1,10 @@ +""" +Parser for the YAML configuration file. + +If you update any of the keys in the YAML file, you should consider +updating the configuration documentation on the wiki: +<https://wiki.vpn/en/arkindex/deploy/configuration> +""" import uuid from enum import Enum from pathlib import Path @@ -49,6 +56,28 @@ def public_hostname(value: Optional[str]): return value.rstrip('/') +def add_s3_parser(parser, name, **kwargs): + s3_parser = ConfigParser() + s3_parser.add_option('access_key_id', type=str, default=None) + s3_parser.add_option('secret_access_key', type=str, default=None) + s3_parser.add_option('endpoint', type=str, default=None) + s3_parser.add_option('region', type=str, default=None) + + def s3_validator(value): + data = s3_parser.parse_data(value) + if not data.get('access_key_id') and not data.get('secret_access_key'): + # No configuration specified; + # just ignore and let the system checks warn about this without preventing startup + return data + if not data.get('endpoint') and not data.get('region'): + raise ConfigurationError(f'One of `{name}.endpoint` or `{name}.region` are required') + return data + + parser.add_option(name, type=s3_validator, default={}, **kwargs) + + return s3_parser + + def get_settings_parser(base_dir): parser = ConfigParser() parser.add_option('arkindex_env', type=str, default='dev') @@ -187,11 +216,7 @@ def get_settings_parser(base_dir): parser.add_option('cache', default={}, type=cache_validator) - s3_parser = ConfigParser() - s3_parser.add_option('access_key_id', type=str, default=None) - s3_parser.add_option('secret_access_key', type=str, default=None) - s3_parser.add_option('endpoint', type=str, default=None) - s3_parser.add_option('region', type=str, default=None) + s3_parser = add_s3_parser(parser, 's3') s3_parser.add_option('thumbnails_bucket', type=str, default='thumbnails') s3_parser.add_option('staging_bucket', type=str, default='staging') s3_parser.add_option('export_bucket', type=str, default='export') @@ -199,16 +224,6 @@ def get_settings_parser(base_dir): s3_parser.add_option('ponos_logs_bucket', type=str, default='ponos-logs') s3_parser.add_option('ponos_artifacts_bucket', type=str, default='ponos-artifacts') - def s3_validator(value): - data = s3_parser.parse_data(value) - if not data.get('access_key_id') and not data.get('secret_access_key'): - # No configuration specified; - # just ignore and let the system checks warn about this without preventing startup - return data - if not data.get('endpoint') and not data.get('region'): - raise ConfigurationError('One of `s3.endpoint` or `s3.region` are required') - return data - - parser.add_option('s3', type=s3_validator, default={}) + add_s3_parser(parser, 'ingest') return parser diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index 6b65c56673..85b35ea8fe 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -265,6 +265,7 @@ SPECTACULAR_SETTINGS = { 'description': 'IIIF manifests, annotation lists and services', }, {'name': 'imports'}, + {'name': 'ingest'}, {'name': 'images'}, {'name': 'jobs'}, { @@ -281,10 +282,6 @@ SPECTACULAR_SETTINGS = { {'name': 'search'}, {'name': 'transcriptions'}, {'name': 'users'}, - { - 'name': 'management', - 'description': 'Admin-only tools', - }, ] } @@ -489,11 +486,17 @@ PONOS_AWS_ENDPOINT = AWS_ENDPOINT = conf['s3']['endpoint'] PONOS_AWS_REGION = AWS_REGION = conf['s3']['region'] PONOS_S3_LOGS_BUCKET = conf['s3']['ponos_logs_bucket'] PONOS_S3_ARTIFACTS_BUCKET = conf['s3']['ponos_artifacts_bucket'] + AWS_THUMBNAIL_BUCKET = conf['s3']['thumbnails_bucket'] AWS_STAGING_BUCKET = conf['s3']['staging_bucket'] AWS_EXPORT_BUCKET = conf['s3']['export_bucket'] AWS_TRAINING_BUCKET = conf['s3']['training_bucket'] +INGEST_S3_ACCESS_KEY = conf['ingest']['access_key_id'] +INGEST_S3_SECRET_KEY = conf['ingest']['secret_access_key'] +INGEST_S3_ENDPOINT = conf['ingest']['endpoint'] +INGEST_S3_REGION = conf['ingest']['region'] + # Ponos integration _ponos_env = { 'ARKINDEX_API_CSRF_COOKIE': CSRF_COOKIE_NAME diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml index 00553e7d8d..f4bf3ab648 100644 --- a/arkindex/project/tests/config_samples/defaults.yaml +++ b/arkindex/project/tests/config_samples/defaults.yaml @@ -45,6 +45,11 @@ gitlab: imports_worker_version: null influxdb: api_url: http://localhost:8086/ +ingest: + access_key_id: null + endpoint: null + region: null + secret_access_key: null internal_group_id: 2 job_timeouts: corpus_delete: 7200 diff --git a/arkindex/project/tests/config_samples/errors.yaml b/arkindex/project/tests/config_samples/errors.yaml index 04a7fdce66..770a46bf76 100644 --- a/arkindex/project/tests/config_samples/errors.yaml +++ b/arkindex/project/tests/config_samples/errors.yaml @@ -30,6 +30,11 @@ gitlab: influxdb: api_url: no internal_group_id: 2 +ingest: + endpoint: https://ohno + access_key_id: a + region: nowhere + secret_access_key: null job_timeouts: corpus_delete: lol element_trash: no diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml index f4253c6791..76a8e3f3f7 100644 --- a/arkindex/project/tests/config_samples/override.yaml +++ b/arkindex/project/tests/config_samples/override.yaml @@ -57,6 +57,11 @@ gitlab: imports_worker_version: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa influxdb: api_url: http://graph/ +ingest: + access_key_id: abcd + endpoint: somewhere + region: middle-earth-1 + secret_access_key: hunter2 internal_group_id: 4 job_timeouts: corpus_delete: 1 diff --git a/arkindex/project/tests/test_checks.py b/arkindex/project/tests/test_checks.py index 489e1b49d5..ec16b64d96 100644 --- a/arkindex/project/tests/test_checks.py +++ b/arkindex/project/tests/test_checks.py @@ -1,3 +1,4 @@ +import os from pathlib import Path from unittest.mock import patch @@ -198,3 +199,166 @@ class ChecksTestCase(TestCase): version_info_mock.minor = 9 self.assertListEqual(python_version_check(), []) + + @override_settings() + def test_ingest_check(self): + from arkindex.project.checks import ingest_check + + settings.INGEST_S3_ACCESS_KEY = None + settings.INGEST_S3_SECRET_KEY = None + settings.INGEST_S3_ENDPOINT = None + settings.INGEST_S3_REGION = None + self.assertCountEqual(ingest_check(), [ + Warning( + 'INGEST_S3_ACCESS_KEY is not set. The S3 ingest feature will not work.', + id='arkindex.W010', + ), + Warning( + 'INGEST_S3_SECRET_KEY is not set. The S3 ingest feature will not work.', + id='arkindex.W010', + ), + Warning( + 'Neither INGEST_S3_REGION nor INGEST_S3_ENDPOINT are set. The S3 ingest feature will not work.', + id='arkindex.W010', + ), + ]) + + settings.INGEST_S3_ACCESS_KEY = 'key' + settings.INGEST_S3_SECRET_KEY = 's3kr3t' + settings.INGEST_S3_ENDPOINT = 'something' + self.assertListEqual(ingest_check(), []) + + settings.INGEST_S3_ENDPOINT = None + settings.INGEST_S3_REGION = 'somewhere' + self.assertListEqual(ingest_check(), []) + + @override_settings() + @patch('os.path.exists', return_value=True) + @patch.dict(os.environ, { + 'AWS_ACCESS_KEY_ID': 'blah', + 'AWS_CONFIG_FILE': 'blah', + 'AWS_CREDENTIAL_FILE': 'blah', + 'AWS_DEFAULT_PROFILE': 'blah', + 'AWS_DEFAULT_REGION': 'blah', + 'AWS_EXECUTION_ENV': 'blah', + 'AWS_PROFILE': 'blah', + 'AWS_REGION': 'blah', + 'AWS_SECRET_ACCESS_KEY': 'blah', + 'AWS_SECURITY_TOKEN': 'blah', + 'AWS_SESSION_TOKEN': 'blah', + 'BOTO_CONFIG': 'blah', + }) + def test_botocore_config_check(self, exists_mock): + from arkindex.project.checks import botocore_config_check + + expected_warnings = [ + Warning( + 'The AWS_ACCESS_KEY_ID environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_CONFIG_FILE environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_CREDENTIAL_FILE environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_DEFAULT_PROFILE environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_DEFAULT_REGION environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_EXECUTION_ENV environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_PROFILE environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_REGION environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_SECRET_ACCESS_KEY environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_SECURITY_TOKEN environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The AWS_SESSION_TOKEN environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The BOTO_CONFIG environment variable is set, and not all S3 settings are set.\n' + 'This variable might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The ~/.aws/config file exists, and not all S3 settings are set.\n' + 'The settings in this file might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + Warning( + 'The ~/.aws/credentials file exists, and not all S3 settings are set.\n' + 'The settings in this file might override the Arkindex settings and cause unexpected behavior.', + id='arkindex.W011', + ), + ] + + settings.INGEST_S3_ACCESS_KEY = None + settings.INGEST_S3_SECRET_KEY = None + settings.INGEST_S3_ENDPOINT = None + settings.INGEST_S3_REGION = None + settings.AWS_ACCESS_KEY = None + settings.AWS_SECRET_KEY = None + settings.AWS_ENDPOINT = None + settings.AWS_REGION = None + + self.assertCountEqual(botocore_config_check(), expected_warnings) + + settings.INGEST_S3_ACCESS_KEY = 'something' + self.assertCountEqual(botocore_config_check(), expected_warnings) + + settings.INGEST_S3_SECRET_KEY = 'something' + self.assertCountEqual(botocore_config_check(), expected_warnings) + + settings.AWS_ACCESS_KEY = 'something' + self.assertCountEqual(botocore_config_check(), expected_warnings) + + settings.AWS_SECRET_KEY = 'something' + self.assertCountEqual(botocore_config_check(), expected_warnings) + + settings.AWS_ENDPOINT = 'something' + self.assertCountEqual(botocore_config_check(), expected_warnings) + + settings.INGEST_S3_ENDPOINT = 'something' + # We have set all the required settings here, so this check is skipped. + self.assertCountEqual(botocore_config_check(), []) + + # The check requires either the endpoint or the region + settings.AWS_ENDPOINT = None + settings.AWS_REGION = 'something' + self.assertCountEqual(botocore_config_check(), []) + + settings.INGEST_S3_ENDPOINT = None + settings.INGEST_S3_REGION = 'something' + self.assertCountEqual(botocore_config_check(), []) diff --git a/arkindex/users/models.py b/arkindex/users/models.py index 629116fb39..e348f4e035 100644 --- a/arkindex/users/models.py +++ b/arkindex/users/models.py @@ -227,6 +227,11 @@ class Scope(Enum): This could allow someone to make Arkindex perform DoS attacks on other IIIF servers due to the image checks. """ + S3Ingest = 's3_ingest' + """ + Allows access to S3 ingestion features. + """ + class UserScope(models.Model): user = models.ForeignKey('users.User', related_name='user_scopes', on_delete=models.CASCADE) diff --git a/tests-requirements.txt b/tests-requirements.txt index 918c101ddb..4c9b106603 100644 --- a/tests-requirements.txt +++ b/tests-requirements.txt @@ -1,4 +1,5 @@ coverage==6.3.2 django-nose==1.4.7 +moto[s3]==3.1.12 responses==0.20.0 tripoli==2.0.0 -- GitLab