From 75a039cee9c5714a41658bce1cb1a6a54d53e5a8 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Mon, 13 Jun 2022 15:01:57 +0000
Subject: [PATCH] Add ListBuckets endpoint

---
 arkindex/dataimport/api.py                    |  21 ++-
 arkindex/dataimport/serializers/ingest.py     |   6 +
 arkindex/dataimport/tests/test_ingest.py      |  71 ++++++++
 arkindex/images/tests/test_image.py           |  12 +-
 arkindex/project/api_v1.py                    |   4 +
 arkindex/project/aws.py                       |  58 +++++--
 arkindex/project/checks.py                    | 102 ++++++++++-
 arkindex/project/config.py                    |  47 +++--
 arkindex/project/settings.py                  |  11 +-
 .../tests/config_samples/defaults.yaml        |   5 +
 .../project/tests/config_samples/errors.yaml  |   5 +
 .../tests/config_samples/override.yaml        |   5 +
 arkindex/project/tests/test_checks.py         | 164 ++++++++++++++++++
 arkindex/users/models.py                      |   5 +
 tests-requirements.txt                        |   1 +
 15 files changed, 472 insertions(+), 45 deletions(-)
 create mode 100644 arkindex/dataimport/serializers/ingest.py
 create mode 100644 arkindex/dataimport/tests/test_ingest.py

diff --git a/arkindex/dataimport/api.py b/arkindex/dataimport/api.py
index a5b9e55cfd..fc048aa245 100644
--- a/arkindex/dataimport/api.py
+++ b/arkindex/dataimport/api.py
@@ -70,6 +70,7 @@ from arkindex.dataimport.serializers.imports import (
     WorkerRunEditSerializer,
     WorkerRunSerializer,
 )
+from arkindex.dataimport.serializers.ingest import BucketSerializer
 from arkindex.dataimport.serializers.workers import (
     RepositorySerializer,
     WorkerActivitySerializer,
@@ -82,6 +83,7 @@ from arkindex.dataimport.serializers.workers import (
 )
 from arkindex.dataimport.utils import hash_object
 from arkindex.documents.models import Corpus, Element
+from arkindex.project.aws import get_ingest_resource
 from arkindex.project.fields import ArrayRemove
 from arkindex.project.mixins import (
     ConflictAPIException,
@@ -96,7 +98,7 @@ from arkindex.project.pagination import CustomCursorPagination
 from arkindex.project.permissions import IsInternal, IsVerified, IsVerifiedOrReadOnly
 from arkindex.project.tools import RTrimChr
 from arkindex.project.triggers import process_delete
-from arkindex.users.models import OAuthCredentials, Role
+from arkindex.users.models import OAuthCredentials, Role, Scope
 from arkindex.users.utils import get_max_level
 from ponos.models import STATES_ORDERING, State
 
@@ -1777,3 +1779,20 @@ class ClearProcess(ProcessACLMixin, DestroyAPIView):
         process = self.get_object()
         process.clear()
         return Response(status=status.HTTP_204_NO_CONTENT)
+
+
+@extend_schema(tags=['ingest'])
+class BucketList(ListAPIView):
+    """
+    List the S3 buckets that are available for ingestion.
+    """
+
+    permission_classes = (IsVerified, )
+    scopes = (Scope.S3Ingest, )
+    pagination_class = None
+    serializer_class = BucketSerializer
+
+    def get_queryset(self):
+        # Listing buckets does not use any pagination at all,
+        # so this will make a single API query once.
+        return list(get_ingest_resource().buckets.all())
diff --git a/arkindex/dataimport/serializers/ingest.py b/arkindex/dataimport/serializers/ingest.py
new file mode 100644
index 0000000000..eb06436236
--- /dev/null
+++ b/arkindex/dataimport/serializers/ingest.py
@@ -0,0 +1,6 @@
+from rest_framework import serializers
+
+
+class BucketSerializer(serializers.Serializer):
+
+    name = serializers.CharField()
diff --git a/arkindex/dataimport/tests/test_ingest.py b/arkindex/dataimport/tests/test_ingest.py
new file mode 100644
index 0000000000..2406ad7ce2
--- /dev/null
+++ b/arkindex/dataimport/tests/test_ingest.py
@@ -0,0 +1,71 @@
+import boto3
+from django.test import override_settings
+from django.urls import reverse
+from rest_framework import status
+
+from arkindex.project.tests import FixtureAPITestCase
+from arkindex.users.models import Scope
+from moto import mock_s3
+
+
+class TestIngest(FixtureAPITestCase):
+
+    def test_list_buckets_requires_login(self):
+        with self.assertNumQueries(0):
+            resp = self.client.get(reverse('api:bucket-list'))
+            self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN)
+
+    def test_list_buckets_requires_verified(self):
+        self.user.verified_email = False
+        self.user.save()
+        self.user.user_scopes.create(scope=Scope.S3Ingest)
+        self.client.force_login(self.user)
+
+        with self.assertNumQueries(2):
+            resp = self.client.get(reverse('api:bucket-list'))
+            self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN)
+
+    def test_list_buckets_requires_scope(self):
+        self.assertFalse(self.user.user_scopes.exists())
+        self.client.force_login(self.user)
+
+        with self.assertNumQueries(3):
+            resp = self.client.get(reverse('api:bucket-list'))
+            self.assertEqual(resp.status_code, status.HTTP_403_FORBIDDEN)
+
+    @override_settings(
+        INGEST_S3_ACCESS_KEY=None,
+        INGEST_S3_SECRET_KEY=None,
+        INGEST_S3_ENDPOINT=None,
+        INGEST_S3_REGION=None,
+    )
+    def test_list_buckets_missing_settings(self):
+        self.user.user_scopes.create(scope=Scope.S3Ingest)
+        self.client.force_login(self.user)
+        with self.assertNumQueries(3), self.assertRaises(AssertionError):
+            self.client.get(reverse('api:bucket-list'))
+
+    @override_settings(
+        INGEST_S3_ACCESS_KEY='just-let-me-in',
+        INGEST_S3_SECRET_KEY='i-dunno',
+        INGEST_S3_ENDPOINT=None,
+        INGEST_S3_REGION='atlantis-1',
+    )
+    @mock_s3
+    def test_list_buckets(self):
+        self.user.user_scopes.create(scope=Scope.S3Ingest)
+        self.client.force_login(self.user)
+
+        # Create some buckets with moto to get some test data
+        s3 = boto3.resource('s3', region_name='us-east-1')
+        s3.create_bucket(Bucket='dank-memes')
+        s3.create_bucket(Bucket='cursed-emojis')
+
+        with self.assertNumQueries(3):
+            resp = self.client.get(reverse('api:bucket-list'))
+            self.assertEqual(resp.status_code, status.HTTP_200_OK)
+
+        self.assertListEqual(resp.json(), [
+            {'name': 'dank-memes'},
+            {'name': 'cursed-emojis'},
+        ])
diff --git a/arkindex/images/tests/test_image.py b/arkindex/images/tests/test_image.py
index e28c2d3a8e..f530d116a0 100644
--- a/arkindex/images/tests/test_image.py
+++ b/arkindex/images/tests/test_image.py
@@ -40,23 +40,25 @@ class TestImage(FixtureTestCase):
             Params={'Bucket': 'iiif', 'Key': 'abcd'},
         ))
 
-    @override_settings(LOCAL_IMAGESERVER_ID=1, AWS_REGION=None)
-    @patch('arkindex.project.aws.session.client')
+    @override_settings(LOCAL_IMAGESERVER_ID=1, AWS_REGION='baba-au-rhum-1')
+    @patch('arkindex.project.aws.boto3.session.Session')
     @patch('arkindex.project.aws.Config')
-    def test_s3_get_url_different_region(self, config_mock, s3_client_mock):
+    def test_s3_get_url_different_region(self, config_mock, session_mock):
         img = ImageServer.objects.create(s3_region='middle-earth-1').images.create(path='abcd')
         config_mock.return_value = 'config'
         img.s3_object = MagicMock()
         img.s3_url
-        self.assertEqual(s3_client_mock.return_value.generate_presigned_url.call_count, 1)
+        self.assertEqual(session_mock.call_count, 1)
+        self.assertEqual(session_mock().client.call_count, 1)
         self.assertEqual(
-            s3_client_mock.call_args,
+            session_mock().client.call_args,
             call(
                 's3',
                 region_name='middle-earth-1',
                 config='config'
             )
         )
+        self.assertEqual(session_mock().client().generate_presigned_url.call_count, 1)
 
     @override_settings(LOCAL_IMAGESERVER_ID=1, AWS_REGION=None)
     @patch('arkindex.project.aws.s3.meta.client.generate_presigned_url')
diff --git a/arkindex/project/api_v1.py b/arkindex/project/api_v1.py
index 436214d3a2..7f89187bd7 100644
--- a/arkindex/project/api_v1.py
+++ b/arkindex/project/api_v1.py
@@ -4,6 +4,7 @@ from django.views.generic.base import RedirectView
 from arkindex.dataimport.api import (
     ApplyProcessTemplate,
     AvailableRepositoriesList,
+    BucketList,
     ClearProcess,
     CorpusWorkersActivity,
     CorpusWorkerVersionList,
@@ -248,6 +249,9 @@ api = [
     path('model/<uuid:pk>/versions/', ModelVersionsList.as_view(), name='model-versions'),
     path('modelversion/<uuid:pk>/download/', ModelVersionDownload.as_view(), name='model-version-download'),
 
+    # S3 ingest
+    path('ingest/buckets/', BucketList.as_view(), name='bucket-list'),
+
     # Image management
     path('image/', ImageCreate.as_view(), name='image-create'),
     path('image/iiif/url/', IIIFURLCreate.as_view(), name='iiif-url-create'),
diff --git a/arkindex/project/aws.py b/arkindex/project/aws.py
index c000dc519d..0e97f1325c 100644
--- a/arkindex/project/aws.py
+++ b/arkindex/project/aws.py
@@ -13,24 +13,42 @@ from tenacity import retry, retry_if_exception, stop_after_delay
 logger = logging.getLogger(__name__)
 
 
-session = boto3.session.Session(
-    aws_access_key_id=settings.AWS_ACCESS_KEY,
-    aws_secret_access_key=settings.AWS_SECRET_KEY,
-)
-
-config = Config(
-    region_name=settings.AWS_REGION,
-    signature_version='s3v4',
-    s3={
-        'addressing_style': 'auto' if settings.AWS_ENDPOINT else 'virtual',
-    }
-)
-
-s3 = session.resource(
-    's3',
-    endpoint_url=settings.AWS_ENDPOINT,
-    config=config,
-)
+def get_s3_resource(
+        access_key_id=settings.AWS_ACCESS_KEY,
+        secret_access_key=settings.AWS_SECRET_KEY,
+        endpoint=settings.AWS_ENDPOINT,
+        region=settings.AWS_REGION):
+    session = boto3.session.Session(
+        aws_access_key_id=access_key_id,
+        aws_secret_access_key=secret_access_key,
+    )
+
+    config = Config(
+        region_name=region,
+        signature_version='s3v4',
+        s3={
+            'addressing_style': 'auto' if endpoint else 'virtual',
+        }
+    )
+
+    return session.resource(
+        's3',
+        endpoint_url=endpoint,
+        config=config,
+    )
+
+
+s3 = get_s3_resource()
+
+
+def get_ingest_resource():
+    assert settings.INGEST_S3_ENDPOINT or settings.INGEST_S3_REGION, 'An endpoint or region is required'
+    return get_s3_resource(
+        access_key_id=settings.INGEST_S3_ACCESS_KEY,
+        secret_access_key=settings.INGEST_S3_SECRET_KEY,
+        endpoint=settings.INGEST_S3_ENDPOINT,
+        region=settings.INGEST_S3_REGION,
+    )
 
 
 def requires_s3_object(func):
@@ -90,6 +108,10 @@ class S3FileMixin(object):
     def s3_url(self) -> str:
         # Handle different regions signatures
         if self.s3_region != settings.AWS_REGION:
+            session = boto3.session.Session(
+                aws_access_key_id=settings.AWS_ACCESS_KEY,
+                aws_secret_access_key=settings.AWS_SECRET_KEY,
+            )
             client = session.client(
                 's3',
                 config=Config(signature_version='s3v4'),
diff --git a/arkindex/project/checks.py b/arkindex/project/checks.py
index bb6633b1d7..26ca08da48 100644
--- a/arkindex/project/checks.py
+++ b/arkindex/project/checks.py
@@ -1,4 +1,11 @@
-import os.path
+"""
+Arkindex-specific Django system checks.
+
+If you make any changes to those checks, you should consider updating the system
+checks documentation on the wiki:
+<https://wiki.vpn/en/arkindex/deploy/checks>
+"""
+import os
 import sys
 
 import yaml
@@ -177,3 +184,96 @@ def python_version_check(*args, **kwargs):
             )
         ]
     return []
+
+
+@register()
+@only_runserver
+def ingest_check(*args, **kwargs):
+    from django.conf import settings
+    warnings = []
+
+    if settings.INGEST_S3_ACCESS_KEY is None:
+        warnings.append(Warning(
+            'INGEST_S3_ACCESS_KEY is not set. The S3 ingest feature will not work.',
+            id='arkindex.W010',
+        ))
+
+    if settings.INGEST_S3_SECRET_KEY is None:
+        warnings.append(Warning(
+            'INGEST_S3_SECRET_KEY is not set. The S3 ingest feature will not work.',
+            id='arkindex.W010',
+        ))
+
+    if settings.INGEST_S3_REGION is None and settings.INGEST_S3_ENDPOINT is None:
+        warnings.append(Warning(
+            'Neither INGEST_S3_REGION nor INGEST_S3_ENDPOINT are set. The S3 ingest feature will not work.',
+            id='arkindex.W010',
+        ))
+
+    return warnings
+
+
+@register()
+def botocore_config_check(*args, **kwargs):
+    """
+    When not all S3 settings have both an access key ID, secret access key
+    and either an endpoint or a region set, botocore might use some environment variables
+    or configuration files to fill in the blanks. This can cause confusion for developers
+    as some unit tests might fail or some other behaviors might be different, and can cause
+    data leaks in production where some credentials might be used unexpectedly.
+    """
+    from django.conf import settings
+
+    required_settings = [
+        settings.AWS_ACCESS_KEY,
+        settings.AWS_SECRET_KEY,
+        settings.AWS_ENDPOINT or settings.AWS_REGION,
+        settings.INGEST_S3_ACCESS_KEY,
+        settings.INGEST_S3_SECRET_KEY,
+        settings.INGEST_S3_ENDPOINT or settings.INGEST_S3_REGION,
+    ]
+    if all(setting is not None for setting in required_settings):
+        # All settings are set, no complaints
+        return []
+
+    forbidden_variables = {
+        'AWS_ACCESS_KEY_ID',
+        'AWS_CONFIG_FILE',
+        'AWS_CREDENTIAL_FILE',
+        'AWS_DEFAULT_PROFILE',
+        'AWS_DEFAULT_REGION',
+        'AWS_EXECUTION_ENV',
+        'AWS_PROFILE',
+        'AWS_REGION',
+        'AWS_SECRET_ACCESS_KEY',
+        'AWS_SECURITY_TOKEN',
+        'AWS_SESSION_TOKEN',
+        'BOTO_CONFIG',
+    }
+    forbidden_files = {
+        '~/.aws/config',
+        '~/.aws/credentials',
+    }
+    warnings = []
+
+    found_variables = forbidden_variables & set(os.environ.keys())
+    warnings.extend([
+        Warning(
+            f'The {variable} environment variable is set, and not all S3 settings are set.\n'
+            'This variable might override the Arkindex settings and cause unexpected behavior.',
+            id='arkindex.W011',
+        )
+        for variable in found_variables
+    ])
+
+    for path in forbidden_files:
+        if os.path.exists(os.path.expanduser(path)):
+            warnings.extend([
+                Warning(
+                    f'The {path} file exists, and not all S3 settings are set.\n'
+                    'The settings in this file might override the Arkindex settings and cause unexpected behavior.',
+                    id='arkindex.W011',
+                )
+            ])
+
+    return warnings
diff --git a/arkindex/project/config.py b/arkindex/project/config.py
index 4243317cdb..9decae996f 100644
--- a/arkindex/project/config.py
+++ b/arkindex/project/config.py
@@ -1,3 +1,10 @@
+"""
+Parser for the YAML configuration file.
+
+If you update any of the keys in the YAML file, you should consider
+updating the configuration documentation on the wiki:
+<https://wiki.vpn/en/arkindex/deploy/configuration>
+"""
 import uuid
 from enum import Enum
 from pathlib import Path
@@ -49,6 +56,28 @@ def public_hostname(value: Optional[str]):
     return value.rstrip('/')
 
 
+def add_s3_parser(parser, name, **kwargs):
+    s3_parser = ConfigParser()
+    s3_parser.add_option('access_key_id', type=str, default=None)
+    s3_parser.add_option('secret_access_key', type=str, default=None)
+    s3_parser.add_option('endpoint', type=str, default=None)
+    s3_parser.add_option('region', type=str, default=None)
+
+    def s3_validator(value):
+        data = s3_parser.parse_data(value)
+        if not data.get('access_key_id') and not data.get('secret_access_key'):
+            # No configuration specified;
+            # just ignore and let the system checks warn about this without preventing startup
+            return data
+        if not data.get('endpoint') and not data.get('region'):
+            raise ConfigurationError(f'One of `{name}.endpoint` or `{name}.region` are required')
+        return data
+
+    parser.add_option(name, type=s3_validator, default={}, **kwargs)
+
+    return s3_parser
+
+
 def get_settings_parser(base_dir):
     parser = ConfigParser()
     parser.add_option('arkindex_env', type=str, default='dev')
@@ -187,11 +216,7 @@ def get_settings_parser(base_dir):
 
     parser.add_option('cache', default={}, type=cache_validator)
 
-    s3_parser = ConfigParser()
-    s3_parser.add_option('access_key_id', type=str, default=None)
-    s3_parser.add_option('secret_access_key', type=str, default=None)
-    s3_parser.add_option('endpoint', type=str, default=None)
-    s3_parser.add_option('region', type=str, default=None)
+    s3_parser = add_s3_parser(parser, 's3')
     s3_parser.add_option('thumbnails_bucket', type=str, default='thumbnails')
     s3_parser.add_option('staging_bucket', type=str, default='staging')
     s3_parser.add_option('export_bucket', type=str, default='export')
@@ -199,16 +224,6 @@ def get_settings_parser(base_dir):
     s3_parser.add_option('ponos_logs_bucket', type=str, default='ponos-logs')
     s3_parser.add_option('ponos_artifacts_bucket', type=str, default='ponos-artifacts')
 
-    def s3_validator(value):
-        data = s3_parser.parse_data(value)
-        if not data.get('access_key_id') and not data.get('secret_access_key'):
-            # No configuration specified;
-            # just ignore and let the system checks warn about this without preventing startup
-            return data
-        if not data.get('endpoint') and not data.get('region'):
-            raise ConfigurationError('One of `s3.endpoint` or `s3.region` are required')
-        return data
-
-    parser.add_option('s3', type=s3_validator, default={})
+    add_s3_parser(parser, 'ingest')
 
     return parser
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index 6b65c56673..85b35ea8fe 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -265,6 +265,7 @@ SPECTACULAR_SETTINGS = {
             'description': 'IIIF manifests, annotation lists and services',
         },
         {'name': 'imports'},
+        {'name': 'ingest'},
         {'name': 'images'},
         {'name': 'jobs'},
         {
@@ -281,10 +282,6 @@ SPECTACULAR_SETTINGS = {
         {'name': 'search'},
         {'name': 'transcriptions'},
         {'name': 'users'},
-        {
-            'name': 'management',
-            'description': 'Admin-only tools',
-        },
     ]
 }
 
@@ -489,11 +486,17 @@ PONOS_AWS_ENDPOINT = AWS_ENDPOINT = conf['s3']['endpoint']
 PONOS_AWS_REGION = AWS_REGION = conf['s3']['region']
 PONOS_S3_LOGS_BUCKET = conf['s3']['ponos_logs_bucket']
 PONOS_S3_ARTIFACTS_BUCKET = conf['s3']['ponos_artifacts_bucket']
+
 AWS_THUMBNAIL_BUCKET = conf['s3']['thumbnails_bucket']
 AWS_STAGING_BUCKET = conf['s3']['staging_bucket']
 AWS_EXPORT_BUCKET = conf['s3']['export_bucket']
 AWS_TRAINING_BUCKET = conf['s3']['training_bucket']
 
+INGEST_S3_ACCESS_KEY = conf['ingest']['access_key_id']
+INGEST_S3_SECRET_KEY = conf['ingest']['secret_access_key']
+INGEST_S3_ENDPOINT = conf['ingest']['endpoint']
+INGEST_S3_REGION = conf['ingest']['region']
+
 # Ponos integration
 _ponos_env = {
     'ARKINDEX_API_CSRF_COOKIE': CSRF_COOKIE_NAME
diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml
index 00553e7d8d..f4bf3ab648 100644
--- a/arkindex/project/tests/config_samples/defaults.yaml
+++ b/arkindex/project/tests/config_samples/defaults.yaml
@@ -45,6 +45,11 @@ gitlab:
 imports_worker_version: null
 influxdb:
   api_url: http://localhost:8086/
+ingest:
+  access_key_id: null
+  endpoint: null
+  region: null
+  secret_access_key: null
 internal_group_id: 2
 job_timeouts:
   corpus_delete: 7200
diff --git a/arkindex/project/tests/config_samples/errors.yaml b/arkindex/project/tests/config_samples/errors.yaml
index 04a7fdce66..770a46bf76 100644
--- a/arkindex/project/tests/config_samples/errors.yaml
+++ b/arkindex/project/tests/config_samples/errors.yaml
@@ -30,6 +30,11 @@ gitlab:
 influxdb:
   api_url: no
 internal_group_id: 2
+ingest:
+  endpoint: https://ohno
+  access_key_id: a
+  region: nowhere
+  secret_access_key: null
 job_timeouts:
   corpus_delete: lol
   element_trash: no
diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml
index f4253c6791..76a8e3f3f7 100644
--- a/arkindex/project/tests/config_samples/override.yaml
+++ b/arkindex/project/tests/config_samples/override.yaml
@@ -57,6 +57,11 @@ gitlab:
 imports_worker_version: aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa
 influxdb:
   api_url: http://graph/
+ingest:
+  access_key_id: abcd
+  endpoint: somewhere
+  region: middle-earth-1
+  secret_access_key: hunter2
 internal_group_id: 4
 job_timeouts:
   corpus_delete: 1
diff --git a/arkindex/project/tests/test_checks.py b/arkindex/project/tests/test_checks.py
index 489e1b49d5..ec16b64d96 100644
--- a/arkindex/project/tests/test_checks.py
+++ b/arkindex/project/tests/test_checks.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 from unittest.mock import patch
 
@@ -198,3 +199,166 @@ class ChecksTestCase(TestCase):
 
         version_info_mock.minor = 9
         self.assertListEqual(python_version_check(), [])
+
+    @override_settings()
+    def test_ingest_check(self):
+        from arkindex.project.checks import ingest_check
+
+        settings.INGEST_S3_ACCESS_KEY = None
+        settings.INGEST_S3_SECRET_KEY = None
+        settings.INGEST_S3_ENDPOINT = None
+        settings.INGEST_S3_REGION = None
+        self.assertCountEqual(ingest_check(), [
+            Warning(
+                'INGEST_S3_ACCESS_KEY is not set. The S3 ingest feature will not work.',
+                id='arkindex.W010',
+            ),
+            Warning(
+                'INGEST_S3_SECRET_KEY is not set. The S3 ingest feature will not work.',
+                id='arkindex.W010',
+            ),
+            Warning(
+                'Neither INGEST_S3_REGION nor INGEST_S3_ENDPOINT are set. The S3 ingest feature will not work.',
+                id='arkindex.W010',
+            ),
+        ])
+
+        settings.INGEST_S3_ACCESS_KEY = 'key'
+        settings.INGEST_S3_SECRET_KEY = 's3kr3t'
+        settings.INGEST_S3_ENDPOINT = 'something'
+        self.assertListEqual(ingest_check(), [])
+
+        settings.INGEST_S3_ENDPOINT = None
+        settings.INGEST_S3_REGION = 'somewhere'
+        self.assertListEqual(ingest_check(), [])
+
+    @override_settings()
+    @patch('os.path.exists', return_value=True)
+    @patch.dict(os.environ, {
+        'AWS_ACCESS_KEY_ID': 'blah',
+        'AWS_CONFIG_FILE': 'blah',
+        'AWS_CREDENTIAL_FILE': 'blah',
+        'AWS_DEFAULT_PROFILE': 'blah',
+        'AWS_DEFAULT_REGION': 'blah',
+        'AWS_EXECUTION_ENV': 'blah',
+        'AWS_PROFILE': 'blah',
+        'AWS_REGION': 'blah',
+        'AWS_SECRET_ACCESS_KEY': 'blah',
+        'AWS_SECURITY_TOKEN': 'blah',
+        'AWS_SESSION_TOKEN': 'blah',
+        'BOTO_CONFIG': 'blah',
+    })
+    def test_botocore_config_check(self, exists_mock):
+        from arkindex.project.checks import botocore_config_check
+
+        expected_warnings = [
+            Warning(
+                'The AWS_ACCESS_KEY_ID environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_CONFIG_FILE environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_CREDENTIAL_FILE environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_DEFAULT_PROFILE environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_DEFAULT_REGION environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_EXECUTION_ENV environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_PROFILE environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_REGION environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_SECRET_ACCESS_KEY environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_SECURITY_TOKEN environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The AWS_SESSION_TOKEN environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The BOTO_CONFIG environment variable is set, and not all S3 settings are set.\n'
+                'This variable might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The ~/.aws/config file exists, and not all S3 settings are set.\n'
+                'The settings in this file might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+            Warning(
+                'The ~/.aws/credentials file exists, and not all S3 settings are set.\n'
+                'The settings in this file might override the Arkindex settings and cause unexpected behavior.',
+                id='arkindex.W011',
+            ),
+        ]
+
+        settings.INGEST_S3_ACCESS_KEY = None
+        settings.INGEST_S3_SECRET_KEY = None
+        settings.INGEST_S3_ENDPOINT = None
+        settings.INGEST_S3_REGION = None
+        settings.AWS_ACCESS_KEY = None
+        settings.AWS_SECRET_KEY = None
+        settings.AWS_ENDPOINT = None
+        settings.AWS_REGION = None
+
+        self.assertCountEqual(botocore_config_check(), expected_warnings)
+
+        settings.INGEST_S3_ACCESS_KEY = 'something'
+        self.assertCountEqual(botocore_config_check(), expected_warnings)
+
+        settings.INGEST_S3_SECRET_KEY = 'something'
+        self.assertCountEqual(botocore_config_check(), expected_warnings)
+
+        settings.AWS_ACCESS_KEY = 'something'
+        self.assertCountEqual(botocore_config_check(), expected_warnings)
+
+        settings.AWS_SECRET_KEY = 'something'
+        self.assertCountEqual(botocore_config_check(), expected_warnings)
+
+        settings.AWS_ENDPOINT = 'something'
+        self.assertCountEqual(botocore_config_check(), expected_warnings)
+
+        settings.INGEST_S3_ENDPOINT = 'something'
+        # We have set all the required settings here, so this check is skipped.
+        self.assertCountEqual(botocore_config_check(), [])
+
+        # The check requires either the endpoint or the region
+        settings.AWS_ENDPOINT = None
+        settings.AWS_REGION = 'something'
+        self.assertCountEqual(botocore_config_check(), [])
+
+        settings.INGEST_S3_ENDPOINT = None
+        settings.INGEST_S3_REGION = 'something'
+        self.assertCountEqual(botocore_config_check(), [])
diff --git a/arkindex/users/models.py b/arkindex/users/models.py
index 629116fb39..e348f4e035 100644
--- a/arkindex/users/models.py
+++ b/arkindex/users/models.py
@@ -227,6 +227,11 @@ class Scope(Enum):
     This could allow someone to make Arkindex perform DoS attacks on other IIIF servers due to the image checks.
     """
 
+    S3Ingest = 's3_ingest'
+    """
+    Allows access to S3 ingestion features.
+    """
+
 
 class UserScope(models.Model):
     user = models.ForeignKey('users.User', related_name='user_scopes', on_delete=models.CASCADE)
diff --git a/tests-requirements.txt b/tests-requirements.txt
index 918c101ddb..4c9b106603 100644
--- a/tests-requirements.txt
+++ b/tests-requirements.txt
@@ -1,4 +1,5 @@
 coverage==6.3.2
 django-nose==1.4.7
+moto[s3]==3.1.12
 responses==0.20.0
 tripoli==2.0.0
-- 
GitLab