diff --git a/arkindex/images/models.py b/arkindex/images/models.py index a6825414399ad31b889581815fb32e22ef5fe8d7..aecc25fca690b179e55696ab1f9775179b18d830 100644 --- a/arkindex/images/models.py +++ b/arkindex/images/models.py @@ -15,9 +15,10 @@ from django.utils.text import slugify from enumfields import EnumField from arkindex.images.managers import ImageServerManager -from arkindex.project.aws import S3FileMixin, S3FileStatus, should_verify_cert +from arkindex.project.aws import S3FileMixin, S3FileStatus from arkindex.project.fields import LStripTextField, MD5HashField, StripSlashURLField from arkindex.project.models import IndexableModel +from arkindex.project.tools import should_verify_cert logger = logging.getLogger(__name__) profile_uri_validator = URLValidator(schemes=["http", "https"], message="Invalid IIIF profile URI") diff --git a/arkindex/ponos/tasks.py b/arkindex/ponos/tasks.py index a46bc81ebdf3feb44480bfca3cc96b49c0851e3b..cd2a525f3e23062f4aaac1ec03667a24c1715575 100644 --- a/arkindex/ponos/tasks.py +++ b/arkindex/ponos/tasks.py @@ -18,6 +18,7 @@ import docker from arkindex.ponos.models import State, Task from arkindex.ponos.utils import decompress_zst_archive, extract_tar_archive, upload_artifact from arkindex.process.models import Process, WorkerActivityState +from arkindex.project.tools import should_verify_cert from docker.errors import APIError, ImageNotFound logger = logging.getLogger(__name__) @@ -101,7 +102,7 @@ def download_extra_files(task) -> None: logger.info(f"Downloading file {path_name} using url: {file_url}") # Download file using the provided url - with requests.get(file_url, stream=True, timeout=REQUEST_TIMEOUT) as resp: + with requests.get(file_url, stream=True, timeout=REQUEST_TIMEOUT, verify=should_verify_cert(file_url)) as resp: resp.raise_for_status() # Write file to a specific data directory diff --git a/arkindex/project/aws.py b/arkindex/project/aws.py index ef0edfb7251a1c988cd35ddf823522acb460821c..659f37326a0e132505e1fb4b63a7729e668ba328 100644 --- a/arkindex/project/aws.py +++ b/arkindex/project/aws.py @@ -1,7 +1,6 @@ import logging from functools import wraps from io import BytesIO -from urllib.parse import urlparse import boto3.session from botocore.config import Config @@ -11,19 +10,9 @@ from django.utils.functional import cached_property from enumfields import Enum from tenacity import retry, retry_if_exception, stop_after_delay -logger = logging.getLogger(__name__) - - -def should_verify_cert(url): - """ - Skip SSL certification validation when hitting a development instance - """ - # Special case when no url is provided - if url is None: - return True +from arkindex.project.tools import should_verify_cert - host = urlparse(url).netloc - return not host.endswith("ark.localhost") +logger = logging.getLogger(__name__) def get_s3_resource( diff --git a/arkindex/project/tests/test_aws.py b/arkindex/project/tests/test_tools.py similarity index 80% rename from arkindex/project/tests/test_aws.py rename to arkindex/project/tests/test_tools.py index 7aae4cb45d7bb9c3654230aa3fd8d6d8ceff86c8..89d313e74c45516be2237b7ea7024f4239fb3e15 100644 --- a/arkindex/project/tests/test_aws.py +++ b/arkindex/project/tests/test_tools.py @@ -1,9 +1,9 @@ from django.test import TestCase -from arkindex.project.aws import should_verify_cert # noqa +from arkindex.project.tools import should_verify_cert # noqa -class AWSTestCase(TestCase): +class ToolsTest(TestCase): def test_should_verify_cert(self): self.assertTrue(should_verify_cert("https://google.fr/whatever")) diff --git a/arkindex/project/tools.py b/arkindex/project/tools.py index 74bc0dd81f3629a722797d88824f3a0d7caa5695..ab448463f8b008537077603e579814aea024c477 100644 --- a/arkindex/project/tools.py +++ b/arkindex/project/tools.py @@ -1,11 +1,22 @@ from collections.abc import Iterable, Iterator, Sized from datetime import datetime, timezone +from urllib.parse import urlparse from django.db.models import Aggregate, CharField, Func from django.db.models.expressions import BaseExpression, OrderByList from django.urls import reverse -from arkindex.documents.models import Element, ElementPath + +def should_verify_cert(url): + """ + Skip SSL certification validation when hitting a development instance + """ + # Special case when no url is provided + if url is None: + return True + + host = urlparse(url).netloc + return not host.endswith("ark.localhost") def build_absolute_url(element, request, name, id_argument="pk", **kwargs): @@ -26,6 +37,9 @@ def build_tree(tree, *, corpus, type): Returns a dict associating element names with created Elements. """ + # Avoid circular import issue + from arkindex.documents.models import Element, ElementPath + assert isinstance(tree, dict) def parse_value(val):