From 3afbb5407787698f2cb9f400a913e95fef2e80a4 Mon Sep 17 00:00:00 2001
From: mlbonhomme <bonhomme@teklia.com>
Date: Fri, 29 Mar 2024 16:00:57 +0100
Subject: [PATCH] apply review

---
 arkindex/images/models.py                        |  3 ++-
 arkindex/ponos/tasks.py                          |  3 ++-
 arkindex/project/aws.py                          | 15 ++-------------
 .../project/tests/{test_aws.py => test_tools.py} |  4 ++--
 arkindex/project/tools.py                        | 16 +++++++++++++++-
 5 files changed, 23 insertions(+), 18 deletions(-)
 rename arkindex/project/tests/{test_aws.py => test_tools.py} (80%)

diff --git a/arkindex/images/models.py b/arkindex/images/models.py
index a682541439..aecc25fca6 100644
--- a/arkindex/images/models.py
+++ b/arkindex/images/models.py
@@ -15,9 +15,10 @@ from django.utils.text import slugify
 from enumfields import EnumField
 
 from arkindex.images.managers import ImageServerManager
-from arkindex.project.aws import S3FileMixin, S3FileStatus, should_verify_cert
+from arkindex.project.aws import S3FileMixin, S3FileStatus
 from arkindex.project.fields import LStripTextField, MD5HashField, StripSlashURLField
 from arkindex.project.models import IndexableModel
+from arkindex.project.tools import should_verify_cert
 
 logger = logging.getLogger(__name__)
 profile_uri_validator = URLValidator(schemes=["http", "https"], message="Invalid IIIF profile URI")
diff --git a/arkindex/ponos/tasks.py b/arkindex/ponos/tasks.py
index a46bc81ebd..cd2a525f3e 100644
--- a/arkindex/ponos/tasks.py
+++ b/arkindex/ponos/tasks.py
@@ -18,6 +18,7 @@ import docker
 from arkindex.ponos.models import State, Task
 from arkindex.ponos.utils import decompress_zst_archive, extract_tar_archive, upload_artifact
 from arkindex.process.models import Process, WorkerActivityState
+from arkindex.project.tools import should_verify_cert
 from docker.errors import APIError, ImageNotFound
 
 logger = logging.getLogger(__name__)
@@ -101,7 +102,7 @@ def download_extra_files(task) -> None:
         logger.info(f"Downloading file {path_name} using url: {file_url}")
 
         # Download file using the provided url
-        with requests.get(file_url, stream=True, timeout=REQUEST_TIMEOUT) as resp:
+        with requests.get(file_url, stream=True, timeout=REQUEST_TIMEOUT, verify=should_verify_cert(file_url)) as resp:
             resp.raise_for_status()
 
             # Write file to a specific data directory
diff --git a/arkindex/project/aws.py b/arkindex/project/aws.py
index ef0edfb725..659f37326a 100644
--- a/arkindex/project/aws.py
+++ b/arkindex/project/aws.py
@@ -1,7 +1,6 @@
 import logging
 from functools import wraps
 from io import BytesIO
-from urllib.parse import urlparse
 
 import boto3.session
 from botocore.config import Config
@@ -11,19 +10,9 @@ from django.utils.functional import cached_property
 from enumfields import Enum
 from tenacity import retry, retry_if_exception, stop_after_delay
 
-logger = logging.getLogger(__name__)
-
-
-def should_verify_cert(url):
-    """
-    Skip SSL certification validation when hitting a development instance
-    """
-    # Special case when no url is provided
-    if url is None:
-        return True
+from arkindex.project.tools import should_verify_cert
 
-    host = urlparse(url).netloc
-    return not host.endswith("ark.localhost")
+logger = logging.getLogger(__name__)
 
 
 def get_s3_resource(
diff --git a/arkindex/project/tests/test_aws.py b/arkindex/project/tests/test_tools.py
similarity index 80%
rename from arkindex/project/tests/test_aws.py
rename to arkindex/project/tests/test_tools.py
index 7aae4cb45d..89d313e74c 100644
--- a/arkindex/project/tests/test_aws.py
+++ b/arkindex/project/tests/test_tools.py
@@ -1,9 +1,9 @@
 from django.test import TestCase
 
-from arkindex.project.aws import should_verify_cert  # noqa
+from arkindex.project.tools import should_verify_cert  # noqa
 
 
-class AWSTestCase(TestCase):
+class ToolsTest(TestCase):
 
     def test_should_verify_cert(self):
         self.assertTrue(should_verify_cert("https://google.fr/whatever"))
diff --git a/arkindex/project/tools.py b/arkindex/project/tools.py
index 74bc0dd81f..ab448463f8 100644
--- a/arkindex/project/tools.py
+++ b/arkindex/project/tools.py
@@ -1,11 +1,22 @@
 from collections.abc import Iterable, Iterator, Sized
 from datetime import datetime, timezone
+from urllib.parse import urlparse
 
 from django.db.models import Aggregate, CharField, Func
 from django.db.models.expressions import BaseExpression, OrderByList
 from django.urls import reverse
 
-from arkindex.documents.models import Element, ElementPath
+
+def should_verify_cert(url):
+    """
+    Skip SSL certification validation when hitting a development instance
+    """
+    # Special case when no url is provided
+    if url is None:
+        return True
+
+    host = urlparse(url).netloc
+    return not host.endswith("ark.localhost")
 
 
 def build_absolute_url(element, request, name, id_argument="pk", **kwargs):
@@ -26,6 +37,9 @@ def build_tree(tree, *, corpus, type):
 
     Returns a dict associating element names with created Elements.
     """
+    # Avoid circular import issue
+    from arkindex.documents.models import Element, ElementPath
+
     assert isinstance(tree, dict)
 
     def parse_value(val):
-- 
GitLab