Skip to content
Snippets Groups Projects
Commit 2bea7c06 authored by Bastien Abadie's avatar Bastien Abadie Committed by Erwan Rouchet
Browse files

Do not verify SSL certificates on ark.localhost domains

parent 545c27a0
No related branches found
No related tags found
1 merge request!375Do not verify SSL certificates on ark.localhost domains
Pipeline #163067 passed
......@@ -2,14 +2,22 @@
import logging
import pathlib
import urllib3
from arkindex import ArkindexClient, options_from_env
default_client = ArkindexClient(**options_from_env())
from arkindex_tasks.utils import should_verify_cert
logging.basicConfig(
format="[%(asctime)s] [%(levelname)s] %(message)s", level=logging.INFO
)
# Skip SSL verification in Arkindex API client for local development hosts
options = options_from_env()
verify = should_verify_cert(options.get("base_url"))
if not verify:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
logging.warn("SSL certificate verification is disabled for Arkindex API calls")
default_client = ArkindexClient(verify=verify, **options)
# Read shared VERSION and expose it to Python internals
__version__ = (pathlib.Path(__file__).parent / "VERSION").read_text().strip()
......
......@@ -13,6 +13,7 @@ from requests.exceptions import RequestException
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from arkindex_tasks import default_client
from arkindex_tasks.utils import should_verify_cert
logger = logging.getLogger(__name__)
......@@ -59,7 +60,7 @@ class ThumbnailGenerator(object):
# PIL.Image.open requires the seek(int) method that the urllib responses do not provide
# We therefore get the whole response content and put it in a BytesIO
resp = requests.get(url, timeout=(30, 60))
resp = requests.get(url, timeout=(30, 60), verify=should_verify_cert(url))
resp.raise_for_status()
return Image.open(BytesIO(resp.content))
......@@ -162,7 +163,7 @@ class ThumbnailGenerator(object):
b = BytesIO()
thumbnail.save(b, format="jpeg")
b.seek(0)
resp = requests.put(url, data=b)
resp = requests.put(url, data=b, verify=should_verify_cert(url))
resp.raise_for_status()
def run(self):
......
......@@ -20,6 +20,7 @@ from arkindex_tasks.base import WORKER_RUN_ID, ProcessTask, dump_json
from arkindex_tasks.import_files.image import check_image
from arkindex_tasks.import_files.pdf import extract_pdf_images, upload_pdf_text
from arkindex_tasks.import_files.transkribus import TranskribusImporter
from arkindex_tasks.utils import should_verify_cert
logger = logging.getLogger(__name__)
......@@ -153,7 +154,7 @@ class FileImport(ProcessTask):
)
def upload_image(self, path, url):
with open(path, "rb") as f:
resp = requests.put(url, data=f)
resp = requests.put(url, data=f, verify=should_verify_cert(url))
resp.raise_for_status()
@retry(
......
......@@ -16,9 +16,10 @@ from apistar.exceptions import ErrorResponse
from lxml import etree
from PIL import Image, ImageOps
from arkindex_tasks import default_client
from arkindex_tasks.base import WORKER_RUN_ID, dump_json
from arkindex_tasks.import_files.pagexml import PageXmlParser
from arkindex_tasks.utils import default_client, retried_request
from arkindex_tasks.utils import retried_request
logger = logging.getLogger(__name__)
......
......@@ -7,10 +7,10 @@ from urllib.parse import urlparse
from apistar.exceptions import ErrorResponse
from arkindex_tasks import USER_AGENT
from arkindex_tasks import USER_AGENT, default_client
from arkindex_tasks.import_iiif.parser import IIIFParser
from arkindex_tasks.import_iiif.utils import retried_iiif_get
from arkindex_tasks.utils import default_client, retried_request
from arkindex_tasks.utils import retried_request
def main():
......
......@@ -2,6 +2,8 @@
import requests
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from arkindex_tasks.utils import should_verify_cert
from ..utils import HTTP_GET_RETRY_BACKOFF
......@@ -18,6 +20,8 @@ def retried_iiif_get(url, headers, timeout, **kwargs):
If the 2nd call still gives an exception, that exception is re-raised
and the caller should catch it
"""
with requests.get(url, headers=headers, timeout=timeout, **kwargs) as r:
with requests.get(
url, headers=headers, timeout=timeout, verify=should_verify_cert(url), **kwargs
) as r:
r.raise_for_status()
return r
......@@ -3,6 +3,7 @@
import os
import boto3.session
from arkindex_tasks.utils import should_verify_cert
def get_client_from_env():
......@@ -11,10 +12,16 @@ def get_client_from_env():
"aws_access_key_id": os.environ.get("INGEST_S3_ACCESS_KEY"),
"aws_secret_access_key": os.environ.get("INGEST_S3_SECRET_KEY"),
}
endpoint_url = os.environ.get("INGEST_S3_ENDPOINT")
resource_args = {
"endpoint_url": os.environ.get("INGEST_S3_ENDPOINT"),
"endpoint_url": endpoint_url,
"region_name": os.environ.get("INGEST_S3_REGION"),
}
# Do not verify SSL certs on local development setups
if endpoint_url:
resource_args["verify"] = should_verify_cert(endpoint_url)
if None in session_args.values():
raise Exception(
"At least INGEST_S3_ACCESS_KEY and INGEST_S3_SECRET_KEY environment variables must be set "
......
# -*- coding: utf-8 -*-
import json
import logging
from urllib.parse import urlparse
import requests
from apistar.exceptions import ErrorResponse
......@@ -14,8 +15,6 @@ from tenacity import (
wait_fixed,
)
from arkindex_tasks import default_client
# Time to wait before retrying the IIIF image information fetching
HTTP_GET_RETRY_BACKOFF = 10
......@@ -62,9 +61,21 @@ def retried_request(*args, **kwargs):
and the caller should catch it
Log messages are displayed before sleeping (when at least one exception occurred)
"""
from arkindex_tasks import default_client # noqa: avoid circular imports
return default_client.request(*args, **kwargs)
def should_verify_cert(url):
"""
Skip SSL certification validation when hitting a development instance
"""
if not url:
return True
host = urlparse(url).netloc
return not host.endswith("ark.localhost")
@retry(
reraise=True,
retry=retry_if_exception_type(requests.RequestException),
......@@ -75,7 +86,7 @@ def download_file(url, path):
"""
Download a URL into a local path, retrying if necessary
"""
with requests.get(url, stream=True) as r:
with requests.get(url, stream=True, verify=should_verify_cert(url)) as r:
r.raise_for_status()
with path.open("wb") as f:
for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
......
arkindex-client==1.0.14
arkindex-client==1.0.15
boto3==1.26.113
ijson==3.1.4
natsort==8.3.1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment