Skip to content
Snippets Groups Projects
Commit a65ee34a authored by ml bonhomme's avatar ml bonhomme :bee: Committed by Erwan Rouchet
Browse files

Remove utils.py and use teklia_toolbox.requests

parent 455a8c24
No related branches found
No related tags found
1 merge request!378Remove utils.py and use teklia_toolbox.requests
Pipeline #165654 passed
Showing
with 17 additions and 160 deletions
......@@ -2,9 +2,10 @@
import logging
import pathlib
from teklia_toolbox.requests import should_verify_cert
import urllib3
from arkindex import ArkindexClient, options_from_env
from arkindex_tasks.utils import should_verify_cert
logging.basicConfig(
format="[%(asctime)s] [%(levelname)s] %(message)s", level=logging.INFO
......
......@@ -7,10 +7,10 @@ from pathlib import Path
import requests
from apistar.exceptions import ErrorResponse
from teklia_toolbox.requests import download_file
from arkindex_tasks import default_client
from arkindex_tasks.enums import ProcessMode
from arkindex_tasks.utils import download_file
logger = logging.getLogger(__name__)
......
......@@ -10,10 +10,10 @@ import requests
from apistar.exceptions import ErrorResponse
from PIL import Image
from requests.exceptions import RequestException
from teklia_toolbox.requests import should_verify_cert
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from arkindex_tasks import default_client
from arkindex_tasks.utils import should_verify_cert
logger = logging.getLogger(__name__)
......
......@@ -11,6 +11,7 @@ from zipfile import BadZipFile, ZipFile
import requests
from apistar.exceptions import ErrorResponse
from natsort import natsorted
from teklia_toolbox.requests import should_verify_cert
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from zstandard import ZstdDecompressor
......@@ -20,7 +21,6 @@ from arkindex_tasks.base import WORKER_RUN_ID, ProcessTask, dump_json
from arkindex_tasks.import_files.image import check_image
from arkindex_tasks.import_files.pdf import extract_pdf_images, upload_pdf_text
from arkindex_tasks.import_files.transkribus import TranskribusImporter
from arkindex_tasks.utils import should_verify_cert
logger = logging.getLogger(__name__)
......
......@@ -2,10 +2,10 @@
import logging
from shapely.geometry import LinearRing
from teklia_toolbox.requests import retried_request
from transkribus.pagexml import PageXmlPage
from arkindex_tasks.base import WORKER_RUN_ID
from arkindex_tasks.utils import retried_request
logger = logging.getLogger(__name__)
......
......@@ -15,11 +15,11 @@ import requests
from apistar.exceptions import ErrorResponse
from lxml import etree
from PIL import Image, ImageOps
from teklia_toolbox.requests import retried_request
from arkindex_tasks import default_client
from arkindex_tasks.base import WORKER_RUN_ID, dump_json
from arkindex_tasks.import_files.pagexml import PageXmlParser
from arkindex_tasks.utils import retried_request
logger = logging.getLogger(__name__)
......
......@@ -6,11 +6,11 @@ from io import BytesIO
from urllib.parse import urlparse
from apistar.exceptions import ErrorResponse
from teklia_toolbox.requests import retried_request
from arkindex_tasks import USER_AGENT, default_client
from arkindex_tasks.import_iiif.parser import IIIFParser
from arkindex_tasks.import_iiif.utils import retried_iiif_get
from arkindex_tasks.utils import retried_request
def main():
......
......@@ -7,12 +7,12 @@ from urllib.parse import urljoin, urlparse
import ijson
import requests
from apistar.exceptions import ErrorResponse
from teklia_toolbox.requests import retried_request
from arkindex_tasks import USER_AGENT
from arkindex_tasks.base import WORKER_RUN_ID
from arkindex_tasks.enums import MetaType
from arkindex_tasks.import_iiif.utils import retried_iiif_get
from arkindex_tasks.utils import retried_request
logger = logging.getLogger(__name__)
......
# -*- coding: utf-8 -*-
import requests
from teklia_toolbox.requests import HTTP_GET_RETRY_BACKOFF, should_verify_cert
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from arkindex_tasks.utils import should_verify_cert
from ..utils import HTTP_GET_RETRY_BACKOFF
@retry(
reraise=True,
......
......@@ -2,8 +2,9 @@
# -*- coding: utf-8 -*-
import os
from teklia_toolbox.requests import should_verify_cert
import boto3.session
from arkindex_tasks.utils import should_verify_cert
def get_client_from_env():
......
......@@ -9,13 +9,13 @@ from urllib.parse import quote_plus, urljoin
from apistar.exceptions import ErrorResponse
from pdf2image import convert_from_path
from teklia_toolbox.requests import retried_request
from arkindex_tasks import default_client
from arkindex_tasks.base import WORKER_RUN_ID
from arkindex_tasks.import_files.pdf import upload_pdf_text
from arkindex_tasks.import_s3.boto import get_client_from_env
from arkindex_tasks.import_s3.graph import PATH_DELIMITER, Node
from arkindex_tasks.utils import retried_request
from botocore.exceptions import ClientError
logger = logging.getLogger(__name__)
......
# -*- coding: utf-8 -*-
import json
import logging
from urllib.parse import urlparse
import requests
from apistar.exceptions import ErrorResponse
from tenacity import (
before_sleep_log,
retry,
retry_if_exception,
retry_if_exception_type,
stop_after_attempt,
wait_exponential,
wait_fixed,
)
# Time to wait before retrying the IIIF image information fetching
HTTP_GET_RETRY_BACKOFF = 10
logger = logging.getLogger(__name__)
DOWNLOAD_CHUNK_SIZE = 8192
def is_json_file(path):
with open(path, "rb") as f:
try:
json.load(f)
except json.decoder.JSONDecodeError:
return False
return True
def _is_500_error(exc):
"""
Check if an Arkindex API error is a 50x
This is used to retry most API calls implemented here
"""
if not isinstance(exc, ErrorResponse):
return False
return 500 <= exc.status_code < 600
@retry(
retry=retry_if_exception(_is_500_error),
wait=wait_exponential(multiplier=2, min=3),
reraise=True,
stop=stop_after_attempt(5),
before_sleep=before_sleep_log(logger, logging.INFO),
)
def retried_request(*args, **kwargs):
"""
Proxy all Arkindex API requests with a retry mechanism
in case of 50X errors
The same API call will be retried 5 times, with an exponential sleep time
going through 3, 4, 8 and 16 seconds of wait between call.
If the 5th call still gives a 50x, the exception is re-raised
and the caller should catch it
Log messages are displayed before sleeping (when at least one exception occurred)
"""
from arkindex_tasks import default_client # noqa: avoid circular imports
return default_client.request(*args, **kwargs)
def should_verify_cert(url):
"""
Skip SSL certification validation when hitting a development instance
"""
if not url:
return True
host = urlparse(url).netloc
return not host.endswith("ark.localhost")
@retry(
reraise=True,
retry=retry_if_exception_type(requests.RequestException),
stop=stop_after_attempt(3),
wait=wait_fixed(HTTP_GET_RETRY_BACKOFF),
)
def download_file(url, path):
"""
Download a URL into a local path, retrying if necessary
"""
with requests.get(url, stream=True, verify=should_verify_cert(url)) as r:
r.raise_for_status()
with path.open("wb") as f:
for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
if chunk: # Ignore empty chunks
f.write(chunk)
arkindex-client==1.0.15
boto3==1.26.113
ijson==3.1.4
natsort==8.3.1
pdfminer.six==20221105
python-magic==0.4.27
Shapely==1.8.0
teklia-toolbox==0.1.3
teklia-toolbox==0.1.4rc3
tenacity==8.2.3
transkribus-client==0.3.4rc1
......@@ -26,7 +26,7 @@ SAMPLES = Path(__file__).absolute().parent / "samples"
new=0,
)
@patch(
"arkindex_tasks.utils.download_file.retry.wait.wait_fixed",
"teklia_toolbox.requests.download_file.retry.wait.wait_fixed",
new=0,
)
@patch(
......
......@@ -21,7 +21,7 @@ SAMPLES = Path(__file__).absolute().parent / "samples"
@patch("arkindex_tasks.import_iiif.process.IIIFParser")
@patch("arkindex_tasks.import_iiif.process.argparse.ArgumentParser")
@patch(
"arkindex_tasks.utils.download_file.retry.wait.wait_fixed",
"teklia_toolbox.requests.download_file.retry.wait.wait_fixed",
new=0,
)
@requests_mock.Mocker()
......
......@@ -12,7 +12,7 @@ import arkindex_tasks.base
SAMPLES = Path(__file__).absolute().parent / "samples"
@patch("arkindex_tasks.utils.download_file.retry.wait.wait_fixed", new=0)
@patch("teklia_toolbox.requests.download_file.retry.wait.wait_fixed", new=0)
class TestBase(TestCase):
def setUp(self):
self.tmp_dir = tempfile.mkdtemp()
......
# -*- coding: utf-8 -*-
from unittest import TestCase
from unittest.mock import patch
import requests_mock
from apistar.exceptions import ErrorResponse
from arkindex_tasks.utils import retried_request
@requests_mock.Mocker()
@patch(
"arkindex_tasks.utils.retried_request.retry.wait",
0,
)
class TestRetriedRequest(TestCase):
def test_retried_request_only_500(self, mock):
mock.get("https://arkindex.teklia.com/api/v1/corpus/notfound/", status_code=404)
with self.assertRaises(ErrorResponse) as e:
retried_request("RetrieveCorpus", id="notfound")
self.assertEqual(e.exception.status_code, 404)
self.assertListEqual(
[(req.method, req.url) for req in mock.request_history],
[
("GET", "https://arkindex.teklia.com/api/v1/corpus/notfound/"),
],
)
def test_retried_request_5_attempts(self, mock):
mock.get("https://arkindex.teklia.com/api/v1/corpus/corpusid/", status_code=500)
with self.assertRaises(ErrorResponse) as e:
retried_request("RetrieveCorpus", id="corpusid")
self.assertEqual(e.exception.status_code, 500)
self.assertListEqual(
[(req.method, req.url) for req in mock.request_history],
[
("GET", "https://arkindex.teklia.com/api/v1/corpus/corpusid/"),
("GET", "https://arkindex.teklia.com/api/v1/corpus/corpusid/"),
("GET", "https://arkindex.teklia.com/api/v1/corpus/corpusid/"),
("GET", "https://arkindex.teklia.com/api/v1/corpus/corpusid/"),
("GET", "https://arkindex.teklia.com/api/v1/corpus/corpusid/"),
],
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment