Skip to content
Snippets Groups Projects
Commit a2c9dd76 authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

Merge branch 'log-element-id-when-download-failed' into 'main'

Log element ID when download as failed

Closes #228

See merge request !320
parents afbe821f 35ac8365
No related branches found
No related tags found
1 merge request!320Log element ID when download as failed
......@@ -259,7 +259,7 @@ class ArkindexExtractor:
except Exception as e:
raise ImageDownloadError(
split=split, path=str(destination), url=download_url, exc=e
split=split, path=destination, url=download_url, exc=e
)
def format_text(self, text: str, charset: Optional[set] = None):
......
# -*- coding: utf-8 -*-
from pathlib import Path
class ProcessingError(Exception):
......@@ -26,13 +27,13 @@ class ImageDownloadError(Exception):
"""
def __init__(
self, split: str, path: str, url: str, exc: Exception, *args: object
self, split: str, path: Path, url: str, exc: Exception, *args: object
) -> None:
super().__init__(*args)
self.split: str = split
self.path: str = path
self.path: str = str(path)
self.url: str = url
self.message = str(exc)
self.message = f"{str(exc)} for element {path.stem}"
class NoTranscriptionError(ElementProcessingError):
......
......@@ -5,6 +5,7 @@ import logging
import pickle
import re
from operator import attrgetter, methodcaller
from pathlib import Path
from typing import NamedTuple
from unittest.mock import patch
......@@ -701,7 +702,7 @@ def test_download_image_error(iiif_url, caplog, capsys):
"split": "train",
"polygon": [],
"image_url": "deadbeef",
"destination": "/dev/null",
"destination": Path("/dev/null"),
}
# Make download_image crash
iiif_url.return_value = BoundingBox(0, 0, 0, 0), task["image_url"]
......@@ -723,7 +724,7 @@ def test_download_image_error(iiif_url, caplog, capsys):
extractor.tasks = [task]
# Add the key in data
extractor.data[task["split"]][task["destination"]] = "deadbeefdata"
extractor.data[task["split"]][str(task["destination"])] = "deadbeefdata"
extractor.download_images()
......@@ -738,7 +739,7 @@ def test_download_image_error(iiif_url, caplog, capsys):
# Check stdout
captured = capsys.readouterr()
assert captured.out == "deadbeef: Image URL must be HTTP(S)\n"
assert captured.out == "deadbeef: Image URL must be HTTP(S) for element null\n"
def test_download_image_error_try_max(responses, caplog):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment