Skip to content
Snippets Groups Projects
Commit 1ad17b1f authored by Manon Blanco's avatar Manon Blanco
Browse files

Merge branch 'image-download-output-none' into 'main'

New decorator to make sure output is defined when it should be

See merge request !455
parents c9e8d46f d4e941ce
No related branches found
No related tags found
1 merge request!455New decorator to make sure output is defined when it should be
......@@ -3,6 +3,7 @@
# -*- coding: utf-8 -*-
import functools
import json
import logging
import pickle
......@@ -39,6 +40,19 @@ IIIF_FULL_SIZE = "full"
logger = logging.getLogger(__name__)
def output_attr_required(func):
"""
Always check that the output attribute is not null.
"""
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
assert self.output is not None, "Define an output folder to download images."
return func(self, *args, **kwargs)
return wrapper
class ImageDownloader:
"""
Download images from extracted data
......@@ -46,21 +60,22 @@ class ImageDownloader:
def __init__(
self,
output: Path,
output: Path | None = None,
max_width: int | None = None,
max_height: int | None = None,
image_extension: str = "",
unknown_token: str = "",
) -> None:
self.output = output
self.output: Path | None = output
self.max_width = max_width
self.max_height = max_height
self.image_extension = image_extension
self.data: Dict = defaultdict(dict)
self.unknown_token = unknown_token
self.max_width: int | None = max_width
self.max_height: int | None = max_height
self.image_extension: str = image_extension
self.data: dict[str, dict] = defaultdict(dict)
self.unknown_token: str = unknown_token
def load_split_data(self):
@output_attr_required
def load_split_data(self) -> None:
"""
Load the dataset stored in `split.json` and initializes the charset.
"""
......@@ -136,6 +151,7 @@ class ImageDownloader:
# Rotations are done using the lib
return IIIF_URL.format(image_url=image_url, bbox=get_bbox(polygon), size=size)
@output_attr_required
def build_tasks(self) -> List[Dict[str, str]]:
tasks = []
for split, items in self.split.items():
......@@ -264,6 +280,7 @@ class ImageDownloader:
logger.error(f"Failed to download {len(failed_downloads)} image(s).")
print(*list(map(": ".join, failed_downloads)), sep="\n")
@output_attr_required
def export(self) -> None:
"""
Writes a `labels.json` file containing a mapping of the images that have been correctly uploaded (identified by its path)
......@@ -281,6 +298,7 @@ class ImageDownloader:
pickle.dumps(sorted(list(self.charset)))
)
@output_attr_required
def run(self) -> None:
"""
Download the missing images from a `split.json` file and build a `labels.json` file containing
......@@ -288,7 +306,7 @@ class ImageDownloader:
to the ground-truth transcription (with NER tokens if needed).
"""
self.load_split_data()
tasks: List[Dict[str, str]] = self.build_tasks()
tasks: list[dict[str, str]] = self.build_tasks()
self.download_images(tasks)
self.export()
......
......@@ -11,7 +11,11 @@ from pathlib import Path
import pytest
from PIL import Image, ImageChops
from dan.datasets.download.images import IIIF_FULL_SIZE, ImageDownloader
from dan.datasets.download.images import (
IIIF_FULL_SIZE,
ImageDownloader,
output_attr_required,
)
from dan.datasets.download.utils import download_image
from dan.utils import parse_tokens
from line_image_extractor.image_utils import BoundingBox
......@@ -250,3 +254,23 @@ def test_download_image_error_try_max(responses, caplog):
# We should only have WARNING levels
assert set(level for _, level, _ in caplog.record_tuples) == {logging.WARNING}
def test_output_attr_required():
class TestImageDownloader:
output: Path | None = None
@output_attr_required
def image_downloader_method(self, *args, **kwargs):
return True
downloader = TestImageDownloader()
with pytest.raises(
AssertionError, match="Define an output folder to download images."
):
downloader.image_downloader_method()
# Set downloader.output
downloader.output = Path()
assert downloader.image_downloader_method()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment