# -*- coding: utf-8 -*- import ast import logging import os import tarfile import tempfile import time from pathlib import Path from urllib.parse import urljoin import cv2 import imageio.v2 as iio import zstandard as zstd from worker_generic_training_dataset.exceptions import ImageDownloadError logger = logging.getLogger(__name__) MAX_RETRIES = 5 def bounding_box(polygon: list): """ Returns a 4-tuple (x, y, width, height) for the bounding box of a Polygon (list of points) """ all_x, all_y = zip(*polygon) x, y = min(all_x), min(all_y) width, height = max(all_x) - x, max(all_y) - y return int(x), int(y), int(width), int(height) def build_image_url(element): x, y, width, height = bounding_box(ast.literal_eval(element.polygon)) return urljoin( element.image_url + "/", f"{x},{y},{width},{height}/full/0/default.jpg" ) def download_image(element, folder: Path): """ Download the image to `folder / {element.image.id}.jpg` """ tries = 1 # retry loop while True: if tries > MAX_RETRIES: raise ImageDownloadError(element.id, Exception("Maximum retries reached.")) try: image = iio.imread(build_image_url(element)) cv2.imwrite( str(folder / f"{element.image_id}.png"), cv2.cvtColor(image, cv2.COLOR_BGR2RGB), ) break except TimeoutError: logger.warning("Timeout, retry in 1 second.") time.sleep(1) tries += 1 except Exception as e: raise ImageDownloadError(element.id, e) def create_tar_zstd_archive(folder_path, destination: Path, chunk_size=1024): compressor = zstd.ZstdCompressor(level=3) # Remove extension from the model filename _, path_to_tar_archive = tempfile.mkstemp(prefix="teklia-", suffix=".tar") # Create an uncompressed tar archive with all the needed files # Files hierarchy ifs kept in the archive. with tarfile.open(path_to_tar_archive, "w") as tar: for p in folder_path.glob("**/*"): x = p.relative_to(folder_path) tar.add(p, arcname=x, recursive=False) # Compress the archive with destination.open("wb") as archive_file: with open(path_to_tar_archive, "rb") as model_data: for model_chunk in iter(lambda: model_data.read(chunk_size), b""): compressed_chunk = compressor.compress(model_chunk) archive_file.write(compressed_chunk) # Remove the tar archive os.remove(path_to_tar_archive)