Skip to content
Snippets Groups Projects
Verified Commit 6f7e9b57 authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

moar tests

parent 95f6f614
No related branches found
No related tags found
1 merge request!2Implement worker
Pipeline #81809 failed
...@@ -8,7 +8,7 @@ from arkindex.mock import MockApiClient ...@@ -8,7 +8,7 @@ from arkindex.mock import MockApiClient
from arkindex_export import open_database from arkindex_export import open_database
from arkindex_worker.worker.base import BaseWorker from arkindex_worker.worker.base import BaseWorker
DATA_PATH = Path(__file__).parent / "data" DATA_PATH: Path = Path(__file__).parent / "data"
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
...@@ -22,8 +22,6 @@ def setup_environment(responses, monkeypatch): ...@@ -22,8 +22,6 @@ def setup_environment(responses, monkeypatch):
"https://arkindex.teklia.com/api/v1/openapi/?format=openapi-json", "https://arkindex.teklia.com/api/v1/openapi/?format=openapi-json",
) )
responses.add_passthru(schema_url) responses.add_passthru(schema_url)
# To allow image download
responses.add_passthru("https://europe-gamma.iiif.teklia.com/iiif/2")
# Set schema url in environment # Set schema url in environment
os.environ["ARKINDEX_API_SCHEMA_URL"] = schema_url os.environ["ARKINDEX_API_SCHEMA_URL"] = schema_url
...@@ -35,5 +33,34 @@ def setup_environment(responses, monkeypatch): ...@@ -35,5 +33,34 @@ def setup_environment(responses, monkeypatch):
@pytest.fixture(scope="session", autouse=True) @pytest.fixture(scope="session", autouse=True)
def arkindex_db(): def arkindex_db() -> None:
open_database(DATA_PATH / "arkindex_export.sqlite") open_database(DATA_PATH / "arkindex_export.sqlite")
@pytest.fixture
def page_1_image() -> bytes:
with (DATA_PATH / "sample_image_1.jpg").open("rb") as image:
return image.read()
@pytest.fixture
def page_2_image() -> bytes:
with (DATA_PATH / "sample_image_2.jpg").open("rb") as image:
return image.read()
@pytest.fixture
def downloaded_images(responses, page_1_image, page_2_image) -> None:
# Mock image download call
# Page 1
responses.add(
responses.GET,
"https://europe-gamma.iiif.teklia.com/iiif/2/public%2Fiam%2Fg06-018m.png/0,0,2479,3542/full/0/default.jpg",
body=page_1_image,
)
# Page 2
responses.add(
responses.GET,
"https://europe-gamma.iiif.teklia.com/iiif/2/public%2Fiam%2Fc02-026.png/0,0,2479,3542/full/0/default.jpg",
body=page_2_image,
)
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from argparse import Namespace from argparse import Namespace
from uuid import UUID
from arkindex_worker.cache import ( from arkindex_worker.cache import (
CachedClassification, CachedClassification,
...@@ -13,14 +14,14 @@ from arkindex_worker.cache import ( ...@@ -13,14 +14,14 @@ from arkindex_worker.cache import (
from worker_generic_training_dataset.worker import DatasetExtractor from worker_generic_training_dataset.worker import DatasetExtractor
def test_process_split(tmp_path): def test_process_split(tmp_path, downloaded_images):
# Parent is train folder # Parent is train folder
parent_id = "a0c4522d-2d80-4766-a01c-b9d686f41f6a" parent_id: UUID = UUID("a0c4522d-2d80-4766-a01c-b9d686f41f6a")
worker = DatasetExtractor() worker = DatasetExtractor()
# Mock important configuration steps # Parse some arguments
worker.args = Namespace(dev=False) worker.args = Namespace(dev=False, database=None)
worker.initialize_database() worker.configure_cache()
worker.cached_images = dict() worker.cached_images = dict()
# Where to save the downloaded images # Where to save the downloaded images
...@@ -28,44 +29,99 @@ def test_process_split(tmp_path): ...@@ -28,44 +29,99 @@ def test_process_split(tmp_path):
worker.process_split("train", parent_id) worker.process_split("train", parent_id)
# Should have created 20 elements in total
assert CachedElement.select().count() == 20
# Should have created two pages under root folder # Should have created two pages under root folder
assert ( assert (
CachedElement.select().where(CachedElement.parent_id == parent_id).count() == 2 CachedElement.select().where(CachedElement.parent_id == parent_id).count() == 2
) )
first_page_id = UUID("e26e6803-18da-4768-be30-a0a68132107c")
second_page_id = UUID("c673bd94-96b1-4a2e-8662-a4d806940b5f")
# Should have created 8 text_lines under first page # Should have created 8 text_lines under first page
assert ( assert (
CachedElement.select() CachedElement.select().where(CachedElement.parent_id == first_page_id).count()
.where(CachedElement.parent_id == "e26e6803-18da-4768-be30-a0a68132107c")
.count()
== 8 == 8
) )
# Should have created 9 text_lines under second page
# Should have created one classification linked to first page
assert ( assert (
CachedClassification.select() CachedElement.select().where(CachedElement.parent_id == second_page_id).count()
.where(CachedClassification.element == "e26e6803-18da-4768-be30-a0a68132107c") == 9
.count()
== 1
) )
# Should have created one classification
assert CachedClassification.select().count() == 1
# Check classification
classif = CachedClassification.get_by_id("ea80bc43-6e96-45a1-b6d7-70f29b5871a6")
assert classif.element.id == first_page_id
assert classif.class_name == "Hello darkness"
assert classif.confidence == 1.0
assert classif.state == "validated"
assert classif.worker_run_id is None
# Should have created two images # Should have created two images
assert CachedImage.select().count() == 2 assert CachedImage.select().count() == 2
first_image_id = UUID("80a84b30-1ae1-4c13-95d6-7d0d8ee16c51")
second_image_id = UUID("e3c755f2-0e1c-468e-ae4c-9206f0fd267a")
# Check images
for image_id, page_name in (
(first_image_id, "g06-018m"),
(second_image_id, "c02-026"),
):
page_image = CachedImage.get_by_id(image_id)
assert page_image.width == 2479
assert page_image.height == 3542
assert (
page_image.url
== f"https://europe-gamma.iiif.teklia.com/iiif/2/public%2Fiam%2F{page_name}.png"
)
assert sorted(tmp_path.rglob("*")) == [ assert sorted(tmp_path.rglob("*")) == [
tmp_path / "80a84b30-1ae1-4c13-95d6-7d0d8ee16c51.jpg", tmp_path / f"{first_image_id}.jpg",
tmp_path / "e3c755f2-0e1c-468e-ae4c-9206f0fd267a.jpg", tmp_path / f"{second_image_id}.jpg",
] ]
# Should have created a transcription linked to first line of first page # Should have created 17 transcriptions
assert ( assert CachedTranscription.select().count() == 17
CachedTranscription.select() # Check transcription of first line on first page
.where(CachedTranscription.element == "6411b293-dee0-4002-ac82-ffc5cdcb499d") transcription: CachedTranscription = CachedTranscription.get_by_id(
.count() "144974c3-2477-4101-a486-6c276b0070aa"
== 1
) )
assert transcription.element.id == UUID("6411b293-dee0-4002-ac82-ffc5cdcb499d")
assert transcription.text == "When the sailing season was past , he sent Pearl"
assert transcription.confidence == 1.0
assert transcription.orientation == "horizontal-lr"
assert transcription.worker_version_id is None
assert transcription.worker_run_id is None
# Should have created a transcription entity linked to transcription of first line of first page # Should have created a transcription entity linked to transcription of first line of first page
# Checks on entity
assert CachedEntity.select().count() == 1 assert CachedEntity.select().count() == 1
assert CachedTranscriptionEntity.select().count() == 1 entity: CachedEntity = CachedEntity.get_by_id(
"e04b0323-0dda-4f76-b218-af40f7d40c84"
)
assert entity.name == "When the sailing season"
assert entity.type == "something something"
assert entity.metas == "{}"
assert entity.validated is False
assert entity.worker_run_id is None
assert CachedEntity.get_by_id("e04b0323-0dda-4f76-b218-af40f7d40c84") # Checks on transcription entity
assert CachedTranscriptionEntity.select().count() == 1
tr_entity: CachedTranscriptionEntity = (
CachedTranscriptionEntity.select()
.where(
(
CachedTranscriptionEntity.transcription
== "144974c3-2477-4101-a486-6c276b0070aa"
)
& (CachedTranscriptionEntity.entity == entity)
)
.get()
)
assert tr_entity.offset == 0
assert tr_entity.length == 23
assert tr_entity.confidence == 1.0
assert tr_entity.worker_run_id is None
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from uuid import UUID
from arkindex_export import Classification from arkindex_export import Classification
from arkindex_export.models import ( from arkindex_export.models import (
Element, Element,
...@@ -8,25 +10,24 @@ from arkindex_export.models import ( ...@@ -8,25 +10,24 @@ from arkindex_export.models import (
Transcription, Transcription,
TranscriptionEntity, TranscriptionEntity,
) )
from arkindex_worker.cache import CachedElement, CachedTranscription
def retrieve_element(element_id: str): def retrieve_element(element_id: UUID) -> Element:
return Element.get_by_id(element_id) return Element.get_by_id(element_id)
def list_classifications(element_id: str): def list_classifications(element_id: UUID):
return Classification.select().where(Classification.element_id == element_id) return Classification.select().where(Classification.element == element_id)
def list_transcriptions(element: CachedElement): def list_transcriptions(element_id: UUID):
return Transcription.select().where(Transcription.element_id == element.id) return Transcription.select().where(Transcription.element == element_id)
def list_transcription_entities(transcription: CachedTranscription): def list_transcription_entities(transcription_id: UUID):
return ( return (
TranscriptionEntity.select() TranscriptionEntity.select()
.where(TranscriptionEntity.transcription_id == transcription.id) .where(TranscriptionEntity.transcription == transcription_id)
.join(Entity, on=TranscriptionEntity.entity) .join(Entity, on=TranscriptionEntity.entity)
.join(EntityType, on=Entity.type) .join(EntityType, on=Entity.type)
) )
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import ast import json
import logging import logging
from logging import Logger
from urllib.parse import urljoin from urllib.parse import urljoin
logger = logging.getLogger(__name__) from arkindex_worker.image import BoundingBox, polygon_bounding_box
logger: Logger = logging.getLogger(__name__)
def bounding_box(polygon: list):
"""
Returns a 4-tuple (x, y, width, height) for the bounding box of a Polygon (list of points)
"""
all_x, all_y = zip(*polygon)
x, y = min(all_x), min(all_y)
width, height = max(all_x) - x, max(all_y) - y
return int(x), int(y), int(width), int(height)
def build_image_url(element) -> str:
def build_image_url(element): bbox: BoundingBox = polygon_bounding_box(json.loads(element.polygon))
x, y, width, height = bounding_box(json.loads(element.polygon)) x: int
y: int
width: int
height: int
x, y, width, height = bbox
return urljoin( return urljoin(
element.image.url + "/", f"{x},{y},{width},{height}/full/0/default.jpg" element.image.url + "/", f"{x},{y},{width},{height}/full/0/default.jpg"
) )
...@@ -2,7 +2,9 @@ ...@@ -2,7 +2,9 @@
import logging import logging
import operator import operator
import tempfile import tempfile
from argparse import Namespace
from pathlib import Path from pathlib import Path
from tempfile import _TemporaryFileWrapper
from typing import List, Optional from typing import List, Optional
from uuid import UUID from uuid import UUID
...@@ -32,15 +34,15 @@ from worker_generic_training_dataset.db import ( ...@@ -32,15 +34,15 @@ from worker_generic_training_dataset.db import (
) )
from worker_generic_training_dataset.utils import build_image_url from worker_generic_training_dataset.utils import build_image_url
logger = logging.getLogger(__name__) logger: logging.Logger = logging.getLogger(__name__)
BULK_BATCH_SIZE = 50 BULK_BATCH_SIZE = 50
DEFAULT_TRANSCRIPTION_ORIENTATION = "horizontal-lr" DEFAULT_TRANSCRIPTION_ORIENTATION = "horizontal-lr"
class DatasetExtractor(BaseWorker): class DatasetExtractor(BaseWorker):
def configure(self): def configure(self) -> None:
self.args = self.parser.parse_args() self.args: Namespace = self.parser.parse_args()
if self.is_read_only: if self.is_read_only:
super().configure_for_developers() super().configure_for_developers()
else: else:
...@@ -57,7 +59,7 @@ class DatasetExtractor(BaseWorker): ...@@ -57,7 +59,7 @@ class DatasetExtractor(BaseWorker):
self.download_latest_export() self.download_latest_export()
# Initialize db that will be written # Initialize db that will be written
self.initialize_database() self.configure_cache()
# CachedImage downloaded and created in DB # CachedImage downloaded and created in DB
self.cached_images = dict() self.cached_images = dict()
...@@ -66,7 +68,7 @@ class DatasetExtractor(BaseWorker): ...@@ -66,7 +68,7 @@ class DatasetExtractor(BaseWorker):
self.image_folder = Path(tempfile.mkdtemp(suffix="-arkindex-data")) self.image_folder = Path(tempfile.mkdtemp(suffix="-arkindex-data"))
logger.info(f"Images will be saved at `{self.image_folder}`.") logger.info(f"Images will be saved at `{self.image_folder}`.")
def read_training_related_information(self): def read_training_related_information(self) -> None:
""" """
Read from process information Read from process information
- train_folder_id - train_folder_id
...@@ -84,23 +86,26 @@ class DatasetExtractor(BaseWorker): ...@@ -84,23 +86,26 @@ class DatasetExtractor(BaseWorker):
self.validation_folder_id = UUID(val_folder_id) self.validation_folder_id = UUID(val_folder_id)
test_folder_id = self.config.get("test_folder_id") test_folder_id = self.config.get("test_folder_id")
self.testing_folder_id = UUID(test_folder_id) if test_folder_id else None self.testing_folder_id: UUID | None = (
UUID(test_folder_id) if test_folder_id else None
)
def initialize_database(self): def configure_cache(self) -> None:
""" """
Create an SQLite database compatible with base-worker cache and initialize it. Create an SQLite database compatible with base-worker cache and initialize it.
""" """
database_path = self.work_dir / "db.sqlite" self.use_cache = True
self.cache_path: Path = self.args.database or self.work_dir / "db.sqlite"
# Remove previous execution result if present # Remove previous execution result if present
database_path.unlink(missing_ok=True) self.cache_path.unlink(missing_ok=True)
init_cache_db(database_path) init_cache_db(self.cache_path)
create_version_table() create_version_table()
create_tables() create_tables()
def download_latest_export(self): def download_latest_export(self) -> None:
""" """
Download the latest export of the current corpus. Download the latest export of the current corpus.
Export must be in `"done"` state. Export must be in `"done"` state.
...@@ -119,7 +124,7 @@ class DatasetExtractor(BaseWorker): ...@@ -119,7 +124,7 @@ class DatasetExtractor(BaseWorker):
raise e raise e
# Find the latest that is in "done" state # Find the latest that is in "done" state
exports = sorted( exports: List[dict] = sorted(
list(filter(lambda exp: exp["state"] == "done", exports)), list(filter(lambda exp: exp["state"] == "done", exports)),
key=operator.itemgetter("updated"), key=operator.itemgetter("updated"),
reverse=True, reverse=True,
...@@ -128,11 +133,11 @@ class DatasetExtractor(BaseWorker): ...@@ -128,11 +133,11 @@ class DatasetExtractor(BaseWorker):
len(exports) > 0 len(exports) > 0
), f"No available exports found for the corpus {self.corpus_id}." ), f"No available exports found for the corpus {self.corpus_id}."
# Download latest it in a tmpfile # Download latest export
try: try:
export_id = exports[0]["id"] export_id: str = exports[0]["id"]
logger.info(f"Downloading export ({export_id})...") logger.info(f"Downloading export ({export_id})...")
self.export = self.api_client.request( self.export: _TemporaryFileWrapper = self.api_client.request(
"DownloadExport", "DownloadExport",
id=export_id, id=export_id,
) )
...@@ -146,7 +151,7 @@ class DatasetExtractor(BaseWorker): ...@@ -146,7 +151,7 @@ class DatasetExtractor(BaseWorker):
def insert_classifications(self, element: CachedElement) -> None: def insert_classifications(self, element: CachedElement) -> None:
logger.info("Listing classifications") logger.info("Listing classifications")
classifications = [ classifications: list[CachedClassification] = [
CachedClassification( CachedClassification(
id=classification.id, id=classification.id,
element=element, element=element,
...@@ -168,7 +173,7 @@ class DatasetExtractor(BaseWorker): ...@@ -168,7 +173,7 @@ class DatasetExtractor(BaseWorker):
self, element: CachedElement self, element: CachedElement
) -> List[CachedTranscription]: ) -> List[CachedTranscription]:
logger.info("Listing transcriptions") logger.info("Listing transcriptions")
transcriptions = [ transcriptions: list[CachedTranscription] = [
CachedTranscription( CachedTranscription(
id=transcription.id, id=transcription.id,
element=element, element=element,
...@@ -180,7 +185,7 @@ class DatasetExtractor(BaseWorker): ...@@ -180,7 +185,7 @@ class DatasetExtractor(BaseWorker):
if transcription.worker_version if transcription.worker_version
else None, else None,
) )
for transcription in list_transcriptions(element) for transcription in list_transcriptions(element.id)
] ]
if transcriptions: if transcriptions:
logger.info(f"Inserting {len(transcriptions)} transcription(s)") logger.info(f"Inserting {len(transcriptions)} transcription(s)")
...@@ -193,9 +198,10 @@ class DatasetExtractor(BaseWorker): ...@@ -193,9 +198,10 @@ class DatasetExtractor(BaseWorker):
def insert_entities(self, transcriptions: List[CachedTranscription]) -> None: def insert_entities(self, transcriptions: List[CachedTranscription]) -> None:
logger.info("Listing entities") logger.info("Listing entities")
extracted_entities = [] entities: List[CachedEntity] = []
transcription_entities: List[CachedTranscriptionEntity] = []
for transcription in transcriptions: for transcription in transcriptions:
for transcription_entity in list_transcription_entities(transcription): for transcription_entity in list_transcription_entities(transcription.id):
entity = CachedEntity( entity = CachedEntity(
id=transcription_entity.entity.id, id=transcription_entity.entity.id,
type=transcription_entity.entity.type.name, type=transcription_entity.entity.type.name,
...@@ -203,17 +209,15 @@ class DatasetExtractor(BaseWorker): ...@@ -203,17 +209,15 @@ class DatasetExtractor(BaseWorker):
validated=transcription_entity.entity.validated, validated=transcription_entity.entity.validated,
metas=transcription_entity.entity.metas, metas=transcription_entity.entity.metas,
) )
extracted_entities.append( entities.append(entity)
( transcription_entities.append(
entity, CachedTranscriptionEntity(
CachedTranscriptionEntity( id=transcription_entity.id,
id=transcription_entity.id, transcription=transcription,
transcription=transcription, entity=entity,
entity=entity, offset=transcription_entity.offset,
offset=transcription_entity.offset, length=transcription_entity.length,
length=transcription_entity.length, confidence=transcription_entity.confidence,
confidence=transcription_entity.confidence,
),
) )
) )
if entities: if entities:
...@@ -236,17 +240,19 @@ class DatasetExtractor(BaseWorker): ...@@ -236,17 +240,19 @@ class DatasetExtractor(BaseWorker):
batch_size=BULK_BATCH_SIZE, batch_size=BULK_BATCH_SIZE,
) )
def insert_element(self, element: Element, parent_id: Optional[str] = None): def insert_element(
self, element: Element, parent_id: Optional[UUID] = None
) -> None:
""" """
Insert the given element's children in the cache database. Insert the given element in the cache database.
Their image will also be saved to disk, if they weren't already. Its image will also be saved to disk, if it wasn't already.
The insertion of an element includes: The insertion of an element includes:
- its classifications - its classifications
- its transcriptions - its transcriptions
- its transcriptions' entities (both Entity and TranscriptionEntity) - its transcriptions' entities (both Entity and TranscriptionEntity)
:param element: Element to insert. All its children will be inserted as well. :param element: Element to insert.
:param parent_id: ID of the parent to use when creating the CachedElement. Do not specify for top-level elements. :param parent_id: ID of the parent to use when creating the CachedElement. Do not specify for top-level elements.
""" """
logger.info(f"Processing element ({element.id})") logger.info(f"Processing element ({element.id})")
...@@ -259,41 +265,45 @@ class DatasetExtractor(BaseWorker): ...@@ -259,41 +265,45 @@ class DatasetExtractor(BaseWorker):
# Insert image # Insert image
logger.info("Inserting image") logger.info("Inserting image")
# Store images in case some other elements use it as well # Store images in case some other elements use it as well
self.cached_images[element.image_id] = CachedImage.create( with cache_database.atomic():
id=element.image.id, self.cached_images[element.image.id] = CachedImage.create(
width=element.image.width, id=element.image.id,
height=element.image.height, width=element.image.width,
url=element.image.url, height=element.image.height,
) url=element.image.url,
)
# Insert element # Insert element
logger.info("Inserting element") logger.info("Inserting element")
cached_element = CachedElement.create( with cache_database.atomic():
id=element.id, cached_element: CachedElement = CachedElement.create(
parent_id=parent_id, id=element.id,
type=element.type, parent_id=parent_id,
image=self.cached_images[element.image.id] if element.image else None, type=element.type,
polygon=element.polygon, image=self.cached_images[element.image.id] if element.image else None,
rotation_angle=element.rotation_angle, polygon=element.polygon,
mirrored=element.mirrored, rotation_angle=element.rotation_angle,
worker_version_id=element.worker_version.id mirrored=element.mirrored,
if element.worker_version worker_version_id=element.worker_version.id
else None, if element.worker_version
confidence=element.confidence, else None,
) confidence=element.confidence,
)
# Insert classifications # Insert classifications
self.insert_classifications(cached_element) self.insert_classifications(cached_element)
# Insert transcriptions # Insert transcriptions
transcriptions = self.insert_transcriptions(cached_element) transcriptions: List[CachedTranscription] = self.insert_transcriptions(
cached_element
)
# Insert entities # Insert entities
self.insert_entities(transcriptions) self.insert_entities(transcriptions)
def process_split(self, split_name: str, split_id: UUID): def process_split(self, split_name: str, split_id: UUID) -> None:
""" """
Insert all elements under the given parent folder. Insert all elements under the given parent folder (all queries are recursive).
- `page` elements are linked to this folder (via parent_id foreign key) - `page` elements are linked to this folder (via parent_id foreign key)
- `page` element children are linked to their `page` parent (via parent_id foreign key) - `page` element children are linked to their `page` parent (via parent_id foreign key)
""" """
...@@ -302,12 +312,12 @@ class DatasetExtractor(BaseWorker): ...@@ -302,12 +312,12 @@ class DatasetExtractor(BaseWorker):
) )
# Fill cache # Fill cache
# Retrieve parent and create parent # Retrieve parent and create parent
parent = retrieve_element(split_id) parent: Element = retrieve_element(split_id)
self.insert_element(parent) self.insert_element(parent)
# First list all pages # First list all pages
pages = list_children(split_id).join(Image).where(Element.type == "page") pages = list_children(split_id).join(Image).where(Element.type == "page")
nb_pages = pages.count() nb_pages: int = pages.count()
for idx, page in enumerate(pages, start=1): for idx, page in enumerate(pages, start=1):
logger.info(f"Processing `{split_name}` page ({idx}/{nb_pages})") logger.info(f"Processing `{split_name}` page ({idx}/{nb_pages})")
...@@ -316,7 +326,7 @@ class DatasetExtractor(BaseWorker): ...@@ -316,7 +326,7 @@ class DatasetExtractor(BaseWorker):
# List children # List children
children = list_children(page.id) children = list_children(page.id)
nb_children = children.count() nb_children: int = children.count()
for child_idx, child in enumerate(children, start=1): for child_idx, child in enumerate(children, start=1):
logger.info(f"Processing child ({child_idx}/{nb_children})") logger.info(f"Processing child ({child_idx}/{nb_children})")
# Insert child # Insert child
...@@ -336,7 +346,7 @@ class DatasetExtractor(BaseWorker): ...@@ -336,7 +346,7 @@ class DatasetExtractor(BaseWorker):
self.process_split(split_name, split_id) self.process_split(split_name, split_id)
# TAR + ZSTD Image folder and store as task artifact # TAR + ZSTD Image folder and store as task artifact
zstd_archive_path = self.work_dir / "arkindex_data.zstd" zstd_archive_path: Path = self.work_dir / "arkindex_data.zstd"
logger.info(f"Compressing the images to {zstd_archive_path}") logger.info(f"Compressing the images to {zstd_archive_path}")
create_tar_zst_archive(source=self.image_folder, destination=zstd_archive_path) create_tar_zst_archive(source=self.image_folder, destination=zstd_archive_path)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment