Skip to content
Snippets Groups Projects
Verified Commit ac10b18e authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

fix image caching

parent ec6c64fa
No related branches found
No related tags found
1 merge request!2Implement worker
Pipeline #81793 failed
This commit is part of merge request !2. Comments created here will be created in the context of that merge request.
......@@ -81,9 +81,11 @@ def retrieve_entities(transcription: CachedTranscription):
.join(Entity, on=TranscriptionEntity.entity)
.join(EntityType, on=Entity.type)
)
return zip(
*[
parse_entities(entity_data, transcription)
for entity_data in query.namedtuples()
]
)
data = [
parse_entities(entity_data, transcription)
for entity_data in query.namedtuples()
]
if not data:
return [], []
return zip(*data)
......@@ -33,7 +33,7 @@ def build_image_url(element: Element):
def download_image(element: Element, folder: Path):
"""
Download the image to `folder / {element.id}.jpg`
Download the image to `folder / {element.image.id}.jpg`
"""
tries = 1
# retry loop
......@@ -43,7 +43,7 @@ def download_image(element: Element, folder: Path):
try:
image = iio.imread(build_image_url(element))
cv2.imwrite(
str(folder / f"{element.id}.jpg"),
str(folder / f"{element.image.id}.jpg"),
cv2.cvtColor(image, cv2.COLOR_BGR2RGB),
)
break
......
......@@ -49,6 +49,9 @@ class DatasetExtractor(ElementsWorker):
# Initialize db that will be written
self.initialize_database()
# Cached Images downloaded and created in DB
self.cached_images = dict()
def initialize_database(self):
# Create db at
# - self.workdir / "db.sqlite" in Arkindex mode
......@@ -101,13 +104,14 @@ class DatasetExtractor(ElementsWorker):
def insert_element(self, element: Element, parent_id: str):
logger.info(f"Processing element ({element.id})")
if element.image:
if element.image and element.image.id not in self.cached_images:
# Download image
logger.info("Downloading image")
download_image(element, folder=IMAGE_FOLDER)
# Insert image
logger.info("Inserting image")
CachedImage.create(
# Store images in case some other elements use it as well
self.cached_images[element.image.id] = CachedImage.create(
id=element.image.id,
width=element.image.width,
height=element.image.height,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment