Skip to content
Snippets Groups Projects
Verified Commit ac10b18e authored by Yoann Schneider's avatar Yoann Schneider :tennis:
Browse files

fix image caching

parent ec6c64fa
No related branches found
No related tags found
1 merge request!2Implement worker
Pipeline #81793 failed
This commit is part of merge request !2. Comments created here will be created in the context of that merge request.
...@@ -81,9 +81,11 @@ def retrieve_entities(transcription: CachedTranscription): ...@@ -81,9 +81,11 @@ def retrieve_entities(transcription: CachedTranscription):
.join(Entity, on=TranscriptionEntity.entity) .join(Entity, on=TranscriptionEntity.entity)
.join(EntityType, on=Entity.type) .join(EntityType, on=Entity.type)
) )
return zip( data = [
*[ parse_entities(entity_data, transcription)
parse_entities(entity_data, transcription) for entity_data in query.namedtuples()
for entity_data in query.namedtuples() ]
] if not data:
) return [], []
return zip(*data)
...@@ -33,7 +33,7 @@ def build_image_url(element: Element): ...@@ -33,7 +33,7 @@ def build_image_url(element: Element):
def download_image(element: Element, folder: Path): def download_image(element: Element, folder: Path):
""" """
Download the image to `folder / {element.id}.jpg` Download the image to `folder / {element.image.id}.jpg`
""" """
tries = 1 tries = 1
# retry loop # retry loop
...@@ -43,7 +43,7 @@ def download_image(element: Element, folder: Path): ...@@ -43,7 +43,7 @@ def download_image(element: Element, folder: Path):
try: try:
image = iio.imread(build_image_url(element)) image = iio.imread(build_image_url(element))
cv2.imwrite( cv2.imwrite(
str(folder / f"{element.id}.jpg"), str(folder / f"{element.image.id}.jpg"),
cv2.cvtColor(image, cv2.COLOR_BGR2RGB), cv2.cvtColor(image, cv2.COLOR_BGR2RGB),
) )
break break
......
...@@ -49,6 +49,9 @@ class DatasetExtractor(ElementsWorker): ...@@ -49,6 +49,9 @@ class DatasetExtractor(ElementsWorker):
# Initialize db that will be written # Initialize db that will be written
self.initialize_database() self.initialize_database()
# Cached Images downloaded and created in DB
self.cached_images = dict()
def initialize_database(self): def initialize_database(self):
# Create db at # Create db at
# - self.workdir / "db.sqlite" in Arkindex mode # - self.workdir / "db.sqlite" in Arkindex mode
...@@ -101,13 +104,14 @@ class DatasetExtractor(ElementsWorker): ...@@ -101,13 +104,14 @@ class DatasetExtractor(ElementsWorker):
def insert_element(self, element: Element, parent_id: str): def insert_element(self, element: Element, parent_id: str):
logger.info(f"Processing element ({element.id})") logger.info(f"Processing element ({element.id})")
if element.image: if element.image and element.image.id not in self.cached_images:
# Download image # Download image
logger.info("Downloading image") logger.info("Downloading image")
download_image(element, folder=IMAGE_FOLDER) download_image(element, folder=IMAGE_FOLDER)
# Insert image # Insert image
logger.info("Inserting image") logger.info("Inserting image")
CachedImage.create( # Store images in case some other elements use it as well
self.cached_images[element.image.id] = CachedImage.create(
id=element.image.id, id=element.image.id,
width=element.image.width, width=element.image.width,
height=element.image.height, height=element.image.height,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment