Skip to content
Snippets Groups Projects
Commit 95f6f614 authored by Manon Blanco's avatar Manon Blanco Committed by Yoann Schneider
Browse files

Apply 5 suggestion(s) to 3 file(s)

parent 56b1ba7c
No related branches found
No related tags found
1 merge request!2Implement worker
Pipeline #81808 failed
...@@ -17,7 +17,7 @@ def bounding_box(polygon: list): ...@@ -17,7 +17,7 @@ def bounding_box(polygon: list):
def build_image_url(element): def build_image_url(element):
x, y, width, height = bounding_box(ast.literal_eval(element.polygon)) x, y, width, height = bounding_box(json.loads(element.polygon))
return urljoin( return urljoin(
element.image.url + "/", f"{x},{y},{width},{height}/full/0/default.jpg" element.image.url + "/", f"{x},{y},{width},{height}/full/0/default.jpg"
) )
...@@ -182,18 +182,16 @@ class DatasetExtractor(BaseWorker): ...@@ -182,18 +182,16 @@ class DatasetExtractor(BaseWorker):
) )
for transcription in list_transcriptions(element) for transcription in list_transcriptions(element)
] ]
if not transcriptions: if transcriptions:
return [] logger.info(f"Inserting {len(transcriptions)} transcription(s)")
with cache_database.atomic():
logger.info(f"Inserting {len(transcriptions)} transcription(s)") CachedTranscription.bulk_create(
with cache_database.atomic(): model_list=transcriptions,
CachedTranscription.bulk_create( batch_size=BULK_BATCH_SIZE,
model_list=transcriptions, )
batch_size=BULK_BATCH_SIZE,
)
return transcriptions return transcriptions
def insert_entities(self, transcriptions: List[CachedTranscription]): def insert_entities(self, transcriptions: List[CachedTranscription]) -> None:
logger.info("Listing entities") logger.info("Listing entities")
extracted_entities = [] extracted_entities = []
for transcription in transcriptions: for transcription in transcriptions:
...@@ -218,19 +216,14 @@ class DatasetExtractor(BaseWorker): ...@@ -218,19 +216,14 @@ class DatasetExtractor(BaseWorker):
), ),
) )
) )
if not extracted_entities: if entities:
# Early return if no entities found # First insert entities since they are foreign keys on transcription entities
return logger.info(f"Inserting {len(entities)} entities")
with cache_database.atomic():
entities, transcription_entities = zip(*extracted_entities) CachedEntity.bulk_create(
model_list=entities,
# First insert entities since they are foreign keys on transcription entities batch_size=BULK_BATCH_SIZE,
logger.info(f"Inserting {len(entities)} entities") )
with cache_database.atomic():
CachedEntity.bulk_create(
model_list=entities,
batch_size=BULK_BATCH_SIZE,
)
if transcription_entities: if transcription_entities:
# Insert transcription entities # Insert transcription entities
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment