Skip to content
Snippets Groups Projects

Rework the worker due to `Dataset` API changes

Merged Eva Bardou requested to merge rework-worker into main
All threads resolved!
@@ -161,7 +161,7 @@ class DatasetExtractor(DatasetWorker):
state=classification.state,
worker_run_id=get_object_id(classification.worker_run),
)
for classification in list_classifications(element.id)
for classification in list_classifications(element.id).iterator()
]
if classifications:
logger.info(f"Inserting {len(classifications)} classification(s)")
@@ -185,7 +185,7 @@ class DatasetExtractor(DatasetWorker):
worker_version_id=get_object_id(transcription.worker_version),
worker_run_id=get_object_id(transcription.worker_run),
)
for transcription in list_transcriptions(element.id)
for transcription in list_transcriptions(element.id).iterator()
]
if transcriptions:
logger.info(f"Inserting {len(transcriptions)} transcription(s)")
@@ -201,7 +201,9 @@ class DatasetExtractor(DatasetWorker):
entities: List[CachedEntity] = []
transcription_entities: List[CachedTranscriptionEntity] = []
for transcription in transcriptions:
for transcription_entity in list_transcription_entities(transcription.id):
for transcription_entity in list_transcription_entities(
transcription.id
).iterator():
entity = CachedEntity(
id=transcription_entity.entity.id,
type=transcription_entity.entity.type.name,
@@ -336,7 +338,7 @@ class DatasetExtractor(DatasetWorker):
# List children
children = list_children(element.id)
nb_children: int = children.count()
for child_idx, child in enumerate(children, start=1):
for child_idx, child in enumerate(children.iterator(), start=1):
logger.info(f"Processing child ({child_idx}/{nb_children})")
# Insert child
self.insert_element(child, parent_id=element.id)
@@ -382,7 +384,8 @@ class DatasetExtractor(DatasetWorker):
dataset_sets: list[Set] = list(self.list_sets())
datasets_and_sets: list[tuple[Dataset, list[Set]]] = [
groupby(dataset_sets, attrgetter("dataset.id"))
(dataset, list(sets))
for dataset, sets in groupby(dataset_sets, attrgetter("dataset"))
]
if not datasets_and_sets:
logger.warning("No datasets to process, stopping.")
Loading