Skip to content
Snippets Groups Projects

Store Transcriptions in local cache

Merged Eva Bardou requested to merge cache-transcriptions into master
4 files
+ 85
41
Compare changes
  • Side-by-side
  • Inline
Files
4
+ 47
40
@@ -512,20 +512,25 @@ class ElementsWorker(BaseWorker):
self.report.add_transcription(element.id)
# Store transcription in local cache
try:
to_insert = [
CachedTranscription(
id=convert_str_uuid_to_hex(created["id"]),
element_id=convert_str_uuid_to_hex(element.id),
text=created["text"],
confidence=created["confidence"],
worker_version_id=convert_str_uuid_to_hex(self.worker_version_id),
if self.cache:
# Store transcription in local cache
try:
to_insert = [
CachedTranscription(
id=convert_str_uuid_to_hex(created["id"]),
element_id=convert_str_uuid_to_hex(element.id),
text=created["text"],
confidence=created["confidence"],
worker_version_id=convert_str_uuid_to_hex(
self.worker_version_id
),
)
]
self.cache.insert("transcriptions", to_insert)
except sqlite3.IntegrityError as e:
logger.warning(
f"Couldn't save created transcription in local cache: {e}"
)
]
self.cache.insert("transcriptions", to_insert)
except sqlite3.IntegrityError as e:
logger.warning(f"Couldn't save created transcription in local cache: {e}")
def create_classification(
self, element, ml_class, confidence, high_confidence=False
@@ -676,27 +681,30 @@ class ElementsWorker(BaseWorker):
},
)
created_ids = []
elements_to_insert = []
transcriptions_to_insert = []
parent_id_hex = convert_str_uuid_to_hex(element.id)
worker_version_id_hex = convert_str_uuid_to_hex(self.worker_version_id)
for index, annotation in enumerate(annotations):
transcription = transcriptions[index]
element_id_hex = convert_str_uuid_to_hex(annotation["id"])
for annotation in annotations:
if annotation["created"]:
logger.debug(
f"A sub_element of {element.id} with type {sub_element_type} was created during transcriptions bulk creation"
)
self.report.add_element(element.id, sub_element_type)
self.report.add_transcription(annotation["id"])
if annotation["id"] not in created_ids:
if self.cache:
# Store transcriptions and their associated element (if created) in local cache
created_ids = []
elements_to_insert = []
transcriptions_to_insert = []
parent_id_hex = convert_str_uuid_to_hex(element.id)
worker_version_id_hex = convert_str_uuid_to_hex(self.worker_version_id)
for index, annotation in enumerate(annotations):
transcription = transcriptions[index]
element_id_hex = convert_str_uuid_to_hex(annotation["id"])
if annotation["created"] and annotation["id"] not in created_ids:
# TODO: Retrieve real element_name through API
elements_to_insert.append(
CachedElement(
id=element_id_hex,
parent_id=parent_id_hex,
name="test",
type=sub_element_type,
polygon=json.dumps(transcription["polygon"]),
worker_version_id=worker_version_id_hex,
@@ -704,25 +712,24 @@ class ElementsWorker(BaseWorker):
)
created_ids.append(annotation["id"])
self.report.add_transcription(annotation["id"])
transcriptions_to_insert.append(
CachedTranscription(
# TODO: Retrieve real transcription_id through API
id=convert_str_uuid_to_hex(uuid.uuid4()),
element_id=element_id_hex,
text=transcription["text"],
confidence=transcription["score"],
worker_version_id=worker_version_id_hex,
transcriptions_to_insert.append(
CachedTranscription(
# TODO: Retrieve real transcription_id through API
id=convert_str_uuid_to_hex(uuid.uuid4()),
element_id=element_id_hex,
text=transcription["text"],
confidence=transcription["score"],
worker_version_id=worker_version_id_hex,
)
)
)
# Store transcriptions and their associated element (if created) in local cache
try:
self.cache.insert("elements", elements_to_insert)
self.cache.insert("transcriptions", transcriptions_to_insert)
except sqlite3.IntegrityError as e:
logger.warning(f"Couldn't save created transcriptions in local cache: {e}")
try:
self.cache.insert("elements", elements_to_insert)
self.cache.insert("transcriptions", transcriptions_to_insert)
except sqlite3.IntegrityError as e:
logger.warning(
f"Couldn't save created transcriptions in local cache: {e}"
)
return annotations
Loading