Skip to content
Snippets Groups Projects

Implement worker

Merged Yoann Schneider requested to merge implem into main
5 files
+ 310
13
Compare changes
  • Side-by-side
  • Inline
Files
5
+ 89
0
# -*- coding: utf-8 -*-
from typing import NamedTuple
from arkindex_export import Classification
from arkindex_export.models import (
Element,
Entity,
EntityType,
Transcription,
TranscriptionEntity,
)
from arkindex_worker.cache import (
CachedElement,
CachedEntity,
CachedTranscription,
CachedTranscriptionEntity,
)
DEFAULT_TRANSCRIPTION_ORIENTATION = "horizontal-lr"
def retrieve_element(element_id: str):
return Element.get_by_id(element_id)
def list_classifications(element: Element):
query = Classification.select().where(Classification.element == element)
return query
def parse_transcription(transcription: NamedTuple, element: CachedElement):
return CachedTranscription(
id=transcription.id,
element=element,
text=transcription.text,
confidence=transcription.confidence,
orientation=DEFAULT_TRANSCRIPTION_ORIENTATION,
worker_version_id=transcription.worker_version.id
if transcription.worker_version
else None,
)
def list_transcriptions(element: CachedElement):
query = Transcription.select().where(Transcription.element_id == element.id)
return [parse_transcription(x, element) for x in query]
def parse_entities(data: NamedTuple, transcription: CachedTranscription):
entity = CachedEntity(
id=data.entity_id,
type=data.type,
name=data.name,
validated=data.validated,
metas=data.metas,
)
return entity, CachedTranscriptionEntity(
id=data.transcription_entity_id,
transcription=transcription,
entity=entity,
offset=data.offset,
length=data.length,
confidence=data.confidence,
)
def retrieve_entities(transcription: CachedTranscription):
query = (
TranscriptionEntity.select(
TranscriptionEntity.id.alias("transcription_entity_id"),
TranscriptionEntity.length.alias("length"),
TranscriptionEntity.offset.alias("offset"),
TranscriptionEntity.confidence.alias("confidence"),
Entity.id.alias("entity_id"),
EntityType.name.alias("type"),
Entity.name,
Entity.validated,
Entity.metas,
)
.where(TranscriptionEntity.transcription_id == transcription.id)
.join(Entity, on=TranscriptionEntity.entity)
.join(EntityType, on=Entity.type)
)
return zip(
*[
parse_entities(entity_data, transcription)
for entity_data in query.namedtuples()
]
)
Loading