diff --git a/arkindex_worker/worker.py b/arkindex_worker/worker.py index 46aab69251b86f3674f198cda02ac62e656d9531..ee8b05692cfd0c36e2fe70d38c3f97480b1f98b2 100644 --- a/arkindex_worker/worker.py +++ b/arkindex_worker/worker.py @@ -9,6 +9,7 @@ import uuid from apistar.exceptions import ErrorResponse from arkindex import ArkindexClient, options_from_env +from arkindex_common.enums import EntityType, TranscriptionType from arkindex_worker import logger from arkindex_worker.models import Element from arkindex_worker.reporting import Reporter @@ -202,3 +203,99 @@ class ElementsWorker(BaseWorker): }, ) self.report.add_element(element.id, type) + + def create_transcription(self, element, text, type, score): + """ + Create a transcription on the given element through API + """ + assert element and isinstance( + element, Element + ), "element shouldn't be null and should be of type Element" + assert type and isinstance( + type, str + ), "type shouldn't be null and should be of type str" + assert ( + type in TranscriptionType._value2member_map_ + ), "type should be an allowed transcription type" + assert text and isinstance( + text, str + ), "text shouldn't be null and should be of type str" + assert ( + score and isinstance(score, float) and 0 <= score <= 1 + ), "score shouldn't be null and should be a float in [0..1] range" + + self.api_client.request( + "CreateTranscription", + id=element.id, + body={ + "text": text, + "type": type, + "worker_version": self.worker_version_id, + "score": score, + }, + ) + self.report.add_transcription(element.id, type) + + def create_classification(self, element, ml_class, confidence, high_confidence): + """ + Create a classification on the given element through API + """ + assert element and isinstance( + element, Element + ), "element shouldn't be null and should be of type Element" + assert ml_class and isinstance( + ml_class, str + ), "ml_class shouldn't be null and should be of type str" + assert ( + confidence and isinstance(confidence, float) and 0 <= confidence <= 1 + ), "confidence shouldn't be null and should be a float in [0..1] range" + assert high_confidence and isinstance( + high_confidence, bool + ), "high_confidence shouldn't be null and should be of type bool" + + self.api_client.request( + "CreateClassification", + body={ + "element": element.id, + "ml_class": ml_class, + "worker_version": self.worker_version_id, + "confidence": confidence, + "high_confidence": high_confidence, + }, + ) + self.report.add_classification(element.id, ml_class) + + def create_entity(self, name, type, corpus, metas=None, validated=None): + """ + Create an entity on the given corpus through API + """ + assert name and isinstance( + name, str + ), "name shouldn't be null and should be of type str" + assert type and isinstance( + type, str + ), "type shouldn't be null and should be of type str" + assert ( + type in EntityType._value2member_map_ + ), "type should be an allowed entity type" + assert corpus and isinstance( + corpus, str + ), "corpus shouldn't be null and should be of type str" + if metas: + assert isinstance(metas, dict), "metas should be of type dict" + if validated: + assert isinstance(validated, bool), "validated should be of type bool" + + self.api_client.request( + "CreateEntity", + body={ + "name": name, + "type": type, + "metas": metas, + "validated": validated, + "corpus": corpus, + "worker_version": self.worker_version_id, + }, + ) + # TODO: Uncomment this when Reporter add_entity() method is implemented + # self.report.add_entity(element.id, type)