From 217e3de2cf5543607ac5fffb28e13541ca685482 Mon Sep 17 00:00:00 2001 From: Eva Bardou <ebardou@teklia.com> Date: Mon, 26 Apr 2021 13:07:15 +0000 Subject: [PATCH] Support cache in create_entity --- arkindex_worker/cache.py | 21 ++++++++- arkindex_worker/worker/entity.py | 21 ++++++++- tests/test_cache.py | 1 + tests/test_elements_worker/test_entities.py | 48 +++++++++++++++++++++ 4 files changed, 89 insertions(+), 2 deletions(-) diff --git a/arkindex_worker/cache.py b/arkindex_worker/cache.py index 6d7d60d6..4505f394 100644 --- a/arkindex_worker/cache.py +++ b/arkindex_worker/cache.py @@ -125,9 +125,28 @@ class CachedClassification(Model): table_name = "classifications" +class CachedEntity(Model): + id = UUIDField(primary_key=True) + type = CharField(max_length=50) + name = TextField() + validated = BooleanField(default=False) + metas = JSONField(null=True) + worker_version_id = UUIDField() + + class Meta: + database = db + table_name = "entities" + + # Add all the managed models in that list # It's used here, but also in unit tests -MODELS = [CachedImage, CachedElement, CachedTranscription, CachedClassification] +MODELS = [ + CachedImage, + CachedElement, + CachedTranscription, + CachedClassification, + CachedEntity, +] def init_cache_db(path): diff --git a/arkindex_worker/worker/entity.py b/arkindex_worker/worker/entity.py index 30c1637d..1689ca03 100644 --- a/arkindex_worker/worker/entity.py +++ b/arkindex_worker/worker/entity.py @@ -2,8 +2,10 @@ import os from enum import Enum +from peewee import IntegrityError + from arkindex_worker import logger -from arkindex_worker.cache import CachedElement +from arkindex_worker.cache import CachedElement, CachedEntity from arkindex_worker.models import Element @@ -61,4 +63,21 @@ class EntityMixin(object): ) self.report.add_entity(element.id, entity["id"], type.value, name) + if self.use_cache: + # Store entity in local cache + try: + to_insert = [ + { + "id": entity["id"], + "type": type.value, + "name": name, + "validated": validated if validated is not None else False, + "metas": metas, + "worker_version_id": self.worker_version_id, + } + ] + CachedEntity.insert_many(to_insert).execute() + except IntegrityError as e: + logger.warning(f"Couldn't save created entity in local cache: {e}") + return entity["id"] diff --git a/tests/test_cache.py b/tests/test_cache.py index 6d2b147f..bc26c000 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -56,6 +56,7 @@ def test_create_tables(tmp_path): expected_schema = """CREATE TABLE "classifications" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "class_name" TEXT NOT NULL, "confidence" REAL NOT NULL, "state" VARCHAR(10) NOT NULL, "worker_version_id" TEXT NOT NULL, FOREIGN KEY ("element_id") REFERENCES "elements" ("id")) CREATE TABLE "elements" ("id" TEXT NOT NULL PRIMARY KEY, "parent_id" TEXT, "type" VARCHAR(50) NOT NULL, "image_id" TEXT, "polygon" text, "initial" INTEGER NOT NULL, "worker_version_id" TEXT, FOREIGN KEY ("image_id") REFERENCES "images" ("id")) +CREATE TABLE "entities" ("id" TEXT NOT NULL PRIMARY KEY, "type" VARCHAR(50) NOT NULL, "name" TEXT NOT NULL, "validated" INTEGER NOT NULL, "metas" text, "worker_version_id" TEXT NOT NULL) CREATE TABLE "images" ("id" TEXT NOT NULL PRIMARY KEY, "width" INTEGER NOT NULL, "height" INTEGER NOT NULL, "url" TEXT NOT NULL) CREATE TABLE "transcriptions" ("id" TEXT NOT NULL PRIMARY KEY, "element_id" TEXT NOT NULL, "text" TEXT NOT NULL, "confidence" REAL NOT NULL, "worker_version_id" TEXT NOT NULL, FOREIGN KEY ("element_id") REFERENCES "elements" ("id"))""" diff --git a/tests/test_elements_worker/test_entities.py b/tests/test_elements_worker/test_entities.py index 1fe38b36..b03edb0f 100644 --- a/tests/test_elements_worker/test_entities.py +++ b/tests/test_elements_worker/test_entities.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- import json +from uuid import UUID import pytest from apistar.exceptions import ErrorResponse +from arkindex_worker.cache import CachedElement, CachedEntity from arkindex_worker.models import Element from arkindex_worker.worker import EntityType @@ -210,3 +212,49 @@ def test_create_entity(responses, mock_elements_worker): "worker_version": "12341234-1234-1234-1234-123412341234", } assert entity_id == "12345678-1234-1234-1234-123456789123" + + +def test_create_entity_with_cache(responses, mock_elements_worker_with_cache): + elt = CachedElement.create(id="12341234-1234-1234-1234-123412341234", type="thing") + responses.add( + responses.POST, + "http://testserver/api/v1/entity/", + status=200, + json={"id": "12345678-1234-1234-1234-123456789123"}, + ) + + entity_id = mock_elements_worker_with_cache.create_entity( + element=elt, + name="Bob Bob", + type=EntityType.Person, + corpus="12341234-1234-1234-1234-123412341234", + ) + + assert len(responses.calls) == 3 + assert [call.request.url for call in responses.calls] == [ + "http://testserver/api/v1/user/", + "http://testserver/api/v1/workers/versions/12341234-1234-1234-1234-123412341234/", + "http://testserver/api/v1/entity/", + ] + + assert json.loads(responses.calls[2].request.body) == { + "name": "Bob Bob", + "type": "person", + "metas": None, + "validated": None, + "corpus": "12341234-1234-1234-1234-123412341234", + "worker_version": "12341234-1234-1234-1234-123412341234", + } + assert entity_id == "12345678-1234-1234-1234-123456789123" + + # Check that created entity was properly stored in SQLite cache + assert list(CachedEntity.select()) == [ + CachedEntity( + id=UUID("12345678-1234-1234-1234-123456789123"), + type="person", + name="Bob Bob", + validated=False, + metas=None, + worker_version_id=UUID("12341234-1234-1234-1234-123412341234"), + ) + ] -- GitLab