From f8e167d4359f1b2734f3b0033040f2023139a9cf Mon Sep 17 00:00:00 2001 From: Eva Bardou <ebardou@teklia.com> Date: Tue, 23 Mar 2021 17:40:28 +0100 Subject: [PATCH] Fix existing tests --- arkindex_worker/worker.py | 87 ++++++++++++++++++--------------- tests/data/cache/lines.sqlite | Bin 12288 -> 20480 bytes tests/data/cache/tables.sqlite | Bin 12288 -> 20480 bytes tests/test_cache.py | 39 ++++++++++++++- 4 files changed, 85 insertions(+), 41 deletions(-) diff --git a/arkindex_worker/worker.py b/arkindex_worker/worker.py index 655464cf..e386f40f 100644 --- a/arkindex_worker/worker.py +++ b/arkindex_worker/worker.py @@ -512,20 +512,25 @@ class ElementsWorker(BaseWorker): self.report.add_transcription(element.id) - # Store transcription in local cache - try: - to_insert = [ - CachedTranscription( - id=convert_str_uuid_to_hex(created["id"]), - element_id=convert_str_uuid_to_hex(element.id), - text=created["text"], - confidence=created["confidence"], - worker_version_id=convert_str_uuid_to_hex(self.worker_version_id), + if self.cache: + # Store transcription in local cache + try: + to_insert = [ + CachedTranscription( + id=convert_str_uuid_to_hex(created["id"]), + element_id=convert_str_uuid_to_hex(element.id), + text=created["text"], + confidence=created["confidence"], + worker_version_id=convert_str_uuid_to_hex( + self.worker_version_id + ), + ) + ] + self.cache.insert("transcriptions", to_insert) + except sqlite3.IntegrityError as e: + logger.warning( + f"Couldn't save created transcription in local cache: {e}" ) - ] - self.cache.insert("transcriptions", to_insert) - except sqlite3.IntegrityError as e: - logger.warning(f"Couldn't save created transcription in local cache: {e}") def create_classification( self, element, ml_class, confidence, high_confidence=False @@ -676,27 +681,30 @@ class ElementsWorker(BaseWorker): }, ) - created_ids = [] - elements_to_insert = [] - transcriptions_to_insert = [] - parent_id_hex = convert_str_uuid_to_hex(element.id) - worker_version_id_hex = convert_str_uuid_to_hex(self.worker_version_id) - for index, annotation in enumerate(annotations): - transcription = transcriptions[index] - element_id_hex = convert_str_uuid_to_hex(annotation["id"]) + for annotation in annotations: if annotation["created"]: logger.debug( f"A sub_element of {element.id} with type {sub_element_type} was created during transcriptions bulk creation" ) self.report.add_element(element.id, sub_element_type) + self.report.add_transcription(annotation["id"]) - if annotation["id"] not in created_ids: + if self.cache: + # Store transcriptions and their associated element (if created) in local cache + created_ids = [] + elements_to_insert = [] + transcriptions_to_insert = [] + parent_id_hex = convert_str_uuid_to_hex(element.id) + worker_version_id_hex = convert_str_uuid_to_hex(self.worker_version_id) + for index, annotation in enumerate(annotations): + transcription = transcriptions[index] + element_id_hex = convert_str_uuid_to_hex(annotation["id"]) + if annotation["created"] and annotation["id"] not in created_ids: # TODO: Retrieve real element_name through API elements_to_insert.append( CachedElement( id=element_id_hex, parent_id=parent_id_hex, - name="test", type=sub_element_type, polygon=json.dumps(transcription["polygon"]), worker_version_id=worker_version_id_hex, @@ -704,25 +712,24 @@ class ElementsWorker(BaseWorker): ) created_ids.append(annotation["id"]) - self.report.add_transcription(annotation["id"]) - - transcriptions_to_insert.append( - CachedTranscription( - # TODO: Retrieve real transcription_id through API - id=convert_str_uuid_to_hex(uuid.uuid4()), - element_id=element_id_hex, - text=transcription["text"], - confidence=transcription["score"], - worker_version_id=worker_version_id_hex, + transcriptions_to_insert.append( + CachedTranscription( + # TODO: Retrieve real transcription_id through API + id=convert_str_uuid_to_hex(uuid.uuid4()), + element_id=element_id_hex, + text=transcription["text"], + confidence=transcription["score"], + worker_version_id=worker_version_id_hex, + ) ) - ) - # Store transcriptions and their associated element (if created) in local cache - try: - self.cache.insert("elements", elements_to_insert) - self.cache.insert("transcriptions", transcriptions_to_insert) - except sqlite3.IntegrityError as e: - logger.warning(f"Couldn't save created transcriptions in local cache: {e}") + try: + self.cache.insert("elements", elements_to_insert) + self.cache.insert("transcriptions", transcriptions_to_insert) + except sqlite3.IntegrityError as e: + logger.warning( + f"Couldn't save created transcriptions in local cache: {e}" + ) return annotations diff --git a/tests/data/cache/lines.sqlite b/tests/data/cache/lines.sqlite index ea881e4f1bb2143f560ca81ac6435842494c95f5..d7c476d4c629e98913b2c1f4edf4f66cf3f70fe5 100644 GIT binary patch delta 446 zcmZojXjs5FL0XW7fq{V)h+%+fqK>gB3xl5hO<w*V3@p4g41D+bkMq{>E!tSf!Rywf z%*-yXsma*FSdy5OlUh=gm{*)!lvz-cnV(mT%42a3a&-)GRS0o(@^Mu_k((^cui=xL zlbV~FR}!C@q7dd7<m}-Xq+x8Nso>`yqTm<m<D<i+00bqe6(tHGt`R^n6!GNzytK@e z)V$<W1)wcHlV|elxS=U=^AB?MboW#6c8%0P*rf?n@8%lh>gVhltN;}**2qlJ<kGZe zVH5Y)oP3sFLLSAn#qoxyeAdnP`4km-c!6QY#LvgT-^I_jSx|wApQDP2&DU3z-FNaq zeW}S8^tq8RtB6N_xk6%5szPOcslENJhwke{svb@bl$V+OL7s)9gb}Dy5~%Z`zWn45 V`T`)#Z|ISllasHAu6U7x006wphu8oB delta 56 zcmZozz}S#5L0XWBfq{V;h+%+nqK+|8P|yA*FaHk)CcgU&eE0c}Zx$5D=i7XrPf-y7 D9GnaW diff --git a/tests/data/cache/tables.sqlite b/tests/data/cache/tables.sqlite index efc107cd7506757f92df1ee011b670738228b8b2..f8027fdfbd148d0240047ea2facab93bb2ced474 100644 GIT binary patch delta 325 zcmZojXjs5FL0XWBfq{V)h+%+fqK>gB6N8@pO<w*V3@p4g41D+bkMq{>E!tSf!Rywf z%*-yXsma*FSdy5OlUh=gm{*)!lvz-cnV(mT%42a3a&-)GRS0o(@^Mu_k((^cui=xL zlbV~FR}!C@q7dd7<m}-Xq+x8Nso>`yqTm<m<D<i+00bqe6(tHGt`R^n6!GNzytK@e z)V$<W1)wcHlV|elxS=U=^AB?MboW#6c8%0P*rf?n@8%lh>gVhltN;}**2qlJ<kGZe fVH5Y)oP3sFLLSAn#qoxyeAdnP`4klw2`B&n<g#Fh delta 56 zcmZozz}S#5L0XWJfq{V;h+%+nqK+|8P|yA*FaHk)CcgU&eE0c}Zx$5D=i7XrPf-y7 D98nAe diff --git a/tests/test_cache.py b/tests/test_cache.py index fb3c28a1..c71f9332 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -6,7 +6,7 @@ from pathlib import Path import pytest -from arkindex_worker.cache import CachedElement, LocalDB +from arkindex_worker.cache import CachedElement, CachedTranscription, LocalDB from arkindex_worker.utils import convert_str_uuid_to_hex FIXTURES = Path(__file__).absolute().parent / "data/cache" @@ -30,6 +30,26 @@ ELEMENTS_TO_INSERT = [ ), ), ] +TRANSCRIPTIONS_TO_INSERT = [ + CachedTranscription( + id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"), + element_id=convert_str_uuid_to_hex("11111111-1111-1111-1111-111111111111"), + text="Hello!", + confidence=0.42, + worker_version_id=convert_str_uuid_to_hex( + "56785678-5678-5678-5678-567856785678" + ), + ), + CachedTranscription( + id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"), + element_id=convert_str_uuid_to_hex("22222222-2222-2222-2222-222222222222"), + text="How are you?", + confidence=0.42, + worker_version_id=convert_str_uuid_to_hex( + "56785678-5678-5678-5678-567856785678" + ), + ), +] def test_init_non_existent_path(): @@ -108,6 +128,10 @@ def test_insert_existing_lines(): cache.insert("elements", ELEMENTS_TO_INSERT) assert str(e.value) == "UNIQUE constraint failed: elements.id" + with pytest.raises(sqlite3.IntegrityError) as e: + cache.insert("transcriptions", TRANSCRIPTIONS_TO_INSERT) + assert str(e.value) == "UNIQUE constraint failed: transcriptions.id" + with open(db_path, "rb") as after_file: after = after_file.read() @@ -128,3 +152,16 @@ def test_insert(): ) assert [CachedElement(**dict(row)) for row in generated_rows] == ELEMENTS_TO_INSERT + + cache.insert("transcriptions", TRANSCRIPTIONS_TO_INSERT) + generated_rows = cache.cursor.execute("SELECT * FROM transcriptions").fetchall() + + expected_cache = LocalDB(f"{FIXTURES}/lines.sqlite") + assert ( + generated_rows + == expected_cache.cursor.execute("SELECT * FROM transcriptions").fetchall() + ) + + assert [ + CachedTranscription(**dict(row)) for row in generated_rows + ] == TRANSCRIPTIONS_TO_INSERT -- GitLab