Create missing tables in parents caches before merge

3e610b20 · Eva Bardou · 8641248c · 3e610b20 · 3e610b20
Commit 3e610b20 authored 3 years ago by Eva Bardou
--- a/arkindex_worker/cache.py
+++ b/arkindex_worker/cache.py
@@ -188,13 +188,19 @@ def merge_parents_cache(parent_ids, current_database, data_dir="/data", chunk=No
    # Merge each table into the local database
    for idx, path in enumerate(paths):
+        with SqliteDatabase(path) as source:
+            with source.bind_ctx(MODELS):
+                source.create_tables(MODELS)
        logger.info(f"Merging parent db {path} into {current_database}")
        statements = [
            "PRAGMA page_size=80000;",
            "PRAGMA synchronous=OFF;",
            f"ATTACH DATABASE '{path}' AS source_{idx};",
+            f"REPLACE INTO images SELECT * FROM source_{idx}.images;",
            f"REPLACE INTO elements SELECT * FROM source_{idx}.elements;",
            f"REPLACE INTO transcriptions SELECT * FROM source_{idx}.transcriptions;",
+            f"REPLACE INTO classifications SELECT * FROM source_{idx}.classifications;",
        ]
        for statement in statements:

--- a/tests/test_merge.py
+++ b/tests/test_merge.py
@@ -5,7 +5,9 @@ import pytest
 from arkindex_worker.cache import (
    MODELS,
+    CachedClassification,
    CachedElement,
+    CachedImage,
    CachedTranscription,
    merge_parents_cache,
 )
@@ -76,8 +78,10 @@ def test_merge_databases(
    # We always start with an empty database
    with mock_databases["target"]["db"].bind_ctx(MODELS):
+        assert CachedImage.select().count() == 0
        assert CachedElement.select().count() == 0
        assert CachedTranscription.select().count() == 0
+        assert CachedClassification.select().count() == 0
    # Merge all requested parents databases into our target
    merge_parents_cache(
@@ -88,8 +92,10 @@ def test_merge_databases(
    # The target now should have the expected elements and transcriptions
    with mock_databases["target"]["db"].bind_ctx(MODELS):
+        assert CachedImage.select().count() == 0
        assert CachedElement.select().count() == len(expected_elements)
        assert CachedTranscription.select().count() == len(expected_transcriptions)
+        assert CachedClassification.select().count() == 0
        assert [
            e.id for e in CachedElement.select().order_by("id")
        ] == expected_elements
@@ -124,8 +130,10 @@ def test_merge_chunk(mock_databases, tmpdir, monkeypatch):
    # The target should now have 3 elements and 0 transcription
    with mock_databases["target"]["db"].bind_ctx(MODELS):
+        assert CachedImage.select().count() == 0
        assert CachedElement.select().count() == 3
        assert CachedTranscription.select().count() == 0
+        assert CachedClassification.select().count() == 0
        assert [e.id for e in CachedElement.select().order_by("id")] == [
            UUID("42424242-4242-4242-4242-424242424242"),
            UUID("12341234-1234-1234-1234-123412341234"),
@@ -147,8 +155,10 @@ def test_merge_from_worker(
    )
    # At first we have no data in our main database
+    assert CachedImage.select().count() == 0
    assert CachedElement.select().count() == 0
    assert CachedTranscription.select().count() == 0
+    assert CachedClassification.select().count() == 0
    # Configure worker with a specific data directory
    monkeypatch.setenv("PONOS_DATA", str(tmpdir))
@@ -157,8 +167,10 @@ def test_merge_from_worker(
    mock_base_worker_with_cache.configure()
    # Then we have 2 elements and a transcription
+    assert CachedImage.select().count() == 0
    assert CachedElement.select().count() == 3
    assert CachedTranscription.select().count() == 1
+    assert CachedClassification.select().count() == 0
    assert [e.id for e in CachedElement.select().order_by("id")] == [
        UUID("12341234-1234-1234-1234-123412341234"),
        UUID("56785678-5678-5678-5678-567856785678"),