Skip to content
Snippets Groups Projects

Merge parents caches into the current task one

Merged Eva Bardou requested to merge merge-parents-cache into master
All threads resolved!
4 files
+ 337
8
Compare changes
  • Side-by-side
  • Inline
Files
4
+ 60
1
# -*- coding: utf-8 -*-
import json
import logging
import os
import sqlite3
from peewee import (
BooleanField,
@@ -58,6 +60,11 @@ class CachedTranscription(Model):
table_name = "transcriptions"
# Add all the managed models in that list
# It's used here, but also in unit tests
MODELS = [CachedElement, CachedTranscription]
def init_cache_db(path):
db.init(
path,
@@ -75,4 +82,56 @@ def create_tables():
"""
Creates the tables in the cache DB only if they do not already exist.
"""
db.create_tables([CachedElement, CachedTranscription])
db.create_tables(MODELS)
def merge_parents_cache(parent_ids, current_database, data_dir="/data", chunk=None):
"""
Merge all the potential parent task's databases into the existing local one
"""
assert isinstance(parent_ids, list)
assert os.path.isdir(data_dir)
assert os.path.exists(current_database)
# Handle possible chunk in parent task name
# This is needed to support the init_elements databases
filenames = [
"db.sqlite",
]
if chunk is not None:
filenames.append(f"db_{chunk}.sqlite")
# Find all the paths for these databases
paths = list(
filter(
lambda p: os.path.isfile(p),
[
os.path.join(data_dir, parent, name)
for parent in parent_ids
for name in filenames
],
)
)
if not paths:
logger.info("No parents cache to use")
return
# Open a connection on current database
connection = sqlite3.connect(current_database)
cursor = connection.cursor()
# Merge each table into the local database
for idx, path in enumerate(paths):
logger.info(f"Merging parent db {path} into {current_database}")
statements = [
"PRAGMA page_size=80000;",
"PRAGMA synchronous=OFF;",
f"ATTACH DATABASE '{path}' AS source_{idx};",
f"REPLACE INTO elements SELECT * FROM source_{idx}.elements;",
f"REPLACE INTO transcriptions SELECT * FROM source_{idx}.transcriptions;",
]
for statement in statements:
cursor.execute(statement)
connection.commit()
Loading