From e0135b0401348ad1753c32e1978d9e7aab5897d3 Mon Sep 17 00:00:00 2001
From: Erwan Rouchet <rouchet@teklia.com>
Date: Thu, 18 Jun 2020 12:05:40 +0000
Subject: [PATCH] Basic multi database support

---
 .../dataimport/migrations/0008_add_gitref.py  |  3 +-
 .../documents/migrations/0006_region_fk.py    |  2 ++
 arkindex/project/config.py                    |  7 +++++
 arkindex/project/routers.py                   | 27 ++++++++++++++++
 arkindex/project/settings.py                  | 31 ++++++++++++++-----
 .../tests/config_samples/defaults.yaml        |  1 +
 .../tests/config_samples/override.yaml        |  6 ++++
 arkindex/users/migrations/0001_initial.py     |  3 +-
 arkindex/users/migrations/0002_userscope.py   |  5 +--
 9 files changed, 73 insertions(+), 12 deletions(-)
 create mode 100644 arkindex/project/routers.py

diff --git a/arkindex/dataimport/migrations/0008_add_gitref.py b/arkindex/dataimport/migrations/0008_add_gitref.py
index 2a17edaf8e..1ab9b71b2f 100644
--- a/arkindex/dataimport/migrations/0008_add_gitref.py
+++ b/arkindex/dataimport/migrations/0008_add_gitref.py
@@ -17,8 +17,9 @@ def migrate_git_refs(apps, schema_editor):
     which case the GitRef object is linked to the latest version returned by the default
     DB order. These values will be updated later when the GitLab webhook will be triggered.
     """
+    db_alias = schema_editor.connection.alias
     Revision = apps.get_model('dataimport', 'Revision')
-    for rev in Revision.objects.filter(ref__isnull=False):
+    for rev in Revision.objects.using(db_alias).filter(ref__isnull=False):
         ref = rev.repo.refs.filter(name=rev.ref).first()
         if ref:
             ref.revision = rev
diff --git a/arkindex/documents/migrations/0006_region_fk.py b/arkindex/documents/migrations/0006_region_fk.py
index 01e538b298..fdb8f4e271 100644
--- a/arkindex/documents/migrations/0006_region_fk.py
+++ b/arkindex/documents/migrations/0006_region_fk.py
@@ -6,9 +6,11 @@ def check_m2m(apps, schema_editor):
     Ensure the RegionElement many-to-many relationship only has one
     element per region, making it possible to turn it into a foreign key.
     """
+    db_alias = schema_editor.connection.alias
     RegionElement = apps.get_model('documents', 'RegionElement')
     duplicates = RegionElement \
         .objects \
+        .using(db_alias) \
         .values('region_id') \
         .annotate(count=models.Count('id')) \
         .filter(count__gt=1)
diff --git a/arkindex/project/config.py b/arkindex/project/config.py
index 31439c5e80..74e6596029 100644
--- a/arkindex/project/config.py
+++ b/arkindex/project/config.py
@@ -38,6 +38,13 @@ def get_settings_parser(base_dir):
     database_parser.add_option('user', type=str, default='devuser')
     database_parser.add_option('password', type=str, default='devdata')
 
+    replica_parser = database_parser.add_subparser('replica', default=None)
+    replica_parser.add_option('name', type=str, default=None)
+    replica_parser.add_option('host', type=str, default=None)
+    replica_parser.add_option('port', type=int, default=None)
+    replica_parser.add_option('user', type=str, default=None)
+    replica_parser.add_option('password', type=str, default=None)
+
     email_parser = parser.add_subparser('email', default=None)
     email_parser.add_option('host', type=str)
     email_parser.add_option('port', type=int)
diff --git a/arkindex/project/routers.py b/arkindex/project/routers.py
new file mode 100644
index 0000000000..6caccbe1a6
--- /dev/null
+++ b/arkindex/project/routers.py
@@ -0,0 +1,27 @@
+class PrimaryReplicaRouter:
+    def db_for_read(self, model, **hints):
+        """
+        Reads go to the read-only replica.
+        """
+        return 'replica'
+
+    def db_for_write(self, model, **hints):
+        """
+        Writes always go to primary.
+        """
+        return 'default'
+
+    def allow_relation(self, obj1, obj2, **hints):
+        """
+        Relations between objects are allowed if both objects are in the primary/replica pool.
+        """
+        db_list = ('default', 'replica')
+        if obj1._state.db in db_list and obj2._state.db in db_list:
+            return True
+        return None
+
+    def allow_migrate(self, db, app_label, model_name=None, **hints):
+        """
+        All migrations should only run on the primary.
+        """
+        return db == 'default'
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index 7a02f85bcc..a196372ca9 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -44,17 +44,32 @@ ALLOWED_HOSTS = conf['allowed_hosts']
 # Docker container name resolution
 ALLOWED_HOSTS += ['127.0.0.1', 'localhost', 'backend', 'ark-backend']
 
+
 # Database
-DATABASES = {
-    'default': {
+def _conf_to_django_db(config):
+    "Turn a YAML database config into a Django database config"
+    return {
         'ENGINE': 'django.db.backends.postgresql',
-        'NAME': conf['database']['name'],
-        'USER': conf['database']['user'],
-        'PASSWORD': conf['database']['password'],
-        'HOST': conf['database']['host'],
-        'PORT': conf['database']['port'],
+        'NAME': config['name'],
+        'USER': config['user'],
+        'PASSWORD': config['password'],
+        'HOST': config['host'],
+        'PORT': config['port'],
     }
-}
+
+
+_primary_db = _conf_to_django_db(conf['database'])
+DATABASES = {'default': _primary_db}
+
+if conf['database']['replica'] is not None:
+    # Make the replica's options default to the primary's
+    _replica_db = _conf_to_django_db(conf['database']['replica'])
+    for key, value in _primary_db.items():
+        if not _replica_db.get(key):
+            _replica_db[key] = value
+
+    DATABASES['replica'] = _replica_db
+    DATABASE_ROUTERS = ['arkindex.project.routers.PrimaryReplicaRouter']
 
 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = ARKINDEX_ENV == 'dev'
diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml
index 661df06079..ac3f994928 100644
--- a/arkindex/project/tests/config_samples/defaults.yaml
+++ b/arkindex/project/tests/config_samples/defaults.yaml
@@ -20,6 +20,7 @@ database:
   name: arkindex_dev
   password: devdata
   port: 9100
+  replica: null
   user: devuser
 docker:
   tasks_image: registry.gitlab.com/arkindex/tasks
diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml
index acd4a45b4a..8885614b2a 100644
--- a/arkindex/project/tests/config_samples/override.yaml
+++ b/arkindex/project/tests/config_samples/override.yaml
@@ -22,6 +22,12 @@ database:
   name: arkindex_db
   password: hunter2
   port: 9123
+  replica:
+    host: ghost
+    name: arkindex_db2
+    password: password
+    port: 1
+    user: postgres
   user: littlebobbytables
 docker:
   tasks_image: registry.gitlab.com/arkindex/stonks
diff --git a/arkindex/users/migrations/0001_initial.py b/arkindex/users/migrations/0001_initial.py
index 51dbb2b9c6..12b9188053 100644
--- a/arkindex/users/migrations/0001_initial.py
+++ b/arkindex/users/migrations/0001_initial.py
@@ -14,8 +14,9 @@ def create_internal_group(apps, schema_editor):
 
     In the event of a migrations reset, this should be kept to make deployments simpler.
     """
+    db_alias = schema_editor.connection.alias
     Group = apps.get_model('auth', 'Group')
-    Group.objects.create(id=settings.INTERNAL_GROUP_ID, name='Internal')
+    Group.objects.using(db_alias).create(id=settings.INTERNAL_GROUP_ID, name='Internal')
 
 
 class Migration(migrations.Migration):
diff --git a/arkindex/users/migrations/0002_userscope.py b/arkindex/users/migrations/0002_userscope.py
index 646520fb76..09e37d18b2 100644
--- a/arkindex/users/migrations/0002_userscope.py
+++ b/arkindex/users/migrations/0002_userscope.py
@@ -8,11 +8,12 @@ import enumfields.fields
 
 
 def add_scopes(apps, schema_editor):
+    db_alias = schema_editor.connection.alias
     User = apps.get_model('users', 'User')
     UserScope = apps.get_model('users', 'UserScope')
-    UserScope.objects.bulk_create([
+    UserScope.objects.using(db_alias).bulk_create([
         UserScope(user=user, scope=scope)
-        for user in User.objects.all()
+        for user in User.objects.using(db_alias).all()
         for scope in (Scope.CreateIIIFImage, Scope.UploadS3Image)
     ])
 
-- 
GitLab