From e0135b0401348ad1753c32e1978d9e7aab5897d3 Mon Sep 17 00:00:00 2001 From: Erwan Rouchet <rouchet@teklia.com> Date: Thu, 18 Jun 2020 12:05:40 +0000 Subject: [PATCH] Basic multi database support --- .../dataimport/migrations/0008_add_gitref.py | 3 +- .../documents/migrations/0006_region_fk.py | 2 ++ arkindex/project/config.py | 7 +++++ arkindex/project/routers.py | 27 ++++++++++++++++ arkindex/project/settings.py | 31 ++++++++++++++----- .../tests/config_samples/defaults.yaml | 1 + .../tests/config_samples/override.yaml | 6 ++++ arkindex/users/migrations/0001_initial.py | 3 +- arkindex/users/migrations/0002_userscope.py | 5 +-- 9 files changed, 73 insertions(+), 12 deletions(-) create mode 100644 arkindex/project/routers.py diff --git a/arkindex/dataimport/migrations/0008_add_gitref.py b/arkindex/dataimport/migrations/0008_add_gitref.py index 2a17edaf8e..1ab9b71b2f 100644 --- a/arkindex/dataimport/migrations/0008_add_gitref.py +++ b/arkindex/dataimport/migrations/0008_add_gitref.py @@ -17,8 +17,9 @@ def migrate_git_refs(apps, schema_editor): which case the GitRef object is linked to the latest version returned by the default DB order. These values will be updated later when the GitLab webhook will be triggered. """ + db_alias = schema_editor.connection.alias Revision = apps.get_model('dataimport', 'Revision') - for rev in Revision.objects.filter(ref__isnull=False): + for rev in Revision.objects.using(db_alias).filter(ref__isnull=False): ref = rev.repo.refs.filter(name=rev.ref).first() if ref: ref.revision = rev diff --git a/arkindex/documents/migrations/0006_region_fk.py b/arkindex/documents/migrations/0006_region_fk.py index 01e538b298..fdb8f4e271 100644 --- a/arkindex/documents/migrations/0006_region_fk.py +++ b/arkindex/documents/migrations/0006_region_fk.py @@ -6,9 +6,11 @@ def check_m2m(apps, schema_editor): Ensure the RegionElement many-to-many relationship only has one element per region, making it possible to turn it into a foreign key. """ + db_alias = schema_editor.connection.alias RegionElement = apps.get_model('documents', 'RegionElement') duplicates = RegionElement \ .objects \ + .using(db_alias) \ .values('region_id') \ .annotate(count=models.Count('id')) \ .filter(count__gt=1) diff --git a/arkindex/project/config.py b/arkindex/project/config.py index 31439c5e80..74e6596029 100644 --- a/arkindex/project/config.py +++ b/arkindex/project/config.py @@ -38,6 +38,13 @@ def get_settings_parser(base_dir): database_parser.add_option('user', type=str, default='devuser') database_parser.add_option('password', type=str, default='devdata') + replica_parser = database_parser.add_subparser('replica', default=None) + replica_parser.add_option('name', type=str, default=None) + replica_parser.add_option('host', type=str, default=None) + replica_parser.add_option('port', type=int, default=None) + replica_parser.add_option('user', type=str, default=None) + replica_parser.add_option('password', type=str, default=None) + email_parser = parser.add_subparser('email', default=None) email_parser.add_option('host', type=str) email_parser.add_option('port', type=int) diff --git a/arkindex/project/routers.py b/arkindex/project/routers.py new file mode 100644 index 0000000000..6caccbe1a6 --- /dev/null +++ b/arkindex/project/routers.py @@ -0,0 +1,27 @@ +class PrimaryReplicaRouter: + def db_for_read(self, model, **hints): + """ + Reads go to the read-only replica. + """ + return 'replica' + + def db_for_write(self, model, **hints): + """ + Writes always go to primary. + """ + return 'default' + + def allow_relation(self, obj1, obj2, **hints): + """ + Relations between objects are allowed if both objects are in the primary/replica pool. + """ + db_list = ('default', 'replica') + if obj1._state.db in db_list and obj2._state.db in db_list: + return True + return None + + def allow_migrate(self, db, app_label, model_name=None, **hints): + """ + All migrations should only run on the primary. + """ + return db == 'default' diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index 7a02f85bcc..a196372ca9 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -44,17 +44,32 @@ ALLOWED_HOSTS = conf['allowed_hosts'] # Docker container name resolution ALLOWED_HOSTS += ['127.0.0.1', 'localhost', 'backend', 'ark-backend'] + # Database -DATABASES = { - 'default': { +def _conf_to_django_db(config): + "Turn a YAML database config into a Django database config" + return { 'ENGINE': 'django.db.backends.postgresql', - 'NAME': conf['database']['name'], - 'USER': conf['database']['user'], - 'PASSWORD': conf['database']['password'], - 'HOST': conf['database']['host'], - 'PORT': conf['database']['port'], + 'NAME': config['name'], + 'USER': config['user'], + 'PASSWORD': config['password'], + 'HOST': config['host'], + 'PORT': config['port'], } -} + + +_primary_db = _conf_to_django_db(conf['database']) +DATABASES = {'default': _primary_db} + +if conf['database']['replica'] is not None: + # Make the replica's options default to the primary's + _replica_db = _conf_to_django_db(conf['database']['replica']) + for key, value in _primary_db.items(): + if not _replica_db.get(key): + _replica_db[key] = value + + DATABASES['replica'] = _replica_db + DATABASE_ROUTERS = ['arkindex.project.routers.PrimaryReplicaRouter'] # SECURITY WARNING: don't run with debug turned on in production! DEBUG = ARKINDEX_ENV == 'dev' diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml index 661df06079..ac3f994928 100644 --- a/arkindex/project/tests/config_samples/defaults.yaml +++ b/arkindex/project/tests/config_samples/defaults.yaml @@ -20,6 +20,7 @@ database: name: arkindex_dev password: devdata port: 9100 + replica: null user: devuser docker: tasks_image: registry.gitlab.com/arkindex/tasks diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml index acd4a45b4a..8885614b2a 100644 --- a/arkindex/project/tests/config_samples/override.yaml +++ b/arkindex/project/tests/config_samples/override.yaml @@ -22,6 +22,12 @@ database: name: arkindex_db password: hunter2 port: 9123 + replica: + host: ghost + name: arkindex_db2 + password: password + port: 1 + user: postgres user: littlebobbytables docker: tasks_image: registry.gitlab.com/arkindex/stonks diff --git a/arkindex/users/migrations/0001_initial.py b/arkindex/users/migrations/0001_initial.py index 51dbb2b9c6..12b9188053 100644 --- a/arkindex/users/migrations/0001_initial.py +++ b/arkindex/users/migrations/0001_initial.py @@ -14,8 +14,9 @@ def create_internal_group(apps, schema_editor): In the event of a migrations reset, this should be kept to make deployments simpler. """ + db_alias = schema_editor.connection.alias Group = apps.get_model('auth', 'Group') - Group.objects.create(id=settings.INTERNAL_GROUP_ID, name='Internal') + Group.objects.using(db_alias).create(id=settings.INTERNAL_GROUP_ID, name='Internal') class Migration(migrations.Migration): diff --git a/arkindex/users/migrations/0002_userscope.py b/arkindex/users/migrations/0002_userscope.py index 646520fb76..09e37d18b2 100644 --- a/arkindex/users/migrations/0002_userscope.py +++ b/arkindex/users/migrations/0002_userscope.py @@ -8,11 +8,12 @@ import enumfields.fields def add_scopes(apps, schema_editor): + db_alias = schema_editor.connection.alias User = apps.get_model('users', 'User') UserScope = apps.get_model('users', 'UserScope') - UserScope.objects.bulk_create([ + UserScope.objects.using(db_alias).bulk_create([ UserScope(user=user, scope=scope) - for user in User.objects.all() + for user in User.objects.using(db_alias).all() for scope in (Scope.CreateIIIFImage, Scope.UploadS3Image) ]) -- GitLab