From 525648fd5da5475dfd0ff7b271d223bd7c898bae Mon Sep 17 00:00:00 2001
From: Valentin Rigal <rigal@teklia.com>
Date: Wed, 27 Sep 2023 20:17:08 +0000
Subject: [PATCH] Serve metrics for Prometheus

---
 Dockerfile                                    |  8 +++----
 Dockerfile.binary                             |  4 ++--
 README.md                                     |  4 ++++
 .../documents/management/commands/gunicorn.py |  7 +++++--
 arkindex/metrics/__init__.py                  |  0
 arkindex/metrics/tests/__init__.py            |  0
 arkindex/metrics/tests/test_metrics_api.py    | 17 +++++++++++++++
 arkindex/metrics/urls.py                      |  7 +++++++
 arkindex/metrics/utils.py                     |  6 ++++++
 arkindex/metrics/views.py                     | 21 +++++++++++++++++++
 arkindex/project/config.py                    |  1 +
 arkindex/project/settings.py                  |  2 ++
 .../tests/config_samples/defaults.yaml        |  1 +
 .../project/tests/config_samples/errors.yaml  |  1 +
 .../tests/config_samples/override.yaml        |  1 +
 arkindex/project/urls.py                      |  2 ++
 16 files changed, 74 insertions(+), 8 deletions(-)
 create mode 100644 arkindex/metrics/__init__.py
 create mode 100644 arkindex/metrics/tests/__init__.py
 create mode 100644 arkindex/metrics/tests/test_metrics_api.py
 create mode 100644 arkindex/metrics/urls.py
 create mode 100644 arkindex/metrics/utils.py
 create mode 100644 arkindex/metrics/views.py

diff --git a/Dockerfile b/Dockerfile
index 28317a3598..5601f26760 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -42,10 +42,10 @@ RUN chown -R ark:teklia /backend_static
 # Copy Version file
 COPY VERSION /etc/arkindex.version
 
-HEALTHCHECK --start-period=1m --start-interval=1s --interval=1m --timeout=5s \
+HEALTHCHECK --start-period=1m --interval=1m --timeout=5s \
   CMD wget --spider --quiet http://localhost/api/v1/public-key/ || exit 1
 
 # Run with Gunicorn
-ENV PORT 80
-EXPOSE 80
-CMD ["manage.py", "gunicorn", "--host=0.0.0.0"]
+ENV PORT 8000
+EXPOSE $PORT
+CMD manage.py gunicorn --host=0.0.0.0 --port $PORT
diff --git a/Dockerfile.binary b/Dockerfile.binary
index a4e90ae72e..5d13f2b757 100644
--- a/Dockerfile.binary
+++ b/Dockerfile.binary
@@ -93,5 +93,5 @@ HEALTHCHECK --start-period=1m --start-interval=1s --interval=1m --timeout=5s \
 
 # Run gunicorn server
 ENV PORT=80
-EXPOSE 80
-CMD ["arkindex", "gunicorn", "--host=0.0.0.0"]
+EXPOSE $PORT
+CMD arkindex gunicorn --host=0.0.0.0 --port $PORT
diff --git a/README.md b/README.md
index b21486dcd6..0f7632ab3b 100644
--- a/README.md
+++ b/README.md
@@ -181,3 +181,7 @@ We use [rq](https://python-rq.org/), integrated via [django-rq](https://pypi.org
 * Export a corpus to an SQLite database: `export_corpus`
 
 To run them, use `make worker` to start a RQ worker. You will need to have Redis running; `make slim` or `make` in the architecture will provide it. `make` in the architecture also provides a RQ worker running in Docker from a binary build.
+
+## Metrics
+The application serves metrics for Prometheus under the `/metrics` prefix.
+A specific port can be used by setting the `PROMETHEUS_METRICS_PORT` environment variable, thus separating the application from the metrics API.
diff --git a/arkindex/documents/management/commands/gunicorn.py b/arkindex/documents/management/commands/gunicorn.py
index 892441fc1f..accebace27 100644
--- a/arkindex/documents/management/commands/gunicorn.py
+++ b/arkindex/documents/management/commands/gunicorn.py
@@ -2,6 +2,7 @@ import multiprocessing
 import os
 import sys
 
+from django.conf import settings
 from django.core.management.base import BaseCommand, CommandError
 from django.core.wsgi import get_wsgi_application
 
@@ -19,7 +20,7 @@ class Command(BaseCommand):
         parser.add_argument(
             "--port",
             type=int,
-            help="Port to bind gunicorn",
+            help="Port to bind the Arkindex application",
             default=int(os.environ.get("PORT", 8000)),
         )
         parser.add_argument(
@@ -35,13 +36,15 @@ class Command(BaseCommand):
         except ImportError:
             raise CommandError("Gunicorn is not available")
 
+        assert port != settings.PROMETHEUS_METRICS_PORT, "Application and metrics should use different ports"
+
         # Calc max workers
         workers = (multiprocessing.cpu_count() * 2) + 1
         if max_workers > 0:
             workers = min(workers, max_workers)
 
         # Build bind string
-        bind = f"{host}:{port}"
+        bind = [f"{host}:{port}", f"{host}:{settings.PROMETHEUS_METRICS_PORT}"]
         self.stdout.write(f"Running server on {bind} with {workers} workers")
 
         # Do not send out CLI args to gunicorn as they are not compatible
diff --git a/arkindex/metrics/__init__.py b/arkindex/metrics/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/arkindex/metrics/tests/__init__.py b/arkindex/metrics/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/arkindex/metrics/tests/test_metrics_api.py b/arkindex/metrics/tests/test_metrics_api.py
new file mode 100644
index 0000000000..ee5b51fba0
--- /dev/null
+++ b/arkindex/metrics/tests/test_metrics_api.py
@@ -0,0 +1,17 @@
+from django.test import override_settings
+from django.urls import reverse
+
+from arkindex.project.tests import FixtureAPITestCase
+
+
+class TestMetricsAPI(FixtureAPITestCase):
+
+    def test_metrics_base_wrong_port(self):
+        response = self.client.get(reverse('metrics:base-metrics'))
+        self.assertEqual(response.status_code, 404)
+
+    @override_settings(PROMETHEUS_METRICS_PORT='42', PUBLIC_HOSTNAME="hostname", ARKINDEX_ENV="test")
+    def test_metrics_base(self):
+        response = self.client.get(reverse('metrics:base-metrics'), SERVER_PORT=42)
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.content, b'arkindex_instance{hostname="hostname", env="test"} 1')
diff --git a/arkindex/metrics/urls.py b/arkindex/metrics/urls.py
new file mode 100644
index 0000000000..27aa5dc8f6
--- /dev/null
+++ b/arkindex/metrics/urls.py
@@ -0,0 +1,7 @@
+from django.urls import path
+
+from arkindex.metrics.views import MetricsView
+
+metrics_urls = [
+    path('', MetricsView.as_view(), name='base-metrics'),
+]
diff --git a/arkindex/metrics/utils.py b/arkindex/metrics/utils.py
new file mode 100644
index 0000000000..8c240723fe
--- /dev/null
+++ b/arkindex/metrics/utils.py
@@ -0,0 +1,6 @@
+def build_metric(label, attributes={}, value=1, timestamp=None):
+    attrs_fmt = ', '.join(["=".join((k, f'"{v}"')) for k, v in attributes.items()])
+    metric = f'{label}{{{attrs_fmt}}} {value}'
+    if timestamp:
+        metric = f'{metric} {timestamp}'
+    return metric
diff --git a/arkindex/metrics/views.py b/arkindex/metrics/views.py
new file mode 100644
index 0000000000..b2db53085c
--- /dev/null
+++ b/arkindex/metrics/views.py
@@ -0,0 +1,21 @@
+from django.conf import settings
+from django.http import Http404, HttpResponse
+from django.views import View
+
+from arkindex.metrics.utils import build_metric
+
+
+class MetricsView(View):
+    def get(self, request, *args, **kwargs):
+        if settings.PROMETHEUS_METRICS_PORT != int(request.get_port()):
+            raise Http404()
+        return HttpResponse(
+            build_metric(
+                'arkindex_instance',
+                {
+                    'hostname': settings.PUBLIC_HOSTNAME,
+                    'env': settings.ARKINDEX_ENV
+                }
+            ),
+            content_type="text/plain"
+        )
diff --git a/arkindex/project/config.py b/arkindex/project/config.py
index c98d3211eb..8cae2633c5 100644
--- a/arkindex/project/config.py
+++ b/arkindex/project/config.py
@@ -88,6 +88,7 @@ def get_settings_parser(base_dir):
     parser.add_option('robots_txt_disallow', type=str, many=True, default=[])
     parser.add_option('public_hostname', type=public_hostname)
     parser.add_option('worker_activity_timeout', type=int, default=3600)
+    parser.add_option('metrics_port', type=int, default=3000)
 
     # SECURITY WARNING: keep the secret key used in production secret!
     parser.add_option('secret_key', type=str, default='jf0w^y&ml(caax8f&a1mub)(js9(l5mhbbhosz3gi+m01ex+lo')
diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py
index 1107e6e4c6..002fe5cc36 100644
--- a/arkindex/project/settings.py
+++ b/arkindex/project/settings.py
@@ -54,6 +54,8 @@ WORKER_ACTIVITY_TIMEOUT = conf['worker_activity_timeout']
 
 PUBLIC_HOSTNAME = conf['public_hostname']
 
+PROMETHEUS_METRICS_PORT = conf['metrics_port']
+
 
 # Database
 def _conf_to_django_db(config):
diff --git a/arkindex/project/tests/config_samples/defaults.yaml b/arkindex/project/tests/config_samples/defaults.yaml
index 1f2531b69c..6ece4c85c6 100644
--- a/arkindex/project/tests/config_samples/defaults.yaml
+++ b/arkindex/project/tests/config_samples/defaults.yaml
@@ -67,6 +67,7 @@ license:
   key: null
   ping_frequency: 1800
 local_imageserver_id: 1
+metrics_port: 3000
 ponos:
   artifact_max_size: 5368709120
   default_env: {}
diff --git a/arkindex/project/tests/config_samples/errors.yaml b/arkindex/project/tests/config_samples/errors.yaml
index bee83a7c02..8917b84a1c 100644
--- a/arkindex/project/tests/config_samples/errors.yaml
+++ b/arkindex/project/tests/config_samples/errors.yaml
@@ -50,6 +50,7 @@ license:
   key: arkindex-test-deadbeef1234
   ping_frequency: plop
 local_imageserver_id: 1
+metrics_port: 12
 ponos:
   artifact_max_size: .nan
   default_env: {}
diff --git a/arkindex/project/tests/config_samples/override.yaml b/arkindex/project/tests/config_samples/override.yaml
index 11d8487bd7..0fbb32b3b5 100644
--- a/arkindex/project/tests/config_samples/override.yaml
+++ b/arkindex/project/tests/config_samples/override.yaml
@@ -81,6 +81,7 @@ license:
   key: arkindex-test-deadbeef1234
   ping_frequency: 120
 local_imageserver_id: 45
+metrics_port: 4242
 ponos:
   artifact_max_size: 12345678901234567890
   default_env:
diff --git a/arkindex/project/urls.py b/arkindex/project/urls.py
index ad6841f082..de393c87a5 100644
--- a/arkindex/project/urls.py
+++ b/arkindex/project/urls.py
@@ -3,6 +3,7 @@ from django.contrib import admin
 from django.contrib.staticfiles.urls import staticfiles_urlpatterns
 from django.urls import include, path, re_path
 
+from arkindex.metrics.urls import metrics_urls
 from arkindex.project.api_v1 import api
 from arkindex.project.views import CdnHome, FrontendView, OpenAPIDocsView, RobotsTxt
 
@@ -11,6 +12,7 @@ frontend_view = FrontendView if settings.CDN_ASSETS_URL is None else CdnHome
 
 urlpatterns = [
     path('api/v1/', include((api, 'api'), namespace='api')),
+    path('metrics/', include((metrics_urls, 'metrics'), namespace='metrics')),
     path('api-docs/', OpenAPIDocsView.as_view(), name='openapi-docs'),
     path('admin/', admin.site.urls),
     path('rq/', include('django_rq.urls')),
-- 
GitLab