Skip to content
Snippets Groups Projects
Commit f105d48c authored by Bastien Abadie's avatar Bastien Abadie Committed by Erwan Rouchet
Browse files

Extend metrics with tasks & agents

parent 3cd9a581
No related branches found
No related tags found
1 merge request!2361Extend metrics with tasks & agents
from datetime import datetime
from django.test import override_settings
from django.urls import reverse
from arkindex.ponos.models import Agent, AgentMode, Farm, State
from arkindex.process.models import Process, ProcessMode
from arkindex.project.tests import FixtureAPITestCase
......@@ -15,3 +19,40 @@ class TestMetricsAPI(FixtureAPITestCase):
response = self.client.get(reverse("metrics:base-metrics"), SERVER_PORT=42)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, b'arkindex_instance{hostname="hostname", env="test"} 1')
@override_settings(PROMETHEUS_METRICS_PORT=42, PUBLIC_HOSTNAME="hostname", ARKINDEX_ENV="test")
def test_metrics_tasks(self):
# Retrieve objects
process = Process.objects.get(mode=ProcessMode.Workers)
process.run()
task1, task2, task3 = process.tasks.all().order_by("depth")
farm = Farm.objects.first()
agent = Agent.objects.create(
mode=AgentMode.Docker,
hostname="Demo Agent",
farm=farm,
last_ping=datetime.now(),
cpu_cores=42,
cpu_frequency=42e8,
ram_total=42e3
)
last_ping = int(agent.last_ping.timestamp())
# Create some tasks with various tasks
task1.state = State.Pending
task1.save()
task2.state = State.Error
task2.save()
task3.state = State.Unscheduled
task3.save()
response = self.client.get(reverse("metrics:base-metrics"), SERVER_PORT=42)
self.assertEqual(response.status_code, 200)
metrics = """arkindex_instance{hostname="hostname", env="test"} 1
arkindex_tasks{hostname="hostname", env="test", state="Pending", farm="Wheat farm"} 1
arkindex_tasks{hostname="hostname", env="test", state="Unscheduled", farm="Wheat farm"} 1
arkindex_agent_ping{hostname="hostname", env="test", agent_name="Demo Agent", farm="Wheat farm"} """ + str(last_ping)
self.assertEqual(response.content, metrics.encode("utf-8"))
def _render_attribute(key, value):
if value is None:
value = "null"
return f'{key}="{value}"'
def build_metric(label, attributes={}, value=1, timestamp=None):
attrs_fmt = ", ".join(["=".join((k, f'"{v}"')) for k, v in attributes.items()])
attrs_fmt = ", ".join([_render_attribute(k, v) for k, v in attributes.items()])
metric = f"{label}{{{attrs_fmt}}} {value}"
if timestamp:
metric = f"{metric} {timestamp}"
......
from django.conf import settings
from django.db.models import Count
from django.http import Http404, HttpResponse
from django.views import View
from arkindex.metrics.utils import build_metric
from arkindex.ponos.models import Agent, Farm, State, Task
class MetricsView(View):
def get(self, request, *args, **kwargs):
if settings.PROMETHEUS_METRICS_PORT != int(request.get_port()):
raise Http404()
return HttpResponse(
common_attributes = {
"hostname": settings.PUBLIC_HOSTNAME,
"env": settings.ARKINDEX_ENV
}
# Count nb of tasks in pending or unscheduled states
# grouped by farm
tasks_count = (
Task.objects
.filter(state__in=(State.Pending, State.Unscheduled))
.values("process__farm", "state")
.annotate(nb=Count("id"))
)
# Load all ponos agents details
agents = Agent.objects.all().values("hostname", "farm_id", "last_ping")
# Load all farms to use in attributes of farms & tasks
farms = dict(Farm.objects.all().values_list("id", "name"))
metrics = [
# Backend ping
build_metric(
"arkindex_instance",
{
"hostname": settings.PUBLIC_HOSTNAME,
"env": settings.ARKINDEX_ENV
}
attributes=common_attributes
),
] + [
# Pending or Unscheduled tasks count
build_metric(
"arkindex_tasks",
attributes={**common_attributes, "state": t["state"], "farm": farms.get(t["process__farm"])},
value=t["nb"]
)
for t in tasks_count
] + [
# Ponos agent last known ping
build_metric(
"arkindex_agent_ping",
attributes={**common_attributes, "agent_name": a["hostname"], "farm": farms.get(a["farm_id"])},
value=int(a["last_ping"].timestamp())
)
for a in agents
]
# Render text response with all metrics
return HttpResponse(
"\n".join(metrics),
content_type="text/plain"
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment