From 2e3e9c884fc70e495a44e3bbe5ae870c12459423 Mon Sep 17 00:00:00 2001 From: ml bonhomme <bonhomme@teklia.com> Date: Mon, 6 May 2024 09:35:21 +0000 Subject: [PATCH] Add 'mode' field on ponos agent model, and make hardware requirements nullable --- arkindex/ponos/migrations/0008_agent_mode.py | 41 +++++ arkindex/ponos/models.py | 20 ++- arkindex/ponos/tests/test_models.py | 161 ++++++++++++++++++- 3 files changed, 218 insertions(+), 4 deletions(-) create mode 100644 arkindex/ponos/migrations/0008_agent_mode.py diff --git a/arkindex/ponos/migrations/0008_agent_mode.py b/arkindex/ponos/migrations/0008_agent_mode.py new file mode 100644 index 0000000000..f0e8f2ae83 --- /dev/null +++ b/arkindex/ponos/migrations/0008_agent_mode.py @@ -0,0 +1,41 @@ +# Generated by Django 4.1.7 on 2024-04-29 08:59 + +import django.core.validators +import enumfields.fields +from django.db import migrations, models + +import arkindex.ponos.models + + +class Migration(migrations.Migration): + + dependencies = [ + ("ponos", "0007_remove_task_has_docker_socket"), + ] + + operations = [ + migrations.AddField( + model_name="agent", + name="mode", + field=enumfields.fields.EnumField(default="docker", enum=arkindex.ponos.models.AgentMode, max_length=20), + ), + migrations.AlterField( + model_name="agent", + name="cpu_cores", + field=models.PositiveSmallIntegerField(blank=True, null=True, validators=[django.core.validators.MinValueValidator(1)]), + ), + migrations.AlterField( + model_name="agent", + name="cpu_frequency", + field=models.BigIntegerField(blank=True, null=True, validators=[django.core.validators.MinValueValidator(1)]), + ), + migrations.AlterField( + model_name="agent", + name="ram_total", + field=models.BigIntegerField(blank=True, null=True, validators=[django.core.validators.MinValueValidator(1)]), + ), + migrations.AddConstraint( + model_name="agent", + constraint=models.CheckConstraint(check=models.Q(("mode", arkindex.ponos.models.AgentMode["Slurm"]), models.Q(("cpu_cores__isnull", False), ("cpu_frequency__isnull", False), ("ram_total__isnull", False)), _connector="OR"), name="slurm_or_hardware_requirements"), + ), + ] diff --git a/arkindex/ponos/models.py b/arkindex/ponos/models.py index 2eaaee05e7..26e6186920 100644 --- a/arkindex/ponos/models.py +++ b/arkindex/ponos/models.py @@ -66,6 +66,11 @@ class Farm(models.Model): return True +class AgentMode(Enum): + Docker = "docker" + Slurm = "slurm" + + class Agent(models.Model): """ A remote host that can run tasks. @@ -76,18 +81,27 @@ class Agent(models.Model): updated = models.DateTimeField(auto_now=True) farm = models.ForeignKey(Farm, on_delete=models.PROTECT) public_key = models.TextField() + mode = EnumField(AgentMode, default=AgentMode.Docker, max_length=20) hostname = models.SlugField(max_length=64, db_index=False) - cpu_cores = models.PositiveSmallIntegerField(validators=[MinValueValidator(1)]) - cpu_frequency = models.BigIntegerField(validators=[MinValueValidator(1)]) + cpu_cores = models.PositiveSmallIntegerField(null=True, blank=True, validators=[MinValueValidator(1)]) + cpu_frequency = models.BigIntegerField(null=True, blank=True, validators=[MinValueValidator(1)]) # Total amount of RAM on this agent in bytes - ram_total = models.BigIntegerField(validators=[MinValueValidator(1)]) + ram_total = models.BigIntegerField(null=True, blank=True, validators=[MinValueValidator(1)]) # Last minute average CPU load measure on this agent cpu_load = models.FloatField(null=True, blank=True) # Last RAM load measure expressed as a percentage (0 ≤ ram_load ≤ 1) ram_load = models.FloatField(null=True, blank=True) last_ping = models.DateTimeField(editable=False) + class Meta: + constraints = [ + models.CheckConstraint( + check=Q(mode=AgentMode.Slurm) | Q(cpu_cores__isnull=False, cpu_frequency__isnull=False, ram_total__isnull=False), + name="slurm_or_hardware_requirements", + ), + ] + def __str__(self) -> str: return self.hostname diff --git a/arkindex/ponos/tests/test_models.py b/arkindex/ponos/tests/test_models.py index f8e884911f..8492b07765 100644 --- a/arkindex/ponos/tests/test_models.py +++ b/arkindex/ponos/tests/test_models.py @@ -1,9 +1,12 @@ from unittest.mock import patch +from django.core.exceptions import ValidationError +from django.db import transaction from django.db.models import prefetch_related_objects +from django.db.utils import IntegrityError from django.utils import timezone -from arkindex.ponos.models import FINAL_STATES, State +from arkindex.ponos.models import FINAL_STATES, Agent, AgentMode, Farm, State from arkindex.process.models import ProcessMode from arkindex.project.tests import FixtureAPITestCase @@ -12,6 +15,7 @@ class TestModels(FixtureAPITestCase): @classmethod def setUpTestData(cls): super().setUpTestData() + cls.farm = Farm.objects.create(name="Invisible corn farm") cls.process = cls.corpus.processes.create( creator=cls.user, mode=ProcessMode.Workers, @@ -132,3 +136,158 @@ class TestModels(FixtureAPITestCase): self.assertEqual(self.process.get_state(0), State.Running) self.assertEqual(self.process.get_state(1), State.Unscheduled) self.assertEqual(self.process.get_state(-1), State.Unscheduled) + + def test_agent_default_mode_docker(self): + Agent.objects.create( + hostname="agent_smith", + cpu_cores=2, + cpu_frequency=4.2e9, + public_key="", + farm=self.farm, + ram_total=2e9, + last_ping=timezone.now(), + ram_load=0.49, + cpu_load=0.99 + ) + + test_agent=Agent.objects.get(hostname="agent_smith") + self.assertEqual(test_agent.mode, AgentMode.Docker) + + def test_agent_slurm_mode(self): + Agent.objects.create( + hostname="agent_smith", + cpu_cores=2, + cpu_frequency=4.2e9, + public_key="", + farm=self.farm, + ram_total=2e9, + last_ping=timezone.now(), + ram_load=0.49, + cpu_load=0.99, + mode=AgentMode.Slurm.value + ) + + test_agent=Agent.objects.get(hostname="agent_smith") + self.assertEqual(test_agent.mode, AgentMode.Slurm) + + def test_agent_invalid_mode(self): + with self.assertRaisesRegex(ValidationError, "bad_mode is not a valid value for enum"): + Agent.objects.create( + hostname="agent_smith", + cpu_cores=2, + cpu_frequency=4.2e9, + public_key="", + farm=self.farm, + ram_total=2e9, + last_ping=timezone.now(), + ram_load=0.49, + cpu_load=0.99, + mode="bad_mode" + ) + + def test_agent_mode_hardware_constraint(self): + """ + When the agent's mode is not AgentMode.Slurm, the hardware requirement parameters + cannot be null. When the agent's mode is AgentMode.Slurm, they can be null or not. + """ + cases = [ + { + # no cpu_cores + "mode": AgentMode.Docker, + "params": { + "hostname": "agent_smith", + "cpu_frequency": 4.2e9, + "public_key": "", + "farm": self.farm, + "ram_total": 2e9, + "last_ping": timezone.now(), + "ram_load": 0.49, + "cpu_load": 0.99, + }, + "failure": True + }, + { + # no cpu_frequency + "mode": AgentMode.Docker, + "params": { + "hostname": "agent_smith", + "cpu_cores": 2, + "public_key": "", + "farm": self.farm, + "ram_total": 2e9, + "last_ping": timezone.now(), + "ram_load": 0.49, + "cpu_load": 0.99, + }, + "failure": True + }, + { + # no ram_total + "mode": AgentMode.Docker, + "params": { + "hostname": "agent_smith", + "cpu_cores": 2, + "cpu_frequency": 4.2e9, + "public_key": "", + "farm": self.farm, + "last_ping": timezone.now(), + "ram_load": 0.49, + "cpu_load": 0.99, + }, + "failure": True + }, + { + # hardware parameters set to None + "mode": AgentMode.Docker, + "params": { + "hostname": "agent_smith", + "cpu_cores": None, + "cpu_frequency": None, + "public_key": "", + "farm": self.farm, + "ram_total": None, + "last_ping": timezone.now(), + "ram_load": 0.49, + "cpu_load": 0.99, + }, + "failure": True + }, + { + # slurm mode with hardware requirements + "mode": AgentMode.Slurm, + "params": { + "hostname": "agent_smith", + "cpu_cores": 2, + "cpu_frequency": 4.2e9, + "public_key": "", + "farm": self.farm, + "ram_total": 2e9, + "last_ping": timezone.now(), + "ram_load": 0.49, + "cpu_load": 0.99, + }, + "failure": False + }, + { + # slurm mode without hardware requirements + "mode": AgentMode.Slurm, + "params": { + "hostname": "agent_smith", + "public_key": "", + "farm": self.farm, + "last_ping": timezone.now(), + "ram_load": 0.49, + "cpu_load": 0.99, + }, + "failure": False + }, + ] + for case in cases: + with self.subTest(case=case): + if case["failure"]: + # Each create must be run in an atomic block to avoid TransactionManagementError + with transaction.atomic(): + with self.assertRaisesRegex(IntegrityError, "slurm_or_hardware_requirements"): + Agent.objects.create(**case["params"], mode=case["mode"]) + else: + Agent.objects.create(**case["params"], mode=case["mode"]) -- GitLab