Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (5)
1.6.0
1.6.1-beta1
import uuid
from textwrap import dedent
from django.db import transaction
from django.utils.functional import cached_property
......@@ -300,16 +301,22 @@ class CorpusMLClassPagination(PageNumberPagination):
@extend_schema_view(
get=extend_schema(
operation_id="ListCorpusMLClasses",
description=dedent("""
List available classes in a corpus.
Requires a **guest** access to the corpus.
"""),
),
post=extend_schema(
operation_id="CreateMLClass",
description="Create an ML class in a corpus",
description=dedent("""
Create an ML class in a corpus.
Requires an **admin** access to the corpus.
"""),
)
)
class CorpusMLClassList(CorpusACLMixin, ListCreateAPIView):
"""
List available classes in a corpus
"""
serializer_class = MLClassSerializer
pagination_class = CorpusMLClassPagination
# For OpenAPI type discovery: a corpus ID is in the path
......@@ -322,7 +329,7 @@ class CorpusMLClassList(CorpusACLMixin, ListCreateAPIView):
def corpus(self):
role = Role.Guest
if self.request.method == "POST":
role = Role.Contributor
role = Role.Admin
return self.get_corpus(self.kwargs["pk"], role=role)
def check_permissions(self, *args, **kwargs):
......@@ -357,10 +364,26 @@ class CorpusMLClassList(CorpusACLMixin, ListCreateAPIView):
@extend_schema(tags=["classifications"])
@extend_schema_view(
get=extend_schema(description="Retrieve a ML class."),
patch=extend_schema(description="Rename a ML class."),
put=extend_schema(description="Rename a ML class."),
delete=extend_schema(description="Delete a ML class if it is not used by any classification."),
get=extend_schema(description=dedent("""
Retrieve an ML class.
Requires a **guest** access to the corpus.
""")),
patch=extend_schema(description=dedent("""
Rename an ML class.
Requires an **admin** access to the corpus.
""")),
put=extend_schema(description=dedent("""
Rename an ML class.
Requires an **admin** access to the corpus.
""")),
delete=extend_schema(description=dedent("""
Delete an ML class if it is not used by any classification.
Requires an **admin** access to the corpus.
""")),
)
class MLClassRetrieve(CorpusACLMixin, RetrieveUpdateDestroyAPIView):
serializer_class = MLClassSerializer
......@@ -372,7 +395,7 @@ class MLClassRetrieve(CorpusACLMixin, RetrieveUpdateDestroyAPIView):
def corpus(self):
role = Role.Guest
if self.request and self.request.method != "GET":
role = Role.Contributor
role = Role.Admin
return self.get_corpus(self.kwargs["corpus"], role=role)
......
......@@ -49,7 +49,7 @@ class TestDeleteCorpus(FixtureTestCase):
)
element_process.elements.add(element)
worker_run = element_process.worker_runs.create(version=cls.worker_version, parents=[])
task_1, task_2, task_3 = Task.objects.bulk_create(
task_1, task_2, task_3, task_4 = Task.objects.bulk_create(
[
Task(
run=0,
......@@ -58,11 +58,15 @@ class TestDeleteCorpus(FixtureTestCase):
worker_run=worker_run,
slug=f"unscheduled task {i}",
state=State.Unscheduled,
) for i in range(1, 4)
) for i in range(0, 4)
]
)
task_1.parents.set([task_2])
task_3.parents.set([task_1, task_2])
task_3.slug += "_old1"
task_4.original_task_id = task_3.id
task_3.save()
task_4.save()
element.worker_run = worker_run
element.worker_version = cls.worker_version
element.save()
......
......@@ -236,12 +236,12 @@ class TestClasses(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
@patch("arkindex.project.mixins.has_access", return_value=False)
def test_update_requires_contributor(self, has_access_mock):
self.user.rights.update(level=Role.Guest.value)
def test_update_requires_admin(self, has_access_mock):
self.user.rights.update(level=Role.Contributor.value)
self.client.force_login(self.user)
response = self.client.put(reverse("api:ml-class-retrieve", kwargs={"corpus": self.corpus.id, "mlclass": self.text.id}), {"name": "new name"})
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this corpus."})
self.assertDictEqual(response.json(), {"detail": "You do not have admin access to this corpus."})
def test_partial_update(self):
self.client.force_login(self.superuser)
......@@ -266,12 +266,12 @@ class TestClasses(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
@patch("arkindex.project.mixins.has_access", return_value=False)
def test_partial_update_requires_contributor(self, has_access_mock):
self.user.rights.update(level=Role.Guest.value)
def test_partial_update_requires_admin(self, has_access_mock):
self.user.rights.update(level=Role.Contributor.value)
self.client.force_login(self.user)
response = self.client.patch(reverse("api:ml-class-retrieve", kwargs={"corpus": self.corpus.id, "mlclass": self.text.id}), {"name": "new name"})
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this corpus."})
self.assertDictEqual(response.json(), {"detail": "You do not have admin access to this corpus."})
def test_destroy(self):
self.client.force_login(self.superuser)
......@@ -300,12 +300,12 @@ class TestClasses(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
@patch("arkindex.project.mixins.has_access", return_value=False)
def test_destroy_requires_contributor(self, has_access_mock):
self.user.rights.update(level=Role.Guest.value)
def test_destroy_requires_admin(self, has_access_mock):
self.user.rights.update(level=Role.Contributor.value)
self.client.force_login(self.user)
response = self.client.delete(reverse("api:ml-class-retrieve", kwargs={"corpus": self.corpus.id, "mlclass": self.text.id}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {"detail": "You do not have contributor access to this corpus."})
self.assertDictEqual(response.json(), {"detail": "You do not have admin access to this corpus."})
def test_list_elements_db_queries(self):
with self.assertNumQueries(5):
......
......@@ -41,6 +41,7 @@ class TaskAdmin(admin.ModelAdmin):
"updated",
"container",
"shm_size",
"original_task",
)
fieldsets = (
(
......@@ -54,6 +55,7 @@ class TaskAdmin(admin.ModelAdmin):
"state",
"process",
"priority",
"original_task",
),
},
),
......
......@@ -223,12 +223,9 @@ class TaskRestart(ProcessACLMixin, CreateAPIView):
raise ValidationError(
detail="Task's state must be in a final state to be restarted."
)
# TODO Check the original_task_id field directly once it is implemented
# https://gitlab.teklia.com/arkindex/frontend/-/issues/1383
_, *suffix = task.slug.rsplit("_old", 1)
if suffix:
if task.restarts.exists():
raise ValidationError(
detail="This task has already been restarted"
detail="This task has already been restarted."
)
return task
......@@ -238,7 +235,10 @@ class TaskRestart(ProcessACLMixin, CreateAPIView):
parents = list(copy.parents.all())
# Rename the original task
basename, *_ = copy.slug.rsplit("_old", 1)
if copy.original_task_id:
basename, *_ = copy.slug.rsplit("_old", 1)
else:
basename = copy.slug
latest_task = Task.objects.filter(run=copy.run, slug__startswith=f"{basename}_old").order_by("-created").first()
if not latest_task:
# There is no previously restarted task: the original task will have the slug slug_old1
......@@ -251,6 +251,7 @@ class TaskRestart(ProcessACLMixin, CreateAPIView):
copy.save()
# Copy the original task
copy.original_task_id = copy.id
copy.id = uuid.uuid4()
copy.slug = basename
copy.state = State.Pending
......
# Generated by Django 4.1.7 on 2024-04-29 08:59
import django.core.validators
import enumfields.fields
from django.db import migrations, models
import arkindex.ponos.models
class Migration(migrations.Migration):
dependencies = [
("ponos", "0007_remove_task_has_docker_socket"),
]
operations = [
migrations.AddField(
model_name="agent",
name="mode",
field=enumfields.fields.EnumField(default="docker", enum=arkindex.ponos.models.AgentMode, max_length=20),
),
migrations.AlterField(
model_name="agent",
name="cpu_cores",
field=models.PositiveSmallIntegerField(blank=True, null=True, validators=[django.core.validators.MinValueValidator(1)]),
),
migrations.AlterField(
model_name="agent",
name="cpu_frequency",
field=models.BigIntegerField(blank=True, null=True, validators=[django.core.validators.MinValueValidator(1)]),
),
migrations.AlterField(
model_name="agent",
name="ram_total",
field=models.BigIntegerField(blank=True, null=True, validators=[django.core.validators.MinValueValidator(1)]),
),
migrations.AddConstraint(
model_name="agent",
constraint=models.CheckConstraint(check=models.Q(("mode", arkindex.ponos.models.AgentMode["Slurm"]), models.Q(("cpu_cores__isnull", False), ("cpu_frequency__isnull", False), ("ram_total__isnull", False)), _connector="OR"), name="slurm_or_hardware_requirements"),
),
]
# Generated by Django 4.1.7 on 2024-04-23 12:19
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("ponos", "0008_agent_mode"),
]
operations = [
migrations.AddField(
model_name="task",
name="original_task",
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name="restarts", to="ponos.task"),
),
]
......@@ -66,6 +66,11 @@ class Farm(models.Model):
return True
class AgentMode(Enum):
Docker = "docker"
Slurm = "slurm"
class Agent(models.Model):
"""
A remote host that can run tasks.
......@@ -76,18 +81,27 @@ class Agent(models.Model):
updated = models.DateTimeField(auto_now=True)
farm = models.ForeignKey(Farm, on_delete=models.PROTECT)
public_key = models.TextField()
mode = EnumField(AgentMode, default=AgentMode.Docker, max_length=20)
hostname = models.SlugField(max_length=64, db_index=False)
cpu_cores = models.PositiveSmallIntegerField(validators=[MinValueValidator(1)])
cpu_frequency = models.BigIntegerField(validators=[MinValueValidator(1)])
cpu_cores = models.PositiveSmallIntegerField(null=True, blank=True, validators=[MinValueValidator(1)])
cpu_frequency = models.BigIntegerField(null=True, blank=True, validators=[MinValueValidator(1)])
# Total amount of RAM on this agent in bytes
ram_total = models.BigIntegerField(validators=[MinValueValidator(1)])
ram_total = models.BigIntegerField(null=True, blank=True, validators=[MinValueValidator(1)])
# Last minute average CPU load measure on this agent
cpu_load = models.FloatField(null=True, blank=True)
# Last RAM load measure expressed as a percentage (0 ≤ ram_load ≤ 1)
ram_load = models.FloatField(null=True, blank=True)
last_ping = models.DateTimeField(editable=False)
class Meta:
constraints = [
models.CheckConstraint(
check=Q(mode=AgentMode.Slurm) | Q(cpu_cores__isnull=False, cpu_frequency__isnull=False, ram_total__isnull=False),
name="slurm_or_hardware_requirements",
),
]
def __str__(self) -> str:
return self.hostname
......@@ -313,12 +327,18 @@ class Task(models.Model):
related_name="children",
symmetrical=False,
)
container = models.CharField(
max_length=64,
null=True,
blank=True,
)
original_task = models.ForeignKey(
"self",
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="restarts"
)
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)
......
......@@ -89,6 +89,7 @@ class TaskSerializer(TaskLightSerializer):
"agent",
"gpu",
"extra_files",
"original_task_id"
)
read_only_fields = TaskLightSerializer.Meta.read_only_fields + (
"logs",
......@@ -96,6 +97,7 @@ class TaskSerializer(TaskLightSerializer):
"agent",
"gpu",
"extra_files",
"original_task_id"
)
@extend_schema_field(serializers.CharField())
......
......@@ -74,6 +74,7 @@ class TestAPI(FixtureAPITestCase):
"slug": "initialisation",
"state": "unscheduled",
"parents": [],
"original_task_id": None,
"logs": "Failed successfully",
"full_log": "http://somewhere",
"extra_files": {},
......@@ -157,6 +158,7 @@ class TestAPI(FixtureAPITestCase):
"slug": "initialisation",
"state": "unscheduled",
"parents": [],
"original_task_id": None,
"logs": "Failed successfully",
"full_log": "http://somewhere",
"extra_files": {},
......@@ -198,6 +200,7 @@ class TestAPI(FixtureAPITestCase):
"slug": "initialisation",
"state": "unscheduled",
"parents": [],
"original_task_id": None,
"logs": "Failed successfully",
"full_log": "http://somewhere",
"extra_files": {},
......@@ -586,14 +589,16 @@ class TestAPI(FixtureAPITestCase):
self.task1.slug = self.task1.slug + "_old1"
self.task1.state = State.Completed.value
self.task1.save()
with self.assertNumQueries(7):
self.task2.original_task_id = self.task1.id
self.task2.save()
with self.assertNumQueries(8):
response = self.client.post(
reverse("api:task-restart", kwargs={"pk": str(self.task1.id)})
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertListEqual(
response.json(),
["This task has already been restarted"],
["This task has already been restarted."],
)
@patch("arkindex.project.aws.s3")
......@@ -630,6 +635,7 @@ class TestAPI(FixtureAPITestCase):
mock_now.return_value = datetime.now(timezone.utc) + timedelta(minutes=1)
old_task_2 = self.process.tasks.create(run=self.task1.run, depth=1, slug=f"{task_2_slug}_old1")
old_task_2.state = State.Error.value
old_task_2.original_task_id = self.task1.id
old_task_2.save()
old_task_2.parents.add(self.task1)
self.task1.state = State.Completed.value
......@@ -638,7 +644,7 @@ class TestAPI(FixtureAPITestCase):
self.task2.save()
self.client.force_login(self.user)
with self.assertNumQueries(13):
with self.assertNumQueries(14):
with patch("django.utils.timezone.now") as mock_now:
mock_now.return_value = datetime.now(timezone.utc) + timedelta(minutes=2)
response = self.client.post(
......@@ -657,6 +663,7 @@ class TestAPI(FixtureAPITestCase):
"full_log": "http://somewhere",
"gpu": None,
"logs": "Task has been restarted",
"original_task_id": str(self.task2.id),
"parents": [str(self.task1.id)],
"run": 0,
"shm_size": None,
......@@ -704,7 +711,7 @@ class TestAPI(FixtureAPITestCase):
self.task2.save()
self.client.force_login(self.user)
with self.assertNumQueries(13):
with self.assertNumQueries(14):
response = self.client.post(
reverse("api:task-restart", kwargs={"pk": str(self.task2.id)})
)
......@@ -721,6 +728,7 @@ class TestAPI(FixtureAPITestCase):
"full_log": "http://somewhere",
"gpu": None,
"logs": "Task has been restarted",
"original_task_id": str(self.task2.id),
"parents": [str(self.task1.id)],
"run": 0,
"shm_size": None,
......
from unittest.mock import patch
from django.core.exceptions import ValidationError
from django.db import transaction
from django.db.models import prefetch_related_objects
from django.db.utils import IntegrityError
from django.utils import timezone
from arkindex.ponos.models import FINAL_STATES, State
from arkindex.ponos.models import FINAL_STATES, Agent, AgentMode, Farm, State
from arkindex.process.models import ProcessMode
from arkindex.project.tests import FixtureAPITestCase
......@@ -12,6 +15,7 @@ class TestModels(FixtureAPITestCase):
@classmethod
def setUpTestData(cls):
super().setUpTestData()
cls.farm = Farm.objects.create(name="Invisible corn farm")
cls.process = cls.corpus.processes.create(
creator=cls.user,
mode=ProcessMode.Workers,
......@@ -132,3 +136,158 @@ class TestModels(FixtureAPITestCase):
self.assertEqual(self.process.get_state(0), State.Running)
self.assertEqual(self.process.get_state(1), State.Unscheduled)
self.assertEqual(self.process.get_state(-1), State.Unscheduled)
def test_agent_default_mode_docker(self):
Agent.objects.create(
hostname="agent_smith",
cpu_cores=2,
cpu_frequency=4.2e9,
public_key="",
farm=self.farm,
ram_total=2e9,
last_ping=timezone.now(),
ram_load=0.49,
cpu_load=0.99
)
test_agent=Agent.objects.get(hostname="agent_smith")
self.assertEqual(test_agent.mode, AgentMode.Docker)
def test_agent_slurm_mode(self):
Agent.objects.create(
hostname="agent_smith",
cpu_cores=2,
cpu_frequency=4.2e9,
public_key="",
farm=self.farm,
ram_total=2e9,
last_ping=timezone.now(),
ram_load=0.49,
cpu_load=0.99,
mode=AgentMode.Slurm.value
)
test_agent=Agent.objects.get(hostname="agent_smith")
self.assertEqual(test_agent.mode, AgentMode.Slurm)
def test_agent_invalid_mode(self):
with self.assertRaisesRegex(ValidationError, "bad_mode is not a valid value for enum"):
Agent.objects.create(
hostname="agent_smith",
cpu_cores=2,
cpu_frequency=4.2e9,
public_key="",
farm=self.farm,
ram_total=2e9,
last_ping=timezone.now(),
ram_load=0.49,
cpu_load=0.99,
mode="bad_mode"
)
def test_agent_mode_hardware_constraint(self):
"""
When the agent's mode is not AgentMode.Slurm, the hardware requirement parameters
cannot be null. When the agent's mode is AgentMode.Slurm, they can be null or not.
"""
cases = [
{
# no cpu_cores
"mode": AgentMode.Docker,
"params": {
"hostname": "agent_smith",
"cpu_frequency": 4.2e9,
"public_key": "",
"farm": self.farm,
"ram_total": 2e9,
"last_ping": timezone.now(),
"ram_load": 0.49,
"cpu_load": 0.99,
},
"failure": True
},
{
# no cpu_frequency
"mode": AgentMode.Docker,
"params": {
"hostname": "agent_smith",
"cpu_cores": 2,
"public_key": "",
"farm": self.farm,
"ram_total": 2e9,
"last_ping": timezone.now(),
"ram_load": 0.49,
"cpu_load": 0.99,
},
"failure": True
},
{
# no ram_total
"mode": AgentMode.Docker,
"params": {
"hostname": "agent_smith",
"cpu_cores": 2,
"cpu_frequency": 4.2e9,
"public_key": "",
"farm": self.farm,
"last_ping": timezone.now(),
"ram_load": 0.49,
"cpu_load": 0.99,
},
"failure": True
},
{
# hardware parameters set to None
"mode": AgentMode.Docker,
"params": {
"hostname": "agent_smith",
"cpu_cores": None,
"cpu_frequency": None,
"public_key": "",
"farm": self.farm,
"ram_total": None,
"last_ping": timezone.now(),
"ram_load": 0.49,
"cpu_load": 0.99,
},
"failure": True
},
{
# slurm mode with hardware requirements
"mode": AgentMode.Slurm,
"params": {
"hostname": "agent_smith",
"cpu_cores": 2,
"cpu_frequency": 4.2e9,
"public_key": "",
"farm": self.farm,
"ram_total": 2e9,
"last_ping": timezone.now(),
"ram_load": 0.49,
"cpu_load": 0.99,
},
"failure": False
},
{
# slurm mode without hardware requirements
"mode": AgentMode.Slurm,
"params": {
"hostname": "agent_smith",
"public_key": "",
"farm": self.farm,
"last_ping": timezone.now(),
"ram_load": 0.49,
"cpu_load": 0.99,
},
"failure": False
},
]
for case in cases:
with self.subTest(case=case):
if case["failure"]:
# Each create must be run in an atomic block to avoid TransactionManagementError
with transaction.atomic():
with self.assertRaisesRegex(IntegrityError, "slurm_or_hardware_requirements"):
Agent.objects.create(**case["params"], mode=case["mode"])
else:
Agent.objects.create(**case["params"], mode=case["mode"])
"""
Default corpus attributes
If you edit default types, please update 'DEFAULT_CORPUS_ATTRS' config value in the frontend (at js/config.js)
"""
DEFAULT_CORPUS_TYPES = [
......