Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (16)
Showing
with 793 additions and 709 deletions
......@@ -11,7 +11,7 @@ include:
# For jobs that run backend scripts directly
.backend-setup:
image: registry.gitlab.teklia.com/arkindex/backend/base:django-4.2
image: registry.gitlab.teklia.com/arkindex/backend/base:django-5.0
cache:
paths:
......
# syntax=docker/dockerfile:1
FROM registry.gitlab.teklia.com/arkindex/backend/base:django-4.2 as build
FROM registry.gitlab.teklia.com/arkindex/backend/base:django-5.0 as build
RUN mkdir build
ADD . build
RUN cd build && python3 setup.py sdist
FROM registry.gitlab.teklia.com/arkindex/backend/base:django-4.2
FROM registry.gitlab.teklia.com/arkindex/backend/base:django-5.0
# Install arkindex and its deps
# Uses a source archive instead of full local copy to speedup docker build
......
1.6.2-alpha2
1.6.2-beta2
......@@ -110,7 +110,7 @@ CREATE TABLE classification (
class_name VARCHAR(1024) NOT NULL,
state VARCHAR(16) NOT NULL DEFAULT 'pending',
moderator VARCHAR(255),
confidence REAL,
confidence REAL NOT NULL,
high_confidence INTEGER NOT NULL DEFAULT 0,
worker_version_id VARCHAR(37),
worker_run_id VARCHAR(37),
......@@ -118,7 +118,7 @@ CREATE TABLE classification (
FOREIGN KEY (element_id) REFERENCES element (id) ON DELETE CASCADE,
FOREIGN KEY (worker_version_id) REFERENCES worker_version (id) ON DELETE CASCADE,
FOREIGN KEY (worker_run_id) REFERENCES worker_run (id) ON DELETE CASCADE,
CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)),
CHECK (confidence >= 0 AND confidence <= 1),
CHECK (high_confidence = 0 OR high_confidence = 1),
CHECK (worker_run_id IS NULL OR worker_version_id IS NOT NULL)
);
......
This diff is collapsed.
......@@ -31,7 +31,7 @@ SELECT
elementtype.display_name AS type_name,
element.image_id AS image_id,
element.polygon::bytea AS polygon,
element.worker_version_id AS worker_version_id
element.worker_run_id AS worker_run_id
FROM documents_element element
INNER JOIN documents_elementtype elementtype ON (elementtype.id = element.type_id)
WHERE element.corpus_id = %(corpus)s
......@@ -51,7 +51,7 @@ SELECT
elementtype.display_name as type_name,
element.image_id AS image_id,
element.polygon::bytea AS polygon,
element.worker_version_id AS worker_version_id
element.worker_run_id AS worker_run_id
FROM (SELECT * FROM parent LIMIT %(limit)s OFFSET %(offset)s) AS parent_chunk
INNER JOIN documents_elementpath as elementpath ON (elementpath.path @> ARRAY[parent_chunk.id])
INNER JOIN documents_element as element ON (elementpath.element_id = element.id)
......@@ -219,11 +219,11 @@ class Indexer:
return
yield chunk
def hash_worker(self, worker_version):
if not worker_version:
def hash_worker(self, worker_run):
if not worker_run:
return
return worker_version.worker.name
return worker_run.version.worker.name
def build_solr_id(self, element, target):
hash = md5(element.id.bytes + target.id.bytes)
......@@ -248,7 +248,7 @@ class Indexer:
"element_id": str(element.id),
"element_text": element.name,
"element_type": element.type_name,
"element_worker": self.hash_worker(element.worker_version),
"element_worker": self.hash_worker(element.worker_run),
"element_image": element.iiif_thumbnail_url
}
......@@ -259,7 +259,7 @@ class Indexer:
"transcription_id": str(transcription.id),
"transcription_text": transcription.text,
"transcription_confidence": transcription.confidence,
"transcription_worker": self.hash_worker(transcription.worker_version)
"transcription_worker": self.hash_worker(transcription.worker_run)
}) for transcription in element.transcriptions.all()
]
......@@ -270,7 +270,7 @@ class Indexer:
"classification_id": str(classification.id),
"classification_name": classification.ml_class.name,
"classification_confidence": classification.confidence,
"classification_worker": self.hash_worker(classification.worker_version)
"classification_worker": self.hash_worker(classification.worker_run)
}) for classification in element.classifications.all()
]
......@@ -282,7 +282,7 @@ class Indexer:
"metadata_name": metadata.name,
"metadata_text": metadata.value,
"metadata_type": metadata.type.value,
"metadata_worker": self.hash_worker(metadata.worker_version)
"metadata_worker": self.hash_worker(metadata.worker_run)
}) for metadata in element.metadatas.all()
]
......@@ -295,7 +295,7 @@ class Indexer:
"entity_id": str(entity.id),
"entity_text": entity.name,
"entity_type": entity.type.name,
"entity_worker": self.hash_worker(entity.worker_version)
"entity_worker": self.hash_worker(entity.worker_run)
}) for entity in entities
]
......@@ -321,22 +321,22 @@ class Indexer:
with Timer() as t:
# Element
prefetch_related_objects(elements_chunk, "image__server")
prefetch_related_objects(elements_chunk, "worker_version__worker")
prefetch_related_objects(elements_chunk, "worker_run__version__worker")
# Transcriptions
prefetch_related_objects(elements_chunk, "transcriptions")
prefetch_related_objects(elements_chunk, "transcriptions__worker_version__worker")
prefetch_related_objects(elements_chunk, "transcriptions__worker_run__version__worker")
# Classifications
prefetch_related_objects(elements_chunk, "classifications")
prefetch_related_objects(elements_chunk, "classifications__worker_version__worker")
prefetch_related_objects(elements_chunk, "classifications__worker_run__version__worker")
# Metadatas
prefetch_related_objects(elements_chunk, "metadatas")
prefetch_related_objects(elements_chunk, "metadatas__worker_version__worker")
prefetch_related_objects(elements_chunk, "metadatas__worker_run__version__worker")
# Entities
prefetch_related_objects(elements_chunk, "transcriptions__entities")
prefetch_related_objects(elements_chunk, "transcriptions__entities__worker_version__worker")
prefetch_related_objects(elements_chunk, "transcriptions__entities__worker_run__version__worker")
prefetch_related_objects(elements_chunk, "transcriptions__entities__type")
prefetch_related_objects(elements_chunk, "metadatas__entity")
prefetch_related_objects(elements_chunk, "metadatas__entity__worker_version__worker")
prefetch_related_objects(elements_chunk, "metadatas__entity__worker_run__version__worker")
prefetch_related_objects(elements_chunk, "metadatas__entity__type")
retrieve_time += t.delta
logger.debug(f"Retrieved {nb_elements} elements")
......
......@@ -246,7 +246,7 @@ class Command(BaseCommand):
version=dla_worker,
parents=[init_worker_run.id],
)
process.worker_runs.create(
reco_run = process.worker_runs.create(
version=recognizer_worker,
parents=[dla_worker_run.id],
)
......@@ -381,14 +381,14 @@ class Command(BaseCommand):
)
element.add_parent(page)
element.transcriptions.create(
worker_version=recognizer_worker,
worker_run=reco_run,
text=word,
confidence=1.0,
)
# Create a page transcription on page 1
p1_1.transcriptions.create(
worker_version=recognizer_worker,
worker_run=reco_run,
text="Lorem ipsum dolor sit amet",
confidence=1.0,
)
......
# Generated by Django 4.2.13 on 2024-07-03 16:03
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("documents", "0010_delete_entityrole_entitylink"),
]
operations = [
migrations.AlterField(
model_name="classification",
name="confidence",
field=models.FloatField(default=1),
preserve_default=False,
),
]
......@@ -1079,7 +1079,7 @@ class Classification(models.Model):
# Predicted class is considered as correct by its creator
high_confidence = models.BooleanField(default=False)
state = EnumField(ClassificationState, max_length=16, default=ClassificationState.Pending)
confidence = models.FloatField(null=True, blank=True)
confidence = models.FloatField()
class Meta:
constraints = [
......
......@@ -9,7 +9,7 @@ from rest_framework.exceptions import ValidationError
from arkindex.documents.models import Corpus, Entity, EntityType, TranscriptionEntity
from arkindex.documents.serializers.light import CorpusLightSerializer, EntityTypeLightSerializer
from arkindex.documents.serializers.ml import WorkerRunSummarySerializer
from arkindex.project.serializer_fields import ForbiddenField, WorkerRunIDField
from arkindex.project.serializer_fields import WorkerRunIDField
from arkindex.project.validators import ForbiddenValidator
......@@ -221,7 +221,6 @@ class TranscriptionEntityCreateSerializer(serializers.ModelSerializer):
"""
offset = serializers.IntegerField(min_value=0)
length = serializers.IntegerField(min_value=1)
worker_version_id = ForbiddenField()
worker_run_id = WorkerRunIDField(
required=False,
write_only=True,
......@@ -245,7 +244,6 @@ class TranscriptionEntityCreateSerializer(serializers.ModelSerializer):
"entity",
"offset",
"length",
"worker_version_id",
"worker_run_id",
"worker_run",
"confidence",
......@@ -294,12 +292,10 @@ class TranscriptionEntityCreateSerializer(serializers.ModelSerializer):
return data
@extend_schema_serializer(deprecate_fields=("worker_version_id", ))
class TranscriptionEntitySerializer(TranscriptionEntityCreateSerializer):
"""
Serialize the link between a transcription and a detailed entity
"""
worker_version_id = serializers.UUIDField(read_only=True)
entity = BaseEntitySerializer()
......
......@@ -5,7 +5,7 @@ from django.test import override_settings
from arkindex.documents.indexer import Indexer
from arkindex.documents.models import Corpus, EntityType, MetaType
from arkindex.process.models import WorkerVersion
from arkindex.process.models import WorkerRun
from arkindex.project.tests import FixtureTestCase
......@@ -15,7 +15,8 @@ class TestReindexCommand(FixtureTestCase):
def setUpTestData(cls):
super().setUpTestData()
cls.private_corpus = Corpus.objects.create(name="private", indexable=True)
cls.worker_version = WorkerVersion.objects.first()
cls.worker_run = WorkerRun.objects.get(version__worker__slug="reco")
cls.worker_version = cls.worker_run.version
cls.worker = cls.worker_version.worker
# Create element types
......@@ -122,11 +123,13 @@ class TestReindexCommand(FixtureTestCase):
confidence=0.8,
text="Transcription for the line",
worker_version=self.worker_version,
worker_run=self.worker_run,
)
tr_2 = self.line.transcriptions.create(
confidence=0.5,
text="Second transcription",
worker_version=self.worker_version,
worker_run=self.worker_run,
)
call_command("reindex", "-c", self.private_corpus.id)
......@@ -198,11 +201,13 @@ class TestReindexCommand(FixtureTestCase):
cl_1 = self.line.classifications.create(
confidence=0.8,
worker_version=self.worker_version,
worker_run=self.worker_run,
ml_class=self.private_corpus.ml_classes.create(name="Cat")
)
cl_2 = self.line.classifications.create(
confidence=0.4,
worker_version=self.worker_version,
worker_run=self.worker_run,
ml_class=self.private_corpus.ml_classes.create(name="Dog")
)
......@@ -279,12 +284,14 @@ class TestReindexCommand(FixtureTestCase):
name="Country",
value="France",
worker_version=self.worker_version,
worker_run=self.worker_run,
)
md_2 = self.line.metadatas.create(
type=MetaType.Text,
name="Folio",
value="1",
worker_version=self.worker_version,
worker_run=self.worker_run,
)
call_command("reindex", "-c", self.private_corpus.id)
......@@ -357,7 +364,7 @@ class TestReindexCommand(FixtureTestCase):
"""
location_type = EntityType.objects.create(name="location", corpus=self.private_corpus)
self.private_corpus.allowed_metadatas.create(type=MetaType.Location, name="Country")
entity_1 = self.private_corpus.entities.create(name="CDLK", type=location_type, worker_version=self.worker_version)
entity_1 = self.private_corpus.entities.create(name="CDLK", type=location_type, worker_version=self.worker_version, worker_run=self.worker_run)
tr = self.line.transcriptions.create(
confidence=0.8,
text="Transcription for the line",
......@@ -368,7 +375,7 @@ class TestReindexCommand(FixtureTestCase):
length=len(entity_1.name)
)
person_type = EntityType.objects.create(name="person", corpus=self.private_corpus)
entity_2 = self.private_corpus.entities.create(name="Robert", type=person_type, worker_version=self.worker_version)
entity_2 = self.private_corpus.entities.create(name="Robert", type=person_type, worker_version=self.worker_version, worker_run=self.worker_run)
md = self.line.metadatas.create(
type=MetaType.Location,
name="Country",
......@@ -471,6 +478,7 @@ class TestReindexCommand(FixtureTestCase):
"""
Test the reindex command for element with a worker version
"""
self.line.worker_run = self.worker_run
self.line.worker_version = self.worker_version
self.line.save()
......
......@@ -75,6 +75,7 @@ class TestDeleteCorpus(FixtureTestCase):
ml_class=cls.corpus.ml_classes.create(name="something"),
worker_run=worker_run,
worker_version=cls.worker_version,
confidence=0.89
)
element.metadatas.create(
type=MetaType.Text,
......
......@@ -836,6 +836,7 @@ class TestClassifications(FixtureAPITestCase):
worker_version=self.worker_version_1,
state=ClassificationState.Pending,
high_confidence=True,
confidence=1,
ml_class=line
)
......@@ -845,6 +846,7 @@ class TestClassifications(FixtureAPITestCase):
worker_version=self.worker_version_1,
state=ClassificationState.Pending,
high_confidence=True,
confidence=1,
ml_class=self.text
)
Classification.objects.create(
......@@ -852,6 +854,7 @@ class TestClassifications(FixtureAPITestCase):
worker_version=self.worker_version_2,
state=ClassificationState.Pending,
high_confidence=False,
confidence=0.4,
ml_class=self.text
)
......
......@@ -590,24 +590,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_create_transcription_entity_worker_version(self):
self.client.force_login(self.user)
with self.assertNumQueries(4):
response = self.client.post(
reverse("api:transcription-entity-create", kwargs={"pk": str(self.transcription.id)}),
data={
"entity": str(self.entity.id),
"offset": 4,
"length": len(self.entity.name),
"worker_version_id": str(self.worker_version_1.id),
},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
"worker_version_id": ["This field is forbidden."],
})
def test_create_transcription_entity_worker_run(self):
"""
A regular user can create classifications with a WorkerRun of their own local process
......@@ -636,24 +618,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
"confidence": None,
})
def test_create_transcription_entity_forbidden_version(self):
self.client.force_login(self.user)
with self.assertNumQueries(4):
response = self.client.post(
reverse("api:transcription-entity-create", kwargs={"pk": str(self.transcription.id)}),
data={
"entity": str(self.entity.id),
"offset": 4,
"length": 8,
"worker_version_id": str(self.worker_version_1.id),
},
format="json",
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
"worker_version_id": ["This field is forbidden."],
})
def test_create_transcription_entity_bad_worker_run(self):
self.client.force_login(self.user)
with self.assertNumQueries(5):
......@@ -1074,7 +1038,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
},
"length": self.transcriptionentity.length,
"offset": self.transcriptionentity.offset,
"worker_version_id": str(self.worker_version_1.id),
"worker_run": None,
"confidence": None
}]
......@@ -1105,7 +1068,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
},
"length": self.transcriptionentity.length,
"offset": self.transcriptionentity.offset,
"worker_version_id": str(self.worker_version_1.id),
"worker_run": None,
"confidence": None
}],
......@@ -1228,7 +1190,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
},
"length": 8,
"offset": 8,
"worker_version_id": str(self.worker_version_2.id),
"worker_run": None,
"confidence": None
}]
......@@ -1268,7 +1229,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
},
"length": 8,
"offset": 8,
"worker_version_id": str(self.worker_version_2.id),
"worker_run": {
"id": str(self.worker_run_2.id),
"summary": self.worker_run_2.summary,
......@@ -1309,7 +1269,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
},
"length": 8,
"offset": 2,
"worker_version_id": None,
"worker_run": None,
"confidence": None
}]
......@@ -1348,7 +1307,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
},
"length": 8,
"offset": 8,
"worker_version_id": str(self.worker_version_2.id),
"worker_run": None,
"confidence": None
}]
......@@ -1397,7 +1355,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
},
"length": 8,
"offset": 8,
"worker_version_id": str(self.worker_version_2.id),
"worker_run": {
"id": str(self.worker_run_2.id),
"summary": self.worker_run_2.summary,
......@@ -1444,7 +1401,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
},
"length": 13,
"offset": 8,
"worker_version_id": str(self.worker_version_2.id),
"worker_run": None,
"confidence": None
}]
......@@ -1493,7 +1449,6 @@ class TestEntitiesAPI(FixtureAPITestCase):
},
"length": 8,
"offset": 8,
"worker_version_id": str(self.worker_version_2.id),
"worker_run": None,
"confidence": None
}]
......
......@@ -7,7 +7,7 @@ from django.db.models import CharField, Value
from arkindex.documents.indexer import Indexer
from arkindex.documents.models import Corpus, EntityType, MetaType
from arkindex.process.models import WorkerVersion
from arkindex.process.models import WorkerRun
from arkindex.project.tests import FixtureTestCase
......@@ -17,10 +17,11 @@ class TestIndexerCommand(FixtureTestCase):
def setUpTestData(cls):
super().setUpTestData()
cls.private_corpus = Corpus.objects.create(name="private", indexable=True)
cls.worker_version = WorkerVersion.objects.first()
cls.worker_run = WorkerRun.objects.get(version__worker__slug="reco")
cls.worker_version = cls.worker_run.version
cls.worker = cls.worker_version.worker
page_type, _ = cls.private_corpus.types.get_or_create(slug="page", display_name="Page", indexable=True)
cls.page = cls.private_corpus.elements.create(name="New page", type=page_type, worker_version=cls.worker_version)
cls.page = cls.private_corpus.elements.create(name="New page", type=page_type, worker_run=cls.worker_run, worker_version=cls.worker_version)
@patch("arkindex.documents.indexer.solr")
def test_setup(self, mock_solr):
......@@ -100,7 +101,7 @@ class TestIndexerCommand(FixtureTestCase):
def test_hash_worker(self):
indexer = Indexer(None)
self.assertIsNone(indexer.hash_worker(None))
self.assertEqual(indexer.hash_worker(self.worker_version), self.worker.name)
self.assertEqual(indexer.hash_worker(self.worker_run), self.worker.name)
def test_build_id(self):
expected = UUID(md5(self.page.id.bytes + self.page.id.bytes).hexdigest())
......@@ -131,12 +132,14 @@ class TestIndexerCommand(FixtureTestCase):
tr_1 = self.page.transcriptions.create(
confidence=0.8,
text="Transcription on the page",
worker_version=self.worker_version,
worker_run=self.worker_run,
worker_version=self.worker_version
)
tr_2 = self.page.transcriptions.create(
confidence=0.5,
text="Second transcription",
worker_version=self.worker_version,
worker_run=self.worker_run,
worker_version=self.worker_version
)
indexer = Indexer(None)
self.assertListEqual(indexer.build_transcriptions(self.page, {"key": "value"}), [
......@@ -161,11 +164,13 @@ class TestIndexerCommand(FixtureTestCase):
def test_build_classifications(self):
cl_1 = self.page.classifications.create(
confidence=0.8,
worker_run=self.worker_run,
worker_version=self.worker_version,
ml_class=self.private_corpus.ml_classes.create(name="Cat")
)
cl_2 = self.page.classifications.create(
confidence=0.4,
worker_run=self.worker_run,
worker_version=self.worker_version,
ml_class=self.private_corpus.ml_classes.create(name="Dog")
)
......@@ -196,13 +201,15 @@ class TestIndexerCommand(FixtureTestCase):
type=MetaType.Location,
name="Country",
value="France",
worker_version=self.worker_version,
worker_run=self.worker_run,
worker_version=self.worker_version
)
md_2 = self.page.metadatas.create(
type=MetaType.Text,
name="Folio",
value="1",
worker_version=self.worker_version,
worker_run=self.worker_run,
worker_version=self.worker_version
)
indexer = Indexer(None)
self.assertListEqual(indexer.build_metadatas(self.page, {"key": "value"}), [
......@@ -229,7 +236,8 @@ class TestIndexerCommand(FixtureTestCase):
def test_build_entities(self):
self.private_corpus.allowed_metadatas.create(type=MetaType.Location, name="Country")
location_type = EntityType.objects.create(name="location", corpus=self.private_corpus)
entity_1 = self.private_corpus.entities.create(name="CDLK", type=location_type, worker_version=self.worker_version)
entity_1 = self.private_corpus.entities.create(name="CDLK", type=location_type, worker_run=self.worker_run,
worker_version=self.worker_version)
tr = self.page.transcriptions.create(
confidence=0.8,
text="Transcription on the page",
......@@ -240,7 +248,8 @@ class TestIndexerCommand(FixtureTestCase):
length=len(entity_1.name)
)
person_type = EntityType.objects.create(name="person", corpus=self.private_corpus)
entity_2 = self.private_corpus.entities.create(name="Robert", type=person_type, worker_version=self.worker_version)
entity_2 = self.private_corpus.entities.create(name="Robert", type=person_type, worker_run=self.worker_run,
worker_version=self.worker_version)
self.page.metadatas.create(
type=MetaType.Location,
name="Country",
......@@ -270,7 +279,8 @@ class TestIndexerCommand(FixtureTestCase):
@patch("arkindex.documents.indexer.solr")
def test_index(self, mock_solr):
location_type = EntityType.objects.create(name="location", corpus=self.private_corpus)
entity = self.private_corpus.entities.create(name="CDLK", type=location_type, worker_version=self.worker_version)
entity = self.private_corpus.entities.create(name="CDLK", type=location_type, worker_run=self.worker_run,
worker_version=self.worker_version)
tr = self.page.transcriptions.create(
confidence=0.8,
text="Transcription on the page",
......@@ -286,6 +296,7 @@ class TestIndexerCommand(FixtureTestCase):
"corpus_id": self.private_corpus.id,
"page_id": self.page.id,
"image_id": self.page.image_id,
"worker_run_id": self.worker_run.id,
"worker_version_id": self.worker_version.id,
"worker_id": self.worker.id,
"transcription_id": tr.id,
......
......@@ -27,7 +27,7 @@ class TestRetrieveElements(FixtureAPITestCase):
def test_get_element(self):
ml_class = MLClass.objects.create(name="text", corpus=self.corpus)
classification = self.vol.classifications.create(worker_version=self.worker_version, ml_class=ml_class)
classification = self.vol.classifications.create(worker_version=self.worker_version, ml_class=ml_class, confidence=0.8)
with self.assertNumQueries(2):
response = self.client.get(reverse("api:element-retrieve", kwargs={"pk": str(self.vol.id)}))
......@@ -56,7 +56,7 @@ class TestRetrieveElements(FixtureAPITestCase):
"classifications": [
{
"id": str(classification.id),
"confidence": None,
"confidence": 0.8,
"high_confidence": False,
"state": "pending",
"worker_version": str(self.worker_version.id),
......@@ -251,7 +251,7 @@ class TestRetrieveElements(FixtureAPITestCase):
def test_get_element_classification_worker_run(self):
ml_class = MLClass.objects.create(name="text", corpus=self.corpus)
classification = self.vol.classifications.create(worker_version=self.worker_version, worker_run=self.worker_run, ml_class=ml_class)
classification = self.vol.classifications.create(worker_version=self.worker_version, worker_run=self.worker_run, ml_class=ml_class, confidence=0.89)
with self.assertNumQueries(3):
response = self.client.get(reverse("api:element-retrieve", kwargs={"pk": str(self.vol.id)}))
......@@ -280,7 +280,7 @@ class TestRetrieveElements(FixtureAPITestCase):
"classifications": [
{
"id": str(classification.id),
"confidence": None,
"confidence": 0.89,
"high_confidence": False,
"state": "pending",
"worker_version": str(self.worker_version.id),
......
from datetime import datetime
from django.test import override_settings
from django.urls import reverse
from arkindex.ponos.models import Agent, AgentMode, Farm, State
from arkindex.process.models import Process, ProcessMode
from arkindex.project.tests import FixtureAPITestCase
......@@ -15,3 +19,40 @@ class TestMetricsAPI(FixtureAPITestCase):
response = self.client.get(reverse("metrics:base-metrics"), SERVER_PORT=42)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.content, b'arkindex_instance{hostname="hostname", env="test"} 1')
@override_settings(PROMETHEUS_METRICS_PORT=42, PUBLIC_HOSTNAME="hostname", ARKINDEX_ENV="test")
def test_metrics_tasks(self):
# Retrieve objects
process = Process.objects.get(mode=ProcessMode.Workers)
process.run()
task1, task2, task3 = process.tasks.all().order_by("depth")
farm = Farm.objects.first()
agent = Agent.objects.create(
mode=AgentMode.Docker,
hostname="Demo Agent",
farm=farm,
last_ping=datetime.now(),
cpu_cores=42,
cpu_frequency=42e8,
ram_total=42e3
)
last_ping = int(agent.last_ping.timestamp())
# Create some tasks with various tasks
task1.state = State.Pending
task1.save()
task2.state = State.Error
task2.save()
task3.state = State.Unscheduled
task3.save()
response = self.client.get(reverse("metrics:base-metrics"), SERVER_PORT=42)
self.assertEqual(response.status_code, 200)
metrics = """arkindex_instance{hostname="hostname", env="test"} 1
arkindex_tasks{hostname="hostname", env="test", state="Pending", farm="Wheat farm"} 1
arkindex_tasks{hostname="hostname", env="test", state="Unscheduled", farm="Wheat farm"} 1
arkindex_agent_ping{hostname="hostname", env="test", agent_name="Demo Agent", farm="Wheat farm"} """ + str(last_ping)
self.assertEqual(response.content, metrics.encode("utf-8"))
def _render_attribute(key, value):
if value is None:
value = "null"
return f'{key}="{value}"'
def build_metric(label, attributes={}, value=1, timestamp=None):
attrs_fmt = ", ".join(["=".join((k, f'"{v}"')) for k, v in attributes.items()])
attrs_fmt = ", ".join([_render_attribute(k, v) for k, v in attributes.items()])
metric = f"{label}{{{attrs_fmt}}} {value}"
if timestamp:
metric = f"{metric} {timestamp}"
......
from django.conf import settings
from django.db.models import Count
from django.http import Http404, HttpResponse
from django.views import View
from arkindex.metrics.utils import build_metric
from arkindex.ponos.models import Agent, Farm, State, Task
class MetricsView(View):
def get(self, request, *args, **kwargs):
if settings.PROMETHEUS_METRICS_PORT != int(request.get_port()):
raise Http404()
return HttpResponse(
common_attributes = {
"hostname": settings.PUBLIC_HOSTNAME,
"env": settings.ARKINDEX_ENV
}
# Count nb of tasks in pending or unscheduled states
# grouped by farm
tasks_count = (
Task.objects
.filter(state__in=(State.Pending, State.Unscheduled))
.values("process__farm", "state")
.annotate(nb=Count("id"))
)
# Load all ponos agents details
agents = Agent.objects.all().values("hostname", "farm_id", "last_ping")
# Load all farms to use in attributes of farms & tasks
farms = dict(Farm.objects.all().values_list("id", "name"))
metrics = [
# Backend ping
build_metric(
"arkindex_instance",
{
"hostname": settings.PUBLIC_HOSTNAME,
"env": settings.ARKINDEX_ENV
}
attributes=common_attributes
),
] + [
# Pending or Unscheduled tasks count
build_metric(
"arkindex_tasks",
attributes={**common_attributes, "state": t["state"], "farm": farms.get(t["process__farm"])},
value=t["nb"]
)
for t in tasks_count
] + [
# Ponos agent last known ping
build_metric(
"arkindex_agent_ping",
attributes={**common_attributes, "agent_name": a["hostname"], "farm": farms.get(a["farm_id"])},
value=int(a["last_ping"].timestamp())
)
for a in agents
]
# Render text response with all metrics
return HttpResponse(
"\n".join(metrics),
content_type="text/plain"
)
......@@ -120,7 +120,7 @@ class WorkerTypeAdmin(admin.ModelAdmin):
class WorkerVersionAdmin(admin.ModelAdmin):
list_display = ("id", "worker", "revision", "version")
list_filter = ("worker", )
fields = ("id", "worker", "revision", "version", "configuration", "model_usage", "gpu_usage", "docker_image_iiid")
fields = ("id", "worker", "revision", "version", "configuration", "model_usage", "gpu_usage", "docker_image_iid")
readonly_fields = ("id", )
raw_id_fields = ("revision", )
......