Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (2)
Showing
with 586 additions and 652 deletions
......@@ -51,7 +51,13 @@ def run_pg_query(query, source_db):
Run a single Postgresql query and split the results into chunks.
When a name is given to a cursor, psycopg2 uses a server-side cursor; we just use a random string as a name.
"""
with connections[source_db].create_cursor(name=str(uuid.uuid4())) as pg_cursor:
db = connections[source_db]
# Make sure a connection is open and available for export databases
if source_db != "default" and db.connection is None:
db.connect()
with db.create_cursor(name=str(uuid.uuid4())) as pg_cursor:
pg_cursor.itersize = BATCH_SIZE
pg_cursor.execute(query)
......
This diff is collapsed.
......@@ -155,7 +155,6 @@ class TestRetrieveElements(FixtureAPITestCase):
process = Process.objects.create(
mode=ProcessMode.Workers,
creator=self.user,
generate_thumbnails=True,
farm=Farm.objects.first(),
corpus=self.corpus
)
......
......@@ -62,24 +62,6 @@ class ProcessBuilder(object):
for parent_slug in parent_slugs
) + 1
def _add_thumbnails(self, import_task_slug: str) -> None:
"""
Automatically adds a thumbnail generation task at the end of each distributed chunk
If a specific parent slug is provided, attach the thumbnails generation task to it.
"""
if not self.process.generate_thumbnails:
return
chunks = self._get_elements_json_chunks()
for index, chunk in enumerate(chunks, start=1):
slug = "thumbnails"
if len(chunks) > 1:
slug += f"_{index}"
elements_path = shlex.quote(path.join("/data", import_task_slug, chunk))
command = f"python3 -m arkindex_tasks.generate_thumbnails {elements_path}"
self._build_task(command=command, slug=slug, env=self.base_env)
self.tasks_parents[slug].append(import_task_slug)
def _build_task(
self,
command,
......@@ -204,8 +186,6 @@ class ProcessBuilder(object):
def validate_dataset(self) -> None:
self.validate_gpu_requirement()
self.validate_archived()
if self.process.generate_thumbnails:
raise ValidationError("Thumbnails generation is incompatible with dataset mode processes.")
def validate(self) -> None:
"""
......@@ -275,8 +255,6 @@ class ProcessBuilder(object):
slug=import_task_slug,
env=self.base_env,
)
# Add thumbnails
self._add_thumbnails(import_task_slug=import_task_slug)
# Distribute worker run tasks
worker_runs = list(self.process.worker_runs.all())
......
# Generated by Django 4.1.7 on 2024-03-27 16:38
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("process", "0032_processdatasetset_model"),
]
operations = [
migrations.RemoveField(
model_name="process",
name="generate_thumbnails",
),
]
......@@ -135,9 +135,6 @@ class Process(IndexableModel):
# Used to include sub-elements of the actual query in Workers processes
load_children = models.BooleanField(default=False)
# Whether or not to add a thumbnail generation task
generate_thumbnails = models.BooleanField(default=False)
chunks = models.PositiveIntegerField(
default=1,
validators=[
......
......@@ -258,9 +258,6 @@ class FilesProcessSerializer(serializers.ModelSerializer):
# Automatically set the creator on the process
creator = serializers.HiddenField(default=serializers.CurrentUserDefault())
# Always start the process with thumbnail generation enabled
generate_thumbnails = serializers.HiddenField(default=True)
default_error_messages = {
"mode_not_allowed": "This mode is not allowed when importing from files",
"files_required": "At least one file is required to start an import process",
......@@ -287,7 +284,6 @@ class FilesProcessSerializer(serializers.ModelSerializer):
"element_type",
"farm_id",
"creator",
"generate_thumbnails",
)
def validate_mode(self, mode):
......@@ -389,7 +385,6 @@ class StartProcessSerializer(serializers.Serializer):
validators=[MaxValueValidator(lambda: settings.MAX_CHUNKS)],
default=1,
)
thumbnails = serializers.BooleanField(default=False, source="generate_thumbnails")
farm = serializers.PrimaryKeyRelatedField(queryset=Farm.objects.all(), default=None, allow_null=True)
use_cache = serializers.BooleanField(default=False)
use_gpu = serializers.BooleanField(default=False, allow_null=True)
......@@ -421,8 +416,6 @@ class StartProcessSerializer(serializers.Serializer):
elif not any(ds.dataset.corpus_id == self.instance.corpus.id for ds in self.instance.sets.all()):
errors["non_field_errors"].append("At least one of the process sets must be from the same corpus as the process.")
if validated_data.get("generate_thumbnails"):
errors["thumbnails"].append("Thumbnails generation is not supported on Dataset processes.")
if validated_data.get("worker_activity"):
errors["worker_activity"].append("Worker activities are not supported on Dataset processes.")
if validated_data.get("use_cache"):
......@@ -481,14 +474,7 @@ class StartProcessSerializer(serializers.Serializer):
)
else:
if validated_data.get("worker_activity"):
errors["worker_activity"].append("The process must have workers attached to handle their activity.")
if validated_data.get("use_cache"):
errors["use_cache"].append("The process must have workers attached to use cached results.")
if validated_data.get("use_gpu"):
errors["use_gpu"].append("The process must have workers attached to use GPUs.")
if not validated_data.get("generate_thumbnails"):
errors["__all__"].append("The process must either use thumbnail generation or have worker runs.")
errors["__all__"].append("The process must have worker runs to be started.")
if errors:
raise ValidationError(errors)
......
......@@ -117,10 +117,14 @@ class TestCreateProcess(FixtureAPITestCase):
})
# The process needs to be started to produce a workflow
process.worker_runs.create(
version=self.version_1
)
response = self.client.post(
reverse("api:process-start", kwargs={"pk": str(process.id)}),
# The process needs a worker run or thumbnails generation to start
{"thumbnails": "true"}
{}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
process.refresh_from_db()
......@@ -487,10 +491,13 @@ class TestCreateProcess(FixtureAPITestCase):
self.assertEqual(process.elements.get(), page)
# The process needs to be started to produce a workflow
process.worker_runs.create(
version=self.version_1
)
response = self.client.post(
reverse("api:process-start", kwargs={"pk": str(process.id)}),
# The process needs a worker run or thumbnails generation to start
{"thumbnails": "true"}
{}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
process.refresh_from_db()
......@@ -533,46 +540,6 @@ class TestCreateProcess(FixtureAPITestCase):
process = Process.objects.get(id=data["id"])
self.assertEqual(process.name, "blah")
def test_thumbnails_generation_only(self):
"""
Generating thumbnails without any worker must generate a process task
and a thumbnails task, when the process is started
"""
self.client.force_login(self.user)
corpus_type = self.corpus.types.first()
response = self.client.post(
reverse("api:corpus-process"),
{
"corpus": str(self.corpus.id),
"element_type": corpus_type.slug,
},
format="json"
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
data = response.json()
process = Process.objects.get(id=data["id"])
self.assertFalse(process.tasks.exists())
response = self.client.post(
reverse("api:process-start", kwargs={"pk": str(process.id)}),
{
"chunks": 3,
"thumbnails": True,
}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
process.refresh_from_db()
self.assertEqual(process.tasks.count(), 4)
init_task = process.tasks.get(slug="initialisation")
self.assertEqual(init_task.command, f"python -m arkindex_tasks.init_elements {process.id} --chunks-number 3")
for i in range(1, 4):
self.assertEqual(
process.tasks.get(slug=f"thumbnails_{i}").command,
f"python3 -m arkindex_tasks.generate_thumbnails /data/initialisation/elements_chunk_{i}.json"
)
@override_settings(
ARKINDEX_TASKS_IMAGE="registry.teklia.com/tasks",
PONOS_DEFAULT_ENV={}
......
......@@ -2168,7 +2168,7 @@ class TestProcesses(FixtureAPITestCase):
self.assertEqual(
response.json(),
{"__all__": ["The process must either use thumbnail generation or have worker runs."]},
{"__all__": ["The process must have worker runs to be started."]},
)
def test_start_process_unavailable_worker_version(self):
......@@ -2355,7 +2355,6 @@ class TestProcesses(FixtureAPITestCase):
response = self.client.post(
reverse("api:process-start", kwargs={"pk": str(process2.id)}),
{
"thumbnails": True,
"use_cache": True,
"worker_activity": True,
}
......@@ -2363,7 +2362,6 @@ class TestProcesses(FixtureAPITestCase):
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
"thumbnails": ["Thumbnails generation is not supported on Dataset processes."],
"use_cache": ["Caching is not supported on Dataset processes."],
"worker_activity": ["Worker activities are not supported on Dataset processes."],
})
......@@ -2509,7 +2507,6 @@ class TestProcesses(FixtureAPITestCase):
({"chunks": 0}, {"chunks": ["Ensure this value is greater than or equal to 1."]}),
({"chunks": 20}, {"chunks": ["Ensure this value is less than or equal to 10."]}),
({"chunks": "max"}, {"chunks": ["A valid integer is required."]}),
({"thumbnails": "gloubiboulga"}, {"thumbnails": ["Must be a valid boolean."]})
]
for (params, check) in wrong_params_checks:
with self.subTest(**params), self.assertNumQueries(5):
......@@ -2539,7 +2536,7 @@ class TestProcesses(FixtureAPITestCase):
def test_start_process_workers_parameters(self):
"""
It should be possible to pass chunks and thumbnails parameters when starting a workers process
It should be possible to pass chunks parameters when starting a workers process
"""
process = self.corpus.processes.create(creator=self.user, mode=ProcessMode.Workers)
# Add a worker run to this process
......@@ -2548,7 +2545,7 @@ class TestProcesses(FixtureAPITestCase):
self.client.force_login(self.user)
response = self.client.post(
reverse("api:process-start", kwargs={"pk": str(process.id)}),
{"chunks": 3, "thumbnails": "true"}
{"chunks": 3}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
process.refresh_from_db()
......@@ -2558,9 +2555,6 @@ class TestProcesses(FixtureAPITestCase):
f"{run.task_slug}_1",
f"{run.task_slug}_2",
f"{run.task_slug}_3",
"thumbnails_1",
"thumbnails_2",
"thumbnails_3"
])
def test_start_process_dataset_chunks(self):
......@@ -2608,10 +2602,7 @@ class TestProcesses(FixtureAPITestCase):
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
"__all__": ["The process must either use thumbnail generation or have worker runs."],
"use_cache": ["The process must have workers attached to use cached results."],
"worker_activity": ["The process must have workers attached to handle their activity."],
"use_gpu": ["The process must have workers attached to use GPUs."],
"__all__": ["The process must have worker runs to be started."]
})
process.refresh_from_db()
self.assertFalse(process.use_cache)
......
......@@ -29,7 +29,6 @@ SELECT "process_process"."id",
"process_process"."name_contains",
"process_process"."ml_class_id",
"process_process"."load_children",
"process_process"."generate_thumbnails",
"process_process"."chunks",
"process_process"."use_cache",
"process_process"."use_gpu",
......
......@@ -29,7 +29,6 @@ SELECT "process_process"."id",
"process_process"."name_contains",
"process_process"."ml_class_id",
"process_process"."load_children",
"process_process"."generate_thumbnails",
"process_process"."chunks",
"process_process"."use_cache",
"process_process"."use_gpu",
......
......@@ -29,7 +29,6 @@ SELECT "process_process"."id",
"process_process"."name_contains",
"process_process"."ml_class_id",
"process_process"."load_children",
"process_process"."generate_thumbnails",
"process_process"."chunks",
"process_process"."use_cache",
"process_process"."use_gpu",
......
......@@ -29,7 +29,6 @@ SELECT "process_process"."id",
"process_process"."name_contains",
"process_process"."ml_class_id",
"process_process"."load_children",
"process_process"."generate_thumbnails",
"process_process"."chunks",
"process_process"."use_cache",
"process_process"."use_gpu",
......