Skip to content
Snippets Groups Projects
Commit 19e861f0 authored by ml bonhomme's avatar ml bonhomme :bee: Committed by Erwan Rouchet
Browse files

Update ponos artifact regex for cleanup

parent 861c5580
No related branches found
No related tags found
1 merge request!1795Update ponos artifact regex for cleanup
......@@ -17,11 +17,13 @@ from arkindex.training.models import ModelVersion
from ponos.models import Artifact, Task, Workflow
# Ponos artifacts use the path: <workflow uuid>/<task id>/<path>
# Before June 2020, artifacts used <workflow uuid>/run_<run id>/<task id>.tar.zst
REGEX_ARTIFACT = re.compile(r'^(?P<workflow_id>[0-9a-f\-]{36})/(?P<task_id>[0-9a-f\-]{36})/')
REGEX_OLD_ARTIFACT = re.compile(r'^(?P<workflow_id>[0-9a-f\-]{36})/run_(?P<run_id>[0-9]+)/(?P<task_id>[0-9a-f\-]{36})\.tar\.zst$')
# Ponos logs use the path: <workflow uuid>/run_<run id>/<task id>.log
REGEX_LOG = re.compile(r'^(?P<workflow_id>[0-9a-f\-]{36})/run_(?P<run_id>[0-9]+)/(?P<task_id>[0-9a-f\-]{36}).log')
REGEX_LOG = re.compile(r'^(?P<workflow_id>[0-9a-f\-]{36})/run_(?P<run_id>[0-9]+)/(?P<task_id>[0-9a-f\-]{36})\.log$')
# Model version artifacts use the path: <modelversion uuid>.zst
REGEX_MODEL_VERSION = re.compile(r'^(?P<modelversion_id>[0-9a-f\-]{36}).zst$')
REGEX_MODEL_VERSION = re.compile(r'^(?P<modelversion_id>[0-9a-f\-]{36})\.zst$')
class Command(BaseCommand):
......@@ -56,7 +58,17 @@ class Command(BaseCommand):
# Parse workflow and task
match = REGEX_ARTIFACT.match(obj.key)
if match is None:
self.stdout.write(self.style.WARNING(f"Unsupported artifact {obj.key}"))
old_match = REGEX_OLD_ARTIFACT.match(obj.key)
if old_match is None:
self.stdout.write(self.style.WARNING(f"Unsupported artifact {obj.key}"))
continue
# If REGEX_OLD_ARTIFACT matches the file, it is an obsolete and unsupported artifact so it
# can be deleted without looking for a matching task
self.stdout.write(f'Removing obsolete artifact {obj.key}')
try:
obj.delete()
except ClientError as e:
self.stdout.write(self.style.ERROR(str(e)))
continue
# Find matching task
......
......@@ -308,6 +308,8 @@ class TestCleanupCommand(FixtureTestCase):
good_s3_artifact.key = f'{workflow.id}/{task.id}/path/to/thing.txt'
orphan_s3_artifact = MagicMock()
orphan_s3_artifact.key = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb/sad/artifact.zip'
obsolete_artifact = MagicMock()
obsolete_artifact.key = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/run_0/bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb.tar.zst'
unsupported_s3_artifact = MagicMock()
unsupported_s3_artifact.key = 'cant_touch_this.txt.vbs'
broken_s3_artifact = MagicMock()
......@@ -316,7 +318,7 @@ class TestCleanupCommand(FixtureTestCase):
cleanup_s3_mock.Bucket.return_value.objects.all.side_effect = [
# Bucket for Ponos artifacts
[good_s3_artifact, orphan_s3_artifact, unsupported_s3_artifact, broken_s3_artifact],
[good_s3_artifact, orphan_s3_artifact, obsolete_artifact, unsupported_s3_artifact, broken_s3_artifact],
# Bucket for corpus exports
[],
# Bucket for IIIF images
......@@ -333,6 +335,7 @@ class TestCleanupCommand(FixtureTestCase):
"""
Removing orphaned Ponos artifacts…
Removing artifact aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb/sad/artifact.zip…
Removing obsolete artifact aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/run_0/bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb.tar.zst…
Unsupported artifact cant_touch_this.txt.vbs
Removing artifact aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb/nope.zip…
An error occurred (500) when calling the delete_object operation: Unknown
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment