Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • arkindex/backend
1 result
Show changes
Commits on Source (3)
1.2.4-rc3
1.2.4
import re
from datetime import timedelta
from urllib.parse import quote
from uuid import UUID
from botocore.exceptions import ClientError
from django.conf import settings
......@@ -33,6 +34,8 @@ class Command(BaseCommand):
self.cleanup_old_exports()
self.cleanup_orphan_exports()
self.cleanup_trashed_datafiles()
self.cleanup_local_images()
......@@ -145,6 +148,29 @@ class Command(BaseCommand):
old_exports.delete()
self.stdout.write(self.style.SUCCESS('Successfully cleaned up old corpus exports.'))
def cleanup_orphan_exports(self):
"""
Remove all corpus exports on S3 that are not tied to a CorpusExport instance
"""
self.stdout.write('Removing orphaned corpus exports…')
bucket = s3.Bucket(settings.AWS_EXPORT_BUCKET)
for obj in bucket.objects.all():
try:
export_id = UUID(obj.key)
except ValueError:
self.stdout.write(self.style.WARNING(f"Unsupported corpus export {obj.key}"))
continue
if not CorpusExport.objects.filter(id=export_id).exists():
self.stdout.write(f'Removing corpus export {obj.key}')
try:
obj.delete()
except ClientError as e:
self.stdout.write(self.style.ERROR(str(e)))
self.stdout.write(self.style.SUCCESS('Successfully cleaned up orphaned corpus exports.'))
def cleanup_trashed_datafiles(self):
datafiles = DataFile.objects.filter(trashed=True)
self.stdout.write(f'Deleting {datafiles.count()} DataFiles marked as trashed from S3 and the database…')
......
......@@ -53,6 +53,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing export {done_export.id} from S3…
Removing 2 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -89,6 +91,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -126,6 +130,8 @@ class TestCleanupCommand(FixtureTestCase):
Export {done_export.id} not found on S3, skipping
Removing 1 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -170,6 +176,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing export {done_export.id} from S3…
Removing 1 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -216,6 +224,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 1 DataFiles marked as trashed from S3 and the database…
Deleting DataFile {trashed_df.id}
Successfully cleaned up DataFiles marked as trashed.
......@@ -270,6 +280,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 1 DataFiles marked as trashed from S3 and the database…
Deleting DataFile {trashed_df.id}
Something went wrong
......@@ -305,6 +317,8 @@ class TestCleanupCommand(FixtureTestCase):
cleanup_s3_mock.Bucket.return_value.objects.all.side_effect = [
# Bucket for Ponos artifacts
[good_s3_artifact, orphan_s3_artifact, unsupported_s3_artifact, broken_s3_artifact],
# Bucket for corpus exports
[],
# Bucket for IIIF images
[],
# Bucket for Ponos logs
......@@ -333,6 +347,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -344,14 +360,15 @@ class TestCleanupCommand(FixtureTestCase):
"""
).strip()
)
self.assertEqual(cleanup_s3_mock.Bucket.call_count, 4)
self.assertEqual(cleanup_s3_mock.Bucket.call_count, 5)
self.assertListEqual(cleanup_s3_mock.Bucket.call_args_list, [
call('ponos-artifacts'),
call('export'),
call('iiif'),
call('ponos-logs'),
call('training')
])
self.assertEqual(cleanup_s3_mock.Bucket().objects.all.call_count, 4)
self.assertEqual(cleanup_s3_mock.Bucket().objects.all.call_count, 5)
self.assertEqual(good_s3_artifact.delete.call_count, 0)
self.assertEqual(orphan_s3_artifact.delete.call_count, 1)
self.assertEqual(unsupported_s3_artifact.delete.call_count, 0)
......@@ -403,6 +420,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -498,6 +517,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -549,6 +570,8 @@ class TestCleanupCommand(FixtureTestCase):
cleanup_s3_mock.Bucket.return_value.objects.all.side_effect = [
# Bucket for Ponos artifacts
[],
# Bucket for corpus exports
[],
# Bucket for IIIF images
[broken_object, img_object, orphan_object],
# Bucket for Ponos logs
......@@ -573,6 +596,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -587,14 +612,15 @@ class TestCleanupCommand(FixtureTestCase):
"""
).strip()
)
self.assertEqual(cleanup_s3_mock.Bucket.call_count, 4)
self.assertEqual(cleanup_s3_mock.Bucket.call_count, 5)
self.assertListEqual(cleanup_s3_mock.Bucket.call_args_list, [
call('ponos-artifacts'),
call('export'),
call('iiif'),
call('ponos-logs'),
call('training')
])
self.assertEqual(cleanup_s3_mock.Bucket().objects.all.call_count, 4)
self.assertEqual(cleanup_s3_mock.Bucket().objects.all.call_count, 5)
self.assertEqual(img_object.delete.call_count, 0)
self.assertEqual(broken_object.delete.call_count, 1)
self.assertEqual(orphan_object.delete.call_count, 1)
......@@ -617,6 +643,8 @@ class TestCleanupCommand(FixtureTestCase):
cleanup_s3_mock.Bucket.return_value.objects.all.side_effect = [
# Bucket for Ponos artifacts
[],
# Bucket for corpus exports
[],
# Bucket for IIIF images
[],
# Bucket for Ponos logs
......@@ -641,6 +669,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -657,14 +687,15 @@ class TestCleanupCommand(FixtureTestCase):
).strip()
)
self.assertEqual(cleanup_s3_mock.Bucket.call_count, 4)
self.assertEqual(cleanup_s3_mock.Bucket.call_count, 5)
self.assertListEqual(cleanup_s3_mock.Bucket.call_args_list, [
call('ponos-artifacts'),
call('export'),
call('iiif'),
call('ponos-logs'),
call('training')
])
self.assertEqual(cleanup_s3_mock.Bucket().objects.all.call_count, 4)
self.assertEqual(cleanup_s3_mock.Bucket().objects.all.call_count, 5)
self.assertEqual(good_s3_log.delete.call_count, 0)
self.assertEqual(orphan_s3_log.delete.call_count, 1)
self.assertEqual(unsupported_s3_log.delete.call_count, 0)
......@@ -688,6 +719,8 @@ class TestCleanupCommand(FixtureTestCase):
cleanup_s3_mock.Bucket.return_value.objects.all.side_effect = [
# Bucket for Ponos artifacts
[],
# Bucket for corpus exports
[],
# Bucket for IIIF images
[],
# Bucket for Ponos logs
......@@ -712,6 +745,8 @@ class TestCleanupCommand(FixtureTestCase):
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
......@@ -728,15 +763,91 @@ class TestCleanupCommand(FixtureTestCase):
).strip()
)
self.assertEqual(cleanup_s3_mock.Bucket.call_count, 4)
self.assertEqual(cleanup_s3_mock.Bucket.call_count, 5)
self.assertListEqual(cleanup_s3_mock.Bucket.call_args_list, [
call('ponos-artifacts'),
call('export'),
call('iiif'),
call('ponos-logs'),
call('training')
])
self.assertEqual(cleanup_s3_mock.Bucket().objects.all.call_count, 4)
self.assertEqual(cleanup_s3_mock.Bucket().objects.all.call_count, 5)
self.assertEqual(good_s3_version.delete.call_count, 0)
self.assertEqual(orphan_s3_version.delete.call_count, 1)
self.assertEqual(unsupported_s3_version.delete.call_count, 0)
self.assertEqual(broken_s3_version.delete.call_count, 1)
@patch('arkindex.documents.management.commands.cleanup.s3')
def test_cleanup_orphan_exports(self, cleanup_s3_mock, s3_mock):
export = self.corpus.exports.create(user=self.superuser)
good_export = MagicMock()
good_export.key = str(export.id)
orphan_export = MagicMock()
orphan_export.key = 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb'
unsupported_export = MagicMock()
unsupported_export.key = 'cant_touch_this.txt.vbs'
broken_export = MagicMock()
broken_export.key = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'
broken_export.delete.side_effect = ClientError({'Error': {'Code': '500'}}, 'delete_object')
cleanup_s3_mock.Bucket.return_value.objects.all.side_effect = [
# Bucket for Ponos artifacts
[],
# Bucket for corpus exports
[good_export, orphan_export, unsupported_export, broken_export],
# Bucket for IIIF images
[],
# Bucket for Ponos logs
[],
# Bucket for model version archives
[],
]
self.assertEqual(
self.cleanup(),
dedent(
"""
Removing orphaned Ponos artifacts…
Successfully cleaned up orphaned Ponos artifacts.
Removing 0 artifacts of expired workflows from S3…
Removing logs for 0 tasks of expired workflows from S3…
Updating 0 available worker versions to the Error state…
Removing 0 artifacts of expired workflows…
Removing 0 tasks of expired workflows…
Removing 0 expired workflows…
Successfully cleaned up expired workflows.
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Removing corpus export bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb…
Unsupported corpus export cant_touch_this.txt.vbs
Removing corpus export aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa…
An error occurred (500) when calling the delete_object operation: Unknown
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
Successfully cleaned up orphaned Ponos logs.
Removing orphaned model versions archives…
Successfully cleaned up orphaned model versions archives.
"""
).strip()
)
self.assertEqual(cleanup_s3_mock.Bucket.call_count, 5)
self.assertListEqual(cleanup_s3_mock.Bucket.call_args_list, [
call('ponos-artifacts'),
call('export'),
call('iiif'),
call('ponos-logs'),
call('training')
])
self.assertEqual(cleanup_s3_mock.Bucket().objects.all.call_count, 5)
self.assertEqual(good_export.delete.call_count, 0)
self.assertEqual(orphan_export.delete.call_count, 1)
self.assertEqual(unsupported_export.delete.call_count, 0)
self.assertEqual(broken_export.delete.call_count, 1)