Skip to content
Snippets Groups Projects
Commit 1f3da07a authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Add an export cleanup command

parent 135dc5ad
No related branches found
No related tags found
1 merge request!1376Add an export cleanup command
from datetime import datetime, timedelta, timezone
from botocore.exceptions import ClientError
from django.core.management.base import BaseCommand
from arkindex.documents.models import CorpusExport, CorpusExportState
class Command(BaseCommand):
help = 'Clean up old exports'
def handle(self, *args, **options):
# Corpus exports that are over 2 weeks old
old_exports = CorpusExport.objects.filter(created__lt=datetime.now(timezone.utc) - timedelta(weeks=2)).only('id')
# Only completed exports will have files on S3
old_exports_with_files = old_exports.filter(state=CorpusExportState.Done)
self.stdout.write(f'Removing {old_exports_with_files.count()} old corpus exports from S3…')
for export in old_exports_with_files.iterator():
self.stdout.write(f'Removing export {export.id} from S3…')
try:
export.s3_delete()
except ClientError as e:
if e.response['Error']['Code'] == '404':
self.stdout.write(f'Export {export.id} not found on S3, skipping')
else:
self.stdout.write(self.style.ERROR(str(e)))
self.stdout.write(f'Removing {old_exports.count()} old corpus exports…')
old_exports.delete()
self.stdout.write(self.style.SUCCESS('Successfully cleaned up old corpus exports.'))
from datetime import datetime, timedelta, timezone
from io import StringIO
from textwrap import dedent
from unittest.mock import call, patch
from botocore.exceptions import ClientError
from django.core.management import call_command
from django.test import override_settings
from arkindex.documents.models import CorpusExport, CorpusExportState
from arkindex.project.tests import FixtureTestCase
@override_settings(AWS_EXPORT_BUCKET='export')
class TestCleanupCommand(FixtureTestCase):
def cleanup(self, args=[]):
"""
Run the cleanup command, capturing its output
"""
output = StringIO()
call_command('cleanup', args + ['--no-color'], stdout=output, stderr=output)
return output.getvalue().strip()
@patch('arkindex.project.aws.s3.Object')
def test_cleanup(self, s3_object_mock):
young_export = self.corpus.exports.create(user=self.superuser)
# Use a fake creation time to make old exports
with patch('django.utils.timezone.now') as mock_now:
mock_now.return_value = datetime.now(timezone.utc) - timedelta(days=42)
self.corpus.exports.create(user=self.user, state=CorpusExportState.Failed)
done_export = self.corpus.exports.create(user=self.superuser, state=CorpusExportState.Done)
self.assertEqual(
self.cleanup(),
dedent(
f"""
Removing 1 old corpus exports from S3…
Removing export {done_export.id} from S3…
Removing 2 old corpus exports…
Successfully cleaned up old corpus exports.
"""
).strip()
)
self.assertEqual(CorpusExport.objects.count(), 1)
self.assertEqual(CorpusExport.objects.get(), young_export)
self.assertEqual(s3_object_mock.call_count, 1)
self.assertEqual(s3_object_mock.call_args, call('export', str(done_export.id)))
self.assertEqual(s3_object_mock.return_value.delete.call_count, 1)
def test_nothing(self):
self.assertFalse(CorpusExport.objects.exists())
self.assertEqual(
self.cleanup(),
dedent(
"""
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
"""
).strip()
)
@patch('arkindex.project.aws.s3.Object')
def test_s3_not_found(self, s3_object_mock):
s3_object_mock.return_value.delete.side_effect = ClientError({'Error': {'Code': '404'}}, 'delete_object')
with patch('django.utils.timezone.now') as mock_now:
mock_now.return_value = datetime.now(timezone.utc) - timedelta(days=42)
done_export = self.corpus.exports.create(user=self.superuser, state=CorpusExportState.Done)
self.assertEqual(
self.cleanup(),
dedent(
f"""
Removing 1 old corpus exports from S3…
Removing export {done_export.id} from S3…
Export {done_export.id} not found on S3, skipping
Removing 1 old corpus exports…
Successfully cleaned up old corpus exports.
"""
).strip()
)
self.assertFalse(CorpusExport.objects.exists())
self.assertEqual(s3_object_mock.call_count, 1)
self.assertEqual(s3_object_mock.call_args, call('export', str(done_export.id)))
# s3.Object.delete should only be called once, not retried
self.assertEqual(s3_object_mock.return_value.delete.call_count, 1)
@patch('arkindex.project.aws.s3.Object')
def test_s3_error(self, s3_object_mock):
error = ClientError({'Error': {'Code': '500'}}, 'delete_object')
# Fail twice, then delete successfully
s3_object_mock.return_value.delete.side_effect = [error, error, None]
with patch('django.utils.timezone.now') as mock_now:
mock_now.return_value = datetime.now(timezone.utc) - timedelta(days=42)
done_export = self.corpus.exports.create(user=self.superuser, state=CorpusExportState.Done)
self.assertEqual(
self.cleanup(),
dedent(
f"""
Removing 1 old corpus exports from S3…
Removing export {done_export.id} from S3…
Removing 1 old corpus exports…
Successfully cleaned up old corpus exports.
"""
).strip()
)
self.assertFalse(CorpusExport.objects.exists())
self.assertEqual(s3_object_mock.call_count, 1)
self.assertEqual(s3_object_mock.call_args, call('export', str(done_export.id)))
self.assertEqual(s3_object_mock.return_value.delete.call_count, 3)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment