Skip to content
Snippets Groups Projects
Commit 9fc32c4f authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Merge branch 'delete-s3' into 'master'

Delete DataFiles from S3 when deleting them via the API

Closes #289

See merge request !789
parents 4895b230 527cabeb
No related branches found
No related tags found
1 merge request!789Delete DataFiles from S3 when deleting them via the API
......@@ -34,9 +34,12 @@ from ponos.models import State
from datetime import datetime
from uuid import UUID
import ijson
import logging
import magic
import os.path
logger = logging.getLogger(__name__)
class DataImportsList(CorpusACLMixin, ListAPIView):
"""
......@@ -339,6 +342,10 @@ class DataFileRetrieve(CorpusACLMixin, RetrieveUpdateDestroyAPIView):
def perform_destroy(self, instance):
if not self.has_write_access(instance.corpus):
raise PermissionDenied
try:
instance.s3_delete()
except Exception as e:
logger.warning(f'Could not delete DataFile from S3: {e}')
return super().perform_destroy(instance)
......
......@@ -3,6 +3,7 @@ from rest_framework import status
from django.urls import reverse
from django.core.files.uploadedfile import SimpleUploadedFile
from django.test import override_settings
from botocore.exceptions import ClientError
from arkindex.documents.models import Corpus
from arkindex.dataimport.models import DataFile
from arkindex.project.tests import FixtureAPITestCase
......@@ -173,3 +174,54 @@ class TestFiles(FixtureAPITestCase):
df = DataFile.objects.get(id=data['id'])
self.assertEqual(df.name, 'manifest')
self.assertEqual(df.content_type, 'application/json')
@patch('arkindex.project.aws.s3')
def test_file_delete(self, s3_mock):
self.assertTrue(DataFile.objects.exists())
response = self.client.delete(reverse('api:file-retrieve', kwargs={'pk': self.df.id}))
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
self.assertFalse(DataFile.objects.exists())
self.assertEqual(s3_mock.Object().delete.call_count, 1)
@patch('arkindex.project.aws.s3')
def test_file_delete_ignore_s3_errors(self, s3_mock):
"""
Test the DataFile deletion tries multiple times to delete from S3, but ignores errors if it fails
"""
exceptions = [
ClientError({'Error': {'Code': '500'}}, 'delete_object'),
ClientError({'Error': {'Code': '500'}}, 'delete_object'),
ValueError,
]
def _raise(*args, **kwargs):
raise exceptions.pop(0)
s3_mock.Object().delete.side_effect = _raise
self.assertTrue(DataFile.objects.exists())
response = self.client.delete(reverse('api:file-retrieve', kwargs={'pk': self.df.id}))
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
self.assertFalse(DataFile.objects.exists())
self.assertEqual(s3_mock.Object().delete.call_count, 3)
def test_file_delete_requires_login(self):
"""
Assert a file upload requires a login
"""
self.client.logout()
self.assertEqual(DataFile.objects.count(), 1)
response = self.client.delete(reverse('api:file-retrieve', kwargs={'pk': self.df.id}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertEqual(DataFile.objects.count(), 1)
def test_file_delete_requires_verified(self):
"""
Assert a file upload requires being logged in with a verified email
"""
self.user.verified_email = False
self.user.save()
self.assertEqual(DataFile.objects.count(), 1)
response = self.client.delete(reverse('api:file-retrieve', kwargs={'pk': self.df.id}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertEqual(DataFile.objects.count(), 1)
from django.conf import settings
from botocore.config import Config
from botocore.exceptions import ClientError
from botocore.exceptions import BotoCoreError, ClientError
from django.utils.functional import cached_property
from enumfields import Enum
from tenacity import retry, retry_if_exception, stop_after_delay
from io import BytesIO
import boto3.session
import logging
......@@ -51,6 +52,16 @@ class S3FileStatus(Enum):
Error = "error"
def _retry_delete_predicate(exception):
"""
Retry S3FileMixin.s3_delete only for botocore errors that are not an HTTP 404
"""
return isinstance(exception, BotoCoreError) or (
isinstance(exception, ClientError)
and exception.response['Error']['Code'] != '404'
)
class S3FileMixin(object):
def get_s3_object(self):
......@@ -132,3 +143,14 @@ class S3FileMixin(object):
self.save()
if self.status == S3FileStatus.Error and raise_exc:
raise ValueError('MD5 hashes do not match')
@retry(
retry=retry_if_exception(_retry_delete_predicate),
stop=stop_after_delay(10),
reraise=True,
)
def s3_delete(self):
"""
Try deleting the S3 object.
"""
self.s3_object.delete()
......@@ -25,6 +25,7 @@ PyYAML==5.1
responses==0.10.7
requests==2.22
sentry-sdk==0.14.3
tenacity==6.2
urllib3==1.22
uritemplate==3
git+https://gitlab.com/arkindex/common.git#egg=arkindex-common
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment