Skip to content
Snippets Groups Projects
Commit 527cabeb authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Delete DataFiles from S3 when deleting them via the API

parent 4895b230
No related branches found
No related tags found
No related merge requests found
......@@ -34,9 +34,12 @@ from ponos.models import State
from datetime import datetime
from uuid import UUID
import ijson
import logging
import magic
import os.path
logger = logging.getLogger(__name__)
class DataImportsList(CorpusACLMixin, ListAPIView):
"""
......@@ -339,6 +342,10 @@ class DataFileRetrieve(CorpusACLMixin, RetrieveUpdateDestroyAPIView):
def perform_destroy(self, instance):
if not self.has_write_access(instance.corpus):
raise PermissionDenied
try:
instance.s3_delete()
except Exception as e:
logger.warning(f'Could not delete DataFile from S3: {e}')
return super().perform_destroy(instance)
......
......@@ -3,6 +3,7 @@ from rest_framework import status
from django.urls import reverse
from django.core.files.uploadedfile import SimpleUploadedFile
from django.test import override_settings
from botocore.exceptions import ClientError
from arkindex.documents.models import Corpus
from arkindex.dataimport.models import DataFile
from arkindex.project.tests import FixtureAPITestCase
......@@ -173,3 +174,54 @@ class TestFiles(FixtureAPITestCase):
df = DataFile.objects.get(id=data['id'])
self.assertEqual(df.name, 'manifest')
self.assertEqual(df.content_type, 'application/json')
@patch('arkindex.project.aws.s3')
def test_file_delete(self, s3_mock):
self.assertTrue(DataFile.objects.exists())
response = self.client.delete(reverse('api:file-retrieve', kwargs={'pk': self.df.id}))
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
self.assertFalse(DataFile.objects.exists())
self.assertEqual(s3_mock.Object().delete.call_count, 1)
@patch('arkindex.project.aws.s3')
def test_file_delete_ignore_s3_errors(self, s3_mock):
"""
Test the DataFile deletion tries multiple times to delete from S3, but ignores errors if it fails
"""
exceptions = [
ClientError({'Error': {'Code': '500'}}, 'delete_object'),
ClientError({'Error': {'Code': '500'}}, 'delete_object'),
ValueError,
]
def _raise(*args, **kwargs):
raise exceptions.pop(0)
s3_mock.Object().delete.side_effect = _raise
self.assertTrue(DataFile.objects.exists())
response = self.client.delete(reverse('api:file-retrieve', kwargs={'pk': self.df.id}))
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
self.assertFalse(DataFile.objects.exists())
self.assertEqual(s3_mock.Object().delete.call_count, 3)
def test_file_delete_requires_login(self):
"""
Assert a file upload requires a login
"""
self.client.logout()
self.assertEqual(DataFile.objects.count(), 1)
response = self.client.delete(reverse('api:file-retrieve', kwargs={'pk': self.df.id}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertEqual(DataFile.objects.count(), 1)
def test_file_delete_requires_verified(self):
"""
Assert a file upload requires being logged in with a verified email
"""
self.user.verified_email = False
self.user.save()
self.assertEqual(DataFile.objects.count(), 1)
response = self.client.delete(reverse('api:file-retrieve', kwargs={'pk': self.df.id}))
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertEqual(DataFile.objects.count(), 1)
from django.conf import settings
from botocore.config import Config
from botocore.exceptions import ClientError
from botocore.exceptions import BotoCoreError, ClientError
from django.utils.functional import cached_property
from enumfields import Enum
from tenacity import retry, retry_if_exception, stop_after_delay
from io import BytesIO
import boto3.session
import logging
......@@ -51,6 +52,16 @@ class S3FileStatus(Enum):
Error = "error"
def _retry_delete_predicate(exception):
"""
Retry S3FileMixin.s3_delete only for botocore errors that are not an HTTP 404
"""
return isinstance(exception, BotoCoreError) or (
isinstance(exception, ClientError)
and exception.response['Error']['Code'] != '404'
)
class S3FileMixin(object):
def get_s3_object(self):
......@@ -132,3 +143,14 @@ class S3FileMixin(object):
self.save()
if self.status == S3FileStatus.Error and raise_exc:
raise ValueError('MD5 hashes do not match')
@retry(
retry=retry_if_exception(_retry_delete_predicate),
stop=stop_after_delay(10),
reraise=True,
)
def s3_delete(self):
"""
Try deleting the S3 object.
"""
self.s3_object.delete()
......@@ -25,6 +25,7 @@ PyYAML==5.1
responses==0.10.7
requests==2.22
sentry-sdk==0.14.3
tenacity==6.2
urllib3==1.22
uritemplate==3
git+https://gitlab.com/arkindex/common.git#egg=arkindex-common
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment