Skip to content
Snippets Groups Projects
Commit 9d251b86 authored by ml bonhomme's avatar ml bonhomme :bee: Committed by Erwan Rouchet
Browse files

Add orphan db images cleanup command

parent 979d59f5
No related branches found
No related tags found
1 merge request!1800Add orphan db images cleanup command
......@@ -6,11 +6,11 @@ from uuid import UUID
from botocore.exceptions import ClientError
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db.models import Max, Q
from django.db.models import Exists, Max, OuterRef, Q, Value
from django.utils import timezone
from arkindex.dataimport.models import DataFile, GitRef, GitRefType, WorkerVersion, WorkerVersionState
from arkindex.documents.models import CorpusExport, CorpusExportState
from arkindex.documents.models import CorpusExport, CorpusExportState, Element
from arkindex.images.models import Image, ImageServer
from arkindex.project.aws import s3
from arkindex.training.models import ModelVersion
......@@ -40,6 +40,8 @@ class Command(BaseCommand):
self.cleanup_trashed_datafiles()
self.cleanup_orphan_images()
self.cleanup_local_images()
self.cleanup_ponos_logs()
......@@ -225,6 +227,39 @@ class Command(BaseCommand):
self.stdout.write(self.style.SUCCESS('Successfully cleaned up orphaned local images.'))
def cleanup_orphan_images(self):
"""
Remove all images in the database that are over 2 weeks old and are not tied to any element
"""
self.stdout.write('Removing orphan images…')
# In order to optimize the request, and not perform a join that reads all of the images and
# elements in the db (Image.objects.filter(elements__isnull=True)), the queryset gets annotated
# with "has_elements". "has_elements" comes from an EXISTS query which looks for Element objects
# with an image_id that is the pk of an Image object in the 'parent' queryset (OuterRef). Since
# we are looking for Image objects with no related elements, we filter on has_elements=False.
# The second annotation (annotate(has_elements=Value(False))) further improves the query by
# removing the EXISTS query from the SELECT one, as we know that "has_elements" will always be false.
old_orphan_images = Image.objects.annotate(
has_elements=Exists(
Element.objects.filter(
image_id=OuterRef('pk')
)
)
) \
.filter(has_elements=False, created__lt=timezone.now() - timedelta(weeks=2)) \
.annotate(has_elements=Value(False)) \
.only('id')
delete_count = old_orphan_images.count()
if delete_count > 0:
self.stdout.write(f'Removing {delete_count} orphan images…')
try:
# Using _raw_delete() to stop Django from looking for related elements before deleting
# since we know there are no related elements.
old_orphan_images._raw_delete(using='default')
except ClientError as e:
self.stdout.write(self.style.ERROR(str(e)))
self.stdout.write(self.style.SUCCESS('Successfully cleaned up orphan images.'))
def cleanup_ponos_logs(self):
"""
Remove ponos logs that are not tied to a task in the database
......
......@@ -9,8 +9,8 @@ from django.core.management import call_command
from django.test import override_settings
from arkindex.dataimport.models import DataFile, GitRefType, Repository, WorkerVersionState
from arkindex.documents.models import CorpusExport, CorpusExportState
from arkindex.images.models import ImageServer
from arkindex.documents.models import CorpusExport, CorpusExportState, Element
from arkindex.images.models import Image, ImageServer
from arkindex.project.tests import FixtureTestCase
from arkindex.training.models import Model, ModelVersion
from ponos.models import Artifact, Farm, Task, Workflow
......@@ -57,6 +57,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -95,6 +97,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -134,6 +138,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -180,6 +186,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -229,6 +237,8 @@ class TestCleanupCommand(FixtureTestCase):
Deleting 1 DataFiles marked as trashed from S3 and the database…
Deleting DataFile {trashed_df.id}
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -286,6 +296,8 @@ class TestCleanupCommand(FixtureTestCase):
Deleting DataFile {trashed_df.id}
Something went wrong
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -354,6 +366,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -427,6 +441,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -524,6 +540,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -603,6 +621,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Removing image thisisfine.jpg…
An error occurred (500) when calling the delete_object operation: Unknown
......@@ -628,6 +648,58 @@ class TestCleanupCommand(FixtureTestCase):
self.assertEqual(broken_object.delete.call_count, 1)
self.assertEqual(orphan_object.delete.call_count, 1)
@patch('arkindex.documents.management.commands.cleanup.s3')
def test_cleanup_orphan_images(self, cleanup_s3_mock, s3_mock):
element = Element.objects.get(name='Volume 2, page 1v')
image_with_element = Image.objects.get(id=element.image.id)
img_server = ImageServer.objects.get(url='http://server')
with patch('django.utils.timezone.now') as mock_now:
mock_now.return_value = datetime.now(timezone.utc) - timedelta(weeks=3)
image_no_element_old_1 = Image.objects.create(path='path/path/img', width=12, height=12, server=img_server)
image_no_element_old_2 = Image.objects.create(path='path/pathpathpath/img', width=12, height=12, server=img_server)
image_no_element_new = Image.objects.create(path='path/pathpath/img', width=12, height=12, server=img_server)
with self.assertNumQueries(19):
self.assertEqual(
self.cleanup(),
dedent(
"""
Removing orphaned Ponos artifacts…
Successfully cleaned up orphaned Ponos artifacts.
Removing 0 artifacts of expired workflows from S3…
Removing logs for 0 tasks of expired workflows from S3…
Updating 0 available worker versions to the Error state…
Removing 0 artifacts of expired workflows…
Removing 0 tasks of expired workflows…
Removing 0 expired workflows…
Successfully cleaned up expired workflows.
Removing 0 old corpus exports from S3…
Removing 0 old corpus exports…
Successfully cleaned up old corpus exports.
Removing orphaned corpus exports…
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Removing 2 orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
Successfully cleaned up orphaned Ponos logs.
Removing orphaned model versions archives…
Successfully cleaned up orphaned model versions archives.
"""
).strip()
)
with self.assertRaises(Image.DoesNotExist):
image_no_element_old_1.refresh_from_db()
with self.assertRaises(Image.DoesNotExist):
image_no_element_old_2.refresh_from_db()
image_with_element.refresh_from_db()
image_no_element_new.refresh_from_db()
@patch('arkindex.documents.management.commands.cleanup.s3')
def test_cleanup_logs(self, cleanup_s3_mock, s3_mock):
workflow = Workflow.objects.create(farm=Farm.objects.first())
......@@ -676,6 +748,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -752,6 +826,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......@@ -831,6 +907,8 @@ class TestCleanupCommand(FixtureTestCase):
Successfully cleaned up orphaned corpus exports.
Deleting 0 DataFiles marked as trashed from S3 and the database…
Successfully cleaned up DataFiles marked as trashed.
Removing orphan images…
Successfully cleaned up orphan images.
Removing orphaned local images…
Successfully cleaned up orphaned local images.
Removing orphaned Ponos logs…
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment