diff --git a/arkindex/documents/management/commands/import_annotations.py b/arkindex/documents/management/commands/import_annotations.py index 944bbe356c2f02921d7b0e8578d62f94dac9bdec..d3d6d641b643d09b0f0427ba297ae154a43dab74 100644 --- a/arkindex/documents/management/commands/import_annotations.py +++ b/arkindex/documents/management/commands/import_annotations.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 from django.core.management.base import BaseCommand, CommandError from arkindex.images.importer import IndexImporter -from arkindex.documents.models import Element, ElementType +from arkindex.documents.models import Element, ElementType, Corpus from arkindex.documents.tasks import import_annotations import logging import re @@ -21,9 +21,14 @@ class Command(BaseCommand): help='Folder to recursively search for index files', default='.', ) + parser.add_argument( + '--corpus', + help='ID or part of the name of the corpus to import annotations in', + required=True, + ) parser.add_argument( '--volume', - help='ID or exact name of the volume to import annotations in', + help='ID or part of the name of the volume to import annotations in', required=True, ) parser.add_argument( @@ -53,11 +58,17 @@ class Command(BaseCommand): if verbosity > 1: root_logger.setLevel(logging.DEBUG) + # Find corpus + try: + corpus = Corpus.objects.get(pk=options['corpus']) + except Exception: + corpus = Corpus.objects.get(name__icontains=options['corpus']) + # Find volume try: - volume = Element.objects.get(pk=options['volume'], type=ElementType.Volume) + volume = Element.objects.get(pk=options['volume'], type=ElementType.Volume, corpus=corpus) except Exception: - volume = Element.objects.get(name=options['volume'], type=ElementType.Volume) + volume = Element.objects.get(name__icontains=options['volume'], type=ElementType.Volume, corpus=corpus) # Handle mask if options['mask'] is not None and options['regex'] != IndexImporter.DEFAULT_MASK: @@ -67,7 +78,8 @@ class Command(BaseCommand): if options['mask'] is not None: mask = options['mask'] - assert mask.count('<PATH>') == 1 + if mask.count('<PATH>') != 1: + raise CommandError("A mask should contain '<PATH>' to specify the path to find images on") # Replace <PATH> with (.+) and escape the rest mask_regex = '^' + r'(.+)'.join(re.escape(p) for p in mask.split('<PATH>')) + '$' diff --git a/arkindex/documents/tests/test_import_annotations.py b/arkindex/documents/tests/test_import_annotations.py new file mode 100644 index 0000000000000000000000000000000000000000..ebd518d29caf87297eebae75cb4c34325390fc71 --- /dev/null +++ b/arkindex/documents/tests/test_import_annotations.py @@ -0,0 +1,162 @@ +from django.core.management import call_command +from django.core.management.base import CommandError +from unittest.mock import patch +from arkindex.project.tests import FixtureTestCase +from arkindex.documents.models import Element, ElementType + + +class TestImportAnnotationsCommand(FixtureTestCase): + """ + Test the import_annotations command. + """ + + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.vol = Element.objects.get(type=ElementType.Volume, name="Volume 1") + + def setUp(self): + super().setUp() + self.importer_patch = patch('arkindex.documents.management.commands.import_annotations.IndexImporter') + self.importer_mock = self.importer_patch.start() + self.import_task_patch = patch('arkindex.documents.management.commands.import_annotations.import_annotations') + self.import_task_mock = self.import_task_patch.start() + + def tearDown(self): + super().tearDown() + self.importer_mock.stop() + self.import_task_patch.stop() + + def test_start(self): + """ + Test import_annotations starts a Celery task + """ + call_command('import_annotations', 'somefolder', corpus=str(self.corpus.id), volume=str(self.vol.id)) + self.assertEqual(self.import_task_mock.delay.call_count, 1) + args, kwargs = self.import_task_mock.delay.call_args + self.assertTupleEqual(args, ('somefolder', self.vol.id)) + self.assertDictEqual(kwargs, {'mask': self.importer_mock.DEFAULT_MASK}) + + def test_dry_run(self): + """ + Test import_annotations runs locally on dry run + """ + call_command( + 'import_annotations', + 'somefolder', + corpus=str(self.corpus.id), + volume=str(self.vol.id), + dry_run=True, + ) + self.assertEqual(self.import_task_mock.delay.call_count, 0) + self.assertEqual(self.importer_mock.call_count, 1) + + args, kwargs = self.importer_mock.call_args + self.assertTupleEqual(args, ('somefolder', self.vol)) + self.assertDictEqual(kwargs, {'mask': self.importer_mock.DEFAULT_MASK}) + + self.assertEqual(self.importer_mock().dry_run.call_count, 1) + + @patch('arkindex.documents.management.commands.import_annotations.logging.getLogger') + def test_verbosity(self, getlogger): + """ + Test import_annotations handles verbosity levels + """ + call_command( + 'import_annotations', + 'somefolder', + corpus=str(self.corpus.id), + volume=str(self.vol.id), + verbosity=1, + ) + self.assertFalse(getlogger().setLevel.called) + + call_command( + 'import_annotations', + 'somefolder', + corpus=str(self.corpus.id), + volume=str(self.vol.id), + verbosity=2, + ) + self.assertTrue(getlogger().setLevel.called) + + def test_volume_name(self): + """ + Test import_annotations can look for a volume by case-insensitive name + """ + call_command('import_annotations', 'somefolder', corpus=str(self.corpus.id), volume='volume 1') + self.assertEqual(self.import_task_mock.delay.call_count, 1) + args, kwargs = self.import_task_mock.delay.call_args + self.assertTupleEqual(args, ('somefolder', self.vol.id)) + self.assertDictEqual(kwargs, {'mask': self.importer_mock.DEFAULT_MASK}) + + def test_corpus_name(self): + """ + Test import_annotations can look for a corpus by case-insensitive name + """ + call_command('import_annotations', 'somefolder', corpus='tests', volume=str(self.vol.id)) + self.assertEqual(self.import_task_mock.delay.call_count, 1) + args, kwargs = self.import_task_mock.delay.call_args + self.assertTupleEqual(args, ('somefolder', self.vol.id)) + self.assertDictEqual(kwargs, {'mask': self.importer_mock.DEFAULT_MASK}) + + def test_regex(self): + """ + Test import_annotations sets the importer regex + """ + call_command( + 'import_annotations', + 'somefolder', + corpus=str(self.corpus.id), + volume=str(self.vol.id), + regex='some_regex', + ) + self.assertEqual(self.import_task_mock.delay.call_count, 1) + args, kwargs = self.import_task_mock.delay.call_args + self.assertTupleEqual(args, ('somefolder', self.vol.id)) + self.assertDictEqual(kwargs, {'mask': 'some_regex'}) + + def test_mask(self): + """ + Test import_annotations converts a simple mask to a regex + """ + call_command( + 'import_annotations', + 'somefolder', + corpus=str(self.corpus.id), + volume=str(self.vol.id), + mask='something<PATH>otherthing', + ) + self.assertEqual(self.import_task_mock.delay.call_count, 1) + args, kwargs = self.import_task_mock.delay.call_args + self.assertTupleEqual(args, ('somefolder', self.vol.id)) + self.assertDictEqual(kwargs, {'mask': r'^something(.+)otherthing$'}) + + def test_invalid_mask(self): + """ + Test import_annotations checks a mask is valid before converting + """ + self.assertRaises( + CommandError, + call_command, + 'import_annotations', + 'somefolder', + corpus=str(self.corpus.id), + volume=str(self.vol.id), + mask='some_invalid_mask', + ) + + def test_mask_and_regex(self): + """ + Test import_annotations does not allow both mask and regex args simultaneously + """ + self.assertRaises( + CommandError, + call_command, + 'import_annotations', + 'somefolder', + corpus=str(self.corpus.id), + volume=str(self.vol.id), + mask='some_mask', + regex='some_regex', + )