Skip to content
Snippets Groups Projects
Commit 0c5517f5 authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Merge branch 'import-anno-vol-name' into 'master'

Handle volume names in import_annotations

See merge request !110
parents 5cfc0d4f 8413d73e
No related branches found
No related tags found
1 merge request!110Handle volume names in import_annotations
#!/usr/bin/env python3
from django.core.management.base import BaseCommand, CommandError
from arkindex.images.importer import IndexImporter
from arkindex.documents.models import Element, ElementType
from arkindex.documents.models import Element, ElementType, Corpus
from arkindex.documents.tasks import import_annotations
import logging
import re
......@@ -21,9 +21,14 @@ class Command(BaseCommand):
help='Folder to recursively search for index files',
default='.',
)
parser.add_argument(
'--corpus',
help='ID or part of the name of the corpus to import annotations in',
required=True,
)
parser.add_argument(
'--volume',
help='ID or exact name of the volume to import annotations in',
help='ID or part of the name of the volume to import annotations in',
required=True,
)
parser.add_argument(
......@@ -53,11 +58,17 @@ class Command(BaseCommand):
if verbosity > 1:
root_logger.setLevel(logging.DEBUG)
# Find corpus
try:
corpus = Corpus.objects.get(pk=options['corpus'])
except Exception:
corpus = Corpus.objects.get(name__icontains=options['corpus'])
# Find volume
try:
volume = Element.objects.get(pk=options['volume'], type=ElementType.Volume)
volume = Element.objects.get(pk=options['volume'], type=ElementType.Volume, corpus=corpus)
except Exception:
volume = Element.objects.get(name=options['volume'], type=ElementType.Volume)
volume = Element.objects.get(name__icontains=options['volume'], type=ElementType.Volume, corpus=corpus)
# Handle mask
if options['mask'] is not None and options['regex'] != IndexImporter.DEFAULT_MASK:
......@@ -67,7 +78,8 @@ class Command(BaseCommand):
if options['mask'] is not None:
mask = options['mask']
assert mask.count('<PATH>') == 1
if mask.count('<PATH>') != 1:
raise CommandError("A mask should contain '<PATH>' to specify the path to find images on")
# Replace <PATH> with (.+) and escape the rest
mask_regex = '^' + r'(.+)'.join(re.escape(p) for p in mask.split('<PATH>')) + '$'
......
from django.core.management import call_command
from django.core.management.base import CommandError
from unittest.mock import patch
from arkindex.project.tests import FixtureTestCase
from arkindex.documents.models import Element, ElementType
class TestImportAnnotationsCommand(FixtureTestCase):
"""
Test the import_annotations command.
"""
@classmethod
def setUpTestData(cls):
super().setUpTestData()
cls.vol = Element.objects.get(type=ElementType.Volume, name="Volume 1")
def setUp(self):
super().setUp()
self.importer_patch = patch('arkindex.documents.management.commands.import_annotations.IndexImporter')
self.importer_mock = self.importer_patch.start()
self.import_task_patch = patch('arkindex.documents.management.commands.import_annotations.import_annotations')
self.import_task_mock = self.import_task_patch.start()
def tearDown(self):
super().tearDown()
self.importer_mock.stop()
self.import_task_patch.stop()
def test_start(self):
"""
Test import_annotations starts a Celery task
"""
call_command('import_annotations', 'somefolder', corpus=str(self.corpus.id), volume=str(self.vol.id))
self.assertEqual(self.import_task_mock.delay.call_count, 1)
args, kwargs = self.import_task_mock.delay.call_args
self.assertTupleEqual(args, ('somefolder', self.vol.id))
self.assertDictEqual(kwargs, {'mask': self.importer_mock.DEFAULT_MASK})
def test_dry_run(self):
"""
Test import_annotations runs locally on dry run
"""
call_command(
'import_annotations',
'somefolder',
corpus=str(self.corpus.id),
volume=str(self.vol.id),
dry_run=True,
)
self.assertEqual(self.import_task_mock.delay.call_count, 0)
self.assertEqual(self.importer_mock.call_count, 1)
args, kwargs = self.importer_mock.call_args
self.assertTupleEqual(args, ('somefolder', self.vol))
self.assertDictEqual(kwargs, {'mask': self.importer_mock.DEFAULT_MASK})
self.assertEqual(self.importer_mock().dry_run.call_count, 1)
@patch('arkindex.documents.management.commands.import_annotations.logging.getLogger')
def test_verbosity(self, getlogger):
"""
Test import_annotations handles verbosity levels
"""
call_command(
'import_annotations',
'somefolder',
corpus=str(self.corpus.id),
volume=str(self.vol.id),
verbosity=1,
)
self.assertFalse(getlogger().setLevel.called)
call_command(
'import_annotations',
'somefolder',
corpus=str(self.corpus.id),
volume=str(self.vol.id),
verbosity=2,
)
self.assertTrue(getlogger().setLevel.called)
def test_volume_name(self):
"""
Test import_annotations can look for a volume by case-insensitive name
"""
call_command('import_annotations', 'somefolder', corpus=str(self.corpus.id), volume='volume 1')
self.assertEqual(self.import_task_mock.delay.call_count, 1)
args, kwargs = self.import_task_mock.delay.call_args
self.assertTupleEqual(args, ('somefolder', self.vol.id))
self.assertDictEqual(kwargs, {'mask': self.importer_mock.DEFAULT_MASK})
def test_corpus_name(self):
"""
Test import_annotations can look for a corpus by case-insensitive name
"""
call_command('import_annotations', 'somefolder', corpus='tests', volume=str(self.vol.id))
self.assertEqual(self.import_task_mock.delay.call_count, 1)
args, kwargs = self.import_task_mock.delay.call_args
self.assertTupleEqual(args, ('somefolder', self.vol.id))
self.assertDictEqual(kwargs, {'mask': self.importer_mock.DEFAULT_MASK})
def test_regex(self):
"""
Test import_annotations sets the importer regex
"""
call_command(
'import_annotations',
'somefolder',
corpus=str(self.corpus.id),
volume=str(self.vol.id),
regex='some_regex',
)
self.assertEqual(self.import_task_mock.delay.call_count, 1)
args, kwargs = self.import_task_mock.delay.call_args
self.assertTupleEqual(args, ('somefolder', self.vol.id))
self.assertDictEqual(kwargs, {'mask': 'some_regex'})
def test_mask(self):
"""
Test import_annotations converts a simple mask to a regex
"""
call_command(
'import_annotations',
'somefolder',
corpus=str(self.corpus.id),
volume=str(self.vol.id),
mask='something<PATH>otherthing',
)
self.assertEqual(self.import_task_mock.delay.call_count, 1)
args, kwargs = self.import_task_mock.delay.call_args
self.assertTupleEqual(args, ('somefolder', self.vol.id))
self.assertDictEqual(kwargs, {'mask': r'^something(.+)otherthing$'})
def test_invalid_mask(self):
"""
Test import_annotations checks a mask is valid before converting
"""
self.assertRaises(
CommandError,
call_command,
'import_annotations',
'somefolder',
corpus=str(self.corpus.id),
volume=str(self.vol.id),
mask='some_invalid_mask',
)
def test_mask_and_regex(self):
"""
Test import_annotations does not allow both mask and regex args simultaneously
"""
self.assertRaises(
CommandError,
call_command,
'import_annotations',
'somefolder',
corpus=str(self.corpus.id),
volume=str(self.vol.id),
mask='some_mask',
regex='some_regex',
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment