Skip to content
Snippets Groups Projects
Commit 9e64f644 authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Remove acts and TEI imports from Git

parent 2044b344
No related branches found
No related tags found
No related merge requests found
......@@ -10,8 +10,6 @@ import yaml
class ImportType(Enum):
Volumes = 'volumes'
Acts = 'acts'
Metadata = 'metadata'
class VolumesImportFormat(Enum):
......@@ -19,46 +17,11 @@ class VolumesImportFormat(Enum):
TXT = 'txt'
class ActsImportFormat(Enum):
CSV = 'csv'
class MetadataImportFormat(Enum):
TEI = 'tei'
class ImportConfig(dict):
class VolumesConfig(dict):
def path_match(self, path):
return any(fnmatch.fnmatch(path, pattern) for pattern in self['paths'])
class ImportConfigSerializer(serializers.Serializer):
format = serializers.CharField()
paths = serializers.ListField(
child=serializers.CharField(),
allow_empty=False,
min_length=1,
read_only=False,
)
default_error_messages = {
'type_folder': 'Element type with slug {slug!r} in {name!r} should be a folder',
'type_hidden': 'Element type with slug {slug!r} in {name!r} should be hidden',
'type_not_found': 'Element type with slug {slug!r} in {name!r} not found',
'type_not_folder': 'Element type with slug {slug!r} in {name!r} should not be a folder',
'type_not_hidden': 'Element type with slug {slug!r} in {name!r} should not be hidden',
}
class Meta:
output_class = ImportConfig
def validate(self, data):
return self.Meta.output_class(data)
class VolumesConfig(ImportConfig):
@cached_property
def image_servers(self):
if not self.get('image_servers'):
......@@ -71,14 +34,31 @@ class VolumesConfig(ImportConfig):
return servers
class VolumesConfigSerializer(ImportConfigSerializer):
class VolumesConfigSerializer(serializers.Serializer):
format = EnumField(VolumesImportFormat, default=VolumesImportFormat.IIIF)
paths = serializers.ListField(
child=serializers.CharField(),
allow_empty=False,
min_length=1,
read_only=False,
)
lazy_checks = serializers.BooleanField(default=False)
autoconvert_https = serializers.BooleanField(default=False)
image_servers = serializers.DictField(child=serializers.URLField(), default=dict)
manifest_type = serializers.SlugField(default='volume')
canvas_type = serializers.SlugField(default='page')
default_error_messages = {
'type_folder': 'Element type with slug {slug!r} in {name!r} should be a folder',
'type_hidden': 'Element type with slug {slug!r} in {name!r} should be hidden',
'type_not_found': 'Element type with slug {slug!r} in {name!r} not found',
'type_not_folder': 'Element type with slug {slug!r} in {name!r} should not be a folder',
'type_not_hidden': 'Element type with slug {slug!r} in {name!r} should not be hidden',
}
def validate(self, data):
return VolumesConfig(data)
def validate_manifest_type(self, slug):
assert 'corpus' in self.context, 'A corpus is required to validate element types'
try:
......@@ -111,83 +91,6 @@ class VolumesConfigSerializer(ImportConfigSerializer):
output_class = VolumesConfig
class ActsConfigSerializer(ImportConfigSerializer):
format = EnumField(ActsImportFormat, default=ActsImportFormat.CSV)
folder_type = serializers.SlugField(default='volume')
act_type = serializers.SlugField(default='act')
surface_type = serializers.SlugField(default='surface')
def validate_folder_type(self, slug):
assert 'corpus' in self.context, 'A corpus is required to validate element types'
try:
element_type = self.context['corpus'].types.get(slug=slug)
except ElementType.DoesNotExist:
self.fail('type_not_found', slug=slug, name='folder_type')
if not element_type.folder:
self.fail('type_folder', slug=slug, name='folder_type')
if element_type.hidden:
self.fail('type_not_hidden', slug=slug, name='folder_type')
return element_type
def validate_act_type(self, slug):
assert 'corpus' in self.context, 'A corpus is required to validate element types'
try:
element_type = self.context['corpus'].types.get(slug=slug)
except ElementType.DoesNotExist:
self.fail('type_not_found', slug=slug, name='act_type')
if element_type.folder:
self.fail('type_not_folder', slug=slug, name='act_type')
if element_type.hidden:
self.fail('type_not_hidden', slug=slug, name='act_type')
return element_type
def validate_surface_type(self, slug):
assert 'corpus' in self.context, 'A corpus is required to validate element types'
try:
element_type = self.context['corpus'].types.get(slug=slug)
except ElementType.DoesNotExist:
self.fail('type_not_found', slug=slug, name='surface_type')
if element_type.folder:
self.fail('type_not_folder', slug=slug, name='surface_type')
if not element_type.hidden:
self.fail('type_hidden', slug=slug, name='surface_type')
return element_type
class MetadataConfigSerializer(ImportConfigSerializer):
format = EnumField(MetadataImportFormat, default=MetadataImportFormat.TEI)
folder_type = serializers.SlugField(default='volume')
element_type = serializers.SlugField(default='act')
def validate_element_type(self, slug):
assert 'corpus' in self.context, 'A corpus is required to validate element types'
try:
element_type = self.context['corpus'].types.get(slug=slug)
except ElementType.DoesNotExist:
self.fail('type_not_found', slug=slug, name='element_type')
return element_type
def validate_folder_type(self, slug):
assert 'corpus' in self.context, 'A corpus is required to validate element types'
try:
element_type = self.context['corpus'].types.get(slug=slug)
except ElementType.DoesNotExist:
self.fail('type_not_found', slug=slug, name='folder_type')
if not element_type.folder:
self.fail('type_folder', slug=slug, name='folder_type')
if element_type.hidden:
self.fail('type_not_hidden', slug=slug, name='folder_type')
return element_type
class GitConfig(dict):
FILE_NAME = '.arkindex.yml'
......@@ -210,8 +113,6 @@ class GitConfigSerializer(serializers.Serializer):
read_only=False,
)
volumes = VolumesConfigSerializer(required=False)
acts = ActsConfigSerializer(required=False)
metadata = MetadataConfigSerializer(required=False)
default_error_messages = {
'no_imports': 'No import types were set in the configuration file',
......
from abc import ABC, abstractmethod
from arkindex.dataimport.config import GitConfig, VolumesImportFormat
from arkindex.dataimport.iiif import IIIFParser
from arkindex.himanis.tei import TeiParser
from arkindex.himanis.acts import ActsImporter
import os.path
import logging
......@@ -117,47 +115,6 @@ class ManifestListFileType(FileType):
self.changed_elements.extend(IIIFFileType.run(self.flow.dataimport, path, self.flow.config))
class ActsListFileType(FileType):
"""
CSV files with a volume name as their filename and two columns: [act number, first folio]
"""
@staticmethod
def match(path, config):
assert isinstance(config, GitConfig)
return config.get('acts') and config['acts'].path_match(path) and path.endswith('.csv')
def handle(self):
ActsImporter(
self.full_path,
corpus=self.flow.dataimport.corpus,
folder_type=self.flow.config['acts']['folder_type'],
act_type=self.flow.config['acts']['act_type'],
surface_type=self.flow.config['acts']['surface_type'],
).run()
class MetadataFileType(FileType):
"""
TEI-XML files
"""
@staticmethod
def match(path, config):
assert isinstance(config, GitConfig)
return config.get('metadata') and config['metadata'].path_match(path) and path.endswith('.xml')
def handle(self):
parser = TeiParser(
path=self.full_path,
corpus=self.flow.dataimport.corpus,
revision=self.flow.dataimport.revision,
folder_type=self.flow.config['metadata']['folder_type'],
element_type=self.flow.config['metadata']['element_type'],
)
parser.run()
class GitConfigFileType(FileType):
"""
An Arkindex repository configuration file
......@@ -177,6 +134,4 @@ file_types = [
GitConfigFileType,
ManifestListFileType,
IIIFFileType,
ActsListFileType,
MetadataFileType,
]
......@@ -119,13 +119,6 @@ class GitFlow(object):
logger.info('Using element type {} for manifests'.format(import_config['manifest_type'].slug))
logger.info('Using element type {} for canvases'.format(import_config['canvas_type'].slug))
elif import_type == ImportType.Acts:
logger.info('Using element type {} for acts'.format(import_config['act_type'].slug))
logger.info('Using element type {} for surfaces'.format(import_config['surface_type'].slug))
elif import_type == ImportType.Metadata:
logger.info('Importing metadata on elements with type {}'.format(import_config['element_type'].slug))
rev, _ = self.dataimport.repo.provider.get_or_create_revision(
self.dataimport.repo,
self.dataimport.sha,
......
from rest_framework.exceptions import ValidationError, ErrorDetail
from arkindex.project.tests import FixtureTestCase
from arkindex.images.models import ImageServer
from arkindex.dataimport.config import \
GitConfigSerializer, VolumesImportFormat, ActsImportFormat, MetadataImportFormat
from arkindex.dataimport.config import GitConfigSerializer, VolumesImportFormat
class TestGitConfig(FixtureTestCase):
......@@ -29,14 +28,6 @@ class TestGitConfig(FixtureTestCase):
"lazy_checks": True,
"autoconvert_https": True,
},
"acts": {
"format": "csv",
"paths": ["acts/*"],
},
"metadata": {
"format": "tei",
"paths": ["metadata/*"],
}
}
def test_base(self):
......@@ -58,18 +49,6 @@ class TestGitConfig(FixtureTestCase):
self.assertEqual(volumes['canvas_type'], self.corpus.types.get(slug='page'))
self.assertEqual(volumes.image_servers, [self.sample_server])
acts = data['acts']
self.assertEqual(acts['format'], ActsImportFormat.CSV)
self.assertListEqual(acts['paths'], ['acts/*'])
self.assertEqual(acts['act_type'], self.corpus.types.get(slug='act'))
self.assertEqual(acts['surface_type'], self.corpus.types.get(slug='surface'))
metadata = data['metadata']
self.assertEqual(metadata['format'], MetadataImportFormat.TEI)
self.assertListEqual(metadata['paths'], ['metadata/*'])
self.assertEqual(metadata['folder_type'], self.corpus.types.get(slug='volume'))
self.assertEqual(metadata['element_type'], self.corpus.types.get(slug='act'))
def test_version(self):
"""
Test the GitConfigSerializer handles versioning
......@@ -97,8 +76,6 @@ class TestGitConfig(FixtureTestCase):
Test the config parser requires at least one import
"""
del self.base_data['volumes']
del self.base_data['acts']
del self.base_data['metadata']
with self.assertRaisesRegex(ValidationError, 'import types'):
GitConfigSerializer(
data=self.base_data,
......@@ -128,13 +105,9 @@ class TestGitConfig(FixtureTestCase):
Test omitting import formats sets to default values
"""
del self.base_data['volumes']['format']
del self.base_data['acts']['format']
del self.base_data['metadata']['format']
cfg = GitConfigSerializer(data=self.base_data, context={'corpus': self.corpus})
cfg.is_valid(raise_exception=True)
self.assertEqual(cfg.validated_data['volumes']['format'], VolumesImportFormat.IIIF)
self.assertEqual(cfg.validated_data['acts']['format'], ActsImportFormat.CSV)
self.assertEqual(cfg.validated_data['metadata']['format'], MetadataImportFormat.TEI)
def test_types_exist(self):
"""
......@@ -142,10 +115,6 @@ class TestGitConfig(FixtureTestCase):
"""
self.base_data['volumes']['manifest_type'] = 'bread'
self.base_data['volumes']['canvas_type'] = 'lettuce'
self.base_data['acts']['act_type'] = 'tomato'
self.base_data['acts']['surface_type'] = 'bacon'
self.base_data['metadata']['folder_type'] = 'steak'
self.base_data['metadata']['element_type'] = 'mayonnaise'
cfg = GitConfigSerializer(data=self.base_data, context={'corpus': self.corpus})
self.assertFalse(cfg.is_valid())
......@@ -160,26 +129,6 @@ class TestGitConfig(FixtureTestCase):
code='type_not_found',
)],
},
'acts': {
'act_type': [ErrorDetail(
string="Element type with slug 'tomato' in 'act_type' not found",
code='type_not_found',
)],
'surface_type': [ErrorDetail(
string="Element type with slug 'bacon' in 'surface_type' not found",
code='type_not_found',
)],
},
'metadata': {
'folder_type': [ErrorDetail(
string="Element type with slug 'steak' in 'folder_type' not found",
code='type_not_found',
)],
'element_type': [ErrorDetail(
string="Element type with slug 'mayonnaise' in 'element_type' not found",
code='type_not_found',
)],
}
})
def test_folders(self):
......@@ -188,10 +137,6 @@ class TestGitConfig(FixtureTestCase):
"""
self.base_data['volumes']['manifest_type'] = 'page'
self.base_data['volumes']['canvas_type'] = 'volume'
self.base_data['acts']['act_type'] = 'volume'
self.base_data['acts']['surface_type'] = 'volume'
self.base_data['metadata']['folder_type'] = 'act'
self.base_data['metadata']['element_type'] = 'volume'
cfg = GitConfigSerializer(data=self.base_data, context={'corpus': self.corpus})
self.assertFalse(cfg.is_valid())
......@@ -206,22 +151,6 @@ class TestGitConfig(FixtureTestCase):
code='type_not_folder',
)],
},
'acts': {
'act_type': [ErrorDetail(
string="Element type with slug 'volume' in 'act_type' should not be a folder",
code='type_not_folder',
)],
'surface_type': [ErrorDetail(
string="Element type with slug 'volume' in 'surface_type' should not be a folder",
code='type_not_folder',
)],
},
'metadata': {
'folder_type': [ErrorDetail(
string="Element type with slug 'act' in 'folder_type' should be a folder",
code='type_folder',
)],
}
})
def test_hidden(self):
......@@ -231,10 +160,6 @@ class TestGitConfig(FixtureTestCase):
sneaky_volume = self.corpus.types.create(slug='sneaky_volume', hidden=True, folder=True)
self.base_data['volumes']['manifest_type'] = sneaky_volume.slug
self.base_data['volumes']['canvas_type'] = 'surface'
self.base_data['acts']['act_type'] = 'surface'
self.base_data['acts']['surface_type'] = 'page'
self.base_data['metadata']['folder_type'] = sneaky_volume.slug
self.base_data['metadata']['element_type'] = 'surface'
cfg = GitConfigSerializer(data=self.base_data, context={'corpus': self.corpus})
self.assertFalse(cfg.is_valid())
......@@ -249,20 +174,4 @@ class TestGitConfig(FixtureTestCase):
code='type_not_hidden',
)],
},
'acts': {
'act_type': [ErrorDetail(
string="Element type with slug 'surface' in 'act_type' should not be hidden",
code='type_not_hidden',
)],
'surface_type': [ErrorDetail(
string="Element type with slug 'page' in 'surface_type' should be hidden",
code='type_hidden',
)],
},
'metadata': {
'folder_type': [ErrorDetail(
string="Element type with slug 'sneaky_volume' in 'folder_type' should not be hidden",
code='type_not_hidden',
)],
}
})
from unittest.mock import patch, call, MagicMock
from arkindex_common.enums import MetaType, DataImportMode
from arkindex_common.enums import DataImportMode
from arkindex.project.tests import FixtureTestCase
from arkindex.dataimport.filetypes import \
FileType, IIIFFileType, ActsListFileType, MetadataFileType, GitConfigFileType
from arkindex.dataimport.filetypes import FileType, IIIFFileType, GitConfigFileType
from arkindex.dataimport.git import GitFlow, SimpleDiff, DiffType
from arkindex.dataimport.config import GitConfig, VolumesConfig, ImportConfig, VolumesImportFormat
from arkindex.dataimport.config import GitConfig, VolumesConfig, VolumesImportFormat
class TestFileTypes(FixtureTestCase):
......@@ -23,9 +22,6 @@ class TestFileTypes(FixtureTestCase):
'sha': cls.rev.hash,
},
)
cls.volume_type = cls.corpus.types.get(slug='volume')
cls.act_type = cls.corpus.types.get(slug='act')
cls.surface_type = cls.corpus.types.get(slug='surface')
with patch('arkindex.dataimport.git.os.path.isdir', lambda _: True):
cls.flow = GitFlow(cls.di, 'the/')
......@@ -49,100 +45,6 @@ class TestFileTypes(FixtureTestCase):
self.flow.config['volumes'].path_match.return_value = False
self.assertFalse(IIIFFileType.match('test.json', self.flow.config))
def test_acts_match(self):
self.flow.config = GitConfig({
'acts': ImportConfig(paths=['*']),
})
self.flow.config['acts'].path_match = MagicMock(return_value=True)
self.assertTrue(ActsListFileType.match('path/to/some.csv', self.flow.config))
self.assertFalse(ActsListFileType.match('wrong/type.xml', self.flow.config))
self.assertEqual(self.flow.config['acts'].path_match.call_count, 2)
self.assertEqual(self.flow.config['acts'].path_match.call_args_list, [
call('path/to/some.csv'),
call('wrong/type.xml'),
])
self.flow.config['acts'].path_match.return_value = False
self.assertFalse(ActsListFileType.match('path/to/some.csv', self.flow.config))
@patch('arkindex.dataimport.filetypes.ActsImporter')
def test_acts_handle(self, importer_mock):
self.flow.config = GitConfig({
'acts': ImportConfig({
'paths': ['*'],
'folder_type': self.volume_type,
'act_type': self.act_type,
'surface_type': self.surface_type,
}),
})
acts_diff = SimpleDiff(DiffType.Modification, 'a.csv', 'b.csv')
ft = ActsListFileType(self.flow, acts_diff)
ft.handle()
self.assertEqual(importer_mock.call_count, 1)
self.assertEqual(importer_mock.call_args, call(
'the/repo/b.csv',
corpus=self.corpus,
folder_type=self.volume_type,
act_type=self.act_type,
surface_type=self.surface_type,
))
self.assertEqual(importer_mock().run.call_count, 1)
self.assertEqual(importer_mock().run.call_args, ())
def test_config_match(self):
self.assertTrue(GitConfigFileType.match('path/to/.arkindex.yml', self.flow.config))
self.assertFalse(GitConfigFileType.match('path/to/somethingelse.yml', self.flow.config))
def test_metadata_match(self):
self.flow.config = GitConfig({
'metadata': ImportConfig(paths=['*']),
})
self.flow.config['metadata'].path_match = MagicMock()
self.assertTrue(MetadataFileType.match('path/to/some.xml', self.flow.config))
self.assertFalse(MetadataFileType.match('wrong/type.csv', self.flow.config))
self.assertEqual(self.flow.config['metadata'].path_match.call_count, 2)
self.assertEqual(self.flow.config['metadata'].path_match.call_args_list, [
call('path/to/some.xml'),
call('wrong/type.csv'),
])
self.flow.config['metadata'].path_match.return_value = False
self.assertFalse(MetadataFileType.match('path/to/some.xml', self.flow.config))
@patch('arkindex_common.tei.Date')
@patch('arkindex.dataimport.filetypes.TeiParser')
def test_metadata_handle(self, tei_parser_mock, tei_elt_mock):
db_elt = self.corpus.elements.get(name='Act 1')
metadata = db_elt.metadatas.create(
name='date',
type=MetaType.Date,
value='1333',
)
tei_parser_mock().match_database.return_value = [(db_elt, tei_elt_mock())]
tei_parser_mock().run.return_value = [metadata]
self.flow.config = GitConfig({
'metadata': ImportConfig({
'paths': ['*'],
'folder_type': self.volume_type,
'element_type': self.act_type,
}),
})
metadatas_diff = SimpleDiff(DiffType.Modification, 'a.xml', 'b.xml')
ft = MetadataFileType(self.flow, metadatas_diff)
ft.handle()
self.assertEqual(tei_parser_mock.call_args, call(
path='the/repo/b.xml',
corpus=self.corpus,
revision=self.rev,
folder_type=self.volume_type,
element_type=self.act_type,
))
self.assertEqual(tei_parser_mock().run.call_count, 1)
self.assertTrue(GitConfigFileType.match('path/to/.arkindex.yml', GitConfig()))
self.assertFalse(GitConfigFileType.match('path/to/somethingelse.yml', GitConfig()))
......@@ -3,7 +3,7 @@ from arkindex_common.enums import DataImportMode
from arkindex.project.tests import FixtureTestCase
from arkindex.documents.models import Element
from arkindex.dataimport.models import DataImport
from arkindex.dataimport.filetypes import IIIFFileType, ActsListFileType
from arkindex.dataimport.filetypes import IIIFFileType
from arkindex.dataimport.git import GitFlow, SimpleDiff, DiffType
from ponos.models import State
import os.path
......@@ -314,32 +314,24 @@ class TestGitFlow(FixtureTestCase):
"""
Test a normal GitFlow file types dispatch
"""
elt1 = Element.objects.get(name='Volume 1, page 1r')
elt2 = Element.objects.get(name='Volume 1, page 1v')
diff1 = SimpleDiff(DiffType.Modification, 'path1a', 'path1b')
diff2 = SimpleDiff(DiffType.Modification, 'path2a', 'path2b')
elt = Element.objects.get(name='Volume 1, page 1v')
diff = SimpleDiff(DiffType.Modification, 'path2a', 'path2b')
act_mock = MagicMock(spec=ActsListFileType)
iiif_mock = MagicMock(spec=IIIFFileType)
act_mock.diff = diff1
iiif_mock.diff = diff2
act_mock.changed_elements = [elt1, ]
iiif_mock.changed_elements = [elt2, ]
filetype_mock.side_effect = [act_mock, iiif_mock]
iiif_mock.diff = diff
iiif_mock.changed_elements = [elt, ]
filetype_mock.side_effect = [iiif_mock]
flow = GitFlow(self.dataimport, self.working_dir)
flow.config = MagicMock()
changed_elements = flow.dispatch_imports([diff1, diff2])
changed_elements = flow.dispatch_imports([diff])
self.assertCountEqual(changed_elements, [elt1, elt2])
self.assertCountEqual(changed_elements, [elt])
self.assertEqual(filetype_mock.call_count, 2)
self.assertEqual(filetype_mock.call_count, 1)
self.assertListEqual(filetype_mock.call_args_list, [
call(flow, diff1),
call(flow, diff2),
call(flow, diff),
])
self.assertEqual(act_mock.handle.call_count, 1)
self.assertEqual(act_mock.handle.call_args, call())
self.assertEqual(iiif_mock.handle.call_count, 1)
self.assertEqual(iiif_mock.handle.call_args, call())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment