Skip to content
Snippets Groups Projects
Commit 23e2992f authored by Bastien Abadie's avatar Bastien Abadie
Browse files

Merge branch 'build-transkribus-entities' into 'master'

Build Transkribus entities

See merge request !900
parents daf3311f 2ce49f79
No related branches found
No related tags found
1 merge request!900Build Transkribus entities
......@@ -738,6 +738,7 @@ class ImportTranskribus(CreateAPIView):
self.dataimport = corpus.imports.create(
creator=self.request.user,
mode=DataImportMode.Transkribus,
collection_id=collection_id
collection_id=collection_id,
build_entities=serializer.validated_data['build_entities']
)
self.dataimport.start()
# Generated by Django 3.1 on 2020-09-02 13:10
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0017_dataimport_collection_id'),
]
operations = [
migrations.AddField(
model_name='dataimport',
name='build_entities',
field=models.BooleanField(default=False),
),
]
......@@ -69,8 +69,9 @@ class DataImport(IndexableModel):
related_name='imports',
)
# Used to define the collection ID for Transkribus import
# Used to define the collection ID and entities import for Transkribus import
collection_id = models.PositiveIntegerField(null=True, blank=True)
build_entities = models.BooleanField(default=False)
class Meta:
ordering = ['corpus', '-created']
......@@ -177,6 +178,15 @@ class DataImport(IndexableModel):
},
}
# Import entities directy after import step
if self.mode == DataImportMode.Transkribus and self.build_entities:
transcriptions_path = shlex.quote(path.join('/data', import_task_name, str(self.collection_id), 'transcriptions.json'))
tasks['build_entities'] = {
'image': settings.ARKINDEX_TASKS_IMAGE,
'command': 'python -m arkindex_tasks.build_entities {}'.format(transcriptions_path),
'parents': [import_task_name],
}
elts_chunk_files = ['elements.json']
if ml_workflow_chunks > 1:
elts_chunk_files = ['elements_chunk_{}.json'.format(n) for n in range(1, ml_workflow_chunks + 1)]
......
......@@ -309,6 +309,7 @@ class ImportTranskribusSerializer(serializers.Serializer):
Serialize a Transkribus import
"""
collection_id = serializers.IntegerField(min_value=1)
build_entities = serializers.BooleanField(default=False)
def validate(self, data):
collection_id = data.get('collection_id')
......
......@@ -111,7 +111,7 @@ class TestTranskribusImport(FixtureAPITestCase):
},
'tasks': {
'import': {
'command': 'python -m arkindex_tasks.import_transkribus 12345 --corpus {}'.format(corpus.id),
'command': 'python -m arkindex_tasks.import_transkribus {} --corpus {}'.format(dataimport.collection_id, corpus.id),
'image': 'registry.gitlab.com/arkindex/tasks'
},
'thumbnails': {
......@@ -121,3 +121,67 @@ class TestTranskribusImport(FixtureAPITestCase):
}
}
})
@override_settings(
PONOS_RECIPE={}
)
@patch("transkribus.TranskribusAPI.list_user_collection")
def test_create_import_build_entities(self, mock_transkribus):
mock_transkribus.return_value = [{"email": "nope@nope.fr"}, {"email": "arkindex@teklia.com"}]
self.client.force_login(self.user)
response = self.client.post(reverse("api:import-transkribus"), {
"collection_id": "12345",
"build_entities": True
}, format="json")
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
data = response.json()
dataimport = DataImport.objects.get(id=data["id"])
self.assertEqual(dataimport.mode, DataImportMode.Transkribus)
self.assertEqual(dataimport.collection_id, 12345)
corpus = dataimport.corpus
right = corpus.corpus_right.get(user=self.user)
self.assertTrue(right.can_write)
self.assertTrue(right.can_admin)
self.assertEqual(corpus.name, "Transkribus collection n°12345")
self.assertEqual(corpus.description, "")
self.assertEqual(corpus.public, False)
# Assert defaults types are set on the new corpus
self.assertCountEqual(
list(corpus.types.values(
"slug",
"display_name",
"folder",
"allowed_transcription"
)),
[{
"folder": False,
"allowed_transcription": None,
**values
} for values in DEFAULT_TRANSKRIBUS_TYPES]
)
self.assertEqual(dataimport.state, State.Unscheduled)
self.assertIsNotNone(dataimport.workflow)
recipe = yaml.safe_load(dataimport.workflow.recipe)
self.assertDictEqual(recipe, {
'env': {
'TRANSKRIBUS_EMAIL': settings.TRANSKRIBUS_EMAIL,
'TRANSKRIBUS_PASSWORD': settings.TRANSKRIBUS_PASSWORD
},
'tasks': {
'import': {
'command': 'python -m arkindex_tasks.import_transkribus {} --corpus {}'.format(dataimport.collection_id, corpus.id),
'image': 'registry.gitlab.com/arkindex/tasks'
},
'build_entities': {
'command': 'python -m arkindex_tasks.build_entities /data/import/{}/transcriptions.json'.format(dataimport.collection_id),
'image': 'registry.gitlab.com/arkindex/tasks',
'parents': ['import']
},
'thumbnails': {
'command': 'python3 -m arkindex_tasks.generate_thumbnails /data/import/elements.json',
'image': 'registry.gitlab.com/arkindex/tasks',
'parents': ['import']
}
}
})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment