Skip to content
Snippets Groups Projects
Commit 944e981a authored by Erwan Rouchet's avatar Erwan Rouchet Committed by Bastien Abadie
Browse files

Add worker run on Transcription

parent ab8981e2
No related branches found
No related tags found
1 merge request!1697Add worker run on Transcription
......@@ -135,7 +135,7 @@ class TranscriptionEdit(ACLMixin, RetrieveUpdateDestroyAPIView):
role = Role.Contributor
detail = 'A write access to transcription element corpus is required.'
if transcription.worker_version_id:
if transcription.worker_version_id or transcription.worker_run_id:
role = Role.Admin
detail = 'Only admins can edit non-manual transcription.'
if not self.has_access(transcription.element.corpus, role.value):
......
# Generated by Django 4.0.1 on 2022-05-13 12:13
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('dataimport', '0047_workerversion_model_usage'),
('documents', '0053_metadata_worker_run_and_more'),
]
operations = [
migrations.AddField(
model_name='transcription',
name='worker_run',
field=models.ForeignKey(
blank=True,
null=True,
on_delete=models.DO_NOTHING,
related_name='transcriptions',
to='dataimport.workerrun',
),
),
migrations.AddConstraint(
model_name='transcription',
constraint=models.CheckConstraint(
check=models.Q(worker_version_id__isnull=False) | models.Q(worker_run_id__isnull=True),
name='transcription_worker_run_requires_worker_version',
),
),
]
......@@ -570,6 +570,15 @@ class Transcription(models.Model):
null=True,
blank=True,
)
worker_run = models.ForeignKey(
'dataimport.WorkerRun',
# Equivalent to RESTRICT or PROTECT, but PostgreSQL will do the errors itself
# instead of Django making extra SQL queries.
on_delete=models.DO_NOTHING,
related_name='transcriptions',
null=True,
blank=True,
)
text = models.TextField()
orientation = EnumField(TextOrientation, default=TextOrientation.HorizontalLeftToRight, max_length=50)
confidence = models.FloatField(null=True, blank=True)
......@@ -579,6 +588,17 @@ class Transcription(models.Model):
related_name='transcriptions',
)
class Meta:
constraints = [
# There can be a worker run ID only if there is a worker version ID,
# but there can be a worker version ID without a worker run ID (backwards compatibility).
# In other words, either the worker run ID is null, or the worker version ID is not null.
models.CheckConstraint(
check=Q(worker_version_id__isnull=False) | Q(worker_run_id__isnull=True),
name='transcription_worker_run_requires_worker_version',
),
]
def __str__(self):
return 'Transcription: {}'.format(self.text[:20])
......
......@@ -252,7 +252,11 @@ class TranscriptionSerializer(serializers.ModelSerializer):
'confidence',
'orientation',
'worker_version_id',
'worker_run_id',
)
validators = [
WorkerRunOrVersionValidator(),
]
class ElementTranscriptionSerializer(TranscriptionSerializer):
......@@ -281,6 +285,13 @@ class TranscriptionCreateSerializer(serializers.ModelSerializer):
allow_null=True,
style={'base_template': 'input.html'},
)
worker_run_id = serializers.PrimaryKeyRelatedField(
queryset=WorkerRun.objects.all(),
required=False,
allow_null=True,
style={'base_template': 'input.html'},
source='worker_run',
)
confidence = serializers.FloatField(
min_value=0,
max_value=1,
......@@ -290,7 +301,10 @@ class TranscriptionCreateSerializer(serializers.ModelSerializer):
class Meta:
model = Transcription
fields = ('text', 'worker_version', 'confidence', 'orientation')
fields = ('text', 'worker_version', 'worker_run_id', 'confidence', 'orientation')
validators = [
WorkerRunOrVersionValidator(worker_version_field='worker_version'),
]
def validate(self, data):
data = super().validate(data)
......@@ -301,21 +315,29 @@ class TranscriptionCreateSerializer(serializers.ModelSerializer):
raise ValidationError({'element': ['The element has no zone']})
worker_version = data.get('worker_version')
if worker_version is None:
worker_run = data.get('worker_run')
if worker_version is None and worker_run is None:
return data
# Additional validation for transcriptions with a worker version
errors = {}
user = self.context['request'].user
if (not user or not user.is_internal):
errors['worker_version'] = [
'An internal user is required to create a transcription referring to a worker_version'
]
if 'confidence' not in data:
errors['non_field_errors'] = [
'The confidence field must be defined for a transcription with a worker version.'
]
if worker_version is not None:
if not user or not user.is_internal:
errors['worker_version'] = ['An internal user is required to create a transcription referring to a worker_version']
if 'confidence' not in data:
errors['non_field_errors'] = ['The confidence field must be defined for a transcription with a worker version.']
if worker_run is not None:
if not user or not user.is_internal:
errors['worker_run'] = ['An internal user is required to create a transcription with a worker run.']
if 'confidence' not in data:
errors['non_field_errors'] = ['The confidence field must be defined for a transcription with a worker run.']
data['worker_version'] = WorkerVersion(id=worker_run.version_id)
if errors:
raise ValidationError(errors)
......@@ -426,11 +448,41 @@ class TranscriptionBulkItemSerializer(serializers.Serializer):
class TranscriptionBulkSerializer(serializers.Serializer):
worker_version = serializers.PrimaryKeyRelatedField(
queryset=WorkerVersion.objects.all(),
help_text='A WorkerVersion ID that transcriptions will refer to. '
'Either this field or `worker_run_id` must be defined.',
style={'base_template': 'input.html'},
allow_null=True,
required=False,
)
worker_run_id = serializers.PrimaryKeyRelatedField(
queryset=WorkerRun.objects.all(),
help_text='A WorkerRun ID that transcriptions will refer to. '
'Either this field or `worker_run_id` must be defined.',
style={'base_template': 'input.html'},
allow_null=True,
required=False,
source='worker_run',
)
transcriptions = TranscriptionBulkItemSerializer(many=True)
class Meta:
validators = [
WorkerRunOrVersionValidator(worker_version_field='worker_version'),
]
def validate(self, data):
errors = defaultdict(list)
worker_version = data.get('worker_version')
worker_run = data.get('worker_run')
if worker_run is None and worker_version is None:
errors['non_field_errors'].append('Either `worker_run_id` or `worker_version` must be defined.')
if worker_run is not None:
# Fake a WorkerVersion with the WorkerRun's version ID for backwards compatibility
data['worker_version'] = WorkerVersion(id=worker_run.version_id)
element_ids = set(transcription['element_id'] for transcription in data['transcriptions'])
found_ids = set(
Element
......@@ -444,21 +496,25 @@ class TranscriptionBulkSerializer(serializers.Serializer):
)
missing_ids = element_ids - found_ids
if not missing_ids:
return data
if missing_ids:
# Return an error message as a list just like DRF's ListField, for easier debugging
errors['transcriptions'] = [
{"element_id": [f'Element {transcription["element_id"]} was not found or cannot be written to.']}
if transcription['element_id'] in missing_ids
else {}
for i, transcription in enumerate(data['transcriptions'])
]
if errors:
raise ValidationError(errors)
# Return an error message as a list just like DRF's ListField, for easier debugging
raise ValidationError({'transcriptions': [
{"element_id": [f'Element {transcription["element_id"]} was not found or cannot be written to.']}
if transcription['element_id'] in missing_ids
else {}
for i, transcription in enumerate(data['transcriptions'])
]})
return data
def create(self, validated_data):
transcriptions = [
Transcription(
worker_version=validated_data['worker_version'],
worker_version=validated_data.get('worker_version'),
worker_run=validated_data.get('worker_run'),
element_id=transcription['element_id'],
text=transcription['text'],
orientation=transcription['orientation'],
......
......@@ -12,6 +12,7 @@ class TestBulkTranscriptions(FixtureAPITestCase):
def setUpTestData(cls):
super().setUpTestData()
cls.worker_version = WorkerVersion.objects.get(worker__slug='reco')
cls.worker_run = cls.worker_version.worker_runs.get()
def test_bulk_transcriptions_requires_login(self):
with self.assertNumQueries(0):
......@@ -55,7 +56,7 @@ class TestBulkTranscriptions(FixtureAPITestCase):
]
})
def test_bulk_transcriptions(self):
def test_bulk_transcriptions_worker_version(self):
self.client.force_login(self.user)
element1 = self.corpus.elements.get(name='Volume 2')
......@@ -90,6 +91,7 @@ class TestBulkTranscriptions(FixtureAPITestCase):
third_tr = element2.transcriptions.values_list('id', flat=True)[0]
self.assertEqual(response.json(), {
"worker_version": str(self.worker_version.id),
"worker_run_id": None,
"transcriptions": [
{
"id": str(first_tr),
......@@ -116,23 +118,117 @@ class TestBulkTranscriptions(FixtureAPITestCase):
})
self.assertCountEqual(
list(element1.transcriptions.values('text', 'confidence')),
list(element1.transcriptions.values('text', 'confidence', 'worker_version_id', 'worker_run_id')),
[
{
"text": "Sneasel",
"confidence": 0.54
"confidence": 0.54,
"worker_version_id": self.worker_version.id,
"worker_run_id": None,
},
{
"text": "Raticate",
"confidence": 0.12,
"worker_version_id": self.worker_version.id,
"worker_run_id": None,
},
]
)
self.assertCountEqual(
list(element2.transcriptions.values('text', 'confidence', 'worker_version_id', 'worker_run_id')),
[{
"text": "Charizard",
"confidence": 0.85,
"worker_version_id": self.worker_version.id,
"worker_run_id": None,
}]
)
def test_bulk_transcriptions_worker_run(self):
self.client.force_login(self.user)
element1 = self.corpus.elements.get(name='Volume 2')
element2 = self.corpus.elements.get(name='Volume 2, page 1r')
self.assertFalse(element1.transcriptions.exists())
self.assertFalse(element2.transcriptions.exists())
with self.assertNumQueries(7):
response = self.client.post(reverse('api:transcription-bulk'), {
"worker_run_id": str(self.worker_run.id),
"transcriptions": [
{
"element_id": str(element1.id),
"text": "Sneasel",
"confidence": 0.54
},
{
"element_id": str(element2.id),
"text": "Charizard",
"confidence": 0.85
},
{
"element_id": str(element1.id),
"text": "Raticate",
"confidence": 0.12
},
],
}, format='json')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
first_tr, second_tr = element1.transcriptions.values_list('id', flat=True)
third_tr = element2.transcriptions.values_list('id', flat=True)[0]
self.assertEqual(response.json(), {
"worker_version": str(self.worker_version.id),
"worker_run_id": str(self.worker_run.id),
"transcriptions": [
{
"id": str(first_tr),
"element_id": str(element1.id),
"text": "Sneasel",
"orientation": TextOrientation.HorizontalLeftToRight.value,
"confidence": 0.54,
},
{
"id": str(third_tr),
"element_id": str(element2.id),
"text": "Charizard",
"orientation": TextOrientation.HorizontalLeftToRight.value,
"confidence": 0.85,
},
{
"id": str(second_tr),
"element_id": str(element1.id),
"text": "Raticate",
"confidence": 0.12
"orientation": TextOrientation.HorizontalLeftToRight.value,
"confidence": 0.12,
},
]
})
self.assertCountEqual(
list(element1.transcriptions.values('text', 'confidence', 'worker_version_id', 'worker_run_id')),
[
{
"text": "Sneasel",
"confidence": 0.54,
"worker_version_id": self.worker_version.id,
"worker_run_id": self.worker_run.id,
},
{
"text": "Raticate",
"confidence": 0.12,
"worker_version_id": self.worker_version.id,
"worker_run_id": self.worker_run.id,
},
]
)
self.assertCountEqual(
list(element2.transcriptions.values('text', 'confidence')),
list(element2.transcriptions.values('text', 'confidence', 'worker_version_id', 'worker_run_id')),
[{
"text": "Charizard",
"confidence": 0.85
"confidence": 0.85,
"worker_version_id": self.worker_version.id,
"worker_run_id": self.worker_run.id,
}]
)
......@@ -166,6 +262,7 @@ class TestBulkTranscriptions(FixtureAPITestCase):
response = self.client.post(reverse('api:transcription-bulk'), {
"worker_version": str(self.worker_version.id),
"worker_run_id": None,
"transcriptions": [
{
"element_id": str(test_element.id),
......@@ -186,6 +283,7 @@ class TestBulkTranscriptions(FixtureAPITestCase):
first_tr, second_tr = test_element.transcriptions.values_list('id', flat=True)
self.assertEqual(response.json(), {
"worker_version": str(self.worker_version.id),
"worker_run_id": None,
"transcriptions": [
{
"id": str(first_tr),
......@@ -230,3 +328,23 @@ class TestBulkTranscriptions(FixtureAPITestCase):
],
}, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
def test_bulk_transcriptions_worker_version_xor_worker_run(self):
self.client.force_login(self.user)
test_element = self.corpus.elements.get(name='Volume 2, page 1r')
response = self.client.post(reverse('api:transcription-bulk'), {
"worker_version": str(self.worker_version.id),
"worker_run_id": str(self.worker_run.id),
"transcriptions": [
{
"element_id": str(test_element.id),
"text": "The Glow Cloud does not need to converse with us.",
"orientation": TextOrientation.VerticalRightToLeft.value,
"confidence": 0.33
},
],
}, format='json')
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
'non_field_errors': ['Only one of `worker_version` and `worker_run_id` may be set.']
})
......@@ -29,48 +29,50 @@ class TestTranscriptionCreate(FixtureAPITestCase):
cls.private_corpus.memberships.create(user=cls.private_read_user, level=Role.Guest.value)
cls.worker_version = WorkerVersion.objects.get(worker__slug='reco')
cls.worker_run = cls.worker_version.worker_runs.get()
def test_create_transcription_require_login(self):
response = self.client.post(reverse('api:transcription-create', kwargs={'pk': self.line.id}), format='json')
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
def test_require_login(self):
with self.assertNumQueries(0):
response = self.client.post(reverse('api:transcription-create', kwargs={'pk': self.line.id}), format='json')
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {
'detail': 'Authentication credentials were not provided.'
})
def test_create_transcription_write_right(self):
def test_write_right(self):
self.client.force_login(self.private_read_user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.private_page.id}),
format='json',
data={'text': 'NEKUDOTAYIM'}
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
with self.assertNumQueries(7):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.private_page.id}),
format='json',
data={'text': 'NEKUDOTAYIM'}
)
self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
self.assertDictEqual(response.json(), {
'detail': "A write access to the element's corpus is required."
})
def test_create_transcription_no_read_right(self):
def test_no_read_right(self):
self.client.force_login(self.user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.private_page.id}),
format='json',
data={'text': 'NEKUDOTAYIM'}
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
with self.assertNumQueries(5):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.private_page.id}),
format='json',
data={'text': 'NEKUDOTAYIM'}
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
def test_create_transcription_no_element(self):
def test_no_element(self):
self.client.force_login(self.user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': uuid4()}),
format='json',
data={'text': 'NEKUDOTAYIM'}
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
with self.assertNumQueries(5):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': uuid4()}),
format='json',
data={'text': 'NEKUDOTAYIM'}
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
def test_create_manual_transcription(self):
"""
Checks the view creates a manual transcription and runs ES indexing
"""
def test_manual(self):
self.client.force_login(self.user)
with self.assertNumQueries(8):
response = self.client.post(
......@@ -86,6 +88,7 @@ class TestTranscriptionCreate(FixtureAPITestCase):
'text': 'A perfect day in a perfect place',
'orientation': TextOrientation.HorizontalLeftToRight.value,
'worker_version_id': None,
'worker_run_id': None,
})
new_ts = Transcription.objects.get(text='A perfect day in a perfect place')
......@@ -93,7 +96,7 @@ class TestTranscriptionCreate(FixtureAPITestCase):
self.assertEqual(new_ts.worker_version, None)
self.assertTrue(self.line.transcriptions.filter(pk=new_ts.id).exists())
def test_create_duplicated_transcription(self):
def test_duplicated(self):
"""
Check the view creates a new manual transcriptions with a similar text and element
"""
......@@ -113,17 +116,18 @@ class TestTranscriptionCreate(FixtureAPITestCase):
2
)
def test_create_transcription_with_orientation(self):
def test_with_orientation(self):
"""
Check that a transcription is created with the specified orientation
"""
self.client.force_login(self.user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={'text': 'A perfect day in a perfect place', 'orientation': 'vertical-lr'}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(8):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={'text': 'A perfect day in a perfect place', 'orientation': 'vertical-lr'}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
tr = Transcription.objects.get(text='A perfect day in a perfect place')
self.assertDictEqual(response.json(), {
'id': str(tr.id),
......@@ -131,65 +135,70 @@ class TestTranscriptionCreate(FixtureAPITestCase):
'text': 'A perfect day in a perfect place',
'orientation': 'vertical-lr',
'worker_version_id': None,
'worker_run_id': None,
})
new_ts = Transcription.objects.get(text='A perfect day in a perfect place')
self.assertEqual(new_ts.orientation, TextOrientation.VerticalLeftToRight)
def test_create_transcription_invalid_orientation(self):
def test_invalid_orientation(self):
"""
Specifying an invalid text-orientation causes an error
"""
self.client.force_login(self.user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={'text': 'A perfect day in a perfect place', 'orientation': 'wiggly'}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
with self.assertNumQueries(7):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={'text': 'A perfect day in a perfect place', 'orientation': 'wiggly'}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
@override_settings(ARKINDEX_FEATURES={'search': False})
def test_create_transcription_no_search(self):
def test_no_search(self):
self.client.force_login(self.user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={'text': 'A classy text line'}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(8):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={'text': 'A classy text line'}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
def test_create_transcription_worker_version_non_internal(self):
def test_worker_version_non_internal(self):
"""
An internal user is required to create a transcription with a worker version
"""
self.client.force_login(self.user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'NEKUDOTAYIM',
'worker_version': str(self.worker_version.id),
'confidence': .42
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
with self.assertNumQueries(8):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'NEKUDOTAYIM',
'worker_version': str(self.worker_version.id),
'confidence': .42
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
'worker_version': [
'An internal user is required to create a transcription referring to a worker_version'
]
})
def test_create_transcription_worker_version(self):
def test_worker_version(self):
self.client.force_login(self.internal_user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'NEKUDOTAYIM',
'worker_version': str(self.worker_version.id),
'confidence': .42
}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
with self.assertNumQueries(6):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'NEKUDOTAYIM',
'worker_version': str(self.worker_version.id),
'confidence': .42
}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
tr = Transcription.objects.get(text='NEKUDOTAYIM')
self.assertEqual(tr.worker_version, self.worker_version)
self.assertDictEqual(response.json(), {
......@@ -198,60 +207,162 @@ class TestTranscriptionCreate(FixtureAPITestCase):
'text': 'NEKUDOTAYIM',
'orientation': TextOrientation.HorizontalLeftToRight.value,
'worker_version_id': str(self.worker_version.id),
'worker_run_id': None,
})
def test_ml_trancription_required_confidence(self):
def test_worker_version_required_confidence(self):
"""
A confidence is required when creating a transcription on an element with an worker version
"""
self.client.force_login(self.internal_user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'CIRCLE',
'worker_version': str(self.worker_version.id)
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
with self.assertNumQueries(5):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'CIRCLE',
'worker_version': str(self.worker_version.id),
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {'non_field_errors': [
'The confidence field must be defined for a transcription with a worker version.'
]})
def test_ml_trancription_requires_element_zone(self):
def test_worker_run_non_internal(self):
"""
An internal user is required to create a transcription with a worker run
"""
self.client.force_login(self.user)
with self.assertNumQueries(8):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'NEKUDOTAYIM',
'worker_run_id': str(self.worker_run.id),
'confidence': .42
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
'worker_run': [
'An internal user is required to create a transcription with a worker run.'
]
})
def test_worker_run(self):
self.client.force_login(self.internal_user)
with self.assertNumQueries(6):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'NEKUDOTAYIM',
'worker_run_id': str(self.worker_run.id),
'confidence': .42
}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
tr = Transcription.objects.get(text='NEKUDOTAYIM')
self.assertEqual(tr.worker_run, self.worker_run)
self.assertEqual(tr.worker_version, self.worker_version)
self.assertDictEqual(response.json(), {
'id': str(tr.id),
'confidence': .42,
'text': 'NEKUDOTAYIM',
'orientation': TextOrientation.HorizontalLeftToRight.value,
'worker_version_id': str(self.worker_version.id),
'worker_run_id': str(self.worker_run.id),
})
def test_worker_run_not_found(self):
self.client.force_login(self.user)
with self.assertNumQueries(8):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'NEKUDOTAYIM',
'worker_run_id': 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa',
'confidence': .42
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
'worker_run_id': ['Invalid pk "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" - object does not exist.'],
})
def test_worker_run_required_confidence(self):
self.client.force_login(self.internal_user)
with self.assertNumQueries(5):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'CIRCLE',
'worker_run_id': str(self.worker_run.id),
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {'non_field_errors': [
'The confidence field must be defined for a transcription with a worker run.'
]})
def test_worker_version_xor_worker_run(self):
self.client.force_login(self.internal_user)
with self.assertNumQueries(6):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'CIRCLE',
'worker_run_id': str(self.worker_run.id),
'worker_version': str(self.worker_version.id),
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertDictEqual(response.json(), {
'non_field_errors': ['Only one of `worker_version` and `worker_run_id` may be set.'],
})
def test_element_no_zone(self):
"""
A worker cannot create a transcription on an element without a zone
"""
null_zone_page = self.corpus.elements.create(type=self.page.type)
self.client.force_login(self.internal_user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': null_zone_page.id}),
format='json',
data={
'text': 'ELLIPSE',
'confidence': .42
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
with self.assertNumQueries(4):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': null_zone_page.id}),
format='json',
data={
'text': 'ELLIPSE',
'confidence': .42
}
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json(), {'element': ['The element has no zone']})
def test_ml_trancription_ignore_type(self):
def test_ignore_type(self):
"""
An old transcription type can still be sent but will be ignored
"""
self.client.force_login(self.internal_user)
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'CIRCLE',
'type': 'paragraph',
'confidence': 0.654816358,
'worker_version': str(self.worker_version.id)
}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED, response.json())
with self.assertNumQueries(6):
response = self.client.post(
reverse('api:transcription-create', kwargs={'pk': self.line.id}),
format='json',
data={
'text': 'CIRCLE',
'type': 'paragraph',
'confidence': 0.654816358,
'worker_version': str(self.worker_version.id)
}
)
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
transcription = self.line.transcriptions.get(text='CIRCLE')
self.assertEqual(transcription.confidence, 0.654816358)
self.assertEqual(transcription.worker_version, self.worker_version)
......@@ -55,6 +55,7 @@ class TestEditTranscription(FixtureAPITestCase):
'text': 'A manual transcription',
'orientation': TextOrientation.HorizontalLeftToRight.value,
'worker_version_id': None,
'worker_run_id': None,
})
def test_get_worker_version(self):
......@@ -70,6 +71,7 @@ class TestEditTranscription(FixtureAPITestCase):
'text': 'PARIS',
'orientation': TextOrientation.HorizontalLeftToRight.value,
'worker_version_id': str(self.worker_version.id),
'worker_run_id': None,
})
def test_get_verified_user(self):
......@@ -152,6 +154,7 @@ class TestEditTranscription(FixtureAPITestCase):
'text': 'a knight was living lonely',
'orientation': TextOrientation.HorizontalLeftToRight.value,
'worker_version_id': None,
'worker_run_id': None,
})
def test_patch_orientation(self):
......@@ -175,6 +178,7 @@ class TestEditTranscription(FixtureAPITestCase):
'text': 'A manual transcription',
'orientation': 'vertical-rl',
'worker_version_id': None,
'worker_run_id': None,
})
def test_patch_invalid_orientation(self):
......@@ -258,6 +262,7 @@ class TestEditTranscription(FixtureAPITestCase):
'text': 'a knight was living lonely',
'orientation': 'vertical-rl',
'worker_version_id': None,
'worker_run_id': None,
})
def test_put_not_manual(self):
......
......@@ -54,6 +54,7 @@ class TestTranscriptions(FixtureAPITestCase):
'confidence': 1.0,
'orientation': TextOrientation.HorizontalLeftToRight.value,
'worker_version_id': str(self.worker_version_1.id),
'worker_run_id': None,
'element': None,
},
{
......@@ -62,6 +63,7 @@ class TestTranscriptions(FixtureAPITestCase):
'confidence': 0.369,
'orientation': TextOrientation.HorizontalLeftToRight.value,
'worker_version_id': str(self.worker_version_2.id),
'worker_run_id': None,
'element': None,
}
])
......@@ -130,6 +132,7 @@ class TestTranscriptions(FixtureAPITestCase):
'confidence': 0.369,
'orientation': TextOrientation.HorizontalLeftToRight.value,
'worker_version_id': str(self.worker_version_2.id),
'worker_run_id': None,
'element': {
'id': str(self.page.id),
'name': 'Volume 1, page 1r',
......
......@@ -70,6 +70,7 @@ WHERE "dataimport_worker"."id" IN ('{worker_id}'::uuid);
SELECT "documents_transcription"."id",
"documents_transcription"."element_id",
"documents_transcription"."worker_version_id",
"documents_transcription"."worker_run_id",
"documents_transcription"."text",
"documents_transcription"."orientation",
"documents_transcription"."confidence"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment