diff --git a/README.md b/README.md index e306e3441a5f855144c6df404ff1dca43fa44d31..1a74dd2afb7d9d83e001d49c0645f64ec7619a83 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ http://ark.localhost:8000/api/v1/oauth/providers/gitlab/callback/ https://ark.localhost/api/v1/oauth/providers/gitlab/callback/ ``` -Once the application is created, GitLab will provide you with an application ID and a secret. Use the `arkindex/config.yaml` file to set them: +Once the application is created, GitLab will provide you with an application ID and a secret. Use the `arkindex/config.yml` file to set them: ```yaml gitlab: diff --git a/arkindex/documents/api/entities.py b/arkindex/documents/api/entities.py index de0d7ed6247af601203667ea9e3d109837b66c8f..2fe58dfbcbbef460f78bc6334e72c9a79ef7f63f 100644 --- a/arkindex/documents/api/entities.py +++ b/arkindex/documents/api/entities.py @@ -226,7 +226,7 @@ class EntityLinkCreate(CreateAPIView): )) class TranscriptionEntityCreate(CreateAPIView): """ - Link an existing Entity to a given transcription with its position + Link an existing Entity to a given transcription with its position. """ permission_classes = (IsVerified, ) serializer_class = TranscriptionEntitySerializer diff --git a/arkindex/documents/migrations/0042_transcription_entity_confidence.py b/arkindex/documents/migrations/0042_transcription_entity_confidence.py new file mode 100644 index 0000000000000000000000000000000000000000..389470b1ad6b804d97fe90f81878a657b9ec683a --- /dev/null +++ b/arkindex/documents/migrations/0042_transcription_entity_confidence.py @@ -0,0 +1,20 @@ +# Generated by Django 3.2.6 on 2021-08-09 11:34 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('dataimport', '0034_worker_run_config'), + ('documents', '0041_rotation'), + ] + + operations = [ + migrations.AddField( + model_name='transcriptionentity', + name='confidence', + field=models.FloatField(blank=True, null=True, validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(1)]), + ), + ] diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 3d14e34145959b2773ddd36cfb6402f11cbf1215..25994e38ecfacef45e50013c19e772075f26bfe3 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -531,6 +531,8 @@ class TranscriptionEntity(models.Model): blank=True, ) + confidence = models.FloatField(validators=[MinValueValidator(0), MaxValueValidator(1)], null=True, blank=True) + class Meta: unique_together = ( ('transcription', 'entity', 'offset', 'length', 'worker_version'), diff --git a/arkindex/documents/serializers/entities.py b/arkindex/documents/serializers/entities.py index 71509af2cb353decc4ef7ada23ddc9cf6443ef28..7513b47010b3c93512aa6cf100fa6c1f1bf1d4b1 100644 --- a/arkindex/documents/serializers/entities.py +++ b/arkindex/documents/serializers/entities.py @@ -212,10 +212,11 @@ class TranscriptionEntitySerializer(serializers.ModelSerializer): default=None, style={'base_template': 'input.html'}, ) + confidence = serializers.FloatField(min_value=0, max_value=1, default=None) class Meta: model = TranscriptionEntity - fields = ('entity', 'offset', 'length', 'worker_version_id') + fields = ('entity', 'offset', 'length', 'worker_version_id', 'confidence') def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/arkindex/documents/tests/test_entities_api.py b/arkindex/documents/tests/test_entities_api.py index 9b1467c267a95452784504517f1a93779c27b5fb..e3c4cb41ba033b5702edefc590aef52636d5ca3c 100644 --- a/arkindex/documents/tests/test_entities_api.py +++ b/arkindex/documents/tests/test_entities_api.py @@ -82,6 +82,12 @@ class TestEntitiesAPI(FixtureAPITestCase): 'offset': 4, 'length': len(self.entity.name) } + self.tr_entities_confidence_sample = { + 'entity': str(self.entity.id), + 'offset': 4, + 'length': len(self.entity.name), + 'confidence': 1.0 + } self.tr_entities_version_sample = { 'entity': str(self.entity.id), 'offset': 4, @@ -421,6 +427,34 @@ class TestEntitiesAPI(FixtureAPITestCase): 'offset': transcription_entity.offset, 'length': transcription_entity.length, 'worker_version_id': None, + 'confidence': None + } + ) + + def test_create_transcription_entity_with_confidence(self): + self.client.force_login(self.user) + response = self.client.post( + reverse('api:transcription-entity-create', kwargs={'pk': str(self.transcription.id)}), + data=self.tr_entities_confidence_sample, + format='json' + ) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + transcription_entity = TranscriptionEntity.objects.get( + transcription=self.transcription, + entity=self.entity, + offset=self.tr_entities_confidence_sample['offset'], + length=self.tr_entities_confidence_sample['length'], + confidence=self.tr_entities_confidence_sample['confidence'] + ) + self.assertIsNotNone(transcription_entity) + self.assertDictEqual( + response.json(), + { + 'entity': str(transcription_entity.entity.id), + 'offset': transcription_entity.offset, + 'length': transcription_entity.length, + 'worker_version_id': None, + 'confidence': transcription_entity.confidence } ) @@ -492,6 +526,42 @@ class TestEntitiesAPI(FixtureAPITestCase): {'length': ['Ensure this value is greater than or equal to 1.']} ) + def test_create_transcription_entity_wrong_high_confidence(self): + self.client.force_login(self.user) + response = self.client.post( + reverse('api:transcription-entity-create', kwargs={'pk': str(self.transcription.id)}), + data={ + 'entity': str(self.entity.id), + 'offset': 4, + 'length': 1, + 'confidence': 2.0 + }, + format='json' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'confidence': ['Ensure this value is less than or equal to 1.']} + ) + + def test_create_transcription_entity_wrong_low_confidence(self): + self.client.force_login(self.user) + response = self.client.post( + reverse('api:transcription-entity-create', kwargs={'pk': str(self.transcription.id)}), + data={ + 'entity': str(self.entity.id), + 'offset': 4, + 'length': 1, + 'confidence': -0.5 + }, + format='json' + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertDictEqual( + response.json(), + {'confidence': ['Ensure this value is greater than or equal to 0.']} + ) + def test_create_transcription_entity_different_corpus(self): self.client.force_login(self.user) ent = Entity.objects.create( @@ -578,6 +648,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'length': self.transcriptionentity.length, 'offset': self.transcriptionentity.offset, 'worker_version_id': None, + 'confidence': None }] ) @@ -628,6 +699,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'length': 8, 'offset': 8, 'worker_version_id': None, + 'confidence': None }] ) @@ -748,6 +820,7 @@ class TestEntitiesAPI(FixtureAPITestCase): 'offset': t.offset, 'length': t.length, 'worker_version_id': None, + 'confidence': None } for t in TranscriptionEntity.objects.filter(transcription=self.transcription).order_by('offset')], 'metadata': [{ 'entity': {