diff --git a/arkindex/documents/admin.py b/arkindex/documents/admin.py index 2822dcce9f1e054c2d51ad628bbd9765585b96c7..3a2353cab71e90d673ad20366f362e8f49b3d6bf 100644 --- a/arkindex/documents/admin.py +++ b/arkindex/documents/admin.py @@ -1,8 +1,11 @@ from django.contrib import admin +from django import forms from django.urls import path, reverse from django.utils.html import format_html +from django_admin_hstore_widget.forms import HStoreFormField from arkindex.documents.models import \ - Corpus, Page, Element, ElementType, Act, Transcription, MetaData, InterpretedDate, Classification, DataSource + Corpus, Page, Element, ElementType, Act, Transcription, MetaData, InterpretedDate, Classification, DataSource, \ + Entity, EntityRole, EntityLink from arkindex.documents.views import DumpActs from arkindex.dataimport.models import Event from enumfields.admin import EnumFieldListFilter @@ -43,12 +46,13 @@ class DateInline(admin.TabularInline): class MetaDataAdmin(admin.ModelAdmin): list_display = ('id', 'type', 'revision') readonly_fields = ('id', 'revision') - raw_id_fields = ('element', ) + raw_id_fields = ('element', 'entity', ) inlines = (DateInline, ) class MetaDataInline(admin.TabularInline): model = MetaData + raw_id_fields = ('entity', ) class ElementAdmin(admin.ModelAdmin): @@ -95,6 +99,31 @@ class TranscriptionAdmin(admin.ModelAdmin): raw_id_fields = ('element', 'zone', ) +class EntityMetaForm(forms.ModelForm): + metas = HStoreFormField() + + +class EntityLinkInLine(admin.TabularInline): + model = EntityLink + fk_name = 'parent' + raw_id_fields = ('child', ) + + +class EntityAdmin(admin.ModelAdmin): + list_display = ('id', 'name', 'type') + list_filter = [('type', EnumFieldListFilter), 'corpus'] + readonly_fields = ('id', ) + search_fields = ('name', ) + inlines = (EntityLinkInLine, ) + form = EntityMetaForm + + +class EntityRoleAdmin(admin.ModelAdmin): + list_display = ('id', 'parent_name', 'child_name') + list_filter = ['corpus'] + readonly_fields = ('id', ) + + admin.site.register(Corpus, CorpusAdmin) admin.site.register(DataSource, DataSourceAdmin) admin.site.register(Page, PageAdmin) @@ -102,3 +131,5 @@ admin.site.register(Element, ElementAdmin) admin.site.register(Act, ActAdmin) admin.site.register(Transcription, TranscriptionAdmin) admin.site.register(MetaData, MetaDataAdmin) +admin.site.register(Entity, EntityAdmin) +admin.site.register(EntityRole, EntityRoleAdmin) diff --git a/arkindex/documents/migrations/0003_entities.py b/arkindex/documents/migrations/0003_entities.py new file mode 100644 index 0000000000000000000000000000000000000000..cae20504c95a104a258fc1e6fea7acd992a6ac41 --- /dev/null +++ b/arkindex/documents/migrations/0003_entities.py @@ -0,0 +1,93 @@ +# Generated by Django 2.1 on 2019-05-02 13:40 + +import arkindex.documents.models +import django.contrib.postgres.fields.hstore +from django.contrib.postgres.operations import HStoreExtension +from django.db import migrations, models +import django.db.models.deletion +import enumfields.fields +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0002_interpreteddate'), + ] + + operations = [ + HStoreExtension(), + migrations.CreateModel( + name='Entity', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)), + ('name', models.TextField()), + ('type', enumfields.fields.EnumField( + db_index=True, + enum=arkindex.documents.models.EntityType, + max_length=50)), + ('metas', django.contrib.postgres.fields.hstore.HStoreField(blank=True, null=True)), + ('corpus', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='entities', + to='documents.Corpus')), + ], + options={ + 'verbose_name_plural': 'Entities', + }, + ), + migrations.CreateModel( + name='EntityLink', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)), + ('child', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='children', + to='documents.Entity')), + ('parent', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='parents', + to='documents.Entity')), + ], + ), + migrations.CreateModel( + name='EntityRole', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('parent_name', models.CharField(max_length=250)), + ('child_name', models.CharField(max_length=250)), + ('parent_type', enumfields.fields.EnumField(enum=arkindex.documents.models.EntityType, max_length=50)), + ('child_type', enumfields.fields.EnumField(enum=arkindex.documents.models.EntityType, max_length=50)), + ('corpus', models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='roles', + to='documents.Corpus')), + ], + ), + migrations.AddField( + model_name='entitylink', + name='role', + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name='links', + to='documents.EntityRole'), + ), + migrations.AddField( + model_name='metadata', + name='entity', + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name='metadatas', + to='documents.Entity'), + ), + migrations.AlterUniqueTogether( + name='entityrole', + unique_together={('parent_name', 'child_name', 'parent_type', 'child_type', 'corpus')}, + ), + migrations.AlterUniqueTogether( + name='entity', + unique_together={('corpus', 'type', 'name')}, + ), + ] diff --git a/arkindex/documents/migrations/0004_entities_data_migration.py b/arkindex/documents/migrations/0004_entities_data_migration.py new file mode 100644 index 0000000000000000000000000000000000000000..a8cae614570fbb42bdd45c531f2ecf3d2e49a3a7 --- /dev/null +++ b/arkindex/documents/migrations/0004_entities_data_migration.py @@ -0,0 +1,53 @@ +# Generated by Django 2.1 on 2019-05-02 13:18 + +from django.db import migrations +from django.db.models import Q, OuterRef, Subquery +from arkindex_common.enums import MetaType +from arkindex.documents.models import EntityType + + +def createEntities(apps, schema_editor): + values = set(item.value for item in EntityType) + MetaData = apps.get_model('documents', 'MetaData') + Entity = apps.get_model('documents', 'Entity') + for metadata in MetaData.objects \ + .filter(Q(type=MetaType.Text) & (Q(name__in=values) | Q(name='place'))) \ + .prefetch_related('element'): + if metadata.name in values: + type = EntityType(metadata.name) + else: + type = EntityType.Location + entity, created = Entity.objects.get_or_create( + name=metadata.value, + type=type, + corpus_id=metadata.element.corpus_id) + if created: + entity.save() + print(entity) + metadata.value = '' + metadata.type = MetaType.Entity + metadata.entity = entity + metadata.save() + + +def deleteEntities(apps, schema_editor): + MetaData = apps.get_model('documents', 'MetaData') + # Use Subquery() because Django doesn't support F() with joins + MetaData.objects \ + .filter(entity__isnull=False) \ + .update( + type=MetaType.Text, + value=Subquery(MetaData.objects.filter(pk=OuterRef('pk')).values('entity__name')[:1])) + Entity = apps.get_model('documents', 'Entity') + Entity.objects.all().delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('documents', '0003_entities'), + ] + + operations = [ + migrations.RunPython(createEntities, deleteEntities), + ] diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 178012e3f8f9484d8d85e776b7fb91c2145c4ad0..55aa88f1a4449796a77a38baaffce1babdfcc6b2 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -1,5 +1,6 @@ from django.db import models, transaction from django.contrib.postgres.indexes import GinIndex +from django.contrib.postgres.fields import HStoreField from django.utils.functional import cached_property from django.core.validators import MinValueValidator, MaxValueValidator from django.core.exceptions import ValidationError @@ -581,6 +582,91 @@ class Classification(models.Model): ) +class EntityType(Enum): + Person = 'person' + Location = 'location' + Subject = 'subject' + Organization = 'organization' + Misc = 'misc' + + +class Entity(models.Model): + """ + Semantic object in arkindex + """ + id = models.UUIDField(default=uuid.uuid4, primary_key=True) + name = models.TextField() + type = EnumField(EntityType, max_length=50, db_index=True) + corpus = models.ForeignKey(Corpus, related_name='entities', on_delete=models.CASCADE) + metas = HStoreField(null=True, blank=True) + + class Meta: + unique_together = ( + ('corpus', 'type', 'name'), + ) + verbose_name_plural = 'Entities' + + def __str__(self): + return self.name + + +class EntityRole(models.Model): + """ + Role's type between a parent and a child + """ + parent_name = models.CharField(max_length=250) + child_name = models.CharField(max_length=250) + parent_type = EnumField(EntityType, max_length=50) + child_type = EnumField(EntityType, max_length=50) + corpus = models.ForeignKey(Corpus, related_name='roles', on_delete=models.CASCADE) + + class Meta: + unique_together = ( + ('parent_name', 'child_name', 'parent_type', 'child_type', 'corpus'), + ) + + def __str__(self): + return '{} -> {}'.format(self.parent_name, self.child_name) + + +class EntityLink(models.Model): + """ + Link between two entities with a role + """ + id = models.UUIDField(default=uuid.uuid4, primary_key=True) + parent = models.ForeignKey(Entity, related_name='parents', on_delete=models.CASCADE) + child = models.ForeignKey(Entity, related_name='children', on_delete=models.CASCADE) + role = models.ForeignKey(EntityRole, related_name='links', on_delete=models.CASCADE) + + def clean(self): + if self.role is None: + return + if self.parent is None: + return + if self.parent.type != self.role.parent_type: + raise ValidationError("Parent's type {} is different from the expected type {}".format( + self.parent.type, + self.role.parent_type)) + if self.parent.corpus != self.role.corpus: + raise ValidationError("Parent's corpus {} is different from the expected corpus {}".format( + self.parent.corpus, + self.role.corpus)) + if self.child is None: + return + if self.child.type != self.role.child_type: + raise ValidationError("Child's type {} is different from the expected type {}".format( + self.child.type, + self.role.child_type)) + if self.child.corpus != self.role.corpus: + raise ValidationError("Child's corpus {} is different from the expected corpus {}".format( + self.child.corpus, + self.role.corpus)) + + def save(self, *args, **kwargs): + self.full_clean() + super().save(*args, **kwargs) + + class MetaData(models.Model): ''' Metadatas for elements @@ -592,6 +678,7 @@ class MetaData(models.Model): value = models.TextField() revision = models.ForeignKey('dataimport.Revision', on_delete=models.SET_NULL, blank=True, null=True) index = models.PositiveIntegerField(default=0) + entity = models.ForeignKey(Entity, null=True, blank=True, related_name='metadatas', on_delete=models.SET_NULL) class Meta: ordering = ('element', 'name', 'index') @@ -624,6 +711,18 @@ class MetaData(models.Model): def __ge__(self, other): return self.__eq__(other) or self.__gt__(other) + def clean(self): + if self.entity is None or self.element is None: + return + if self.entity.corpus != self.element.corpus: + raise ValidationError("Entity's corpus {} is different from the expected corpus {}".format( + self.entity.corpus, + self.element.corpus)) + + def save(self, *args, **kwargs): + self.full_clean() + super().save(*args, **kwargs) + class DateType(Enum): Exact = 'exact' diff --git a/arkindex/documents/tests/test_entities.py b/arkindex/documents/tests/test_entities.py new file mode 100644 index 0000000000000000000000000000000000000000..e899647ce987b9c5f4b24849823f71f33f2aec0a --- /dev/null +++ b/arkindex/documents/tests/test_entities.py @@ -0,0 +1,67 @@ +from django.core.exceptions import ValidationError +from arkindex.documents.models import Corpus, Element, ElementType, \ + Entity, EntityType, EntityRole, EntityLink, MetaData, MetaType +from arkindex.project.tests import FixtureTestCase + + +class TestSaveEntities(FixtureTestCase): + + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.corpus1 = Corpus.objects.create(name='corpus 1') + cls.corpus2 = Corpus.objects.create(name='corpus 2') + cls.parent = Entity.objects.create(name='parent', type=EntityType.Organization, corpus=cls.corpus1) + cls.child = Entity.objects.create(type=EntityType.Person, corpus=cls.corpus1, name="child") + cls.role = EntityRole.objects.create( + parent_name='organization', + child_name='person', + parent_type=EntityType.Organization, + child_type=EntityType.Person, + corpus=cls.corpus1, + ) + cls.link = EntityLink(parent=cls.parent, child=cls.child, role=cls.role) + + def test_parent_type_different(self): + self.parent.corpus = self.corpus1 + self.child.corpus = self.corpus1 + self.parent.type = EntityType.Person + self.child.type = EntityType.Person + with self.assertRaises(ValidationError): + self.link.save() + + def test_parent_corpus_different(self): + self.parent.corpus = self.corpus2 + self.child.corpus = self.corpus1 + self.parent.type = EntityType.Organization + self.child.type = EntityType.Person + with self.assertRaises(ValidationError): + self.link.save() + + def test_child_type_different(self): + self.parent.corpus = self.corpus1 + self.child.corpus = self.corpus1 + self.parent.type = EntityType.Organization + self.child.type = EntityType.Organization + with self.assertRaises(ValidationError): + self.link.save() + + def test_child_corpus_different(self): + self.parent.corpus = self.corpus1 + self.child.corpus = self.corpus2 + self.parent.type = EntityType.Organization + self.child.type = EntityType.Person + with self.assertRaises(ValidationError): + self.link.save() + + def test_save_entity_in_metadata(self): + self.parent.corpus = self.corpus2 + element = Element.objects.create(corpus=self.corpus1, type=ElementType.Act, name="element") + with self.assertRaises(ValidationError): + MetaData.objects.create( + name='test 1', + type=MetaType.Entity, + value='Blah', + element=element, + entity=self.parent, + ) diff --git a/arkindex/project/settings.py b/arkindex/project/settings.py index c69139c35c55b70da3013ea071f1978dc2fadf0c..4454ca3924ed3313cb62823bdad55460facf5918 100644 --- a/arkindex/project/settings.py +++ b/arkindex/project/settings.py @@ -141,6 +141,8 @@ INSTALLED_APPS = [ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', + 'django.contrib.postgres', + 'django_admin_hstore_widget', # Tools 'rest_framework', diff --git a/requirements.txt b/requirements.txt index da4456f39860e61f32fde0aff7a507cc2440a7af..722eef979d9537345a665705ada055bd6ff0968c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ arkindex-common==0.1.0 boto3==1.9 certifi==2017.7.27.1 chardet==3.0.4 +django-admin-hstore-widget==1.0.1 django-cors-headers==2.4.0 django-enumfields==1.0.0 djangorestframework==3.9.2