From 13e273b20c73cbf6fd77559b143d8cb75f52feda Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Thu, 13 Jun 2019 09:31:04 +0000 Subject: [PATCH] Move spacy validation --- README.md | 2 +- arkindex/documents/api/elements.py | 3 ++- arkindex/documents/api/ml.py | 3 ++- .../management/commands/build_fixtures.py | 3 ++- arkindex/documents/migrations/0001_initial.py | 2 +- arkindex/documents/migrations/0003_entities.py | 8 ++++---- .../migrations/0004_entities_data_migration.py | 3 +-- arkindex/documents/models.py | 18 +----------------- arkindex/documents/pagexml.py | 5 ++++- arkindex/documents/serializers/entities.py | 3 ++- arkindex/documents/serializers/light.py | 3 ++- arkindex/documents/serializers/ml.py | 3 ++- arkindex/documents/serializers/search.py | 3 ++- .../tests/commands/test_delete_corpus.py | 5 +++-- arkindex/documents/tests/test_elements_api.py | 5 ++--- arkindex/documents/tests/test_entities.py | 3 ++- arkindex/documents/tests/test_pagexml.py | 5 +++-- arkindex/documents/tests/test_search.py | 3 ++- .../tests/test_transcription_create.py | 3 ++- arkindex/images/importer.py | 3 ++- .../images/tests/test_bulk_transcriptions.py | 3 ++- 21 files changed, 44 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 6cd01cf4bb..53b7eb4ab5 100644 --- a/README.md +++ b/README.md @@ -144,12 +144,12 @@ SHELL_PLUS_POST_IMPORTS = [ ('django.db.models.functions', '*'), ('arkindex.documents.models', ( 'ElementType', - 'TranscriptionType', 'Right', 'PageType', 'PageDirection', 'PageComplement', )), + ('arkindex_common.enums', '*'), ('arkindex.dataimport.models', ( 'DataImportMode', 'EventType', diff --git a/arkindex/documents/api/elements.py b/arkindex/documents/api/elements.py index 2aa75dd2dc..f7acf9f2b2 100644 --- a/arkindex/documents/api/elements.py +++ b/arkindex/documents/api/elements.py @@ -11,8 +11,9 @@ from arkindex.documents.serializers.elements import ( ActSerializer, SurfaceSerializer, ElementCreateSerializer ) from arkindex.documents.serializers.ml import TranscriptionSerializer +from arkindex_common.enums import TranscriptionType from arkindex.documents.models import ( - Element, ElementType, Page, Act, TranscriptionType, Transcription, + Element, ElementType, Page, Act, Transcription, Corpus, Right ) from arkindex.project.mixins import CorpusACLMixin diff --git a/arkindex/documents/api/ml.py b/arkindex/documents/api/ml.py index 14632c8da4..92509bedcc 100644 --- a/arkindex/documents/api/ml.py +++ b/arkindex/documents/api/ml.py @@ -7,7 +7,8 @@ from rest_framework.exceptions import ValidationError from rest_framework.response import Response from rest_framework.views import APIView from arkindex.documents.models import \ - Classification, DataSource, Transcription, TranscriptionType, Page, Corpus, Entity, EntityType + Classification, DataSource, Transcription, Page, Corpus, Entity +from arkindex_common.enums import TranscriptionType, EntityType from arkindex.documents.serializers.ml import \ ClassificationsSerializer, TranscriptionsSerializer, TranscriptionCreateSerializer from arkindex.documents.indexer import Indexer diff --git a/arkindex/documents/management/commands/build_fixtures.py b/arkindex/documents/management/commands/build_fixtures.py index 870131e410..80c7ef21b5 100644 --- a/arkindex/documents/management/commands/build_fixtures.py +++ b/arkindex/documents/management/commands/build_fixtures.py @@ -2,9 +2,10 @@ from django.core.management.base import BaseCommand from django.contrib.auth.models import Group from arkindex_common.ml_tool import MLToolType +from arkindex_common.enums import TranscriptionType from arkindex.documents.models import ( Corpus, Element, ElementType, Page, PageDirection, PageType, Act, - Transcription, TranscriptionType, DataSource + Transcription, DataSource ) from arkindex.images.models import ImageServer, Image, Zone from arkindex.users.models import User, CorpusRight diff --git a/arkindex/documents/migrations/0001_initial.py b/arkindex/documents/migrations/0001_initial.py index 553fd785de..d33f80181f 100644 --- a/arkindex/documents/migrations/0001_initial.py +++ b/arkindex/documents/migrations/0001_initial.py @@ -89,7 +89,7 @@ class Migration(migrations.Migration): name='Transcription', fields=[ ('id', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)), - ('type', enumfields.fields.EnumField(db_index=True, default='word', enum=arkindex.documents.models.TranscriptionType, max_length=50)), + ('type', enumfields.fields.EnumField(db_index=True, default='word', enum=arkindex_common.enums.TranscriptionType, max_length=50)), ('text', models.TextField(blank=True, null=True)), ('score', models.FloatField(blank=True, null=True)), ], diff --git a/arkindex/documents/migrations/0003_entities.py b/arkindex/documents/migrations/0003_entities.py index 5d0af9375e..84dbccc3a6 100644 --- a/arkindex/documents/migrations/0003_entities.py +++ b/arkindex/documents/migrations/0003_entities.py @@ -1,6 +1,6 @@ # Generated by Django 2.1 on 2019-05-23 14:51 -import arkindex.documents.models +from arkindex_common.enums import EntityType import django.contrib.postgres.fields.hstore from django.contrib.postgres.operations import HStoreExtension from django.db import migrations, models @@ -24,7 +24,7 @@ class Migration(migrations.Migration): ('name', models.TextField()), ('type', enumfields.fields.EnumField( db_index=True, - enum=arkindex.documents.models.EntityType, + enum=EntityType, max_length=50)), ('metas', django.contrib.postgres.fields.hstore.HStoreField(blank=True, null=True)), ('corpus', models.ForeignKey( @@ -56,8 +56,8 @@ class Migration(migrations.Migration): ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('parent_name', models.CharField(max_length=250)), ('child_name', models.CharField(max_length=250)), - ('parent_type', enumfields.fields.EnumField(enum=arkindex.documents.models.EntityType, max_length=50)), - ('child_type', enumfields.fields.EnumField(enum=arkindex.documents.models.EntityType, max_length=50)), + ('parent_type', enumfields.fields.EnumField(enum=EntityType, max_length=50)), + ('child_type', enumfields.fields.EnumField(enum=EntityType, max_length=50)), ('corpus', models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, related_name='roles', diff --git a/arkindex/documents/migrations/0004_entities_data_migration.py b/arkindex/documents/migrations/0004_entities_data_migration.py index 7ae321e9e4..2ab111e75e 100644 --- a/arkindex/documents/migrations/0004_entities_data_migration.py +++ b/arkindex/documents/migrations/0004_entities_data_migration.py @@ -2,8 +2,7 @@ from django.db import migrations from django.db.models import Q, OuterRef, Subquery -from arkindex_common.enums import MetaType -from arkindex.documents.models import EntityType +from arkindex_common.enums import MetaType, EntityType def createEntities(apps, schema_editor): diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index f810bcac54..3a1df42466 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -5,7 +5,7 @@ from django.utils.functional import cached_property from django.core.validators import MinValueValidator, MaxValueValidator from django.core.exceptions import ValidationError from enumfields import EnumField, Enum -from arkindex_common.enums import MetaType +from arkindex_common.enums import TranscriptionType, MetaType, EntityType from arkindex_common.ml_tool import MLToolType from arkindex.project.models import IndexableModel from arkindex.project.fields import ArrayField @@ -492,14 +492,6 @@ class DataSource(models.Model): )[0] -class EntityType(Enum): - Person = 'person' - Location = 'location' - Subject = 'subject' - Organization = 'organization' - Misc = 'misc' - - class Entity(models.Model): """ Semantic object in arkindex @@ -586,14 +578,6 @@ class EntityLink(models.Model): super().save(*args, **kwargs) -class TranscriptionType(Enum): - Page = 'page' - Paragraph = 'paragraph' - Line = 'line' - Word = 'word' - Character = 'character' - - class Transcription(models.Model): """ A transcription on: diff --git a/arkindex/documents/pagexml.py b/arkindex/documents/pagexml.py index 906f73127a..014ae2c177 100644 --- a/arkindex/documents/pagexml.py +++ b/arkindex/documents/pagexml.py @@ -4,8 +4,11 @@ from arkindex_common.ml_tool import MLToolType from arkindex_common.pagexml import PageXmlPage from arkindex_common.enums import MetaType from arkindex.project.polygon import Polygon -from arkindex.documents.models import DataSource, TranscriptionType, ElementType, Page, Transcription,\ +from arkindex_common.enums import TranscriptionType +from arkindex.documents.models import ( + DataSource, ElementType, Page, Transcription, Entity, EntityRole, EntityLink, MetaData, TranscriptionEntity +) from arkindex.documents.indexer import Indexer import functools import Levenshtein diff --git a/arkindex/documents/serializers/entities.py b/arkindex/documents/serializers/entities.py index f4a808412f..0f8fb71d84 100644 --- a/arkindex/documents/serializers/entities.py +++ b/arkindex/documents/serializers/entities.py @@ -1,7 +1,8 @@ from rest_framework import serializers from django.core.exceptions import ValidationError as DjangoValidationError from arkindex.documents.models import \ - Corpus, Entity, EntityType, EntityLink, EntityRole, TranscriptionEntity + Corpus, Entity, EntityLink, EntityRole, TranscriptionEntity +from arkindex_common.enums import EntityType from arkindex.documents.serializers.light import EntityLightSerializer, CorpusLightSerializer from arkindex.project.serializer_fields import EnumField diff --git a/arkindex/documents/serializers/light.py b/arkindex/documents/serializers/light.py index 56733c6d5e..59097ebc8c 100644 --- a/arkindex/documents/serializers/light.py +++ b/arkindex/documents/serializers/light.py @@ -1,5 +1,6 @@ from rest_framework import serializers -from arkindex.documents.models import Element, ElementType, Corpus, Page, PageType, PageDirection, Entity, EntityType +from arkindex.documents.models import Element, ElementType, Corpus, Page, PageType, PageDirection, Entity +from arkindex_common.enums import EntityType from arkindex.documents.serializers.ml import ClassificationSerializer from arkindex.images.serializers import ImageSerializer from arkindex.project.serializer_fields import EnumField diff --git a/arkindex/documents/serializers/ml.py b/arkindex/documents/serializers/ml.py index ce1d21176b..3eb3b54bc7 100644 --- a/arkindex/documents/serializers/ml.py +++ b/arkindex/documents/serializers/ml.py @@ -1,8 +1,9 @@ from rest_framework import serializers from arkindex_common.ml_tool import MLToolType +from arkindex_common.enums import TranscriptionType from arkindex.project.serializer_fields import EnumField, MLToolField, PolygonField from arkindex.documents.models import ( - Corpus, Element, Page, Transcription, TranscriptionType, DataSource, Classification + Corpus, Element, Page, Transcription, DataSource, Classification ) from arkindex.images.serializers import ZoneSerializer diff --git a/arkindex/documents/serializers/search.py b/arkindex/documents/serializers/search.py index 42096c6dc5..87d610cc5c 100644 --- a/arkindex/documents/serializers/search.py +++ b/arkindex/documents/serializers/search.py @@ -1,5 +1,6 @@ from rest_framework import serializers -from arkindex.documents.models import Act, Page, TranscriptionType, Entity, EntityType +from arkindex.documents.models import Act, Page, Entity +from arkindex_common.enums import EntityType, TranscriptionType from arkindex.documents.serializers.light import CorpusLightSerializer from arkindex.documents.serializers.elements import ElementSlimSerializer from arkindex.documents.serializers.ml import TranscriptionSerializer diff --git a/arkindex/documents/tests/commands/test_delete_corpus.py b/arkindex/documents/tests/commands/test_delete_corpus.py index f0efe4a655..da8a4b8125 100644 --- a/arkindex/documents/tests/commands/test_delete_corpus.py +++ b/arkindex/documents/tests/commands/test_delete_corpus.py @@ -1,9 +1,10 @@ from django.core.management import call_command from django.db.models.signals import pre_delete -from arkindex_common.enums import MetaType, DataImportMode +from arkindex_common.enums import TranscriptionType, MetaType, DataImportMode from arkindex_common.ml_tool import MLToolType from arkindex.project.tests import FixtureTestCase -from arkindex.documents.models import Corpus, Element, Page, ElementType, TranscriptionType, DataSource + +from arkindex.documents.models import Corpus, Element, Page, ElementType, DataSource from arkindex.dataimport.models import EventType diff --git a/arkindex/documents/tests/test_elements_api.py b/arkindex/documents/tests/test_elements_api.py index 5b4e7aa00f..6b45163cfa 100644 --- a/arkindex/documents/tests/test_elements_api.py +++ b/arkindex/documents/tests/test_elements_api.py @@ -1,9 +1,8 @@ from django.urls import reverse from rest_framework import status -from arkindex_common.enums import MetaType +from arkindex_common.enums import TranscriptionType, MetaType, EntityType from arkindex.documents.models import Element, ElementType, DataSource, \ - TranscriptionType, Page, Act, Corpus, DateType, Entity, EntityType, \ - EntityRole, EntityLink, MetaData, TranscriptionEntity + Page, Act, Corpus, DateType, Entity, EntityRole, EntityLink, MetaData, TranscriptionEntity from arkindex.images.models import ImageServer from arkindex.project.tests import FixtureAPITestCase from arkindex.project.aws import S3FileStatus diff --git a/arkindex/documents/tests/test_entities.py b/arkindex/documents/tests/test_entities.py index e899647ce9..27f55bbfc3 100644 --- a/arkindex/documents/tests/test_entities.py +++ b/arkindex/documents/tests/test_entities.py @@ -1,6 +1,7 @@ from django.core.exceptions import ValidationError +from arkindex_common.enums import MetaType, EntityType from arkindex.documents.models import Corpus, Element, ElementType, \ - Entity, EntityType, EntityRole, EntityLink, MetaData, MetaType + Entity, EntityRole, EntityLink, MetaData from arkindex.project.tests import FixtureTestCase diff --git a/arkindex/documents/tests/test_pagexml.py b/arkindex/documents/tests/test_pagexml.py index 461c35486b..23dd4e18c8 100644 --- a/arkindex/documents/tests/test_pagexml.py +++ b/arkindex/documents/tests/test_pagexml.py @@ -2,8 +2,9 @@ from pathlib import Path from django.urls import reverse from rest_framework import status from arkindex.project.tests import FixtureAPITestCase -from arkindex.documents.models import Page, TranscriptionType,\ - Entity, EntityRole, EntityLink, EntityType, TranscriptionEntity, Transcription +from arkindex_common.enums import TranscriptionType, EntityType +from arkindex.documents.models import \ + Page, Entity, EntityRole, EntityLink, Transcription, TranscriptionEntity from arkindex.documents.pagexml import PageXmlParser FIXTURES = Path(__file__).absolute().parent / 'pagexml_samples' diff --git a/arkindex/documents/tests/test_search.py b/arkindex/documents/tests/test_search.py index 185c4c16c8..f408292517 100644 --- a/arkindex/documents/tests/test_search.py +++ b/arkindex/documents/tests/test_search.py @@ -1,5 +1,6 @@ from arkindex.project.tests import FixtureAPITestCase -from arkindex.documents.models import Transcription, Act, Page, Element, Corpus, Entity, EntityType +from arkindex_common.enums import EntityType +from arkindex.documents.models import Transcription, Act, Page, Element, Corpus, Entity from django.urls import reverse from django.contrib.auth.models import AnonymousUser from rest_framework import status diff --git a/arkindex/documents/tests/test_transcription_create.py b/arkindex/documents/tests/test_transcription_create.py index 9a210f42fb..54202d0051 100644 --- a/arkindex/documents/tests/test_transcription_create.py +++ b/arkindex/documents/tests/test_transcription_create.py @@ -3,7 +3,8 @@ from unittest.mock import patch from rest_framework import status from arkindex.project.tests import FixtureAPITestCase from arkindex.project.polygon import Polygon -from arkindex.documents.models import Page, Transcription, TranscriptionType, DataSource +from arkindex_common.enums import TranscriptionType +from arkindex.documents.models import Page, Transcription, DataSource import uuid diff --git a/arkindex/images/importer.py b/arkindex/images/importer.py index 48dbd8c642..0c71ce014f 100644 --- a/arkindex/images/importer.py +++ b/arkindex/images/importer.py @@ -3,10 +3,11 @@ from django.db import connection from django.conf import settings from pathlib import Path from arkindex_common.tools import Timer +from arkindex_common.enums import TranscriptionType from arkindex.project.polygon import Polygon from arkindex.documents.indexer import Indexer from arkindex.documents.models import \ - TranscriptionType, Element, Page, ElementType, Transcription, DataSource, Corpus + Element, Page, ElementType, Transcription, DataSource, Corpus from arkindex.documents.importer import parse_folio from arkindex.images.models import Image import csv diff --git a/arkindex/images/tests/test_bulk_transcriptions.py b/arkindex/images/tests/test_bulk_transcriptions.py index a98e67d18e..7faa1a19e8 100644 --- a/arkindex/images/tests/test_bulk_transcriptions.py +++ b/arkindex/images/tests/test_bulk_transcriptions.py @@ -1,6 +1,7 @@ from arkindex.project.tests import FixtureTestCase from arkindex.project.polygon import Polygon -from arkindex.documents.models import Transcription, TranscriptionType, DataSource +from arkindex_common.enums import TranscriptionType +from arkindex.documents.models import Transcription, DataSource from arkindex.images.importer import build_transcriptions, save_transcriptions -- GitLab