Skip to content
Snippets Groups Projects
Commit b0d26503 authored by Erwan Rouchet's avatar Erwan Rouchet
Browse files

Merge branch 'structure-metadata' into 'master'

Structural metadata

See merge request !581
parents d93ab814 8e6ecb74
No related branches found
No related tags found
1 merge request!581Structural metadata
......@@ -56,12 +56,12 @@ class ElementSearch(SearchAPIView):
search = search.filter('match', type=element_type)
nested_query = Q('range', transcriptions__score={'gte': min_score})
if query:
nested_query &= Q('simple_query_string', query=query, fields=['transcriptions.text'])
if transcription_type:
nested_query &= Q('match', transcriptions__type=transcription_type.value)
if query:
nested_query &= Q('simple_query_string', query=query, fields=['transcriptions.text'])
search = search.query(Nested(
elastic_query = Nested(
path='transcriptions',
inner_hits={'size': settings.ES_INNER_RESULTS_LIMIT},
score_mode='sum',
......@@ -69,8 +69,11 @@ class ElementSearch(SearchAPIView):
query=nested_query,
functions=[FieldValueFactor(field='transcriptions.score')],
),
))
)
if query:
elastic_query |= Q('wildcard', references='*{}*'.format(query.lower()))
search = search.query(elastic_query)
return search
def post_process(self, *args, **kwargs):
......
# Generated by Django 2.2 on 2019-12-13 10:28
from django.db import migrations
from arkindex_common.enums import EntityType
from arkindex_common.enums import EntityType, MetaType
from enum import Enum
......@@ -25,6 +24,9 @@ def remove_entity_metatype(apps, schema_editor):
MetaData.objects.filter(type=OldMetaType.Entity, entity__type=EntityType.Date).update(type=OldMetaType.Date)
MetaData.objects.filter(type=OldMetaType.Entity).update(type=OldMetaType.Text)
# Restore MetaType enum
MetaData._meta.fields[2].enum = MetaType
def add_entity_metatype(apps, schema_editor):
MetaData = apps.get_model('documents', 'MetaData')
......@@ -38,6 +40,9 @@ def add_entity_metatype(apps, schema_editor):
MetaData._meta.fields[2].enum = OldMetaType
MetaData.objects.exclude(entity__isnull=True).update(type=OldMetaType.Entity)
# Restore MetaType enum
MetaData._meta.fields[2].enum = MetaType
class Migration(migrations.Migration):
......
from django.db import migrations
from arkindex_common.enums import MetaType
from django.db.models import Q
def update_balsac_pages(apps, schema_editor):
MetaData = apps.get_model('documents', 'MetaData')
balsac_pages_ref_md = MetaData.objects \
.filter(
element__corpus__name__in=('Balsac | S3', 'Balsac | From S3'),
element__type__slug='page'
).filter(
Q(name="name") | Q(name="filename")
)
renamed_pages_count = balsac_pages_ref_md.update(name='balsac_id')
print('Renamed {} reference metadata from BALSAC pages'.format(renamed_pages_count))
balsac_ref_metadata = MetaData.objects \
.filter(element__corpus__name__in=('Balsac | S3', 'Balsac | From S3'), name='Reference')
renamed_other_count = balsac_ref_metadata.update(name='balsac_id')
print('Renamed {} reference metadata from all BALSAC elements'.format(renamed_other_count))
def reference_metadata(apps, schema_editor):
"""
Update reference
"""
MetaData = apps.get_model('documents', 'MetaData')
references = MetaData.objects.filter(name='balsac_id', type=MetaType.Text)
moved_count = references.update(type=MetaType.Reference)
print('Moved {} reference metadata to Reference type'.format(moved_count))
def reverse_migration(apps, schema_editor):
MetaData = apps.get_model('documents', 'MetaData')
MetaData.objects.filter(type=MetaType.Reference).update(type=MetaType.Text)
class Migration(migrations.Migration):
dependencies = [
('documents', '0031_remove_entity_metatype'),
]
operations = [
migrations.RunPython(update_balsac_pages, migrations.RunPython.noop),
migrations.RunPython(reference_metadata, reverse_migration)
]
......@@ -3,7 +3,7 @@ from django.contrib.auth.models import AnonymousUser
from django.urls import reverse
from rest_framework import status
from elasticsearch_dsl.connections import connections
from arkindex_common.enums import EntityType
from arkindex_common.enums import EntityType, MetaType
from arkindex.project.tests import FixtureAPITestCase
from arkindex.project.elastic import ESTranscription
from arkindex.documents.models import Transcription, Element, Corpus, DataSource, MLToolType
......@@ -96,7 +96,7 @@ class TestSearchAPI(FixtureAPITestCase):
"_type": "entity",
}
def test_element_search(self):
def test_element_transcription_search(self):
elt = Element.objects.get(name="Volume 1, page 1r")
ts = Transcription.objects.filter(text="PARIS", zone__image__path='img1')
......@@ -135,7 +135,7 @@ class TestSearchAPI(FixtureAPITestCase):
map(str, Corpus.objects.readable(AnonymousUser()).values_list('id', flat=True)),
)
nested = kwargs['body']['query']['bool']['must'][0]['nested']
nested = kwargs['body']['query']['bool']['should'][0]['nested']
self.assertEqual(nested['score_mode'], 'sum')
self.assertEqual(nested['path'], 'transcriptions')
self.assertIn('inner_hits', nested)
......@@ -164,6 +164,29 @@ class TestSearchAPI(FixtureAPITestCase):
}},
])
def test_element_reference_search(self):
elt = Element.objects.get(name="Volume 1, page 1r")
ref = elt.metadatas.create(
type=MetaType.Reference,
name='reference',
value='reference1337'
)
self.es_mock.count.return_value = {'count': 1}
self.es_mock.search.return_value = self.build_es_response(
[self.make_element_hit(elt, []), ],
)
response = self.client.get(reverse('api:element-search'), {'q': ref.value[2:]})
self.assertEqual(response.status_code, status.HTTP_200_OK)
args, kwargs = self.es_mock.search.call_args
ref_query = kwargs['body']['query']['bool']['should'][1]
self.assertDictEqual(ref_query, {
"wildcard": {
"references": "*{}*".format(ref.value[2:])
}
})
def test_iiif_transcription_search(self):
# Filter to only get transcriptions from volume 1
unfiltered = Transcription.objects.filter(text="PARIS")
......
from arkindex_common.enums import MetaType
from collections.abc import Sequence
from itertools import chain
from django.conf import settings
......@@ -94,6 +95,7 @@ class ESElement(Document):
corpus = Keyword()
# Used exclusively for sorting
parents = Keyword()
references = Keyword()
transcriptions = Nested(ESTranscriptionInnerDoc)
date_range = RawDateRange(format='yyyy||yyyy-MM||yyyy-MM-dd')
......@@ -136,6 +138,10 @@ class ESElement(Document):
element.name
for element in Element.objects.get_ascending(instance.id)
],
references=[
md.value.lower()
for md in instance.metadatas.filter(type=MetaType.Reference)
],
transcriptions=list(map(
ESTranscriptionInnerDoc.from_model,
transcriptions,
......
......@@ -37,3 +37,9 @@ class TestESDocuments(FixtureAPITestCase):
get_dates_mock.return_value = [InterpretedDate(1666, 2, 3), ]
date_range = ESElement.from_model(element).to_dict().get('date_range')
self.assertDictEqual(date_range, {'gte': '1666-02-03', 'lt': '1666-02-03||+1d'})
def test_index_reference_metadata(self):
element = self.corpus.elements.get(name='Volume 1')
element.metadatas.create(type=MetaType.Reference, name='ref.', value='123ABC')
es_document = ESElement.from_model(element)
self.assertCountEqual(es_document.references, ['123abc'])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment