diff --git a/arkindex/documents/indexer.py b/arkindex/documents/indexer.py index 6ee703fbdd3d0c9518fd6c479598c81f4a24f2a0..482453d74fd8ffbcb81a22b45c7737b33f4245ff 100644 --- a/arkindex/documents/indexer.py +++ b/arkindex/documents/indexer.py @@ -36,6 +36,10 @@ class Indexer(object): "score": {"type": "float"}, "text": {"type": "text"}, } + }, + "date_range": { + "type": "date_range", + "format": "yyyy||yyyy-MM||yyyy-MM-dd" } } } diff --git a/arkindex/documents/management/commands/reindex.py b/arkindex/documents/management/commands/reindex.py index 20bc135ec9bb130265d1f40dc88e9275331d9c61..37457c26b8cec2129a8845bf8386927be34736ac 100644 --- a/arkindex/documents/management/commands/reindex.py +++ b/arkindex/documents/management/commands/reindex.py @@ -15,9 +15,11 @@ logger = logging.getLogger(__name__) def get_acts(volume=None): if volume: - return Act.objects.get_descending(volume.id) + queryset = Act.objects.get_descending(volume.id) + else: + queryset = Act.objects.all() - return Act.objects.all() + return queryset.prefetch_related('metadatas__dates') def get_transcriptions(volume=None): diff --git a/arkindex/documents/models.py b/arkindex/documents/models.py index 5106882c6b1168c0631fa6632ae31d0eeec12673..b8588b8e9acec5164fbf306943caf71bd9b8d898 100644 --- a/arkindex/documents/models.py +++ b/arkindex/documents/models.py @@ -448,6 +448,9 @@ class Act(Element): ) for s in surfaces ] + interpreted_dates = itertools.chain(*[md.dates.all() for md in self.metadatas.all()]) + # Concatenate values obtained creating ranges from interpreted dates + date_range = {k: val for date in interpreted_dates for k, val in date.to_es_range().items()} return { 'corpus': self.corpus_id, 'volumes': [v.name for v in Element.objects.get_ascending(self.id, type=ElementType.Volume)], @@ -459,7 +462,8 @@ class Act(Element): 'text': t.text, } for sublist in transcriptions for t in sublist - ] + ], + 'date_range': [date_range] if date_range else [] } @@ -656,8 +660,30 @@ class InterpretedDate(models.Model): return tuple(self) == tuple(other) and self.type == other.type def __str__(self): - year, month, day = tuple(self) - if month: - from arkindex.documents.date_parser import MONTHS - month = MONTHS[DEFAULT_LANGUAGE][month - 1] - return ' '.join([str(e) for e in (year, ',', month, day) if e]) + return '-'.join('{:02d}'.format(e) for e in tuple(self) if e) + + def to_es_range(self): + """ + Return one or multiple date-range fields for ElasticSearch indexing + gte: greater than or equal + lt: lower than + Ex: A lower bound type date with year information only will return 1st january date of this year (YYYY||/y) + An exact date missing day information will return a range from 1 to 31 of this month + """ + es_date = str(self) + # Round date if needed + precision = '||+1y' + if self.month: + precision = '||+1M' + if self.day: + precision = '||+1d' + if self.type == DateType.Exact: + return { + 'gte': es_date, + 'lt': es_date + precision + } + elif self.type == DateType.Lower: + return {'gte': es_date} + elif self.type == DateType.Upper: + return {'lt': es_date + precision} + return {} diff --git a/arkindex/documents/tests/test_act.py b/arkindex/documents/tests/test_act.py index 2661f4d166ec763cc9b0687fff394690a284e4a5..82c5f4d32ba33657b718ff6480c7ec2aacc4127c 100644 --- a/arkindex/documents/tests/test_act.py +++ b/arkindex/documents/tests/test_act.py @@ -1,7 +1,7 @@ from django.urls import reverse from arkindex.project.tests import FixtureAPITestCase from rest_framework import status -from arkindex.documents.models import Element, Act, ElementType, MetaData, MetaType +from arkindex.documents.models import Element, Act, ElementType, MetaData, MetaType, DateType from arkindex.images.models import Image, Zone @@ -88,3 +88,27 @@ class TestAct(FixtureAPITestCase): 'revision': None, 'dates': []}] ) + + def test_build_search_index_dates(self): + # Test Upper bound + metadata = self.act.metadatas.create(type=MetaType.Date, name='date', value='before may 1420') + date = metadata.dates.create(type=DateType.Upper, year=1420, month=5) + date_range = self.act.build_search_index().get('date_range') + self.assertEqual(date_range, [{'lt': '1420-05||+1M'}]) + date.delete() + + # Test Lower bound + metadata.dates.create(type=DateType.Lower, year=1418) + date_range = self.act.build_search_index().get('date_range') + self.assertListEqual(date_range, [{'gte': '1418'}]) + + # Test with both Lower and Upper bound + metadata.dates.create(type=DateType.Upper, year=1428) + date_range = self.act.build_search_index().get('date_range') + self.assertListEqual(date_range, [{'gte': '1418', 'lt': '1428||+1y'}]) + + # Test an exact date + metadata.dates.all().delete() + metadata.dates.create(type=DateType.Exact, year=1666, month=2, day=3) + date_range = self.act.build_search_index().get('date_range') + self.assertListEqual(date_range, [{'gte': '1666-02-03', 'lt': '1666-02-03||+1d'}]) diff --git a/arkindex/documents/tests/test_interpreted_date.py b/arkindex/documents/tests/test_interpreted_date.py index 7df723ca3999a8bfbc87d1b768fa323977423888..3de0e0539001296a1966c66c3536ca0a81ea6a50 100644 --- a/arkindex/documents/tests/test_interpreted_date.py +++ b/arkindex/documents/tests/test_interpreted_date.py @@ -1,12 +1,56 @@ -from arkindex.documents.models import MetaData, MetaType +from arkindex.documents.models import MetaData, MetaType, DateType, InterpretedDate from arkindex.documents.date_parser import DateParser from django.test import TestCase class TestInterpretedDate(TestCase): + @classmethod + def setUpTestData(cls): + super().setUpTestData() + cls.metadata = MetaData(name="date", type=MetaType.Date, value='1337-may') + def test_date_str(self): - metadata = MetaData(name="date", type=MetaType.Date, value='1337-05-02') date_parser = DateParser() - interpreted_date = date_parser.parse(metadata.value)[0] - self.assertIn('may', str(interpreted_date)) + interpreted_date = date_parser.parse(self.metadata.value) + self.assertEqual(1, len(interpreted_date)) + self.assertEqual('1337-05', str(interpreted_date[0])) + + def test_to_es_range(self): + date_test_table = { + InterpretedDate( + year=1221, + type=DateType.Exact, + metadata=self.metadata + ): {'gte': '1221', 'lt': '1221||+1y'}, + InterpretedDate( + year=1350, + month=2, + type=DateType.Exact, + metadata=self.metadata + ): {'gte': '1350-02', 'lt': '1350-02||+1M'}, + InterpretedDate( + year=1323, + type=DateType.Lower, + metadata=self.metadata + ): {'gte': '1323'}, + InterpretedDate( + year=1212, + month=12, + day=12, + type=DateType.Lower, + metadata=self.metadata + ): {'gte': '1212-12-12'}, + InterpretedDate( + year=1212, + type=DateType.Upper, + metadata=self.metadata + ): {'lt': '1212||+1y'}, + InterpretedDate( + year=700, + type=DateType.Unknown, + metadata=self.metadata + ): {}, + } + for interpreted_date, expected_range in date_test_table.items(): + self.assertEqual(interpreted_date.to_es_range(), expected_range)