diff --git a/arkindex/documents/indexer.py b/arkindex/documents/indexer.py index d90d4f9a478a2894ff0453f71e60dcbe15476997..3ef0188ad3be6b5fc6abd41d36efd969832cbf44 100644 --- a/arkindex/documents/indexer.py +++ b/arkindex/documents/indexer.py @@ -123,6 +123,7 @@ class Indexer: # Classification fields {"name": "classification_id", "indexed": False, "required": False, "type": "uuid"}, {"name": "classification_name", "indexed": True, "required": False, "type": "full_string"}, + {"name": "classification_text", "indexed": True, "required": False, "type": "string"}, {"name": "classification_confidence", "indexed": True, "required": False, "type": "pfloat"}, {"name": "classification_worker", "indexed": True, "required": False, "type": "full_string"}, # Metadata fields @@ -269,6 +270,7 @@ class Indexer: "id": str(self.build_solr_id(element, classification)), "classification_id": str(classification.id), "classification_name": classification.ml_class.name, + "classification_text": classification.ml_class.name, "classification_confidence": classification.confidence, "classification_worker": self.hash_worker(classification.worker_run) }) for classification in element.classifications.all() diff --git a/arkindex/documents/serializers/search.py b/arkindex/documents/serializers/search.py index 799ade083c065add32fa87094555bedd67b837e4..7a5d499f63b6dc59d838ffa9ddcbae3df9cef957 100644 --- a/arkindex/documents/serializers/search.py +++ b/arkindex/documents/serializers/search.py @@ -32,6 +32,7 @@ class SolrDocumentSerializer(serializers.Serializer): classification_id = serializers.UUIDField(allow_null=True) classification_name = serializers.CharField(allow_null=True) + classification_text = serializers.CharField(allow_null=True) classification_confidence = serializers.FloatField(min_value=0, max_value=1, allow_null=True) classification_worker = serializers.CharField(allow_null=True) @@ -102,9 +103,10 @@ class CorpusSearchQuerySerializer(serializers.Serializer): ("element", "element"), ("transcription", "transcription"), ("metadata", "metadata"), - ("entity", "entity") + ("entity", "entity"), + ("classification", "classification") ], - default={"element", "transcription", "metadata", "entity"}, + default={"element", "transcription", "metadata", "entity", "classification"}, help_text="List of sources to be searched on.", ) diff --git a/arkindex/documents/tests/commands/test_reindex.py b/arkindex/documents/tests/commands/test_reindex.py index 9446a4dcb3521209a2d46345fc2e163e91e94fe8..a3ce57de6ee76cb65d070b018070d7b0c552d7d4 100644 --- a/arkindex/documents/tests/commands/test_reindex.py +++ b/arkindex/documents/tests/commands/test_reindex.py @@ -251,6 +251,7 @@ class TestReindexCommand(FixtureTestCase): "parent_type": self.page.type.display_name, "classification_id": str(cl_1.id), "classification_name": cl_1.ml_class.name, + "classification_text": cl_1.ml_class.name, "classification_confidence": cl_1.confidence, "classification_worker": self.worker.name, }, @@ -265,6 +266,7 @@ class TestReindexCommand(FixtureTestCase): "parent_type": self.page.type.display_name, "classification_id": str(cl_2.id), "classification_name": cl_2.ml_class.name, + "classification_text": cl_2.ml_class.name, "classification_confidence": cl_2.confidence, "classification_worker": self.worker.name, } diff --git a/arkindex/documents/tests/test_indexer.py b/arkindex/documents/tests/test_indexer.py index 3a25eadf5aad8e8a77364c8cf3bc3bfc8ada7b0f..8fd06cb9e49d9d95511ea12ed86d563e807b899c 100644 --- a/arkindex/documents/tests/test_indexer.py +++ b/arkindex/documents/tests/test_indexer.py @@ -181,6 +181,7 @@ class TestIndexerCommand(FixtureTestCase): "id": str(indexer.build_solr_id(self.page, cl_1)), "classification_id": str(cl_1.id), "classification_name": cl_1.ml_class.name, + "classification_text": cl_1.ml_class.name, "classification_confidence": cl_1.confidence, "classification_worker": self.worker.name, }, @@ -189,6 +190,7 @@ class TestIndexerCommand(FixtureTestCase): "id": str(indexer.build_solr_id(self.page, cl_2)), "classification_id": str(cl_2.id), "classification_name": cl_2.ml_class.name, + "classification_text": cl_2.ml_class.name, "classification_confidence": cl_2.confidence, "classification_worker": self.worker.name, } diff --git a/arkindex/documents/tests/test_search_api.py b/arkindex/documents/tests/test_search_api.py index 9aacaf4d34af09a2817daf16e51ba0868f37113c..cd08ee1708c183892d8125edcb197b4cccafae86 100644 --- a/arkindex/documents/tests/test_search_api.py +++ b/arkindex/documents/tests/test_search_api.py @@ -121,11 +121,8 @@ class TestSearchApi(FixtureAPITestCase): @override_settings(ARKINDEX_FEATURES={"search": True}) @patch("arkindex.documents.api.search.solr") def test_search(self, mock_solr): + self.maxDiff = None collection_name = f"project-{self.corpus.id}" - possible_queries = [ - '(element_text:("I search" OR "Found") OR transcription_text:("I search" OR "Found")) AND (metadata_name:"folio" AND entity_type:"person")', - '(transcription_text:("I search" OR "Found") OR element_text:("I search" OR "Found")) AND (metadata_name:"folio" AND entity_type:"person")' - ] docs = [{ "id": "document_id", "parent_id": "parent_id", @@ -142,6 +139,7 @@ class TestSearchApi(FixtureAPITestCase): "transcription_worker": "1234567890_A worker", "classification_id": "classification_id", "classification_name": "my class", + "classification_text": "my class", "classification_confidence": 0.1, "classification_worker": "1234567890_A worker", "metadata_id": "metadata_id", @@ -157,11 +155,12 @@ class TestSearchApi(FixtureAPITestCase): # Mock SolrClient mock_solr.collections.exists.return_value = True - solr_response = self.build_solr_response(docs=docs, query=possible_queries[0]) + query = '(element_text:("I search" OR "Found") OR transcription_text:("I search" OR "Found") OR classification_text:("I search" OR "Found")) AND (metadata_name:"folio" AND entity_type:"person")' + solr_response = self.build_solr_response(docs=docs, query=query) mock_solr.query.return_value = solr_response payload = { - "sources[]": ["element", "transcription"], + "sources[]": ["element", "transcription", "classification"], "metadata_name": "folio", "entity_type": "person", "query": '"I search" OR "Found"', @@ -182,7 +181,11 @@ class TestSearchApi(FixtureAPITestCase): self.assertEqual(index_name, collection_name) (index_name, args), kwargs = mock_solr.query.call_args self.assertEqual(index_name, collection_name) - self.assertIn(args.pop("q"), possible_queries) + # The order in which the sources appear in the query is random, so we just check that all the sources are there + q = args.pop("q") + self.assertIn('element_text:("I search" OR "Found")', q) + self.assertIn('transcription_text:("I search" OR "Found")', q) + self.assertIn('classification_text:("I search" OR "Found")', q) self.assertDictEqual(args, { "start": 0, "rows": 20,