Skip to content
Snippets Groups Projects
Commit 44929e5a authored by ml bonhomme's avatar ml bonhomme :bee: Committed by Bastien Abadie
Browse files

Allow searching with classification names

parent 4feaa7b0
No related branches found
No related tags found
1 merge request!2498Allow searching with classification names
......@@ -123,6 +123,7 @@ class Indexer:
# Classification fields
{"name": "classification_id", "indexed": False, "required": False, "type": "uuid"},
{"name": "classification_name", "indexed": True, "required": False, "type": "full_string"},
{"name": "classification_text", "indexed": True, "required": False, "type": "string"},
{"name": "classification_confidence", "indexed": True, "required": False, "type": "pfloat"},
{"name": "classification_worker", "indexed": True, "required": False, "type": "full_string"},
# Metadata fields
......@@ -269,6 +270,7 @@ class Indexer:
"id": str(self.build_solr_id(element, classification)),
"classification_id": str(classification.id),
"classification_name": classification.ml_class.name,
"classification_text": classification.ml_class.name,
"classification_confidence": classification.confidence,
"classification_worker": self.hash_worker(classification.worker_run)
}) for classification in element.classifications.all()
......
......@@ -32,6 +32,7 @@ class SolrDocumentSerializer(serializers.Serializer):
classification_id = serializers.UUIDField(allow_null=True)
classification_name = serializers.CharField(allow_null=True)
classification_text = serializers.CharField(allow_null=True)
classification_confidence = serializers.FloatField(min_value=0, max_value=1, allow_null=True)
classification_worker = serializers.CharField(allow_null=True)
......@@ -102,9 +103,10 @@ class CorpusSearchQuerySerializer(serializers.Serializer):
("element", "element"),
("transcription", "transcription"),
("metadata", "metadata"),
("entity", "entity")
("entity", "entity"),
("classification", "classification")
],
default={"element", "transcription", "metadata", "entity"},
default={"element", "transcription", "metadata", "entity", "classification"},
help_text="List of sources to be searched on.",
)
......
......@@ -251,6 +251,7 @@ class TestReindexCommand(FixtureTestCase):
"parent_type": self.page.type.display_name,
"classification_id": str(cl_1.id),
"classification_name": cl_1.ml_class.name,
"classification_text": cl_1.ml_class.name,
"classification_confidence": cl_1.confidence,
"classification_worker": self.worker.name,
},
......@@ -265,6 +266,7 @@ class TestReindexCommand(FixtureTestCase):
"parent_type": self.page.type.display_name,
"classification_id": str(cl_2.id),
"classification_name": cl_2.ml_class.name,
"classification_text": cl_2.ml_class.name,
"classification_confidence": cl_2.confidence,
"classification_worker": self.worker.name,
}
......
......@@ -181,6 +181,7 @@ class TestIndexerCommand(FixtureTestCase):
"id": str(indexer.build_solr_id(self.page, cl_1)),
"classification_id": str(cl_1.id),
"classification_name": cl_1.ml_class.name,
"classification_text": cl_1.ml_class.name,
"classification_confidence": cl_1.confidence,
"classification_worker": self.worker.name,
},
......@@ -189,6 +190,7 @@ class TestIndexerCommand(FixtureTestCase):
"id": str(indexer.build_solr_id(self.page, cl_2)),
"classification_id": str(cl_2.id),
"classification_name": cl_2.ml_class.name,
"classification_text": cl_2.ml_class.name,
"classification_confidence": cl_2.confidence,
"classification_worker": self.worker.name,
}
......
......@@ -121,11 +121,8 @@ class TestSearchApi(FixtureAPITestCase):
@override_settings(ARKINDEX_FEATURES={"search": True})
@patch("arkindex.documents.api.search.solr")
def test_search(self, mock_solr):
self.maxDiff = None
collection_name = f"project-{self.corpus.id}"
possible_queries = [
'(element_text:("I search" OR "Found") OR transcription_text:("I search" OR "Found")) AND (metadata_name:"folio" AND entity_type:"person")',
'(transcription_text:("I search" OR "Found") OR element_text:("I search" OR "Found")) AND (metadata_name:"folio" AND entity_type:"person")'
]
docs = [{
"id": "document_id",
"parent_id": "parent_id",
......@@ -142,6 +139,7 @@ class TestSearchApi(FixtureAPITestCase):
"transcription_worker": "1234567890_A worker",
"classification_id": "classification_id",
"classification_name": "my class",
"classification_text": "my class",
"classification_confidence": 0.1,
"classification_worker": "1234567890_A worker",
"metadata_id": "metadata_id",
......@@ -157,11 +155,12 @@ class TestSearchApi(FixtureAPITestCase):
# Mock SolrClient
mock_solr.collections.exists.return_value = True
solr_response = self.build_solr_response(docs=docs, query=possible_queries[0])
query = '(element_text:("I search" OR "Found") OR transcription_text:("I search" OR "Found") OR classification_text:("I search" OR "Found")) AND (metadata_name:"folio" AND entity_type:"person")'
solr_response = self.build_solr_response(docs=docs, query=query)
mock_solr.query.return_value = solr_response
payload = {
"sources[]": ["element", "transcription"],
"sources[]": ["element", "transcription", "classification"],
"metadata_name": "folio",
"entity_type": "person",
"query": '"I search" OR "Found"',
......@@ -182,7 +181,11 @@ class TestSearchApi(FixtureAPITestCase):
self.assertEqual(index_name, collection_name)
(index_name, args), kwargs = mock_solr.query.call_args
self.assertEqual(index_name, collection_name)
self.assertIn(args.pop("q"), possible_queries)
# The order in which the sources appear in the query is random, so we just check that all the sources are there
q = args.pop("q")
self.assertIn('element_text:("I search" OR "Found")', q)
self.assertIn('transcription_text:("I search" OR "Found")', q)
self.assertIn('classification_text:("I search" OR "Found")', q)
self.assertDictEqual(args, {
"start": 0,
"rows": 20,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment