1313from learning_resources .models import LearningResource
1414from learning_resources_search .connection import (
1515 get_default_alias_name ,
16+ get_vector_model_id ,
1617)
1718from learning_resources_search .constants import (
1819 COMBINED_INDEX ,
1920 CONTENT_FILE_TYPE ,
2021 COURSE_QUERY_FIELDS ,
2122 COURSE_TYPE ,
2223 DEPARTMENT_QUERY_FIELDS ,
24+ HYBRID_SEARCH_MODE ,
2325 LEARNING_RESOURCE ,
2426 LEARNING_RESOURCE_QUERY_FIELDS ,
2527 LEARNING_RESOURCE_SEARCH_SORTBY_OPTIONS ,
@@ -67,7 +69,7 @@ def gen_content_file_id(content_file_id):
6769 return f"cf_{ content_file_id } "
6870
6971
70- def relevant_indexes (resource_types , aggregations , endpoint , use_hybrid_search = False ):
72+ def relevant_indexes (resource_types , aggregations , endpoint , use_hybrid_search ):
7173 """
7274 Return list of relevent index type for the query
7375
@@ -147,7 +149,11 @@ def generate_sort_clause(search_params):
147149 return sort
148150
149151
150- def wrap_text_clause (text_query , min_score = None , use_hybrid_search = False ):
152+ def wrap_text_clause (
153+ text_query ,
154+ use_hybrid_search ,
155+ min_score = None ,
156+ ):
151157 """
152158 Wrap the text subqueries in a bool query
153159 Shared by generate_content_file_text_clause and
@@ -213,11 +219,11 @@ def generate_content_file_text_clause(text):
213219 else :
214220 text_query = {}
215221
216- return wrap_text_clause (text_query )
222+ return wrap_text_clause (text_query , use_hybrid_search = False )
217223
218224
219225def generate_learning_resources_text_clause (
220- text , search_mode , slop , content_file_score_weight , min_score , use_hybrid_search
226+ text , search_mode , slop , content_file_score_weight , min_score
221227):
222228 """
223229 Return text clause for the query
@@ -228,16 +234,23 @@ def generate_learning_resources_text_clause(
228234 dict: dictionary with the opensearch text clause
229235 """
230236
237+ use_hybrid_search = search_mode == HYBRID_SEARCH_MODE
238+
231239 query_type = (
232240 "query_string" if text .startswith ('"' ) and text .endswith ('"' ) else "multi_match"
233241 )
234242
235243 extra_params = {}
236244
237- if query_type == "multi_match" and search_mode :
238- extra_params ["type" ] = search_mode
245+ if use_hybrid_search :
246+ text_search_mode = settings .DEFAULT_SEARCH_MODE
247+ else :
248+ text_search_mode = search_mode
249+
250+ if query_type == "multi_match" :
251+ extra_params ["type" ] = text_search_mode
239252
240- if search_mode == "phrase" and slop :
253+ if text_search_mode == "phrase" and slop :
241254 extra_params ["slop" ] = slop
242255
243256 if content_file_score_weight is not None :
@@ -341,7 +354,7 @@ def generate_learning_resources_text_clause(
341354 else :
342355 text_query = {}
343356
344- return wrap_text_clause (text_query , min_score , use_hybrid_search )
357+ return wrap_text_clause (text_query , use_hybrid_search , min_score )
345358
346359
347360def generate_filter_clause (
@@ -591,7 +604,6 @@ def add_text_query_to_search(
591604 search_params .get ("slop" ),
592605 search_params .get ("content_file_score_weight" ),
593606 search_params .get ("min_score" ),
594- use_hybrid_search ,
595607 )
596608
597609 yearly_decay_percent = search_params .get ("yearly_decay_percent" )
@@ -641,11 +653,17 @@ def add_text_query_to_search(
641653 text_query = {"bool" : {"must" : [text_query ], "filter" : query_type_query }}
642654
643655 if use_hybrid_search :
656+ vector_model_id = get_vector_model_id ()
657+ if not vector_model_id :
658+ log .error ("Vector model not found. Cannot perform hybrid search." )
659+ error_message = "Vector model not found."
660+ raise ValueError (error_message )
661+
644662 vector_query_description = {
645663 "neural" : {
646664 "description_embedding" : {
647665 "query_text" : text ,
648- "model_id" : "PQBFF5oBDk6_T5cL_Izk" ,
666+ "model_id" : vector_model_id ,
649667 "min_score" : 0.015 ,
650668 },
651669 }
@@ -655,7 +673,7 @@ def add_text_query_to_search(
655673 "neural" : {
656674 "title_embedding" : {
657675 "query_text" : text ,
658- "model_id" : "PQBFF5oBDk6_T5cL_Izk" ,
676+ "model_id" : vector_model_id ,
659677 "min_score" : 0.015 ,
660678 },
661679 }
@@ -679,7 +697,7 @@ def add_text_query_to_search(
679697 return search
680698
681699
682- def construct_search (search_params ):
700+ def construct_search (search_params ): # noqa: C901
683701 """
684702 Construct a learning resources search based on the query
685703
@@ -698,7 +716,7 @@ def construct_search(search_params):
698716 ):
699717 search_params ["resource_type" ] = list (LEARNING_RESOURCE_TYPES )
700718
701- use_hybrid_search = search_params .get ("use_hybrid_search" , False )
719+ use_hybrid_search = search_params .get ("search_mode" ) == HYBRID_SEARCH_MODE
702720
703721 indexes = relevant_indexes (
704722 search_params .get ("resource_type" ),
@@ -710,7 +728,7 @@ def construct_search(search_params):
710728 search = Search (index = "," .join (indexes ))
711729
712730 search = search .source (fields = {"excludes" : SOURCE_EXCLUDED_FIELDS })
713- if not search_params . get ( " use_hybrid_search" ) :
731+ if not use_hybrid_search :
714732 search = search .params (search_type = "dfs_query_then_fetch" )
715733 if search_params .get ("offset" ):
716734 search = search .extra (from_ = search_params .get ("offset" ))
@@ -767,12 +785,12 @@ def execute_learn_search(search_params):
767785 Returns:
768786 dict: The opensearch response dict
769787 """
770- print (search_params )
771788 if search_params .get ("endpoint" ) != CONTENT_FILE_TYPE :
772789 if search_params .get ("yearly_decay_percent" ) is None :
773790 search_params ["yearly_decay_percent" ] = (
774791 settings .DEFAULT_SEARCH_STALENESS_PENALTY
775792 )
793+
776794 if search_params .get ("search_mode" ) is None :
777795 search_params ["search_mode" ] = settings .DEFAULT_SEARCH_MODE
778796 if search_params .get ("slop" ) is None :
@@ -785,7 +803,7 @@ def execute_learn_search(search_params):
785803 )
786804 search = construct_search (search_params )
787805
788- if search_params .get ("use_hybrid_search" ) :
806+ if search_params .get ("search_mode" ) == HYBRID_SEARCH_MODE :
789807 search = search .extra (
790808 search_pipeline = {
791809 "description" : "Post processor for hybrid search" ,
@@ -803,7 +821,6 @@ def execute_learn_search(search_params):
803821 }
804822 )
805823
806- print (search .to_dict ())
807824 return search .execute ().to_dict ()
808825
809826
@@ -964,7 +981,9 @@ def get_similar_topics(
964981 list of str:
965982 list of topic values
966983 """
967- indexes = relevant_indexes ([COURSE_TYPE ], [], endpoint = LEARNING_RESOURCE )
984+ indexes = relevant_indexes (
985+ [COURSE_TYPE ], [], endpoint = LEARNING_RESOURCE , use_hybrid_search = False
986+ )
968987 search = Search (index = "," .join (indexes ))
969988 search = search .filter ("term" , resource_type = COURSE_TYPE )
970989 search = search .query (
@@ -1111,7 +1130,9 @@ def get_similar_resources_opensearch(
11111130 list of str:
11121131 list of learning resources
11131132 """
1114- indexes = relevant_indexes (LEARNING_RESOURCE_TYPES , [], endpoint = LEARNING_RESOURCE )
1133+ indexes = relevant_indexes (
1134+ LEARNING_RESOURCE_TYPES , [], endpoint = LEARNING_RESOURCE , use_hybrid_search = False
1135+ )
11151136 search = Search (index = "," .join (indexes ))
11161137 if num_resources :
11171138 # adding +1 to num_resources since we filter out existing resource.id
0 commit comments