num_candidates as fetch_k won't work on langchain

Checked other resources

[X] I added a very descriptive title to this issue.
[X] I searched the LangChain documentation with the integrated search.
[X] I used the GitHub search to find a similar question and didn't find it.
[X] I am sure that this is a bug in LangChain rather than my code.
[X] The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).

Example Code

from langchain_elasticsearch import ElasticsearchStore, DenseVectorStrategy, BM25Strategy

elastic_vector_search = ElasticsearchStore( es_url=YOUR_URL, index_name=YOUR_INDEX, es_user=YOUR_LOGIN, es_params = {'verify_certs':False,'request_timeout':1000}, es_password=YOUR_PASSWORD, embedding=embeddings, strategy=DenseVectorStrategy() )

retriever = elastic_vector_search.as_retriever(search_type="similarity_score_threshold",search_kwargs={'score_threshold': 0.85, 'k':150,'fetch_k' : 10000}, include_original=True)

retriever.get_relevant_documents('query')

Error Message and Stack Trace (if applicable)

BadRequestError Traceback (most recent call last) Cell In[6], line 1 ----> 1 retriever.get_relevant_documents('réseau')

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\langchain_core_api\deprecation.py:168, in deprecated..deprecate..warning_emitting_wrapper(*args, *kwargs) 166 warned = True 167 emit_warning() --> 168 return wrapped(args, **kwargs)

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\langchain_core\retrievers.py:358, in BaseRetriever.get_relevant_documents(self, query, callbacks, tags, metadata, run_name, kwargs) 356 if run_name: 357 config["run_name"] = run_name --> 358 return self.invoke(query, config, kwargs)

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\langchain_core\retrievers.py:219, in BaseRetriever.invoke(self, input, config, **kwargs) 217 except Exception as e: 218 run_manager.on_retriever_error(e) --> 219 raise e 220 else: 221 run_manager.on_retriever_end( 222 result, 223 )

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\langchain_core\retrievers.py:212, in BaseRetriever.invoke(self, input, config, kwargs) 210 _kwargs = kwargs if self._expects_other_args else {} 211 if self._new_arg_supported: --> 212 result = self._get_relevant_documents( 213 input, run_manager=run_manager, _kwargs 214 ) 215 else: 216 result = self._get_relevant_documents(input, **_kwargs)

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\langchain_core\vectorstores\base.py:1249, in VectorStoreRetriever._get_relevant_documents(self, query, run_manager) 1246 docs = self.vectorstore.similarity_search(query, self.search_kwargs) 1247 elif self.search_type == "similarity_score_threshold": 1248 docs_and_similarities = ( -> 1249 self.vectorstore.similarity_search_with_relevance_scores( 1250 query, self.searchkwargs 1251 ) 1252 ) 1253 docs = [doc for doc, in docs_and_similarities] 1254 elif self.search_type == "mmr":

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\langchain_core\vectorstores\base.py:777, in VectorStore.similarity_search_with_relevance_scores(self, query, k, kwargs) 761 """Return docs and relevance scores in the range [0, 1]. 762 763 0 is dissimilar, 1 is most similar. (...) 773 List of Tuples of (doc, similarity_score). 774 """ 775 score_threshold = kwargs.pop("score_threshold", None) --> 777 docs_and_similarities = self._similarity_search_with_relevance_scores( 778 query, k=k, kwargs 779 ) 780 if any( 781 similarity < 0.0 or similarity > 1.0 782 for _, similarity in docs_and_similarities 783 ): 784 warnings.warn( 785 "Relevance scores must be between" 786 f" 0 and 1, got {docs_and_similarities}" 787 )

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\langchain_core\vectorstores\base.py:725, in VectorStore._similarity_search_with_relevance_scores(self, query, k, kwargs) 707 """ 708 Default similarity search with relevance scores. Modify if necessary 709 in subclass. (...) 722 List of Tuples of (doc, similarity_score) 723 """ 724 relevance_score_fn = self._select_relevance_score_fn() --> 725 docs_and_scores = self.similarity_search_with_score(query, k, kwargs) 726 return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\langchain_elasticsearch\vectorstores.py:883, in ElasticsearchStore.similarity_search_with_score(self, query, k, filter, custom_query, doc_builder, **kwargs) 877 if ( 878 isinstance(self._store.retrieval_strategy, DenseVectorStrategy) 879 and self._store.retrieval_strategy.hybrid 880 ): 881 raise ValueError("scores are currently not supported in hybrid mode") --> 883 hits = self._store.search( 884 query=query, k=k, filter=filter, custom_query=custom_query 885 ) 886 return _hits_to_docs_scores( 887 hits=hits, 888 content_field=self.query_field, 889 doc_builder=doc_builder, 890 )

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\elasticsearch\helpers\vectorstore_sync\vectorstore.py:274, in VectorStore.search(self, query, query_vector, k, num_candidates, fields, filter, custom_query) 271 query_body = custom_query(query_body, query) 272 logger.debug(f"Calling custom_query, Query body now: {query_body}") --> 274 response = self.client.search( 275 index=self.index, 276 **query_body, 277 size=k, 278 source=True, 279 source_includes=fields, 280 ) 281 hits: List[Dict[str, Any]] = response["hits"]["hits"] 283 return hits

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\elasticsearch_sync\client\utils.py:446, in _rewrite_parameters..wrapper..wrapped(*args, *kwargs) 443 except KeyError: 444 pass --> 446 return api(args, **kwargs)

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\elasticsearch_sync\client__init.py:4119, in Elasticsearch.search(self, index, aggregations, aggs, allow_no_indices, allow_partial_search_results, analyze_wildcard, analyzer, batched_reduce_size, ccs_minimize_roundtrips, collapse, default_operator, df, docvalue_fields, error_trace, expand_wildcards, explain, ext, fields, filter_path, force_syntheticsource, from, highlight, human, ignore_throttled, ignore_unavailable, indices_boost, knn, lenient, max_concurrent_shard_requests, min_compatible_shard_node, min_score, pit, post_filter, pre_filter_shard_size, preference, pretty, profile, q, query, rank, request_cache, rescore, rest_total_hits_as_int, retriever, routing, runtime_mappings, script_fields, scroll, search_after, search_type, seq_no_primary_term, size, slice, sort, source, source_excludes, source_includes, stats, stored_fields, suggest, suggest_field, suggest_mode, suggest_size, suggest_text, terminate_after, timeout, track_scores, track_total_hits, typed_keys, version, body) 4117 if body is not None: 4118 __headers["content-type"] = "application/json" -> 4119 return self.perform_request( # type: ignore[return-value] 4120 "POST", 4121 path, 4122 params=query, 4123 headers=headers, 4124 body=body, 4125 endpoint_id="search", 4126 path_parts=__path_parts, 4127 )

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\elasticsearch_sync\client_base.py:271, in BaseClient.perform_request(self, method, path, params, headers, body, endpoint_id, path_parts) 255 def perform_request( 256 self, 257 method: str, (...) 264 path_parts: Optional[Mapping[str, Any]] = None, 265 ) -> ApiResponse[Any]: 266 with self._otel.span( 267 method, 268 endpoint_id=endpoint_id, 269 path_parts=path_parts or {}, 270 ) as otel_span: --> 271 response = self._perform_request( 272 method, 273 path, 274 params=params, 275 headers=headers, 276 body=body, 277 otel_span=otel_span, 278 ) 279 otel_span.set_elastic_cloud_metadata(response.meta.headers) 280 return response

File ~\AppData\Local\Programs\Python\Python39\lib\site-packages\elasticsearch_sync\client_base.py:352, in BaseClient._perform_request(self, method, path, params, headers, body, otel_span) 349 except (ValueError, KeyError, TypeError): 350 pass --> 352 raise HTTP_EXCEPTIONS.get(meta.status, ApiError)( 353 message=message, meta=meta, body=resp_body 354 ) 356 # 'X-Elastic-Product: Elasticsearch' should be on every 2XX response. 357 if not self._verified_elasticsearch: 358 # If the header is set we mark the server as verified.

BadRequestError: BadRequestError(400, 'illegal_argument_exception', '[num_candidates] cannot be less than [k]')

Description

I am trying to fetch 150 documents (k) and I specify the fetch_k (supposed to be num_candidates) and it does not work: '[num_candidates] cannot be less than [k]')

Two options:

either it is a bug and the fetch_k is not working
either the num_candidates has to be specified in a different way, and I would like to know how because I have tried everything

System Info

langchain-0.2.12 python 3.9

langchain-ai / langchain