zilliztech / GPTCache

Semantic cache for LLMs. Fully integrated with LangChain and llama_index.
https://gptcache.readthedocs.io
MIT License
7.14k stars 503 forks source link

[Bug]: 'HuggingFaceBgeEmbeddings' object is not callable #553

Open aniketmoha9 opened 11 months ago

aniketmoha9 commented 11 months ago

Current Behavior

from langchain.embeddings import HuggingFaceBgeEmbeddings

`model_name = "BAAI/bge-small-en" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': False} hf = HuggingFaceBgeEmbeddings( model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) cache_base = CacheBase('sqlite') vector_base = VectorBase('faiss', dimension=384) data_manager = get_data_manager(cache_base, vector_base) cache.init( pre_embedding_func=get_content_func, embedding_func=hf, data_manager=data_manager, similarity_evaluation=SearchDistanceEvaluation(), )

before = time.time() answer = nhs_chain.run(question=query, input_documents=docs) print(answer) print("Time Spent:", time.time() - before)

{ "name": "TypeError", "message": "'HuggingFaceBgeEmbeddings' object is not callable", "stack": "--------------------------------------------------------------------------- TypeError Traceback (most recent call last) /Users/aniketmohan/git_repos/semantic_caching/working_llm_caching.ipynb Cell 14 line 2 1 before = time.time() ----> 2 answer = nhs_chain.run(question=query, input_documents=docs) 3 print(answer) 4 print(\"Time Spent:\", time.time() - before)

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/base.py:512, in Chain.run(self, callbacks, tags, metadata, *args, **kwargs) 507 return self(args[0], callbacks=callbacks, tags=tags, metadata=metadata)[ 508 _output_key 509 ] 511 if kwargs and not args: --> 512 return self(kwargs, callbacks=callbacks, tags=tags, metadata=metadata)[ 513 _output_key 514 ] 516 if not kwargs and not args: 517 raise ValueError( 518 \"run supported with either positional arguments or keyword arguments,\" 519 \" but none were provided.\" 520 )

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/base.py:312, in Chain.call(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info) 310 except BaseException as e: 311 run_manager.on_chain_error(e) --> 312 raise e 313 run_manager.on_chain_end(outputs) 314 final_outputs: Dict[str, Any] = self.prep_outputs( 315 inputs, outputs, return_only_outputs 316 )

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/base.py:306, in Chain.call(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info) 299 run_manager = callback_manager.on_chain_start( 300 dumpd(self), 301 inputs, 302 name=run_name, 303 ) 304 try: 305 outputs = ( --> 306 self._call(inputs, run_manager=run_manager) 307 if new_arg_supported 308 else self._call(inputs) 309 ) 310 except BaseException as e: 311 run_manager.on_chain_error(e)

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/combine_documents/base.py:119, in BaseCombineDocumentsChain._call(self, inputs, run_manager) 117 # Other keys are assumed to be needed for LLM prediction 118 other_keys = {k: v for k, v in inputs.items() if k != self.input_key} --> 119 output, extra_return_dict = self.combine_docs( 120 docs, callbacks=_run_manager.get_child(), **other_keys 121 ) 122 extra_return_dict[self.output_key] = output 123 return extra_return_dict

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/combine_documents/stuff.py:171, in StuffDocumentsChain.combine_docs(self, docs, callbacks, kwargs) 169 inputs = self._get_inputs(docs, kwargs) 170 # Call predict on the LLM. --> 171 return self.llm_chain.predict(callbacks=callbacks, **inputs), {}

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/llm.py:257, in LLMChain.predict(self, callbacks, kwargs) 242 def predict(self, callbacks: Callbacks = None, kwargs: Any) -> str: 243 \"\"\"Format prompt with kwargs and pass to LLM. 244 245 Args: (...) 255 completion = llm.predict(adjective=\"funny\") 256 \"\"\" --> 257 return self(kwargs, callbacks=callbacks)[self.output_key]

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/base.py:312, in Chain.call(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info) 310 except BaseException as e: 311 run_manager.on_chain_error(e) --> 312 raise e 313 run_manager.on_chain_end(outputs) 314 final_outputs: Dict[str, Any] = self.prep_outputs( 315 inputs, outputs, return_only_outputs 316 )

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/base.py:306, in Chain.call(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info) 299 run_manager = callback_manager.on_chain_start( 300 dumpd(self), 301 inputs, 302 name=run_name, 303 ) 304 try: 305 outputs = ( --> 306 self._call(inputs, run_manager=run_manager) 307 if new_arg_supported 308 else self._call(inputs) 309 ) 310 except BaseException as e: 311 run_manager.on_chain_error(e)

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/llm.py:93, in LLMChain._call(self, inputs, run_manager) 88 def _call( 89 self, 90 inputs: Dict[str, Any], 91 run_manager: Optional[CallbackManagerForChainRun] = None, 92 ) -> Dict[str, str]: ---> 93 response = self.generate([inputs], run_manager=run_manager) 94 return self.create_outputs(response)[0]

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/chains/llm.py:103, in LLMChain.generate(self, input_list, run_manager) 101 \"\"\"Generate LLM result from inputs.\"\"\" 102 prompts, stop = self.prep_prompts(input_list, run_manager=run_manager) --> 103 return self.llm.generate_prompt( 104 prompts, 105 stop, 106 callbacks=run_manager.get_child() if run_manager else None, 107 **self.llm_kwargs, 108 )

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/llms/base.py:509, in BaseLLM.generate_prompt(self, prompts, stop, callbacks, kwargs) 501 def generate_prompt( 502 self, 503 prompts: List[PromptValue], (...) 506 kwargs: Any, 507 ) -> LLMResult: 508 prompt_strings = [p.to_string() for p in prompts] --> 509 return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/gptcache/adapter/langchain_models.py:102, in LangChainLLMs.generate(self, prompts, stop, callbacks, kwargs) 94 def generate( 95 self, 96 prompts: List[str], (...) 99 kwargs, 100 ) -> LLMResult: 101 self.tmp_args = kwargs --> 102 return super().generate(prompts, stop=stop, callbacks=callbacks)

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/llms/base.py:673, in BaseLLM.generate(self, prompts, stop, callbacks, tags, metadata, run_name, kwargs) 662 if len(missing_prompts) > 0: 663 run_managers = [ 664 callback_managers[idx].on_llm_start( 665 dumpd(self), (...) 671 for idx in missing_prompt_idxs 672 ] --> 673 new_results = self._generate_helper( 674 missing_prompts, stop, run_managers, bool(new_arg_supported), kwargs 675 ) 676 llm_output = update_cache( 677 existing_prompts, llm_string, missing_prompt_idxs, new_results, prompts 678 ) 679 run_info = ( 680 [RunInfo(run_id=run_manager.run_id) for run_manager in run_managers] 681 if run_managers 682 else None 683 )

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/llms/base.py:546, in BaseLLM._generate_helper(self, prompts, stop, run_managers, new_arg_supported, **kwargs) 544 for run_manager in run_managers: 545 run_manager.on_llm_error(e) --> 546 raise e 547 flattened_outputs = output.flatten() 548 for manager, flattened_output in zip(run_managers, flattened_outputs):

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/llms/base.py:533, in BaseLLM._generate_helper(self, prompts, stop, run_managers, new_arg_supported, kwargs) 523 def _generate_helper( 524 self, 525 prompts: List[str], (...) 529 kwargs: Any, 530 ) -> LLMResult: 531 try: 532 output = ( --> 533 self._generate( 534 prompts, 535 stop=stop, 536 # TODO: support multiple run managers 537 run_manager=run_managers[0] if run_managers else None, 538 **kwargs, 539 ) 540 if new_arg_supported 541 else self._generate(prompts, stop=stop) 542 ) 543 except BaseException as e: 544 for run_manager in run_managers:

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/langchain/llms/base.py:1055, in LLM._generate(self, prompts, stop, run_manager, kwargs) 1050 new_arg_supported = inspect.signature(self._call).parameters.get(\"run_manager\") 1051 for prompt in prompts: 1052 text = ( 1053 self._call(prompt, stop=stop, run_manager=run_manager, kwargs) 1054 if new_arg_supported -> 1055 else self._call(prompt, stop=stop, **kwargs) 1056 ) 1057 generations.append([Generation(text=text)]) 1058 return LLMResult(generations=generations)

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/gptcache/adapter/langchain_models.py:79, in LangChainLLMs.call(self, prompt, stop, ) 73 session = ( 74 self.session 75 if \"session\" not in self.tmp_args 76 else self.tmp_args.pop(\"session\") 77 ) 78 cache_obj = self.tmp_args.pop(\"cache_obj\", cache) ---> 79 return adapt( 80 self.llm, 81 _cache_data_convert, 82 _update_cache_callback, 83 prompt=prompt, 84 stop=stop, 85 cache_obj=cache_obj, 86 session=session, 87 **self.tmp_args, 88 )

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/gptcache/adapter/adapter.py:78, in adapt(llm_handler, cache_data_convert, update_cache_callback, *args, **kwargs) 73 pre_embedding_data = _summarize_input( 74 pre_embedding_data, chat_cache.config.input_summary_len 75 ) 77 if cache_enable: ---> 78 embedding_data = time_cal( 79 chat_cache.embedding_func, 80 func_name=\"embedding\", 81 report_func=chat_cache.report.embedding, 82 )(pre_embedding_data, extra_param=context.get(\"embedding_func\", None)) 83 if cache_enable and not cache_skip: 84 search_data_list = time_cal( 85 chat_cache.data_manager.search, 86 func_name=\"search\", (...) 93 else kwargs.pop(\"top_k\", -1), 94 )

File ~/Documents/watsonx/langchain/lib/python3.11/site-packages/gptcache/utils/time.py:9, in time_cal..inner(*args, kwargs) 7 def inner(*args, *kwargs): 8 time_start = time.time() ----> 9 res = func(args, kwargs) 10 delta_time = time.time() - time_start 11 if cache.config.log_time_func:

TypeError: 'HuggingFaceBgeEmbeddings' object is not callable" } `

when i intialise a custom huggingface embedding model, i get this error but when i use the default onxx embedding i dont get any error, does gpt cache support other embedding model? if it supports, then how can i fix this issue

Expected Behavior

No response

Steps To Reproduce

No response

Environment

No response

Anything else?

No response

SimFG commented 11 months ago

https://gptcache.readthedocs.io/en/latest/references/embedding.html#module-gptcache.embedding.langchain The doc will help you to solve your problem.

aniketmoha9 commented 11 months ago

now i am getting this error 'LangChain' object is not callable