Closed ryugonomura closed 1 year ago
I have the same issue here #3174 it's failing at the creation time of the Embedding class
Is this working?
import openai
from llama_index import OpenAIEmbedding, ServiceContext, LLMPredictor, GPTVectorStoreIndex
from langchain.chat_models.azure_openai import AzureChatOpenAI
openai.api_type = "azure"
openai.api_key = ""
openai.api_base = ""
openai.api_version = "2022-12-01"
embed_deployment_name = ""
chat_deployment_name = ""
embed_model = OpenAIEmbedding(deployment_name=embed_deployment_name, embed_batch_size=1)
chat_model = AzureChatOpenAI(
openai_api_key=openai.api_key,
openai_api_type=openai.api_type,
openai_api_base=openai.api_base,
openai_api_version="2023-03-15-preview",
deployment_name=chat_deployment_name,
)
llm_predictor = LLMPredictor(chat_model)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
Is this working?
import openai from llama_index import OpenAIEmbedding, ServiceContext, LLMPredictor, GPTVectorStoreIndex from langchain.chat_models.azure_openai import AzureChatOpenAI openai.api_type = "azure" openai.api_key = "" openai.api_base = "" openai.api_version = "2022-12-01" embed_deployment_name = "" chat_deployment_name = "" embed_model = OpenAIEmbedding(deployment_name=embed_deployment_name, embed_batch_size=1) chat_model = AzureChatOpenAI( openai_api_key=openai.api_key, openai_api_type=openai.api_type, openai_api_base=openai.api_base, openai_api_version="2023-03-15-preview", deployment_name=chat_deployment_name, ) llm_predictor = LLMPredictor(chat_model) service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model) index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
This works
import openai
from dotenv import load_dotenv
from llama_index import OpenAIEmbedding, ServiceContext, LLMPredictor, GPTVectorStoreIndex
from langchain.chat_models.azure_openai import AzureChatOpenAI
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_type = os.getenv('OPENAI_API_TYPE')
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")
embed_deployment_name = "text-embedding-ada-002"
chat_deployment_name = "gpt-35-turbo"
embed_model = OpenAIEmbedding(deployment_name=embed_deployment_name, embed_batch_size=1)
chat_model = AzureChatOpenAI(
openai_api_key=openai.api_key,
openai_api_type=openai.api_type,
openai_api_base=openai.api_base,
openai_api_version="2023-03-15-preview",
deployment_name=chat_deployment_name,
)
llm_predictor = LLMPredictor(chat_model)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTVectorStoreIndex.from_documents(documents[:2], service_context=service_context)
but if I change the type of index I'm creating for a GPTDocumentSummaryIndex it fails
import openai
from dotenv import load_dotenv
from llama_index import OpenAIEmbedding, ServiceContext, LLMPredictor, GPTVectorStoreIndex
from langchain.chat_models.azure_openai import AzureChatOpenAI
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_type = os.getenv('OPENAI_API_TYPE')
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")
embed_deployment_name = "text-embedding-ada-002"
chat_deployment_name = "gpt-35-turbo"
embed_model = OpenAIEmbedding(deployment_name=embed_deployment_name, embed_batch_size=1)
chat_model = AzureChatOpenAI(
openai_api_key=openai.api_key,
openai_api_type=openai.api_type,
openai_api_base=openai.api_base,
openai_api_version="2023-03-15-preview",
deployment_name=chat_deployment_name,
)
llm_predictor = LLMPredictor(chat_model)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
#index = GPTVectorStoreIndex.from_documents(documents[:2], service_context=service_context)
response_synthesizer = ResponseSynthesizer.from_args(response_mode="tree_summarize", use_async=True)
doc_summary_index = GPTDocumentSummaryIndex.from_documents(
documents[:2],
service_context=service_context,
response_synthesizer=response_synthesizer
)
it's still showing the error of missing engine or deployment_id
---------------------------------------------------------------------------
InvalidRequestError Traceback (most recent call last)
Cell In[23], line 28
26 #index = GPTVectorStoreIndex.from_documents(documents[:2], service_context=service_context)
27 response_synthesizer = ResponseSynthesizer.from_args(response_mode="tree_summarize", use_async=True)
---> 28 doc_summary_index = GPTDocumentSummaryIndex.from_documents(
29 documents[:2],
30 service_context=service_context,
31 response_synthesizer=response_synthesizer
32 )
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/base.py:93, in BaseGPTIndex.from_documents(cls, documents, storage_context, service_context, **kwargs)
89 docstore.set_document_hash(doc.get_doc_id(), doc.get_doc_hash())
91 nodes = service_context.node_parser.get_nodes_from_documents(documents)
---> 93 return cls(
94 nodes=nodes,
95 storage_context=storage_context,
96 service_context=service_context,
97 **kwargs,
98 )
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/document_summary/base.py:65, in GPTDocumentSummaryIndex.__init__(self, nodes, index_struct, service_context, response_synthesizer, summary_query, **kwargs)
61 self._response_synthesizer = (
62 response_synthesizer or ResponseSynthesizer.from_args()
63 )
64 self._summary_query = summary_query or "summarize:"
---> 65 super().__init__(
66 nodes=nodes,
67 index_struct=index_struct,
68 service_context=service_context,
69 **kwargs,
70 )
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/base.py:65, in BaseGPTIndex.__init__(self, nodes, index_struct, storage_context, service_context, **kwargs)
63 if index_struct is None:
64 assert nodes is not None
---> 65 index_struct = self.build_index_from_nodes(nodes)
66 self._index_struct = index_struct
67 self._storage_context.index_store.add_index_struct(self._index_struct)
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/token_counter/token_counter.py:78, in llm_token_counter..wrap..wrapped_llm_predict(_self, *args, **kwargs)
76 def wrapped_llm_predict(_self: Any, *args: Any, **kwargs: Any) -> Any:
77 with wrapper_logic(_self):
---> 78 f_return_val = f(_self, *args, **kwargs)
80 return f_return_val
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/base.py:153, in BaseGPTIndex.build_index_from_nodes(self, nodes)
151 """Build the index from nodes."""
152 self._docstore.add_documents(nodes, allow_update=True)
--> 153 return self._build_index_from_nodes(nodes)
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/document_summary/base.py:150, in GPTDocumentSummaryIndex._build_index_from_nodes(self, nodes)
147 # first get doc_id to nodes_dict, generate a summary for each doc_id,
148 # then build the index struct
149 index_struct = IndexDocumentSummary()
--> 150 self._add_nodes_to_index(index_struct, nodes)
151 return index_struct
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/document_summary/base.py:128, in GPTDocumentSummaryIndex._add_nodes_to_index(self, index_struct, nodes)
126 nodes_with_scores = [NodeWithScore(n) for n in nodes]
127 # get the summary for each doc_id
--> 128 summary_response = self._response_synthesizer.synthesize(
129 query_bundle=QueryBundle(self._summary_query),
130 nodes=nodes_with_scores,
131 )
132 summary_response = cast(Response, summary_response)
133 summary_node_dict[doc_id] = Node(
134 summary_response.response,
135 relationships={DocumentRelationship.SOURCE: doc_id},
136 )
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/query/response_synthesis.py:163, in ResponseSynthesizer.synthesize(self, query_bundle, nodes, additional_source_nodes)
161 if self._response_mode != ResponseMode.NO_TEXT:
162 assert self._response_builder is not None
--> 163 response_str = self._response_builder.get_response(
164 query_str=query_bundle.query_str,
165 text_chunks=text_chunks,
166 **self._response_kwargs,
167 )
168 else:
169 response_str = None
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/token_counter/token_counter.py:78, in llm_token_counter..wrap..wrapped_llm_predict(_self, *args, **kwargs)
76 def wrapped_llm_predict(_self: Any, *args: Any, **kwargs: Any) -> Any:
77 with wrapper_logic(_self):
---> 78 f_return_val = f(_self, *args, **kwargs)
80 return f_return_val
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/response/response_builder.py:376, in TreeSummarize.get_response(self, query_str, text_chunks, prev_response, num_children, **response_kwargs)
371 root_node_ids = index_graph.root_nodes
372 root_nodes = {
373 index: index_builder.docstore.get_node(node_id)
374 for index, node_id in root_node_ids.items()
375 }
--> 376 return self._get_tree_response_over_root_nodes(
377 query_str, prev_response, root_nodes, text_qa_template
378 )
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/response/response_builder.py:425, in TreeSummarize._get_tree_response_over_root_nodes(self, query_str, prev_response, root_nodes, text_qa_template)
421 node_text = self._service_context.prompt_helper.get_text_from_nodes(
422 node_list, prompt=text_qa_template
423 )
424 # NOTE: the final response could be a string or a stream
--> 425 response = super().get_response(
426 query_str=query_str,
427 text_chunks=[node_text],
428 prev_response=prev_response,
429 )
430 if isinstance(response, str):
431 response = response or "Empty Response"
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/token_counter/token_counter.py:78, in llm_token_counter..wrap..wrapped_llm_predict(_self, *args, **kwargs)
76 def wrapped_llm_predict(_self: Any, *args: Any, **kwargs: Any) -> Any:
77 with wrapper_logic(_self):
---> 78 f_return_val = f(_self, *args, **kwargs)
80 return f_return_val
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/response/response_builder.py:134, in Refine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
130 for text_chunk in text_chunks:
131 if prev_response_obj is None:
132 # if this is the first chunk, and text chunk already
133 # is an answer, then return it
--> 134 response = self._give_response_single(
135 query_str,
136 text_chunk,
137 )
138 else:
139 response = self._refine_response_single(
140 prev_response_obj, query_str, text_chunk
141 )
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/response/response_builder.py:171, in Refine._give_response_single(self, query_str, text_chunk, **response_kwargs)
166 for cur_text_chunk in text_chunks:
167 if response is None and not self._streaming:
168 (
169 response,
170 formatted_prompt,
--> 171 ) = self._service_context.llm_predictor.predict(
172 text_qa_template,
173 context_str=cur_text_chunk,
174 )
175 self._log_prompt_and_response(
176 formatted_prompt, response, log_prefix="Initial"
177 )
178 elif response is None and self._streaming:
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/llm_predictor/base.py:242, in LLMPredictor.predict(self, prompt, **prompt_args)
237 event_id = self.callback_manager.on_event_start(
238 CBEventType.LLM,
239 payload=llm_payload,
240 )
241 formatted_prompt = prompt.format(llm=self._llm, **prompt_args)
--> 242 llm_prediction = self._predict(prompt, **prompt_args)
243 logger.debug(llm_prediction)
245 # We assume that the value of formatted_prompt is exactly the thing
246 # eventually sent to OpenAI, or whatever LLM downstream
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/llm_predictor/base.py:210, in LLMPredictor._predict(self, prompt, **prompt_args)
208 full_prompt_args = prompt.get_full_format_args(prompt_args)
209 if self.retry_on_throttling:
--> 210 llm_prediction = retry_on_exceptions_with_backoff(
211 lambda: llm_chain.predict(**full_prompt_args),
212 [
213 ErrorToRetry(openai.error.RateLimitError),
214 ErrorToRetry(openai.error.ServiceUnavailableError),
215 ErrorToRetry(openai.error.TryAgain),
216 ErrorToRetry(
217 openai.error.APIConnectionError, lambda e: e.should_retry
218 ),
219 ],
220 )
221 else:
222 llm_prediction = llm_chain.predict(**full_prompt_args)
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/utils.py:177, in retry_on_exceptions_with_backoff(lambda_fn, errors_to_retry, max_tries, min_backoff_secs, max_backoff_secs)
175 while True:
176 try:
--> 177 return lambda_fn()
178 except exception_class_tuples as e:
179 traceback.print_exc()
File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/llm_predictor/base.py:211, in LLMPredictor._predict..()
208 full_prompt_args = prompt.get_full_format_args(prompt_args)
209 if self.retry_on_throttling:
210 llm_prediction = retry_on_exceptions_with_backoff(
--> 211 lambda: llm_chain.predict(**full_prompt_args),
212 [
213 ErrorToRetry(openai.error.RateLimitError),
214 ErrorToRetry(openai.error.ServiceUnavailableError),
215 ErrorToRetry(openai.error.TryAgain),
216 ErrorToRetry(
217 openai.error.APIConnectionError, lambda e: e.should_retry
218 ),
219 ],
220 )
221 else:
222 llm_prediction = llm_chain.predict(**full_prompt_args)
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/llm.py:213, in LLMChain.predict(self, callbacks, **kwargs)
198 def predict(self, callbacks: Callbacks = None, **kwargs: Any) -> str:
199 """Format prompt with kwargs and pass to LLM.
200
201 Args:
(...)
211 completion = llm.predict(adjective="funny")
212 """
--> 213 return self(kwargs, callbacks=callbacks)[self.output_key]
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/base.py:140, in Chain.__call__(self, inputs, return_only_outputs, callbacks)
138 except (KeyboardInterrupt, Exception) as e:
139 run_manager.on_chain_error(e)
--> 140 raise e
141 run_manager.on_chain_end(outputs)
142 return self.prep_outputs(inputs, outputs, return_only_outputs)
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/base.py:134, in Chain.__call__(self, inputs, return_only_outputs, callbacks)
128 run_manager = callback_manager.on_chain_start(
129 {"name": self.__class__.__name__},
130 inputs,
131 )
132 try:
133 outputs = (
--> 134 self._call(inputs, run_manager=run_manager)
135 if new_arg_supported
136 else self._call(inputs)
137 )
138 except (KeyboardInterrupt, Exception) as e:
139 run_manager.on_chain_error(e)
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/llm.py:69, in LLMChain._call(self, inputs, run_manager)
64 def _call(
65 self,
66 inputs: Dict[str, Any],
67 run_manager: Optional[CallbackManagerForChainRun] = None,
68 ) -> Dict[str, str]:
---> 69 response = self.generate([inputs], run_manager=run_manager)
70 return self.create_outputs(response)[0]
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/llm.py:79, in LLMChain.generate(self, input_list, run_manager)
77 """Generate LLM result from inputs."""
78 prompts, stop = self.prep_prompts(input_list, run_manager=run_manager)
---> 79 return self.llm.generate_prompt(
80 prompts, stop, callbacks=run_manager.get_child() if run_manager else None
81 )
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/base.py:127, in BaseLLM.generate_prompt(self, prompts, stop, callbacks)
120 def generate_prompt(
121 self,
122 prompts: List[PromptValue],
123 stop: Optional[List[str]] = None,
124 callbacks: Callbacks = None,
125 ) -> LLMResult:
126 prompt_strings = [p.to_string() for p in prompts]
--> 127 return self.generate(prompt_strings, stop=stop, callbacks=callbacks)
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/base.py:176, in BaseLLM.generate(self, prompts, stop, callbacks)
174 except (KeyboardInterrupt, Exception) as e:
175 run_manager.on_llm_error(e)
--> 176 raise e
177 run_manager.on_llm_end(output)
178 return output
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/base.py:170, in BaseLLM.generate(self, prompts, stop, callbacks)
165 run_manager = callback_manager.on_llm_start(
166 {"name": self.__class__.__name__}, prompts
167 )
168 try:
169 output = (
--> 170 self._generate(prompts, stop=stop, run_manager=run_manager)
171 if new_arg_supported
172 else self._generate(prompts, stop=stop)
173 )
174 except (KeyboardInterrupt, Exception) as e:
175 run_manager.on_llm_error(e)
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/openai.py:315, in BaseOpenAI._generate(self, prompts, stop, run_manager)
313 choices.extend(response["choices"])
314 else:
--> 315 response = completion_with_retry(self, prompt=_prompts, **params)
316 choices.extend(response["choices"])
317 if not self.streaming:
318 # Can't update token usage if streaming
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/openai.py:106, in completion_with_retry(llm, **kwargs)
102 @retry_decorator
103 def _completion_with_retry(**kwargs: Any) -> Any:
104 return llm.client.create(**kwargs)
--> 106 return _completion_with_retry(**kwargs)
File /anaconda/envs/tf/lib/python3.9/site-packages/tenacity/__init__.py:289, in BaseRetrying.wraps..wrapped_f(*args, **kw)
287 @functools.wraps(f)
288 def wrapped_f(*args: t.Any, **kw: t.Any) -> t.Any:
--> 289 return self(f, *args, **kw)
File /anaconda/envs/tf/lib/python3.9/site-packages/tenacity/__init__.py:379, in Retrying.__call__(self, fn, *args, **kwargs)
377 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs)
378 while True:
--> 379 do = self.iter(retry_state=retry_state)
380 if isinstance(do, DoAttempt):
381 try:
File /anaconda/envs/tf/lib/python3.9/site-packages/tenacity/__init__.py:314, in BaseRetrying.iter(self, retry_state)
312 is_explicit_retry = fut.failed and isinstance(fut.exception(), TryAgain)
313 if not (is_explicit_retry or self.retry(retry_state)):
--> 314 return fut.result()
316 if self.after is not None:
317 self.after(retry_state)
File /anaconda/envs/tf/lib/python3.9/concurrent/futures/_base.py:439, in Future.result(self, timeout)
437 raise CancelledError()
438 elif self._state == FINISHED:
--> 439 return self.__get_result()
441 self._condition.wait(timeout)
443 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
File /anaconda/envs/tf/lib/python3.9/concurrent/futures/_base.py:391, in Future.__get_result(self)
389 if self._exception:
390 try:
--> 391 raise self._exception
392 finally:
393 # Break a reference cycle with the exception in self._exception
394 self = None
File /anaconda/envs/tf/lib/python3.9/site-packages/tenacity/__init__.py:382, in Retrying.__call__(self, fn, *args, **kwargs)
380 if isinstance(do, DoAttempt):
381 try:
--> 382 result = fn(*args, **kwargs)
383 except BaseException: # noqa: B902
384 retry_state.set_exception(sys.exc_info()) # type: ignore[arg-type]
File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/openai.py:104, in completion_with_retry.._completion_with_retry(**kwargs)
102 @retry_decorator
103 def _completion_with_retry(**kwargs: Any) -> Any:
--> 104 return llm.client.create(**kwargs)
File /anaconda/envs/tf/lib/python3.9/site-packages/openai/api_resources/completion.py:25, in Completion.create(cls, *args, **kwargs)
23 while True:
24 try:
---> 25 return super().create(*args, **kwargs)
26 except TryAgain as e:
27 if timeout is not None and time.time() > start + timeout:
File /anaconda/envs/tf/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py:149, in EngineAPIResource.create(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)
127 @classmethod
128 def create(
129 cls,
(...)
136 **params,
137 ):
138 (
139 deployment_id,
140 engine,
141 timeout,
142 stream,
143 headers,
144 request_timeout,
145 typed_api_type,
146 requestor,
147 url,
148 params,
--> 149 ) = cls.__prepare_create_request(
150 api_key, api_base, api_type, api_version, organization, **params
151 )
153 response, _, api_key = requestor.request(
154 "post",
155 url,
(...)
160 request_timeout=request_timeout,
161 )
163 if stream:
164 # must be an iterator
File /anaconda/envs/tf/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py:83, in EngineAPIResource.__prepare_create_request(cls, api_key, api_base, api_type, api_version, organization, **params)
81 if typed_api_type in (util.ApiType.AZURE, util.ApiType.AZURE_AD):
82 if deployment_id is None and engine is None:
---> 83 raise error.InvalidRequestError(
84 "Must provide an 'engine' or 'deployment_id' parameter to create a %s"
85 % cls,
86 "engine",
87 )
88 else:
89 if model is None and engine is None:
InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a
for reference:
Can you pass service_context
to ResponseSynthesizer
?
response_synthesizer = ResponseSynthesizer.from_args(
response_mode="tree_summarize",
service_context=service_context
)
import os
import openai
from llama_index import Document, ServiceContext, GPTVectorStoreIndex, LLMPredictor, PromptHelper, LangchainEmbedding, load_index_from_storage, StorageContext
from langchain.llms import AzureOpenAI
from langchain.embeddings import OpenAIEmbeddings
openai.api_type = os.environ["OPENAI_API_TYPE"]
openai.api_key = os.environ["OPENAI_API_KEY"]
openai.api_base = os.environ["OPENAI_API_BASE"]
openai.api_version = os.environ["OPENAI_API_VERSION"]
llm = AzureOpenAI(deployment_name="gpt-35-turbo")
llm_predictor = LLMPredictor(llm=llm)
embedding = OpenAIEmbeddings(deployment="text-embedding-ada-002", max_retries=2)
embedding_llm = LangchainEmbedding(embedding, embed_batch_size=1)
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor,
embed_model=embedding_llm,
)
documents = SimpleDirectoryReader("data").load_data()
index = GPTVectorStoreIndex.from_documents(documents)
query_engine= index.as_query_engine(service_context=service_context)
query_engine.query("sample string")
I ran the above code, but I get an error.
Could you please correct the following line in the llama_index/embeddings/openai.py
file under "aget_embeddings, get_embeddings, aget_embedding, get_embedding"?
Changing "model" to "engine" fixed the error.
Before modification openai.Embedding.create(input=[text], model=engine, kwargs) After modification openai.Embedding.create(input=[text], engine=engine, kwargs)
As far as I have tried, the following code works.
from llama_index import ServiceContext, GPTVectorStoreIndex, LLMPredictor, LangchainEmbedding
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
API_KEY = ""
API_BASE = ""
embed_deployment_name = ""
chat_deployment_name = ""
embedding = OpenAIEmbeddings(
deployment=embed_deployment_name,
openai_api_key=API_KEY,
openai_api_base=API_BASE,
openai_api_type="azure",
openai_api_version="2022-12-01",
)
embedding_llm = LangchainEmbedding(embedding, embed_batch_size=1)
llm = AzureChatOpenAI(
deployment_name=chat_deployment_name,
openai_api_key=API_KEY,
openai_api_base=API_BASE,
openai_api_type="azure",
openai_api_version="2023-03-15-preview",
)
llm_predictor = LLMPredictor(llm=llm)
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor,
embed_model=embedding_llm,
)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
print(query_engine.query("sample"))
@Hironsan If the created index is used as is, it works correctly, but if it is run from a locally saved index, an error occurs.
code
from llama_index import load_index_from_storage, StorageContext
# Save the index to "./storage"
index.storage_context.persist(persist_dir="./storage")
# load index
storage_context = StorageContext.from_defaults(persist_dir='storage')
index = load_index_from_storage(storage_context)
retriever = index.as_query_engine(service_context=service_context)
query_engine.query("sample")
error
Traceback (most recent call last):
File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/tenacity/__init__.py", line 382, in __call__
result = fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/llama_index/embeddings/openai.py", line 105, in get_embedding
return openai.Embedding.create(input=[text], model=engine, **kwargs)["data"][0][
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/openai/api_resources/embedding.py", line 33, in create
response = super().create(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 149, in create
) = cls.__prepare_create_request(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 83, in __prepare_create_request
raise error.InvalidRequestError(
openai.error.InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a <class 'openai.api_resources.embedding.Embedding'>
As the document says, you will need to pass the same ServiceContext
to load_index_from_storage
. Please read it in detail:
from llama_index import load_index_from_storage, StorageContext
index.storage_context.persist(persist_dir="./storage")
storage_context = StorageContext.from_defaults(persist_dir='storage')
index = load_index_from_storage(storage_context, service_context=service_context)
query_engine = index.as_query_engine()
query_engine.query("sample")
You were right. Thank you for your advice.
You're welcome. If the problem has been resolved, please close the Issue.
Can you pass
service_context
toResponseSynthesizer
?response_synthesizer = ResponseSynthesizer.from_args( response_mode="tree_summarize", service_context=service_context )
Thanks for the hint, it worked perfectly. Now i have to read the documentation to understand what I'm doing :)
I would like to have this fixed in #3140 , but the following error is still occurring. In fact, the following error occurs when executing query_engine.query("sample string").