run-llama / llama_index

LlamaIndex is a data framework for your LLM applications
https://docs.llamaindex.ai
MIT License
36.75k stars 5.27k forks source link

InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter #2129

Closed swilliams57to1 closed 1 year ago

swilliams57to1 commented 1 year ago

Attempting simple semantic query with azure (using work around to get beyond size issue)

from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader documents = SimpleDirectoryReader('data').load_data() sc = ServiceContext.from_defaults(embed_model=OpenAIEmbedding(embed_batch_size=1)) index = GPTVectorStoreIndex.from_documents(documents, service_context=sc, engine="text-davinci-003") query_engine = index.as_query_engine() response = query_engine.query("What did the author do growing up?") print(response)

the query results in (full error message below)

InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a <class 'openai.api_resources.completion.Completion'>

where/how do you provide the engine?

Full error

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 53631 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens

InvalidRequestError Traceback (most recent call last) Cell In[19], line 6 4 index = GPTVectorStoreIndex.from_documents(documents, service_context=sc, engine="text-davinci-003") 5 query_engine = index.as_query_engine() ----> 6 response = query_engine.query("What did the author do growing up?") 7 print(response)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\query\base.py:20, in BaseQueryEngine.query(self, str_or_query_bundle) 18 if isinstance(str_or_query_bundle, str): 19 str_or_query_bundle = QueryBundle(str_or_query_bundle) ---> 20 return self._query(str_or_query_bundle)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\query_engine\retriever_query_engine.py:145, in RetrieverQueryEngine._query(self, query_bundle) 140 self._callback_manager.on_event_end( 141 CBEventType.RETRIEVE, payload={"nodes": nodes}, event_id=retrieve_id 142 ) 144 synth_id = self._callback_manager.on_event_start(CBEventType.SYNTHESIZE) --> 145 response = self._response_synthesizer.synthesize( 146 query_bundle=query_bundle, 147 nodes=nodes, 148 ) 149 self._callback_manager.on_event_end( 150 CBEventType.SYNTHESIZE, payload={"response": response}, event_id=synth_id 151 ) 153 self._callback_manager.on_event_end(CBEventType.QUERY, event_id=query_id)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\query\response_synthesis.py:174, in ResponseSynthesizer.synthesize(self, query_bundle, nodes, additional_source_nodes) 172 if self._response_mode != ResponseMode.NO_TEXT: 173 assert self._response_builder is not None --> 174 response_str = self._response_builder.get_response( 175 query_str=query_bundle.query_str, 176 text_chunks=text_chunks, 177 **self._response_kwargs, 178 ) 179 else: 180 response_str = None

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\response\response_builder.py:349, in CompactAndRefine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs) 343 with temp_set_attrs( 344 self._service_context.prompt_helper, use_chunk_size_limit=False 345 ): 346 new_texts = self._service_context.prompt_helper.compact_text_chunks( 347 max_prompt, text_chunks 348 ) --> 349 response = super().get_response( 350 query_str=query_str, text_chunks=new_texts, prev_response=prev_response 351 ) 352 return response

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\token_counter\token_counter.py:78, in llm_token_counter..wrap..wrapped_llm_predict(_self, *args, kwargs) 76 def wrapped_llm_predict(_self: Any, *args: Any, *kwargs: Any) -> Any: 77 with wrapper_logic(_self): ---> 78 f_return_val = f(_self, args, kwargs) 80 return f_return_val

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\response\response_builder.py:172, in Refine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs) 168 for text_chunk in text_chunks: 169 if prev_response_obj is None: 170 # if this is the first chunk, and text chunk already 171 # is an answer, then return it --> 172 response = self._give_response_single( 173 query_str, 174 text_chunk, 175 ) 176 else: 177 response = self._refine_response_single( 178 prev_response_obj, query_str, text_chunk 179 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\response\response_builder.py:212, in Refine._give_response_single(self, query_str, text_chunk, **response_kwargs) 207 if response is None and not self._streaming: 208 event_id = self._callback_llm_on_start() 209 ( 210 response, 211 formatted_prompt, --> 212 ) = self._service_context.llm_predictor.predict( 213 text_qa_template, 214 context_str=cur_text_chunk, 215 ) 216 self._log_prompt_and_response( 217 formatted_prompt, response, log_prefix="Initial" 218 ) 219 self._callback_llm_on_end( 220 formatted_prompt, response, event_id, stage="Initial" 221 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\llm_predictor\base.py:230, in LLMPredictor.predict(self, prompt, prompt_args) 220 """Predict the answer to a query. 221 222 Args: (...) 227 228 """ 229 formatted_prompt = prompt.format(llm=self._llm, prompt_args) --> 230 llm_prediction = self._predict(prompt, **prompt_args) 231 logger.debug(llm_prediction) 233 # We assume that the value of formatted_prompt is exactly the thing 234 # eventually sent to OpenAI, or whatever LLM downstream

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\llm_predictor\base.py:204, in LLMPredictor._predict(self, prompt, prompt_args) 202 full_prompt_args = prompt.get_full_format_args(prompt_args) 203 if self.retry_on_throttling: --> 204 llm_prediction = retry_on_exceptions_with_backoff( 205 lambda: llm_chain.predict(full_prompt_args), 206 [ 207 ErrorToRetry(openai.error.RateLimitError), 208 ErrorToRetry(openai.error.ServiceUnavailableError), 209 ErrorToRetry(openai.error.TryAgain), 210 ErrorToRetry( 211 openai.error.APIConnectionError, lambda e: e.should_retry 212 ), 213 ], 214 ) 215 else: 216 llm_prediction = llm_chain.predict(**full_prompt_args)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\utils.py:177, in retry_on_exceptions_with_backoff(lambda_fn, errors_to_retry, max_tries, min_backoff_secs, max_backoff_secs) 175 while True: 176 try: --> 177 return lambda_fn() 178 except exception_class_tuples as e: 179 traceback.print_exc()

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\llm_predictor\base.py:205, in LLMPredictor._predict..() 202 full_prompt_args = prompt.get_full_format_args(prompt_args) 203 if self.retry_on_throttling: 204 llm_prediction = retry_on_exceptions_with_backoff( --> 205 lambda: llm_chain.predict(full_prompt_args), 206 [ 207 ErrorToRetry(openai.error.RateLimitError), 208 ErrorToRetry(openai.error.ServiceUnavailableError), 209 ErrorToRetry(openai.error.TryAgain), 210 ErrorToRetry( 211 openai.error.APIConnectionError, lambda e: e.should_retry 212 ), 213 ], 214 ) 215 else: 216 llm_prediction = llm_chain.predict(full_prompt_args)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\llm.py:213, in LLMChain.predict(self, callbacks, kwargs) 198 def predict(self, callbacks: Callbacks = None, kwargs: Any) -> str: 199 """Format prompt with kwargs and pass to LLM. 200 201 Args: (...) 211 completion = llm.predict(adjective="funny") 212 """ --> 213 return self(kwargs, callbacks=callbacks)[self.output_key]

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\base.py:142, in Chain.call(self, inputs, return_only_outputs, callbacks) 140 except (KeyboardInterrupt, Exception) as e: 141 run_manager.on_chain_error(e) --> 142 raise e 143 run_manager.on_chain_end(outputs) 144 return self.prep_outputs(inputs, outputs, return_only_outputs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\base.py:136, in Chain.call(self, inputs, return_only_outputs, callbacks) 130 run_manager = callback_manager.on_chain_start( 131 {"name": self.class.name}, 132 inputs, 133 ) 134 try: 135 outputs = ( --> 136 self._call(inputs, run_manager=run_manager) 137 if new_arg_supported 138 else self._call(inputs) 139 ) 140 except (KeyboardInterrupt, Exception) as e: 141 run_manager.on_chain_error(e)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\llm.py:69, in LLMChain._call(self, inputs, run_manager) 64 def _call( 65 self, 66 inputs: Dict[str, Any], 67 run_manager: Optional[CallbackManagerForChainRun] = None, 68 ) -> Dict[str, str]: ---> 69 response = self.generate([inputs], run_manager=run_manager) 70 return self.create_outputs(response)[0]

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\llm.py:79, in LLMChain.generate(self, input_list, run_manager) 77 """Generate LLM result from inputs.""" 78 prompts, stop = self.prep_prompts(input_list, run_manager=run_manager) ---> 79 return self.llm.generate_prompt( 80 prompts, stop, callbacks=run_manager.get_child() if run_manager else None 81 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\base.py:127, in BaseLLM.generate_prompt(self, prompts, stop, callbacks) 120 def generate_prompt( 121 self, 122 prompts: List[PromptValue], 123 stop: Optional[List[str]] = None, 124 callbacks: Callbacks = None, 125 ) -> LLMResult: 126 prompt_strings = [p.to_string() for p in prompts] --> 127 return self.generate(prompt_strings, stop=stop, callbacks=callbacks)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\base.py:176, in BaseLLM.generate(self, prompts, stop, callbacks) 174 except (KeyboardInterrupt, Exception) as e: 175 run_manager.on_llm_error(e) --> 176 raise e 177 run_manager.on_llm_end(output) 178 return output

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\base.py:170, in BaseLLM.generate(self, prompts, stop, callbacks) 165 run_manager = callback_manager.on_llm_start( 166 {"name": self.class.name}, prompts 167 ) 168 try: 169 output = ( --> 170 self._generate(prompts, stop=stop, run_manager=run_manager) 171 if new_arg_supported 172 else self._generate(prompts, stop=stop) 173 ) 174 except (KeyboardInterrupt, Exception) as e: 175 run_manager.on_llm_error(e)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\openai.py:306, in BaseOpenAI._generate(self, prompts, stop, run_manager) 304 choices.extend(response["choices"]) 305 else: --> 306 response = completion_with_retry(self, prompt=_prompts, **params) 307 choices.extend(response["choices"]) 308 if not self.streaming: 309 # Can't update token usage if streaming

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\openai.py:106, in completion_with_retry(llm, kwargs) 102 @retry_decorator 103 def _completion_with_retry(kwargs: Any) -> Any: 104 return llm.client.create(kwargs) --> 106 return _completion_with_retry(kwargs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init__.py:289, in BaseRetrying.wraps..wrapped_f(*args, kw) 287 @functools.wraps(f) 288 def wrapped_f(*args: t.Any, *kw: t.Any) -> t.Any: --> 289 return self(f, args, kw)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init.py:379, in Retrying.call__(self, fn, *args, **kwargs) 377 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs) 378 while True: --> 379 do = self.iter(retry_state=retry_state) 380 if isinstance(do, DoAttempt): 381 try:

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init__.py:314, in BaseRetrying.iter(self, retry_state) 312 is_explicit_retry = fut.failed and isinstance(fut.exception(), TryAgain) 313 if not (is_explicit_retry or self.retry(retry_state)): --> 314 return fut.result() 316 if self.after is not None: 317 self.after(retry_state)

File ~\AppData\Local\Programs\Python\Python311\Lib\concurrent\futures_base.py:449, in Future.result(self, timeout) 447 raise CancelledError() 448 elif self._state == FINISHED: --> 449 return self.__get_result() 451 self._condition.wait(timeout) 453 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

File ~\AppData\Local\Programs\Python\Python311\Lib\concurrent\futures_base.py:401, in Future.__get_result(self) 399 if self._exception: 400 try: --> 401 raise self._exception 402 finally: 403 # Break a reference cycle with the exception in self._exception 404 self = None

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init.py:382, in Retrying.call__(self, fn, *args, *kwargs) 380 if isinstance(do, DoAttempt): 381 try: --> 382 result = fn(args, **kwargs) 383 except BaseException: # noqa: B902 384 retry_state.set_exception(sys.exc_info()) # type: ignore[arg-type]

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\openai.py:104, in completion_with_retry.._completion_with_retry(kwargs) 102 @retry_decorator 103 def _completion_with_retry(kwargs: Any) -> Any: --> 104 return llm.client.create(**kwargs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_resources\completion.py:25, in Completion.create(cls, *args, *kwargs) 23 while True: 24 try: ---> 25 return super().create(args, **kwargs) 26 except TryAgain as e: 27 if timeout is not None and time.time() > start + timeout:

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_resources\abstract\engine_api_resource.py:149, in EngineAPIResource.create(cls, api_key, api_base, api_type, request_id, api_version, organization, params) 127 @classmethod 128 def create( 129 cls, (...) 136 params, 137 ): 138 ( 139 deployment_id, 140 engine, 141 timeout, 142 stream, 143 headers, 144 request_timeout, 145 typed_api_type, 146 requestor, 147 url, 148 params, --> 149 ) = cls.__prepare_create_request( 150 api_key, api_base, api_type, apiversion, organization, **params 151 ) 153 response, , api_key = requestor.request( 154 "post", 155 url, (...) 160 request_timeout=request_timeout, 161 ) 163 if stream: 164 # must be an iterator

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_resources\abstract\engine_api_resource.py:83, in EngineAPIResource.__prepare_create_request(cls, api_key, api_base, api_type, api_version, organization, **params) 81 if typed_api_type in (util.ApiType.AZURE, util.ApiType.AZURE_AD): 82 if deployment_id is None and engine is None: ---> 83 raise error.InvalidRequestError( 84 "Must provide an 'engine' or 'deployment_id' parameter to create a %s" 85 % cls, 86 "engine", 87 ) 88 else: 89 if model is None and engine is None:

InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a <class 'openai.api_resources.completion.Completion'>

Disiok commented 1 year ago

We use text-davinci-003 for completion by default. If you are using Azure, you need to change it to AzureOpenAI

with from langchain.llms import AzureOpenAI

llm = AzureOpenAI(deployment_name="<insert deployment name from azure>", model_kwargs={
    "api_key": openai.api_key,
    "api_base": openai.api_base,
    "api_type": openai.api_type,
    "api_version": openai.api_version,
})

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
)
swilliams57to1 commented 1 year ago

I have tried the above although see new error of 'Resource not found'

the initial part of code works and I see the documents are being read and indexes created as expected.

remains only the index query that fails.

Code

import os import openai from dotenv import load_dotenv from langchain.embeddings import OpenAIEmbeddings from langchain.llms import AzureOpenAI

Load environment variables (set OPENAI_API_KEY and OPENAI_API_BASE in .env)

load_dotenv()

Configure OpenAI API

openai.api_type = "azure"

attempting other versions to no success

openai.api_version = "2022-12-01"

openai.api_version = "2023-03-15-preview"

openai.api_base = os.getenv('OPENAI_API_BASE') openai.api_key = os.getenv("OPENAI_API_KEY")

from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader('dataStore').load_data()

from llama_index.node_parser import SimpleNodeParser

parser = SimpleNodeParser()

nodes = parser.get_nodes_from_documents(documents)

from llama_index import LLMPredictor, GPTVectorStoreIndex, PromptHelper, ServiceContext from langchain import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding

define LLM

llm_predictor = LLMPredictor(llm=AzureOpenAI(temperature=0, model_name="text-davinci-003"))

llm = AzureOpenAI(deployment_name="text-davinci-003", model_kwargs={ "api_key": openai.api_key, "api_base": openai.api_base, "api_type": openai.api_type, "api_version": openai.api_version, })

define prompt helper

set maximum input size

max_input_size = 4096

set number of output tokens

num_output = 256

set maximum chunk overlap

max_chunk_overlap = 20

prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, embed_model=OpenAIEmbedding(embed_batch_size=1))

index = GPTVectorStoreIndex(nodes, service_context=service_context, engine="text-davinci-003")

index.storage_context.persist(persist_dir="indexStore")

from llama_index import StorageContext, load_index_from_storage

rebuild storage context

storage_context = StorageContext.from_defaults(persist_dir="indexStore")

load index

index = load_index_from_storage(storage_context)

query_engine = index.as_query_engine(service_context=service_context, engine="text-davinci-003")

response = query_engine.query("What did the author do growing up?") print(response)

print(response.get_formatted_sources())


Error message

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens INFO:openai:error_code=404 error_message='Resource not found' error_param=None error_type=None message='OpenAI API error received' stream_error=False

InvalidRequestError Traceback (most recent call last) Cell In[11], line 1 ----> 1 response = query_engine.query("What did the author do growing up?") 2 print(response) 3 #print(response.get_formatted_sources())

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\query\base.py:20, in BaseQueryEngine.query(self, str_or_query_bundle) 18 if isinstance(str_or_query_bundle, str): 19 str_or_query_bundle = QueryBundle(str_or_query_bundle) ---> 20 return self._query(str_or_query_bundle)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\query_engine\retriever_query_engine.py:145, in RetrieverQueryEngine._query(self, query_bundle) 140 self._callback_manager.on_event_end( 141 CBEventType.RETRIEVE, payload={"nodes": nodes}, event_id=retrieve_id 142 ) 144 synth_id = self._callback_manager.on_event_start(CBEventType.SYNTHESIZE) --> 145 response = self._response_synthesizer.synthesize( 146 query_bundle=query_bundle, 147 nodes=nodes, 148 ) 149 self._callback_manager.on_event_end( 150 CBEventType.SYNTHESIZE, payload={"response": response}, event_id=synth_id 151 ) 153 self._callback_manager.on_event_end(CBEventType.QUERY, event_id=query_id)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\query\response_synthesis.py:174, in ResponseSynthesizer.synthesize(self, query_bundle, nodes, additional_source_nodes) 172 if self._response_mode != ResponseMode.NO_TEXT: 173 assert self._response_builder is not None --> 174 response_str = self._response_builder.get_response( 175 query_str=query_bundle.query_str, 176 text_chunks=text_chunks, 177 **self._response_kwargs, 178 ) 179 else: 180 response_str = None

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\response\response_builder.py:349, in CompactAndRefine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs) 343 with temp_set_attrs( 344 self._service_context.prompt_helper, use_chunk_size_limit=False 345 ): 346 new_texts = self._service_context.prompt_helper.compact_text_chunks( 347 max_prompt, text_chunks 348 ) --> 349 response = super().get_response( 350 query_str=query_str, text_chunks=new_texts, prev_response=prev_response 351 ) 352 return response

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\token_counter\token_counter.py:78, in llm_token_counter..wrap..wrapped_llm_predict(_self, *args, kwargs) 76 def wrapped_llm_predict(_self: Any, *args: Any, *kwargs: Any) -> Any: 77 with wrapper_logic(_self): ---> 78 f_return_val = f(_self, args, kwargs) 80 return f_return_val

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\response\response_builder.py:172, in Refine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs) 168 for text_chunk in text_chunks: 169 if prev_response_obj is None: 170 # if this is the first chunk, and text chunk already 171 # is an answer, then return it --> 172 response = self._give_response_single( 173 query_str, 174 text_chunk, 175 ) 176 else: 177 response = self._refine_response_single( 178 prev_response_obj, query_str, text_chunk 179 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\response\response_builder.py:212, in Refine._give_response_single(self, query_str, text_chunk, **response_kwargs) 207 if response is None and not self._streaming: 208 event_id = self._callback_llm_on_start() 209 ( 210 response, 211 formatted_prompt, --> 212 ) = self._service_context.llm_predictor.predict( 213 text_qa_template, 214 context_str=cur_text_chunk, 215 ) 216 self._log_prompt_and_response( 217 formatted_prompt, response, log_prefix="Initial" 218 ) 219 self._callback_llm_on_end( 220 formatted_prompt, response, event_id, stage="Initial" 221 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\llm_predictor\base.py:230, in LLMPredictor.predict(self, prompt, prompt_args) 220 """Predict the answer to a query. 221 222 Args: (...) 227 228 """ 229 formatted_prompt = prompt.format(llm=self._llm, prompt_args) --> 230 llm_prediction = self._predict(prompt, **prompt_args) 231 logger.debug(llm_prediction) 233 # We assume that the value of formatted_prompt is exactly the thing 234 # eventually sent to OpenAI, or whatever LLM downstream

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\llm_predictor\base.py:204, in LLMPredictor._predict(self, prompt, prompt_args) 202 full_prompt_args = prompt.get_full_format_args(prompt_args) 203 if self.retry_on_throttling: --> 204 llm_prediction = retry_on_exceptions_with_backoff( 205 lambda: llm_chain.predict(full_prompt_args), 206 [ 207 ErrorToRetry(openai.error.RateLimitError), 208 ErrorToRetry(openai.error.ServiceUnavailableError), 209 ErrorToRetry(openai.error.TryAgain), 210 ErrorToRetry( 211 openai.error.APIConnectionError, lambda e: e.should_retry 212 ), 213 ], 214 ) 215 else: 216 llm_prediction = llm_chain.predict(**full_prompt_args)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\utils.py:177, in retry_on_exceptions_with_backoff(lambda_fn, errors_to_retry, max_tries, min_backoff_secs, max_backoff_secs) 175 while True: 176 try: --> 177 return lambda_fn() 178 except exception_class_tuples as e: 179 traceback.print_exc()

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\llm_predictor\base.py:205, in LLMPredictor._predict..() 202 full_prompt_args = prompt.get_full_format_args(prompt_args) 203 if self.retry_on_throttling: 204 llm_prediction = retry_on_exceptions_with_backoff( --> 205 lambda: llm_chain.predict(full_prompt_args), 206 [ 207 ErrorToRetry(openai.error.RateLimitError), 208 ErrorToRetry(openai.error.ServiceUnavailableError), 209 ErrorToRetry(openai.error.TryAgain), 210 ErrorToRetry( 211 openai.error.APIConnectionError, lambda e: e.should_retry 212 ), 213 ], 214 ) 215 else: 216 llm_prediction = llm_chain.predict(full_prompt_args)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\llm.py:213, in LLMChain.predict(self, callbacks, kwargs) 198 def predict(self, callbacks: Callbacks = None, kwargs: Any) -> str: 199 """Format prompt with kwargs and pass to LLM. 200 201 Args: (...) 211 completion = llm.predict(adjective="funny") 212 """ --> 213 return self(kwargs, callbacks=callbacks)[self.output_key]

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\base.py:142, in Chain.call(self, inputs, return_only_outputs, callbacks) 140 except (KeyboardInterrupt, Exception) as e: 141 run_manager.on_chain_error(e) --> 142 raise e 143 run_manager.on_chain_end(outputs) 144 return self.prep_outputs(inputs, outputs, return_only_outputs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\base.py:136, in Chain.call(self, inputs, return_only_outputs, callbacks) 130 run_manager = callback_manager.on_chain_start( 131 {"name": self.class.name}, 132 inputs, 133 ) 134 try: 135 outputs = ( --> 136 self._call(inputs, run_manager=run_manager) 137 if new_arg_supported 138 else self._call(inputs) 139 ) 140 except (KeyboardInterrupt, Exception) as e: 141 run_manager.on_chain_error(e)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\llm.py:69, in LLMChain._call(self, inputs, run_manager) 64 def _call( 65 self, 66 inputs: Dict[str, Any], 67 run_manager: Optional[CallbackManagerForChainRun] = None, 68 ) -> Dict[str, str]: ---> 69 response = self.generate([inputs], run_manager=run_manager) 70 return self.create_outputs(response)[0]

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\llm.py:79, in LLMChain.generate(self, input_list, run_manager) 77 """Generate LLM result from inputs.""" 78 prompts, stop = self.prep_prompts(input_list, run_manager=run_manager) ---> 79 return self.llm.generate_prompt( 80 prompts, stop, callbacks=run_manager.get_child() if run_manager else None 81 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\base.py:127, in BaseLLM.generate_prompt(self, prompts, stop, callbacks) 120 def generate_prompt( 121 self, 122 prompts: List[PromptValue], 123 stop: Optional[List[str]] = None, 124 callbacks: Callbacks = None, 125 ) -> LLMResult: 126 prompt_strings = [p.to_string() for p in prompts] --> 127 return self.generate(prompt_strings, stop=stop, callbacks=callbacks)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\base.py:176, in BaseLLM.generate(self, prompts, stop, callbacks) 174 except (KeyboardInterrupt, Exception) as e: 175 run_manager.on_llm_error(e) --> 176 raise e 177 run_manager.on_llm_end(output) 178 return output

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\base.py:170, in BaseLLM.generate(self, prompts, stop, callbacks) 165 run_manager = callback_manager.on_llm_start( 166 {"name": self.class.name}, prompts 167 ) 168 try: 169 output = ( --> 170 self._generate(prompts, stop=stop, run_manager=run_manager) 171 if new_arg_supported 172 else self._generate(prompts, stop=stop) 173 ) 174 except (KeyboardInterrupt, Exception) as e: 175 run_manager.on_llm_error(e)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\openai.py:306, in BaseOpenAI._generate(self, prompts, stop, run_manager) 304 choices.extend(response["choices"]) 305 else: --> 306 response = completion_with_retry(self, prompt=_prompts, **params) 307 choices.extend(response["choices"]) 308 if not self.streaming: 309 # Can't update token usage if streaming

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\openai.py:106, in completion_with_retry(llm, kwargs) 102 @retry_decorator 103 def _completion_with_retry(kwargs: Any) -> Any: 104 return llm.client.create(kwargs) --> 106 return _completion_with_retry(kwargs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init__.py:289, in BaseRetrying.wraps..wrapped_f(*args, kw) 287 @functools.wraps(f) 288 def wrapped_f(*args: t.Any, *kw: t.Any) -> t.Any: --> 289 return self(f, args, kw)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init.py:379, in Retrying.call__(self, fn, *args, **kwargs) 377 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs) 378 while True: --> 379 do = self.iter(retry_state=retry_state) 380 if isinstance(do, DoAttempt): 381 try:

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init__.py:314, in BaseRetrying.iter(self, retry_state) 312 is_explicit_retry = fut.failed and isinstance(fut.exception(), TryAgain) 313 if not (is_explicit_retry or self.retry(retry_state)): --> 314 return fut.result() 316 if self.after is not None: 317 self.after(retry_state)

File ~\AppData\Local\Programs\Python\Python311\Lib\concurrent\futures_base.py:449, in Future.result(self, timeout) 447 raise CancelledError() 448 elif self._state == FINISHED: --> 449 return self.__get_result() 451 self._condition.wait(timeout) 453 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

File ~\AppData\Local\Programs\Python\Python311\Lib\concurrent\futures_base.py:401, in Future.__get_result(self) 399 if self._exception: 400 try: --> 401 raise self._exception 402 finally: 403 # Break a reference cycle with the exception in self._exception 404 self = None

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init.py:382, in Retrying.call__(self, fn, *args, *kwargs) 380 if isinstance(do, DoAttempt): 381 try: --> 382 result = fn(args, **kwargs) 383 except BaseException: # noqa: B902 384 retry_state.set_exception(sys.exc_info()) # type: ignore[arg-type]

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\openai.py:104, in completion_with_retry.._completion_with_retry(kwargs) 102 @retry_decorator 103 def _completion_with_retry(kwargs: Any) -> Any: --> 104 return llm.client.create(**kwargs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_resources\completion.py:25, in Completion.create(cls, *args, *kwargs) 23 while True: 24 try: ---> 25 return super().create(args, **kwargs) 26 except TryAgain as e: 27 if timeout is not None and time.time() > start + timeout:

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_resources\abstract\engine_api_resource.py:153, in EngineAPIResource.create(cls, api_key, api_base, api_type, request_id, api_version, organization, params) 127 @classmethod 128 def create( 129 cls, (...) 136 params, 137 ): 138 ( 139 deployment_id, 140 engine, (...) 150 api_key, api_base, api_type, apiversion, organization, **params 151 ) --> 153 response, , api_key = requestor.request( 154 "post", 155 url, 156 params=params, 157 headers=headers, 158 stream=stream, 159 request_id=request_id, 160 request_timeout=request_timeout, 161 ) 163 if stream: 164 # must be an iterator 165 assert not isinstance(response, OpenAIResponse)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_requestor.py:230, in APIRequestor.request(self, method, url, params, headers, files, stream, request_id, request_timeout) 209 def request( 210 self, 211 method, (...) 218 request_timeout: Optional[Union[float, Tuple[float, float]]] = None, 219 ) -> Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], bool, str]: 220 result = self.request_raw( 221 method.lower(), 222 url, (...) 228 request_timeout=request_timeout, 229 ) --> 230 resp, got_stream = self._interpret_response(result, stream) 231 return resp, got_stream, self.api_key

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_requestor.py:624, in APIRequestor._interpret_response(self, result, stream) 616 return ( 617 self._interpret_response_line( 618 line, result.status_code, result.headers, stream=True 619 ) 620 for line in parse_stream(result.iter_lines()) 621 ), True 622 else: 623 return ( --> 624 self._interpret_response_line( 625 result.content.decode("utf-8"), 626 result.status_code, 627 result.headers, 628 stream=False, 629 ), 630 False, 631 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_requestor.py:687, in APIRequestor._interpret_response_line(self, rbody, rcode, rheaders, stream) 685 stream_error = stream and "error" in resp.data 686 if stream_error or not 200 <= rcode < 300: --> 687 raise self.handle_error_response( 688 rbody, rcode, resp.data, rheaders, stream_error=stream_error 689 ) 690 return resp

InvalidRequestError: Resource not found

acshulk commented 1 year ago

I am experiencing a similar issue. No luck with the typescript version either.

p2c2e commented 1 year ago

fixes #2129 - This issue specifically happens for Azure implementation of OpenAI.

Azure OpenAI APIs work a bit differently (using deployment_name vs engine/mode/model_type for OpenAI) though they share some common logic. Fix passes an 'engine' parameter (in additional to the current 'model' param) to satisfy openai expectation. For some reason the llama-index code is not passing through the engine param as it is and instead uses it as 'model' when calling openai.Embedding.create() - within llama_index.embeddings.openai.get_embedding method.

return openai.Embedding.create(input=[text], model=engine, engine=engine)["data"][0]["embedding"]

The error originates from the below code in the openai-python code. See below:

@classmethod
    def __prepare_create_request(
        cls,
        api_key=None,
        api_base=None,
        api_type=None,
        api_version=None,
        organization=None,
        **params,
    ):
        deployment_id = params.pop("deployment_id", None)
        engine = params.pop("engine", deployment_id)
        model = params.get("model", None)
        timeout = params.pop("timeout", None)
        stream = params.get("stream", False)
        headers = params.pop("headers", None)
        request_timeout = params.pop("request_timeout", None)
        typed_api_type = cls._get_api_type_and_version(api_type=api_type)[0]
        if typed_api_type in (util.ApiType.AZURE, util.ApiType.AZURE_AD):
            if deployment_id is None and engine is None:
                raise error.InvalidRequestError(
                    "Must provide an 'engine' or 'deployment_id' parameter to create a %s"
                    % cls,
                    "engine",
                )
p2c2e commented 1 year ago

In case someone has this issue and wants to check the workaround, point to the forked repo branch. For e.g. if you are using poetry for dep management, you can point to the github repo directly and test the fix.. like below:

[tool.poetry.dependencies]
.....
llama-index = { git = "https://github.com/p2c2e/llama_index.git", branch="azure_embedding_depid_engine" }
afewell commented 1 year ago

I am still having this issue, and others on discord are still reporting this issue. I have used azure with llama-index in the past without issue, today I built a new env from the ground up, freshly installed everything, rebuilt the code I used from the ground up based on the example notebook and this thread, I tried with both azureopenai and azurechatopenai, and both still produce this same error.

harisrab commented 1 year ago

Any progress on this? I'm facing the same issue on my end. Works absolutely great with instantiating a query engine on top of sql_index or vectorstore_index, but causes issues with query on SQLAutoVectorQueryEngine.

query_engine = SQLAutoVectorQueryEngine(
    sql_tool,
    vector_tool,
    service_context=service_context
)

res = query_engine.query("Can you find me more information about doctors who can treat cardiovascular issues")

throws


File ~/miniconda3/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py:83, in EngineAPIResource.__prepare_create_request(cls, api_key, api_base, api_type, api_version, organization, **params)
     81 if typed_api_type in (util.ApiType.AZURE, util.ApiType.AZURE_AD):
     82     if deployment_id is None and engine is None:
---> 83         raise error.InvalidRequestError(
     84             "Must provide an 'engine' or 'deployment_id' parameter to create a %s"
     85             % cls,
     86             "engine",
     87         )
     88 else:
     89     if model is None and engine is None:

InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a <class 'openai.api_resources.
```completion.Completion'>
qypanzer commented 1 year ago

fixes #2129 - This issue specifically happens for Azure implementation of OpenAI.

Azure OpenAI APIs work a bit differently (using deployment_name vs engine/mode/model_type for OpenAI) though they share some common logic. Fix passes an 'engine' parameter (in additional to the current 'model' param) to satisfy openai expectation. For some reason the llama-index code is not passing through the engine param as it is and instead uses it as 'model' when calling - within llama_index.embeddings.openai.get_embedding method.openai.Embedding.create()

return openai.Embedding.create(input=[text], model=engine, engine=engine)["data"][0]["embedding"]

The error originates from the below code in the openai-python code. See below:

@classmethod
    def __prepare_create_request(
        cls,
        api_key=None,
        api_base=None,
        api_type=None,
        api_version=None,
        organization=None,
        **params,
    ):
        deployment_id = params.pop("deployment_id", None)
        engine = params.pop("engine", deployment_id)
        model = params.get("model", None)
        timeout = params.pop("timeout", None)
        stream = params.get("stream", False)
        headers = params.pop("headers", None)
        request_timeout = params.pop("request_timeout", None)
        typed_api_type = cls._get_api_type_and_version(api_type=api_type)[0]
        if typed_api_type in (util.ApiType.AZURE, util.ApiType.AZURE_AD):
            if deployment_id is None and engine is None:
                raise error.InvalidRequestError(
                    "Must provide an 'engine' or 'deployment_id' parameter to create a %s"
                    % cls,
                    "engine",
                )

Excuse me, but this does not work

fixes #2129 - This issue specifically happens for Azure implementation of OpenAI.

Azure OpenAI APIs work a bit differently (using deployment_name vs engine/mode/model_type for OpenAI) though they share some common logic. Fix passes an 'engine' parameter (in additional to the current 'model' param) to satisfy openai expectation. For some reason the llama-index code is not passing through the engine param as it is and instead uses it as 'model' when calling - within llama_index.embeddings.openai.get_embedding method.openai.Embedding.create()

return openai.Embedding.create(input=[text], model=engine, engine=engine)["data"][0]["embedding"]

The error originates from the below code in the openai-python code. See below:

@classmethod
    def __prepare_create_request(
        cls,
        api_key=None,
        api_base=None,
        api_type=None,
        api_version=None,
        organization=None,
        **params,
    ):
        deployment_id = params.pop("deployment_id", None)
        engine = params.pop("engine", deployment_id)
        model = params.get("model", None)
        timeout = params.pop("timeout", None)
        stream = params.get("stream", False)
        headers = params.pop("headers", None)
        request_timeout = params.pop("request_timeout", None)
        typed_api_type = cls._get_api_type_and_version(api_type=api_type)[0]
        if typed_api_type in (util.ApiType.AZURE, util.ApiType.AZURE_AD):
            if deployment_id is None and engine is None:
                raise error.InvalidRequestError(
                    "Must provide an 'engine' or 'deployment_id' parameter to create a %s"
                    % cls,
                    "engine",
                )

Excuse me, this solution doesn't solve all the similiar cases, e.g. "GPTDocumentSummaryIndex.from_documents"

I have a very temporary solution, I updated "~\miniconda3\envs\py310\Lib\site-packages\openai\api_resources\abstract\engine_api_resource.py" line 84, add "engine = "td3", and deleted the original raise.error()

I don't think this is a good solution, would you please help to provide a better one? Thanks a lot!

    if typed_api_type in (util.ApiType.AZURE, util.ApiType.AZURE_AD):
        if deployment_id is None and engine is None:
                engine = "td3"  
rafaeldpaula commented 1 year ago

I'm having the same problem, this issue is not fixed.