swilliams57to1 commented 1 year ago

Attempting simple semantic query with azure (using work around to get beyond size issue)

from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader documents = SimpleDirectoryReader('data').load_data() sc = ServiceContext.from_defaults(embed_model=OpenAIEmbedding(embed_batch_size=1)) index = GPTVectorStoreIndex.from_documents(documents, service_context=sc, engine="text-davinci-003") query_engine = index.as_query_engine() response = query_engine.query("What did the author do growing up?") print(response)

the query results in (full error message below)

InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a <class 'openai.api_resources.completion.Completion'>

where/how do you provide the engine?

Full error

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 53631 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens

InvalidRequestError Traceback (most recent call last) Cell In[19], line 6 4 index = GPTVectorStoreIndex.from_documents(documents, service_context=sc, engine="text-davinci-003") 5 query_engine = index.as_query_engine() ----> 6 response = query_engine.query("What did the author do growing up?") 7 print(response)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\query\base.py:20, in BaseQueryEngine.query(self, str_or_query_bundle) 18 if isinstance(str_or_query_bundle, str): 19 str_or_query_bundle = QueryBundle(str_or_query_bundle) ---> 20 return self._query(str_or_query_bundle)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\query_engine\retriever_query_engine.py:145, in RetrieverQueryEngine._query(self, query_bundle) 140 self._callback_manager.on_event_end( 141 CBEventType.RETRIEVE, payload={"nodes": nodes}, event_id=retrieve_id 142 ) 144 synth_id = self._callback_manager.on_event_start(CBEventType.SYNTHESIZE) --> 145 response = self._response_synthesizer.synthesize( 146 query_bundle=query_bundle, 147 nodes=nodes, 148 ) 149 self._callback_manager.on_event_end( 150 CBEventType.SYNTHESIZE, payload={"response": response}, event_id=synth_id 151 ) 153 self._callback_manager.on_event_end(CBEventType.QUERY, event_id=query_id)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\query\response_synthesis.py:174, in ResponseSynthesizer.synthesize(self, query_bundle, nodes, additional_source_nodes) 172 if self._response_mode != ResponseMode.NO_TEXT: 173 assert self._response_builder is not None --> 174 response_str = self._response_builder.get_response( 175 query_str=query_bundle.query_str, 176 text_chunks=text_chunks, 177 **self._response_kwargs, 178 ) 179 else: 180 response_str = None

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\response\response_builder.py:349, in CompactAndRefine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs) 343 with temp_set_attrs( 344 self._service_context.prompt_helper, use_chunk_size_limit=False 345 ): 346 new_texts = self._service_context.prompt_helper.compact_text_chunks( 347 max_prompt, text_chunks 348 ) --> 349 response = super().get_response( 350 query_str=query_str, text_chunks=new_texts, prev_response=prev_response 351 ) 352 return response

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\token_counter\token_counter.py:78, in llm_token_counter..wrap..wrapped_llm_predict(_self, *args, kwargs) 76 def wrapped_llm_predict(_self: Any, *args: Any, *kwargs: Any) -> Any: 77 with wrapper_logic(_self): ---> 78 f_return_val = f(_self, args, kwargs) 80 return f_return_val

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\response\response_builder.py:172, in Refine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs) 168 for text_chunk in text_chunks: 169 if prev_response_obj is None: 170 # if this is the first chunk, and text chunk already 171 # is an answer, then return it --> 172 response = self._give_response_single( 173 query_str, 174 text_chunk, 175 ) 176 else: 177 response = self._refine_response_single( 178 prev_response_obj, query_str, text_chunk 179 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\indices\response\response_builder.py:212, in Refine._give_response_single(self, query_str, text_chunk, **response_kwargs) 207 if response is None and not self._streaming: 208 event_id = self._callback_llm_on_start() 209 ( 210 response, 211 formatted_prompt, --> 212 ) = self._service_context.llm_predictor.predict( 213 text_qa_template, 214 context_str=cur_text_chunk, 215 ) 216 self._log_prompt_and_response( 217 formatted_prompt, response, log_prefix="Initial" 218 ) 219 self._callback_llm_on_end( 220 formatted_prompt, response, event_id, stage="Initial" 221 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\llm_predictor\base.py:230, in LLMPredictor.predict(self, prompt, prompt_args) 220 """Predict the answer to a query. 221 222 Args: (...) 227 228 """ 229 formatted_prompt = prompt.format(llm=self._llm, prompt_args) --> 230 llm_prediction = self._predict(prompt, **prompt_args) 231 logger.debug(llm_prediction) 233 # We assume that the value of formatted_prompt is exactly the thing 234 # eventually sent to OpenAI, or whatever LLM downstream

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\llm_predictor\base.py:204, in LLMPredictor._predict(self, prompt, prompt_args) 202 full_prompt_args = prompt.get_full_format_args(prompt_args) 203 if self.retry_on_throttling: --> 204 llm_prediction = retry_on_exceptions_with_backoff( 205 lambda: llm_chain.predict(full_prompt_args), 206 [ 207 ErrorToRetry(openai.error.RateLimitError), 208 ErrorToRetry(openai.error.ServiceUnavailableError), 209 ErrorToRetry(openai.error.TryAgain), 210 ErrorToRetry( 211 openai.error.APIConnectionError, lambda e: e.should_retry 212 ), 213 ], 214 ) 215 else: 216 llm_prediction = llm_chain.predict(**full_prompt_args)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\utils.py:177, in retry_on_exceptions_with_backoff(lambda_fn, errors_to_retry, max_tries, min_backoff_secs, max_backoff_secs) 175 while True: 176 try: --> 177 return lambda_fn() 178 except exception_class_tuples as e: 179 traceback.print_exc()

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\llama_index\llm_predictor\base.py:205, in LLMPredictor._predict..() 202 full_prompt_args = prompt.get_full_format_args(prompt_args) 203 if self.retry_on_throttling: 204 llm_prediction = retry_on_exceptions_with_backoff( --> 205 lambda: llm_chain.predict(full_prompt_args), 206 [ 207 ErrorToRetry(openai.error.RateLimitError), 208 ErrorToRetry(openai.error.ServiceUnavailableError), 209 ErrorToRetry(openai.error.TryAgain), 210 ErrorToRetry( 211 openai.error.APIConnectionError, lambda e: e.should_retry 212 ), 213 ], 214 ) 215 else: 216 llm_prediction = llm_chain.predict(full_prompt_args)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\llm.py:213, in LLMChain.predict(self, callbacks, kwargs) 198 def predict(self, callbacks: Callbacks = None, kwargs: Any) -> str: 199 """Format prompt with kwargs and pass to LLM. 200 201 Args: (...) 211 completion = llm.predict(adjective="funny") 212 """ --> 213 return self(kwargs, callbacks=callbacks)[self.output_key]

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\base.py:142, in Chain.call(self, inputs, return_only_outputs, callbacks) 140 except (KeyboardInterrupt, Exception) as e: 141 run_manager.on_chain_error(e) --> 142 raise e 143 run_manager.on_chain_end(outputs) 144 return self.prep_outputs(inputs, outputs, return_only_outputs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\base.py:136, in Chain.call(self, inputs, return_only_outputs, callbacks) 130 run_manager = callback_manager.on_chain_start( 131 {"name": self.class.name}, 132 inputs, 133 ) 134 try: 135 outputs = ( --> 136 self._call(inputs, run_manager=run_manager) 137 if new_arg_supported 138 else self._call(inputs) 139 ) 140 except (KeyboardInterrupt, Exception) as e: 141 run_manager.on_chain_error(e)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\llm.py:69, in LLMChain._call(self, inputs, run_manager) 64 def _call( 65 self, 66 inputs: Dict[str, Any], 67 run_manager: Optional[CallbackManagerForChainRun] = None, 68 ) -> Dict[str, str]: ---> 69 response = self.generate([inputs], run_manager=run_manager) 70 return self.create_outputs(response)[0]

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\chains\llm.py:79, in LLMChain.generate(self, input_list, run_manager) 77 """Generate LLM result from inputs.""" 78 prompts, stop = self.prep_prompts(input_list, run_manager=run_manager) ---> 79 return self.llm.generate_prompt( 80 prompts, stop, callbacks=run_manager.get_child() if run_manager else None 81 )

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\base.py:127, in BaseLLM.generate_prompt(self, prompts, stop, callbacks) 120 def generate_prompt( 121 self, 122 prompts: List[PromptValue], 123 stop: Optional[List[str]] = None, 124 callbacks: Callbacks = None, 125 ) -> LLMResult: 126 prompt_strings = [p.to_string() for p in prompts] --> 127 return self.generate(prompt_strings, stop=stop, callbacks=callbacks)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\base.py:176, in BaseLLM.generate(self, prompts, stop, callbacks) 174 except (KeyboardInterrupt, Exception) as e: 175 run_manager.on_llm_error(e) --> 176 raise e 177 run_manager.on_llm_end(output) 178 return output

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\base.py:170, in BaseLLM.generate(self, prompts, stop, callbacks) 165 run_manager = callback_manager.on_llm_start( 166 {"name": self.class.name}, prompts 167 ) 168 try: 169 output = ( --> 170 self._generate(prompts, stop=stop, run_manager=run_manager) 171 if new_arg_supported 172 else self._generate(prompts, stop=stop) 173 ) 174 except (KeyboardInterrupt, Exception) as e: 175 run_manager.on_llm_error(e)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\openai.py:306, in BaseOpenAI._generate(self, prompts, stop, run_manager) 304 choices.extend(response["choices"]) 305 else: --> 306 response = completion_with_retry(self, prompt=_prompts, **params) 307 choices.extend(response["choices"]) 308 if not self.streaming: 309 # Can't update token usage if streaming

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\openai.py:106, in completion_with_retry(llm, kwargs) 102 @retry_decorator 103 def _completion_with_retry(kwargs: Any) -> Any: 104 return llm.client.create(kwargs) --> 106 return _completion_with_retry(kwargs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init__.py:289, in BaseRetrying.wraps..wrapped_f(*args, kw) 287 @functools.wraps(f) 288 def wrapped_f(*args: t.Any, *kw: t.Any) -> t.Any: --> 289 return self(f, args, kw)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init.py:379, in Retrying.call__(self, fn, *args, **kwargs) 377 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs) 378 while True: --> 379 do = self.iter(retry_state=retry_state) 380 if isinstance(do, DoAttempt): 381 try:

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init__.py:314, in BaseRetrying.iter(self, retry_state) 312 is_explicit_retry = fut.failed and isinstance(fut.exception(), TryAgain) 313 if not (is_explicit_retry or self.retry(retry_state)): --> 314 return fut.result() 316 if self.after is not None: 317 self.after(retry_state)

File ~\AppData\Local\Programs\Python\Python311\Lib\concurrent\futures_base.py:449, in Future.result(self, timeout) 447 raise CancelledError() 448 elif self._state == FINISHED: --> 449 return self.__get_result() 451 self._condition.wait(timeout) 453 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

File ~\AppData\Local\Programs\Python\Python311\Lib\concurrent\futures_base.py:401, in Future.__get_result(self) 399 if self._exception: 400 try: --> 401 raise self._exception 402 finally: 403 # Break a reference cycle with the exception in self._exception 404 self = None

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\tenacity__init.py:382, in Retrying.call__(self, fn, *args, *kwargs) 380 if isinstance(do, DoAttempt): 381 try: --> 382 result = fn(args, **kwargs) 383 except BaseException: # noqa: B902 384 retry_state.set_exception(sys.exc_info()) # type: ignore[arg-type]

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\langchain\llms\openai.py:104, in completion_with_retry.._completion_with_retry(kwargs) 102 @retry_decorator 103 def _completion_with_retry(kwargs: Any) -> Any: --> 104 return llm.client.create(**kwargs)

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_resources\completion.py:25, in Completion.create(cls, *args, *kwargs) 23 while True: 24 try: ---> 25 return super().create(args, **kwargs) 26 except TryAgain as e: 27 if timeout is not None and time.time() > start + timeout:

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_resources\abstract\engine_api_resource.py:149, in EngineAPIResource.create(cls, api_key, api_base, api_type, request_id, api_version, organization, params) 127 @classmethod 128 def create( 129 cls, (...) 136 params, 137 ): 138 ( 139 deployment_id, 140 engine, 141 timeout, 142 stream, 143 headers, 144 request_timeout, 145 typed_api_type, 146 requestor, 147 url, 148 params, --> 149 ) = cls.__prepare_create_request( 150 api_key, api_base, api_type, apiversion, organization, **params 151 ) 153 response, , api_key = requestor.request( 154 "post", 155 url, (...) 160 request_timeout=request_timeout, 161 ) 163 if stream: 164 # must be an iterator

File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\openai\api_resources\abstract\engine_api_resource.py:83, in EngineAPIResource.__prepare_create_request(cls, api_key, api_base, api_type, api_version, organization, **params) 81 if typed_api_type in (util.ApiType.AZURE, util.ApiType.AZURE_AD): 82 if deployment_id is None and engine is None: ---> 83 raise error.InvalidRequestError( 84 "Must provide an 'engine' or 'deployment_id' parameter to create a %s" 85 % cls, 86 "engine", 87 ) 88 else: 89 if model is None and engine is None:

InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a <class 'openai.api_resources.completion.Completion'>

Disiok commented 1 year ago

We use text-davinci-003 for completion by default. If you are using Azure, you need to change it to AzureOpenAI

with from langchain.llms import AzureOpenAI

llm = AzureOpenAI(deployment_name="<insert deployment name from azure>", model_kwargs={
    "api_key": openai.api_key,
    "api_base": openai.api_base,
    "api_type": openai.api_type,
    "api_version": openai.api_version,
})

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
)

swilliams57to1 commented 1 year ago

I have tried the above although see new error of 'Resource not found'

the initial part of code works and I see the documents are being read and indexes created as expected.

remains only the index query that fails.

Code

import os import openai from dotenv import load_dotenv from langchain.embeddings import OpenAIEmbeddings from langchain.llms import AzureOpenAI

Load environment variables (set OPENAI_API_KEY and OPENAI_API_BASE in .env)

load_dotenv()

Configure OpenAI API

openai.api_type = "azure"

attempting other versions to no success

openai.api_version = "2022-12-01"

openai.api_version = "2023-03-15-preview"

openai.api_base = os.getenv('OPENAI_API_BASE') openai.api_key = os.getenv("OPENAI_API_KEY")

from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader('dataStore').load_data()

from llama_index.node_parser import SimpleNodeParser

parser = SimpleNodeParser()

nodes = parser.get_nodes_from_documents(documents)

from llama_index import LLMPredictor, GPTVectorStoreIndex, PromptHelper, ServiceContext from langchain import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding

define LLM

llm_predictor = LLMPredictor(llm=AzureOpenAI(temperature=0, model_name="text-davinci-003"))

llm = AzureOpenAI(deployment_name="text-davinci-003", model_kwargs={ "api_key": openai.api_key, "api_base": openai.api_base, "api_type": openai.api_type, "api_version": openai.api_version, })

define prompt helper

set maximum input size

max_input_size = 4096

set number of output tokens

num_output = 256

set maximum chunk overlap

max_chunk_overlap = 20

prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, embed_model=OpenAIEmbedding(embed_batch_size=1))

index = GPTVectorStoreIndex(nodes, service_context=service_context, engine="text-davinci-003")

index.storage_context.persist(persist_dir="indexStore")

from llama_index import StorageContext, load_index_from_storage

rebuild storage context

storage_context = StorageContext.from_defaults(persist_dir="indexStore")

load index

index = load_index_from_storage(storage_context)

query_engine = index.as_query_engine(service_context=service_context, engine="text-davinci-003")

response = query_engine.query("What did the author do growing up?") print(response)

print(response.get_formatted_sources())

Error message

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens INFO:openai:error_code=404 error_message='Resource not found' error_param=None error_type=None message='OpenAI API error received' stream_error=False

InvalidRequestError Traceback (most recent call last) Cell In[11], line 1 ----> 1 response = query_engine.query("What did the author do growing up?") 2 print(response) 3 #print(response.get_formatted_sources())