run-llama / llama_index

LlamaIndex is a data framework for your LLM applications
https://docs.llamaindex.ai
MIT License
36.54k stars 5.23k forks source link

Error in v0.6.5 in Azure environment #3264

Closed ryugonomura closed 1 year ago

ryugonomura commented 1 year ago

I would like to have this fixed in #3140 , but the following error is still occurring. In fact, the following error occurs when executing query_engine.query("sample string").

Traceback (most recent call last):
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/tenacity/__init__.py", line 382, in __call__
    result = fn(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/llama_index/embeddings/openai.py", line 105, in get_embedding
    return openai.Embedding.create(input=[text], model=engine, **kwargs)["data"][0][
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/openai/api_resources/embedding.py", line 33, in create
    response = super().create(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 149, in create
    ) = cls.__prepare_create_request(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 83, in __prepare_create_request
    raise error.InvalidRequestError(
openai.error.InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a <class 'openai.api_resources.embedding.Embedding'>
mrcmoresi commented 1 year ago

I have the same issue here #3174 it's failing at the creation time of the Embedding class

Hironsan commented 1 year ago

Is this working?

import openai
from llama_index import OpenAIEmbedding, ServiceContext, LLMPredictor, GPTVectorStoreIndex
from langchain.chat_models.azure_openai import AzureChatOpenAI

openai.api_type = "azure"
openai.api_key = ""
openai.api_base = ""
openai.api_version = "2022-12-01"
embed_deployment_name = ""
chat_deployment_name = ""

embed_model = OpenAIEmbedding(deployment_name=embed_deployment_name, embed_batch_size=1)
chat_model = AzureChatOpenAI(
    openai_api_key=openai.api_key,
    openai_api_type=openai.api_type,
    openai_api_base=openai.api_base,
    openai_api_version="2023-03-15-preview",
    deployment_name=chat_deployment_name,
)
llm_predictor = LLMPredictor(chat_model)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
mrcmoresi commented 1 year ago

Is this working?

import openai
from llama_index import OpenAIEmbedding, ServiceContext, LLMPredictor, GPTVectorStoreIndex
from langchain.chat_models.azure_openai import AzureChatOpenAI

openai.api_type = "azure"
openai.api_key = ""
openai.api_base = ""
openai.api_version = "2022-12-01"
embed_deployment_name = ""
chat_deployment_name = ""

embed_model = OpenAIEmbedding(deployment_name=embed_deployment_name, embed_batch_size=1)
chat_model = AzureChatOpenAI(
    openai_api_key=openai.api_key,
    openai_api_type=openai.api_type,
    openai_api_base=openai.api_base,
    openai_api_version="2023-03-15-preview",
    deployment_name=chat_deployment_name,
)
llm_predictor = LLMPredictor(chat_model)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)

This works

import openai
from dotenv import load_dotenv
from llama_index import OpenAIEmbedding, ServiceContext, LLMPredictor, GPTVectorStoreIndex
from langchain.chat_models.azure_openai import AzureChatOpenAI

load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_type = os.getenv('OPENAI_API_TYPE')
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")

embed_deployment_name = "text-embedding-ada-002"
chat_deployment_name = "gpt-35-turbo"

embed_model = OpenAIEmbedding(deployment_name=embed_deployment_name, embed_batch_size=1)
chat_model = AzureChatOpenAI(
    openai_api_key=openai.api_key,
    openai_api_type=openai.api_type,
    openai_api_base=openai.api_base,
    openai_api_version="2023-03-15-preview",
    deployment_name=chat_deployment_name,
)
llm_predictor = LLMPredictor(chat_model)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTVectorStoreIndex.from_documents(documents[:2], service_context=service_context)

but if I change the type of index I'm creating for a GPTDocumentSummaryIndex it fails

import openai
from dotenv import load_dotenv
from llama_index import OpenAIEmbedding, ServiceContext, LLMPredictor, GPTVectorStoreIndex
from langchain.chat_models.azure_openai import AzureChatOpenAI

load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_type = os.getenv('OPENAI_API_TYPE')
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")

embed_deployment_name = "text-embedding-ada-002"
chat_deployment_name = "gpt-35-turbo"

embed_model = OpenAIEmbedding(deployment_name=embed_deployment_name, embed_batch_size=1)
chat_model = AzureChatOpenAI(
    openai_api_key=openai.api_key,
    openai_api_type=openai.api_type,
    openai_api_base=openai.api_base,
    openai_api_version="2023-03-15-preview",
    deployment_name=chat_deployment_name,
)
llm_predictor = LLMPredictor(chat_model)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
#index = GPTVectorStoreIndex.from_documents(documents[:2], service_context=service_context)
response_synthesizer = ResponseSynthesizer.from_args(response_mode="tree_summarize", use_async=True)
doc_summary_index = GPTDocumentSummaryIndex.from_documents(
    documents[:2], 
    service_context=service_context,
    response_synthesizer=response_synthesizer
)

it's still showing the error of missing engine or deployment_id

---------------------------------------------------------------------------
InvalidRequestError                       Traceback (most recent call last)
Cell In[23], line 28
     26 #index = GPTVectorStoreIndex.from_documents(documents[:2], service_context=service_context)
     27 response_synthesizer = ResponseSynthesizer.from_args(response_mode="tree_summarize", use_async=True)
---> 28 doc_summary_index = GPTDocumentSummaryIndex.from_documents(
     29     documents[:2], 
     30     service_context=service_context,
     31     response_synthesizer=response_synthesizer
     32 )

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/base.py:93, in BaseGPTIndex.from_documents(cls, documents, storage_context, service_context, **kwargs)
     89     docstore.set_document_hash(doc.get_doc_id(), doc.get_doc_hash())
     91 nodes = service_context.node_parser.get_nodes_from_documents(documents)
---> 93 return cls(
     94     nodes=nodes,
     95     storage_context=storage_context,
     96     service_context=service_context,
     97     **kwargs,
     98 )

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/document_summary/base.py:65, in GPTDocumentSummaryIndex.__init__(self, nodes, index_struct, service_context, response_synthesizer, summary_query, **kwargs)
     61 self._response_synthesizer = (
     62     response_synthesizer or ResponseSynthesizer.from_args()
     63 )
     64 self._summary_query = summary_query or "summarize:"
---> 65 super().__init__(
     66     nodes=nodes,
     67     index_struct=index_struct,
     68     service_context=service_context,
     69     **kwargs,
     70 )

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/base.py:65, in BaseGPTIndex.__init__(self, nodes, index_struct, storage_context, service_context, **kwargs)
     63 if index_struct is None:
     64     assert nodes is not None
---> 65     index_struct = self.build_index_from_nodes(nodes)
     66 self._index_struct = index_struct
     67 self._storage_context.index_store.add_index_struct(self._index_struct)

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/token_counter/token_counter.py:78, in llm_token_counter..wrap..wrapped_llm_predict(_self, *args, **kwargs)
     76 def wrapped_llm_predict(_self: Any, *args: Any, **kwargs: Any) -> Any:
     77     with wrapper_logic(_self):
---> 78         f_return_val = f(_self, *args, **kwargs)
     80     return f_return_val

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/base.py:153, in BaseGPTIndex.build_index_from_nodes(self, nodes)
    151 """Build the index from nodes."""
    152 self._docstore.add_documents(nodes, allow_update=True)
--> 153 return self._build_index_from_nodes(nodes)

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/document_summary/base.py:150, in GPTDocumentSummaryIndex._build_index_from_nodes(self, nodes)
    147 # first get doc_id to nodes_dict, generate a summary for each doc_id,
    148 # then build the index struct
    149 index_struct = IndexDocumentSummary()
--> 150 self._add_nodes_to_index(index_struct, nodes)
    151 return index_struct

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/document_summary/base.py:128, in GPTDocumentSummaryIndex._add_nodes_to_index(self, index_struct, nodes)
    126 nodes_with_scores = [NodeWithScore(n) for n in nodes]
    127 # get the summary for each doc_id
--> 128 summary_response = self._response_synthesizer.synthesize(
    129     query_bundle=QueryBundle(self._summary_query),
    130     nodes=nodes_with_scores,
    131 )
    132 summary_response = cast(Response, summary_response)
    133 summary_node_dict[doc_id] = Node(
    134     summary_response.response,
    135     relationships={DocumentRelationship.SOURCE: doc_id},
    136 )

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/query/response_synthesis.py:163, in ResponseSynthesizer.synthesize(self, query_bundle, nodes, additional_source_nodes)
    161 if self._response_mode != ResponseMode.NO_TEXT:
    162     assert self._response_builder is not None
--> 163     response_str = self._response_builder.get_response(
    164         query_str=query_bundle.query_str,
    165         text_chunks=text_chunks,
    166         **self._response_kwargs,
    167     )
    168 else:
    169     response_str = None

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/token_counter/token_counter.py:78, in llm_token_counter..wrap..wrapped_llm_predict(_self, *args, **kwargs)
     76 def wrapped_llm_predict(_self: Any, *args: Any, **kwargs: Any) -> Any:
     77     with wrapper_logic(_self):
---> 78         f_return_val = f(_self, *args, **kwargs)
     80     return f_return_val

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/response/response_builder.py:376, in TreeSummarize.get_response(self, query_str, text_chunks, prev_response, num_children, **response_kwargs)
    371 root_node_ids = index_graph.root_nodes
    372 root_nodes = {
    373     index: index_builder.docstore.get_node(node_id)
    374     for index, node_id in root_node_ids.items()
    375 }
--> 376 return self._get_tree_response_over_root_nodes(
    377     query_str, prev_response, root_nodes, text_qa_template
    378 )

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/response/response_builder.py:425, in TreeSummarize._get_tree_response_over_root_nodes(self, query_str, prev_response, root_nodes, text_qa_template)
    421 node_text = self._service_context.prompt_helper.get_text_from_nodes(
    422     node_list, prompt=text_qa_template
    423 )
    424 # NOTE: the final response could be a string or a stream
--> 425 response = super().get_response(
    426     query_str=query_str,
    427     text_chunks=[node_text],
    428     prev_response=prev_response,
    429 )
    430 if isinstance(response, str):
    431     response = response or "Empty Response"

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/token_counter/token_counter.py:78, in llm_token_counter..wrap..wrapped_llm_predict(_self, *args, **kwargs)
     76 def wrapped_llm_predict(_self: Any, *args: Any, **kwargs: Any) -> Any:
     77     with wrapper_logic(_self):
---> 78         f_return_val = f(_self, *args, **kwargs)
     80     return f_return_val

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/response/response_builder.py:134, in Refine.get_response(self, query_str, text_chunks, prev_response, **response_kwargs)
    130 for text_chunk in text_chunks:
    131     if prev_response_obj is None:
    132         # if this is the first chunk, and text chunk already
    133         # is an answer, then return it
--> 134         response = self._give_response_single(
    135             query_str,
    136             text_chunk,
    137         )
    138     else:
    139         response = self._refine_response_single(
    140             prev_response_obj, query_str, text_chunk
    141         )

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/indices/response/response_builder.py:171, in Refine._give_response_single(self, query_str, text_chunk, **response_kwargs)
    166 for cur_text_chunk in text_chunks:
    167     if response is None and not self._streaming:
    168         (
    169             response,
    170             formatted_prompt,
--> 171         ) = self._service_context.llm_predictor.predict(
    172             text_qa_template,
    173             context_str=cur_text_chunk,
    174         )
    175         self._log_prompt_and_response(
    176             formatted_prompt, response, log_prefix="Initial"
    177         )
    178     elif response is None and self._streaming:

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/llm_predictor/base.py:242, in LLMPredictor.predict(self, prompt, **prompt_args)
    237 event_id = self.callback_manager.on_event_start(
    238     CBEventType.LLM,
    239     payload=llm_payload,
    240 )
    241 formatted_prompt = prompt.format(llm=self._llm, **prompt_args)
--> 242 llm_prediction = self._predict(prompt, **prompt_args)
    243 logger.debug(llm_prediction)
    245 # We assume that the value of formatted_prompt is exactly the thing
    246 # eventually sent to OpenAI, or whatever LLM downstream

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/llm_predictor/base.py:210, in LLMPredictor._predict(self, prompt, **prompt_args)
    208 full_prompt_args = prompt.get_full_format_args(prompt_args)
    209 if self.retry_on_throttling:
--> 210     llm_prediction = retry_on_exceptions_with_backoff(
    211         lambda: llm_chain.predict(**full_prompt_args),
    212         [
    213             ErrorToRetry(openai.error.RateLimitError),
    214             ErrorToRetry(openai.error.ServiceUnavailableError),
    215             ErrorToRetry(openai.error.TryAgain),
    216             ErrorToRetry(
    217                 openai.error.APIConnectionError, lambda e: e.should_retry
    218             ),
    219         ],
    220     )
    221 else:
    222     llm_prediction = llm_chain.predict(**full_prompt_args)

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/utils.py:177, in retry_on_exceptions_with_backoff(lambda_fn, errors_to_retry, max_tries, min_backoff_secs, max_backoff_secs)
    175 while True:
    176     try:
--> 177         return lambda_fn()
    178     except exception_class_tuples as e:
    179         traceback.print_exc()

File /anaconda/envs/tf/lib/python3.9/site-packages/llama_index/llm_predictor/base.py:211, in LLMPredictor._predict..()
    208 full_prompt_args = prompt.get_full_format_args(prompt_args)
    209 if self.retry_on_throttling:
    210     llm_prediction = retry_on_exceptions_with_backoff(
--> 211         lambda: llm_chain.predict(**full_prompt_args),
    212         [
    213             ErrorToRetry(openai.error.RateLimitError),
    214             ErrorToRetry(openai.error.ServiceUnavailableError),
    215             ErrorToRetry(openai.error.TryAgain),
    216             ErrorToRetry(
    217                 openai.error.APIConnectionError, lambda e: e.should_retry
    218             ),
    219         ],
    220     )
    221 else:
    222     llm_prediction = llm_chain.predict(**full_prompt_args)

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/llm.py:213, in LLMChain.predict(self, callbacks, **kwargs)
    198 def predict(self, callbacks: Callbacks = None, **kwargs: Any) -> str:
    199     """Format prompt with kwargs and pass to LLM.
    200 
    201     Args:
   (...)
    211             completion = llm.predict(adjective="funny")
    212     """
--> 213     return self(kwargs, callbacks=callbacks)[self.output_key]

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/base.py:140, in Chain.__call__(self, inputs, return_only_outputs, callbacks)
    138 except (KeyboardInterrupt, Exception) as e:
    139     run_manager.on_chain_error(e)
--> 140     raise e
    141 run_manager.on_chain_end(outputs)
    142 return self.prep_outputs(inputs, outputs, return_only_outputs)

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/base.py:134, in Chain.__call__(self, inputs, return_only_outputs, callbacks)
    128 run_manager = callback_manager.on_chain_start(
    129     {"name": self.__class__.__name__},
    130     inputs,
    131 )
    132 try:
    133     outputs = (
--> 134         self._call(inputs, run_manager=run_manager)
    135         if new_arg_supported
    136         else self._call(inputs)
    137     )
    138 except (KeyboardInterrupt, Exception) as e:
    139     run_manager.on_chain_error(e)

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/llm.py:69, in LLMChain._call(self, inputs, run_manager)
     64 def _call(
     65     self,
     66     inputs: Dict[str, Any],
     67     run_manager: Optional[CallbackManagerForChainRun] = None,
     68 ) -> Dict[str, str]:
---> 69     response = self.generate([inputs], run_manager=run_manager)
     70     return self.create_outputs(response)[0]

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/chains/llm.py:79, in LLMChain.generate(self, input_list, run_manager)
     77 """Generate LLM result from inputs."""
     78 prompts, stop = self.prep_prompts(input_list, run_manager=run_manager)
---> 79 return self.llm.generate_prompt(
     80     prompts, stop, callbacks=run_manager.get_child() if run_manager else None
     81 )

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/base.py:127, in BaseLLM.generate_prompt(self, prompts, stop, callbacks)
    120 def generate_prompt(
    121     self,
    122     prompts: List[PromptValue],
    123     stop: Optional[List[str]] = None,
    124     callbacks: Callbacks = None,
    125 ) -> LLMResult:
    126     prompt_strings = [p.to_string() for p in prompts]
--> 127     return self.generate(prompt_strings, stop=stop, callbacks=callbacks)

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/base.py:176, in BaseLLM.generate(self, prompts, stop, callbacks)
    174 except (KeyboardInterrupt, Exception) as e:
    175     run_manager.on_llm_error(e)
--> 176     raise e
    177 run_manager.on_llm_end(output)
    178 return output

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/base.py:170, in BaseLLM.generate(self, prompts, stop, callbacks)
    165 run_manager = callback_manager.on_llm_start(
    166     {"name": self.__class__.__name__}, prompts
    167 )
    168 try:
    169     output = (
--> 170         self._generate(prompts, stop=stop, run_manager=run_manager)
    171         if new_arg_supported
    172         else self._generate(prompts, stop=stop)
    173     )
    174 except (KeyboardInterrupt, Exception) as e:
    175     run_manager.on_llm_error(e)

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/openai.py:315, in BaseOpenAI._generate(self, prompts, stop, run_manager)
    313     choices.extend(response["choices"])
    314 else:
--> 315     response = completion_with_retry(self, prompt=_prompts, **params)
    316     choices.extend(response["choices"])
    317 if not self.streaming:
    318     # Can't update token usage if streaming

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/openai.py:106, in completion_with_retry(llm, **kwargs)
    102 @retry_decorator
    103 def _completion_with_retry(**kwargs: Any) -> Any:
    104     return llm.client.create(**kwargs)
--> 106 return _completion_with_retry(**kwargs)

File /anaconda/envs/tf/lib/python3.9/site-packages/tenacity/__init__.py:289, in BaseRetrying.wraps..wrapped_f(*args, **kw)
    287 @functools.wraps(f)
    288 def wrapped_f(*args: t.Any, **kw: t.Any) -> t.Any:
--> 289     return self(f, *args, **kw)

File /anaconda/envs/tf/lib/python3.9/site-packages/tenacity/__init__.py:379, in Retrying.__call__(self, fn, *args, **kwargs)
    377 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs)
    378 while True:
--> 379     do = self.iter(retry_state=retry_state)
    380     if isinstance(do, DoAttempt):
    381         try:

File /anaconda/envs/tf/lib/python3.9/site-packages/tenacity/__init__.py:314, in BaseRetrying.iter(self, retry_state)
    312 is_explicit_retry = fut.failed and isinstance(fut.exception(), TryAgain)
    313 if not (is_explicit_retry or self.retry(retry_state)):
--> 314     return fut.result()
    316 if self.after is not None:
    317     self.after(retry_state)

File /anaconda/envs/tf/lib/python3.9/concurrent/futures/_base.py:439, in Future.result(self, timeout)
    437     raise CancelledError()
    438 elif self._state == FINISHED:
--> 439     return self.__get_result()
    441 self._condition.wait(timeout)
    443 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

File /anaconda/envs/tf/lib/python3.9/concurrent/futures/_base.py:391, in Future.__get_result(self)
    389 if self._exception:
    390     try:
--> 391         raise self._exception
    392     finally:
    393         # Break a reference cycle with the exception in self._exception
    394         self = None

File /anaconda/envs/tf/lib/python3.9/site-packages/tenacity/__init__.py:382, in Retrying.__call__(self, fn, *args, **kwargs)
    380 if isinstance(do, DoAttempt):
    381     try:
--> 382         result = fn(*args, **kwargs)
    383     except BaseException:  # noqa: B902
    384         retry_state.set_exception(sys.exc_info())  # type: ignore[arg-type]

File /anaconda/envs/tf/lib/python3.9/site-packages/langchain/llms/openai.py:104, in completion_with_retry.._completion_with_retry(**kwargs)
    102 @retry_decorator
    103 def _completion_with_retry(**kwargs: Any) -> Any:
--> 104     return llm.client.create(**kwargs)

File /anaconda/envs/tf/lib/python3.9/site-packages/openai/api_resources/completion.py:25, in Completion.create(cls, *args, **kwargs)
     23 while True:
     24     try:
---> 25         return super().create(*args, **kwargs)
     26     except TryAgain as e:
     27         if timeout is not None and time.time() > start + timeout:

File /anaconda/envs/tf/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py:149, in EngineAPIResource.create(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)
    127 @classmethod
    128 def create(
    129     cls,
   (...)
    136     **params,
    137 ):
    138     (
    139         deployment_id,
    140         engine,
    141         timeout,
    142         stream,
    143         headers,
    144         request_timeout,
    145         typed_api_type,
    146         requestor,
    147         url,
    148         params,
--> 149     ) = cls.__prepare_create_request(
    150         api_key, api_base, api_type, api_version, organization, **params
    151     )
    153     response, _, api_key = requestor.request(
    154         "post",
    155         url,
   (...)
    160         request_timeout=request_timeout,
    161     )
    163     if stream:
    164         # must be an iterator

File /anaconda/envs/tf/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py:83, in EngineAPIResource.__prepare_create_request(cls, api_key, api_base, api_type, api_version, organization, **params)
     81 if typed_api_type in (util.ApiType.AZURE, util.ApiType.AZURE_AD):
     82     if deployment_id is None and engine is None:
---> 83         raise error.InvalidRequestError(
     84             "Must provide an 'engine' or 'deployment_id' parameter to create a %s"
     85             % cls,
     86             "engine",
     87         )
     88 else:
     89     if model is None and engine is None:

InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a 

for reference:

Hironsan commented 1 year ago

Can you pass service_context to ResponseSynthesizer?

response_synthesizer = ResponseSynthesizer.from_args(
    response_mode="tree_summarize",
    service_context=service_context
)
ryugonomura commented 1 year ago
import os
import openai
from llama_index import Document, ServiceContext, GPTVectorStoreIndex, LLMPredictor, PromptHelper, LangchainEmbedding, load_index_from_storage, StorageContext
from langchain.llms import AzureOpenAI
from langchain.embeddings import OpenAIEmbeddings

openai.api_type = os.environ["OPENAI_API_TYPE"]
openai.api_key = os.environ["OPENAI_API_KEY"]
openai.api_base = os.environ["OPENAI_API_BASE"]
openai.api_version = os.environ["OPENAI_API_VERSION"]

llm = AzureOpenAI(deployment_name="gpt-35-turbo")
llm_predictor = LLMPredictor(llm=llm)
embedding = OpenAIEmbeddings(deployment="text-embedding-ada-002", max_retries=2)

embedding_llm = LangchainEmbedding(embedding, embed_batch_size=1)

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
    embed_model=embedding_llm,
)

documents = SimpleDirectoryReader("data").load_data()
index = GPTVectorStoreIndex.from_documents(documents)
query_engine= index.as_query_engine(service_context=service_context)
query_engine.query("sample string")

I ran the above code, but I get an error.

Could you please correct the following line in the llama_index/embeddings/openai.py file under "aget_embeddings, get_embeddings, aget_embedding, get_embedding"? Changing "model" to "engine" fixed the error.

Before modification openai.Embedding.create(input=[text], model=engine, kwargs) After modification openai.Embedding.create(input=[text], engine=engine, kwargs)

Hironsan commented 1 year ago

As far as I have tried, the following code works.

from llama_index import ServiceContext, GPTVectorStoreIndex, LLMPredictor, LangchainEmbedding
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings

API_KEY = ""
API_BASE = ""
embed_deployment_name = ""
chat_deployment_name = ""

embedding = OpenAIEmbeddings(
    deployment=embed_deployment_name,
    openai_api_key=API_KEY,
    openai_api_base=API_BASE,
    openai_api_type="azure",
    openai_api_version="2022-12-01",
)
embedding_llm = LangchainEmbedding(embedding, embed_batch_size=1)

llm = AzureChatOpenAI(
    deployment_name=chat_deployment_name,
    openai_api_key=API_KEY,
    openai_api_base=API_BASE,
    openai_api_type="azure",
    openai_api_version="2023-03-15-preview",
)
llm_predictor = LLMPredictor(llm=llm)

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
    embed_model=embedding_llm,
)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine()
print(query_engine.query("sample"))
ryugonomura commented 1 year ago

@Hironsan If the created index is used as is, it works correctly, but if it is run from a locally saved index, an error occurs.

code

from llama_index import load_index_from_storage, StorageContext
# Save the index to "./storage"
index.storage_context.persist(persist_dir="./storage")
# load index
storage_context = StorageContext.from_defaults(persist_dir='storage')
index = load_index_from_storage(storage_context)
retriever = index.as_query_engine(service_context=service_context)
query_engine.query("sample")

error

Traceback (most recent call last):
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/tenacity/__init__.py", line 382, in __call__
    result = fn(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/llama_index/embeddings/openai.py", line 105, in get_embedding
    return openai.Embedding.create(input=[text], model=engine, **kwargs)["data"][0][
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/openai/api_resources/embedding.py", line 33, in create
    response = super().create(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 149, in create
    ) = cls.__prepare_create_request(
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/product/.cache/pypoetry/virtualenvs/chatbot-eVeRnpX_-py3.11/lib/python3.11/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 83, in __prepare_create_request
    raise error.InvalidRequestError(
openai.error.InvalidRequestError: Must provide an 'engine' or 'deployment_id' parameter to create a <class 'openai.api_resources.embedding.Embedding'>
Hironsan commented 1 year ago

As the document says, you will need to pass the same ServiceContext to load_index_from_storage. Please read it in detail:

from llama_index import load_index_from_storage, StorageContext

index.storage_context.persist(persist_dir="./storage")
storage_context = StorageContext.from_defaults(persist_dir='storage')
index = load_index_from_storage(storage_context, service_context=service_context)
query_engine = index.as_query_engine()
query_engine.query("sample")
ryugonomura commented 1 year ago

You were right. Thank you for your advice.

Hironsan commented 1 year ago

You're welcome. If the problem has been resolved, please close the Issue.

mrcmoresi commented 1 year ago

Can you pass service_context to ResponseSynthesizer?

response_synthesizer = ResponseSynthesizer.from_args(
    response_mode="tree_summarize",
    service_context=service_context
)

Thanks for the hint, it worked perfectly. Now i have to read the documentation to understand what I'm doing :)