[Bug]: TypeError: construct_payload() got an unexpected keyword argument 'formatted'

Bug Description

Want to leverage custom LLM and custom embedding to build basic RAG. custom LLM and embedding complete as expected when i am testing. But when i tried to use query() function, got an TypeError: construct_payload() got an unexpected keyword argument 'formatted'.

from typing import Optional, List, Mapping, Any

from llama_index.core import SimpleDirectoryReader, SummaryIndex
from llama_index.core.callbacks import CallbackManager
from llama_index.core.llms import (
    CustomLLM,
    CompletionResponse,
    CompletionResponseGen,
    LLMMetadata,
)
from llama_index.core.llms.callbacks import llm_completion_callback
from llama_index.core import Settings

import requests

class MLServerLLM(CustomLLM):
    context_window: int = 3900
    num_output: int = 256
    model_name: str = "test"
    dummy_request: str = "hello"
    dummy_response: str = "My response"

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    def construct_payload(self, prompt):
        template = {
            "parameters": {
                "extra": {
                    "max_new_tokens": 256,
                    "temperature": 0.3,
                    "repetition_penalty": 1.0
                }
            },
            "inputs": [
                {
                    "name": "input",
                    "shape": [1],
                    "datatype": "str",
                    "data": ["hi"]
                }
            ]
        }
        template["inputs"][0]["data"] = [prompt]
        return template

    def construct_url(self):
        return _LLM_URL.format(model_name=self.model_name)

    @staticmethod
    def _parse_response(resp):
        if resp.status_code != 200:
            raise Exception(resp)
        return resp.json()["outputs"][0]["data"][0]

    @llm_completion_callback()
    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
        response = self._parse_response(requests.post(url=self.construct_url(),
                                          json=self.construct_payload(prompt, **kwargs),
                                          headers={"Content-Type": "application/json"},
                                          params={}))
        return CompletionResponse(text=response)

    @llm_completion_callback()
    def stream_complete(
        self, prompt: str, **kwargs: Any
    ) -> CompletionResponseGen:
        response = ""
        for token in self.dummy_response:
            response += token
            yield CompletionResponse(text=response, delta=token)

if __name__ == '__main__':
    llm = MLServerLLM(model_name="mistral-7b-inst-2252b")
    print(llm.complete("test"))

from typing import Optional, List, Mapping, Any
from llama_index.core.base.embeddings.base import (
    BaseEmbedding,
    Embedding,
)
from llama_index.legacy.bridge.pydantic import PrivateAttr
import requests
import numpy as np
model_name = "all-minilm-l6-v-0438f"

class MLServerEmbedding(BaseEmbedding):
    _model: str = PrivateAttr()
    _url: str = PrivateAttr()
    def __init__(
        self,
        model_name: str,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self._model = model_name
        self._url = _EMBEDDING_URL
    @classmethod
    def class_name(cls) -> str:
        return "CustomEmbeddings"
    @staticmethod
    def _wrap_payload(text_list):
        return {
            "inputs": [
                {
                    "name": "input",
                    "shape": [len(text_list)],
                    "datatype": "str",
                    "data": text_list
                }
            ]
        }
    def _get_embedding(self, text_list: List[str]) -> List[List[float]]:
        return self._parse_response(requests.post(url=self._url,
                                                  json=self._wrap_payload(text_list),
                                                  headers={"Content-Type": "application/json"},
                                                  params={}))
    def _get_query_embedding(self, query: str) -> Embedding:
        vector = self._get_embedding([query])[0]
        return vector
    async def _aget_query_embedding(self, query: str) -> Embedding:
        pass
    def _get_text_embedding(self, text: str) -> Embedding:
        vector = self._get_embedding([text])[0]
        return vector
    def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]:
        vector = self._get_embedding(texts)
        return vector

    @staticmethod
    def _parse_response(response):
        if response.status_code != 200:
            raise Exception(response)
        outputs = response.json()["outputs"][0]
        return np.array(outputs["data"]).reshape(outputs["shape"]).tolist()

if __name__ == '__main__':
    ce = MLServerEmbedding("all-minilm-l6-v-0438f")
    print(ce._get_query_embedding("test"))

Version

0.11.8

Steps to Reproduce

from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, ServiceContext,load_index_from_storage
from llama_index.core import Settings

llm = MLServerLLM(model_name="mistral-7b-inst-2252b")
embed_model = MLServerEmbedding("all-minilm-l6-v-0438f")

Settings.llm = llm
Settings.embed_model = embed_model
#Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=64)
#Settings.num_output = 512
#Settings.context_window = 3900
Settings.chunk_size = 512
Settings.chunk_overlap = 64

print("Node parser Settings", Settings.node_parser.chunk_size, Settings.node_parser.chunk_overlap)

documents = SimpleDirectoryReader("./data", recursive=True).load_data()
print("Number of Input documents:", len(documents))

index = VectorStoreIndex.from_documents(documents)
print("Number of nodes:", len(index.docstore.docs))

query_engine = index.as_query_engine()
response = query_engine.query("America's infrastructure is ranked what place in the world?")

Relevant Logs/Tracbacks

No response

To resolve the TypeError: construct_payload() got an unexpected keyword argument 'formatted' when using the query() function in your custom LLM implementation, you need to ensure that the arguments you are passing to the construct_payload method are correct and supported by its definition.

In your MLServerLLM class, the construct_payload method does not accept any additional keyword arguments (**kwargs). However, in the complete method, you are passing **kwargs to construct_payload, which is causing the error.

Here is how you can adjust your code to avoid the error:

Modify the construct_payload method to accept **kwargs if you need to pass additional arguments.
Ensure that you are not passing unsupported arguments to construct_payload.

Here is an example of how you might adjust your code:

class MLServerLLM(CustomLLM):
    context_window: int = 3900
    num_output: int = 256
    model_name: str = "test"
    dummy_request: str = "hello"
    dummy_response: str = "My response"

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    def construct_payload(self, prompt, **kwargs):
        template = {
            "parameters": {
                "extra": {
                    "max_new_tokens": 256,
                    "temperature": 0.3,
                    "repetition_penalty": 1.0
                }
            },
            "inputs": [
                {
                    "name": "input",
                    "shape": [1],
                    "datatype": "str",
                    "data": ["hi"]
                }
            ]
        }
        template["inputs"][0]["data"] = [prompt]
        return template

    def construct_url(self):
        return _LLM_URL.format(model_name=self.model_name)

    @staticmethod
    def _parse_response(resp):
        if resp.status_code != 200:
            raise Exception(resp)
        return resp.json()["outputs"][0]["data"][0]

    @llm_completion_callback()
    def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
        response = self._parse_response(requests.post(url=self.construct_url(),
                                          json=self.construct_payload(prompt, **kwargs),
                                          headers={"Content-Type": "application/json"},
                                          params={}))
        return CompletionResponse(text=response)

    @llm_completion_callback()
    def stream_complete(
        self, prompt: str, **kwargs: Any
    ) -> CompletionResponseGen:
        response = ""
        for token in self.dummy_response:
            response += token
            yield CompletionResponse(text=response, delta=token)

By updating the construct_payload method to accept **kwargs, you ensure that any additional arguments passed from the complete method do not cause an error. This should resolve the TypeError you are encountering ^[1].

To continue talking to Dosu, mention @dosu.

run-llama / llama_index