run-llama / llama_index

LlamaIndex is a data framework for your LLM applications
https://docs.llamaindex.ai
MIT License
36.59k stars 5.24k forks source link

[Bug]: AttributeError: __pydantic_private__ #16094

Closed yithuang123 closed 1 month ago

yithuang123 commented 1 month ago

Bug Description

Got an AttributeError: __pydantic_private__ when i use customized Embedding Model. Here is the code:

from typing import Optional, List, Mapping, Any
from llama_index.core.base.embeddings.base import (
    BaseEmbedding,
    Embedding,
)
from llama_index.legacy.bridge.pydantic import PrivateAttr
import requests
import numpy as np
_EMBEDDING_URL = "https://XXX"

class MLServerEmbedding(BaseEmbedding):
    _model: str = PrivateAttr()
    _url: str = PrivateAttr()
    def __init__(
        self,
        model_name: str,
        **kwargs: Any,
    ) -> None:
        self._model = model_name
        self._url = _EMBEDDING_URL.format(embedding_model=model_name)
        super().__init__(**kwargs)
    @classmethod
    def class_name(cls) -> str:
        return "CustomEmbeddings"
    @staticmethod
    def _wrap_payload(text_list):
        return {
            "inputs": [
                {
                    "name": "input",
                    "shape": [len(text_list)],
                    "datatype": "str",
                    "data": text_list
                }
            ]
        }
    def _get_embedding(self, text_list: List[str]) -> List[List[float]]:
        return self._parse_response(requests.post(url=self._url,
                                                  json=self._wrap_payload(text_list),
                                                  headers={"Content-Type": "application/json"},
                                                  params={}))
    def _get_query_embedding(self, query: str) -> Embedding:
        vector = self._get_embedding([query])
        return vector
    async def _aget_query_embedding(self, query: str) -> Embedding:
        pass
    def _get_text_embedding(self, text: str) -> Embedding:
        vector = self._get_embedding([text])
        return vector
    def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]:
        vector = self._get_embedding(texts)
        return vector

    @staticmethod
    def _parse_response(response):
        if response.status_code != 200:
            raise Exception(response)
        outputs = response.json()["outputs"][0]
        return np.array(outputs["data"]).reshape(outputs["shape"]).tolist()

if __name__ == '__main__':
    embed_model = MLServerEmbedding(model_name="all-minilm-l6-v-0438f")
``

### Version

0.11.8

### Steps to Reproduce

from typing import Optional, List, Mapping, Any from llama_index.core.base.embeddings.base import ( BaseEmbedding, Embedding, ) from llama_index.legacy.bridge.pydantic import PrivateAttr import requests import numpy as np _EMBEDDING_URL = "https://XXX"

class MLServerEmbedding(BaseEmbedding): _model: str = PrivateAttr() _url: str = PrivateAttr() def init( self, model_name: str, kwargs: Any, ) -> None: self._model = model_name self._url = _EMBEDDING_URL.format(embedding_model=model_name) super().init(kwargs) @classmethod def class_name(cls) -> str: return "CustomEmbeddings" @staticmethod def _wrap_payload(text_list): return { "inputs": [ { "name": "input", "shape": [len(text_list)], "datatype": "str", "data": text_list } ] } def _get_embedding(self, text_list: List[str]) -> List[List[float]]: return self._parse_response(requests.post(url=self._url, json=self._wrap_payload(text_list), headers={"Content-Type": "application/json"}, params={})) def _get_query_embedding(self, query: str) -> Embedding: vector = self._get_embedding([query]) return vector async def _aget_query_embedding(self, query: str) -> Embedding: pass def _get_text_embedding(self, text: str) -> Embedding: vector = self._get_embedding([text]) return vector def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]: vector = self._get_embedding(texts) return vector

@staticmethod
def _parse_response(response):
    if response.status_code != 200:
        raise Exception(response)
    outputs = response.json()["outputs"][0]
    return np.array(outputs["data"]).reshape(outputs["shape"]).tolist()

if name == 'main': embed_model = MLServerEmbedding(model_name="all-minilm-l6-v-0438f") ``

Relevant Logs/Tracbacks

No response

dosubot[bot] commented 1 month ago

Hey @yithuang123! 👋 I'm here to help you with any issues or questions you have. Let's squash those bugs together!

I found a similar issue that was resolved recently. The issue titled "[Bug]: Custom Embedding Model example not working due to Pydantic errors" was closed by updating the documentation for custom embeddings [1]. Another related issue, "[Bug]: OpenAIEmbedding has no attribute __pydantic_private__," was resolved by updating to the latest version of llama-index-embeddings-openai [2].

To resolve the AttributeError: __pydantic_private__ in your code, ensure that PrivateAttr is correctly imported and used. Here is the corrected version of your code:

from typing import Optional, List, Any
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding
from llama_index.legacy.bridge.pydantic import PrivateAttr  # Ensure this import is correct
import requests
import numpy as np

_EMBEDDING_URL = "https://XXX"

class MLServerEmbedding(BaseEmbedding):
    _model: str = PrivateAttr()
    _url: str = PrivateAttr()

    def __init__(self, model_name: str, **kwargs: Any) -> None:
        self._model = model_name
        self._url = _EMBEDDING_URL.format(embedding_model=model_name)
        super().__init__(**kwargs)

    @classmethod
    def class_name(cls) -> str:
        return "CustomEmbeddings"

    @staticmethod
    def _wrap_payload(text_list):
        return {
            "inputs": [
                {
                    "name": "input",
                    "shape": [len(text_list)],
                    "datatype": "str",
                    "data": text_list
                }
            ]
        }

    def _get_embedding(self, text_list: List[str]) -> List[List[float]]:
        return self._parse_response(requests.post(url=self._url,
                                                  json=self._wrap_payload(text_list),
                                                  headers={"Content-Type": "application/json"},
                                                  params={}))

    def _get_query_embedding(self, query: str) -> Embedding:
        vector = self._get_embedding([query])
        return vector

    async def _aget_query_embedding(self, query: str) -> Embedding:
        pass

    def _get_text_embedding(self, text: str) -> Embedding:
        vector = self._get_embedding([text])
        return vector

    def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]:
        vector = self._get_embedding(texts)
        return vector

    @staticmethod
    def _parse_response(response):
        if response.status_code != 200:
            raise Exception(response)
        outputs = response.json()["outputs"][0]
        return np.array(outputs["data"]).reshape(outputs["shape"]).tolist()

if __name__ == '__main__':
    embed_model = MLServerEmbedding(model_name="all-minilm-l6-v-0438f")

Ensure that PrivateAttr is imported from llama_index.legacy.bridge.pydantic and not from any other module. This should resolve the AttributeError: __pydantic_private__ issue.

To continue talking to Dosu, mention @dosu.

logan-markewich commented 1 month ago

@yithuang123 in pydantic v2, the super init has to come before accessing self variables

yithuang123 commented 1 month ago

@logan-markewich Thank you! It works for me.