run-llama / llama_extract

MIT License
105 stars 16 forks source link

Pydantic error when extracting schema #35

Closed tituslhy closed 1 month ago

tituslhy commented 1 month ago

Describe the bug Unable to infer schema from document due to a pydantic validation error

from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_extract import LlamaExtract

Settings.llm = OpenAI(model = "gpt-4o-mini")
Settings.embed_model = GeminiEmbedding()

SCHEMA_NAME = "TEST_SCHEMA_2"
extractor = LlamaExtract()
extraction_schema = await extractor.ainfer_schema(
    SCHEMA_NAME, 
    ["{folder}/resume.pdf"])

print(extraction_schema.data_schema)

Error Message

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[9], line 1
----> 1 extraction_schema = await extractor.ainfer_schema(
      2     SCHEMA_NAME, 
      3     ["../../literate-octo-tribble/resume/resume.pdf"])
      5 extraction_schema.data_schema

File /opt/anaconda3/envs/llamaindex/lib/python3.12/site-packages/llama_extract/base.py:215, in LlamaExtract.ainfer_schema(self, name, seed_files, schema_id, project_id)
    205 _response = await self._async_client._client_wrapper.httpx_client.post(
    206     urllib.parse.urljoin(
    207         f"{self._async_client._client_wrapper.get_base_url()}/",
   (...)
    211     headers=self._async_client._client_wrapper.get_headers(),
    212 )
    214 if 200 <= _response.status_code < 300:
--> 215     return pydantic.parse_obj_as(ExtractionSchema, _response.json())  # type: ignore
    216 if _response.status_code == 422:
    217     raise UnprocessableEntityError(
    218         pydantic.parse_obj_as(HttpValidationError, _response.json())
    219     )  # type: ignore

File /opt/anaconda3/envs/llamaindex/lib/python3.12/site-packages/pydantic/deprecated/tools.py:42, in parse_obj_as(type_, obj, type_name)
     36 if type_name is not None:  # pragma: no cover
     37     warnings.warn(
     38         'The type_name parameter is deprecated. parse_obj_as no longer creates temporary models',
     39         DeprecationWarning,
     40         stacklevel=2,
     41     )
---> 42 return TypeAdapter(type_).validate_python(obj)

File /opt/anaconda3/envs/llamaindex/lib/python3.12/site-packages/pydantic/type_adapter.py:135, in _frame_depth.<locals>.wrapper.<locals>.wrapped(self, *args, **kwargs)
    132 @wraps(func)
    133 def wrapped(self: TypeAdapterT, *args: P.args, **kwargs: P.kwargs) -> R:
    134     with self._with_frame_depth(depth + 1):  # depth + 1 for the wrapper function
--> 135         return func(self, *args, **kwargs)

File /opt/anaconda3/envs/llamaindex/lib/python3.12/site-packages/pydantic/type_adapter.py:366, in TypeAdapter.validate_python(self, object, strict, from_attributes, context)
    341 @_frame_depth(1)
    342 def validate_python(
    343     self,
   (...)
    349     context: dict[str, Any] | None = None,
    350 ) -> T:
    351     """Validate a Python object against the model.
    352 
    353     Args:
   (...)
    364         The validated object.
    365     """
--> 366     return self.validator.validate_python(object, strict=strict, from_attributes=from_attributes, context=context)

TypeError: BaseModel.validate() takes 2 positional arguments but 3 were given