langchain-ai / langchain

🦜🔗 Build context-aware reasoning applications
https://python.langchain.com
MIT License
94.52k stars 15.29k forks source link

SelfQueryRetriever.from_llm raises following issue: ImportError: Cannot import lark, please install it with 'pip install lark'. #15668

Closed XariZaru closed 10 months ago

XariZaru commented 10 months ago

System Info

Langchain v0.0.354, Python v3.11, Chroma v0.4.22, Lark v1.1.8

Who can help?

@agola11

Information

Related Components

Reproduction

def load_self_query_retriever(persist_dir: str, docs: list, metadata_field_info: list, document_content_description = "Information about various documents, the date they are up to date with and where they were sourced from."):

    llm = ChatOpenAI(temperature=0)
    vectorstore = None

    try:
        vectorstore = Chroma(persist_directory=persist_dir, embedding_function=get_embedding_function())
    except:
        vectorstore = Chroma.from_documents(docs, get_embedding_function(), persist_directory=persist_dir)

    return SelfQueryRetriever.from_llm(
        llm,
        vectorstore,
        document_content_description,
        metadata_field_info,
    )

metadata_field_info = [
        AttributeInfo(name="source",description="The document this chunk is from.",type="string",),
        AttributeInfo(name="origin",description="The origin the document came from. Bancworks is the higher priority.",type="string",),
        AttributeInfo(name="date_day",description="The day the document was uploaded.",type="integer",),
        AttributeInfo(name="date_uploaded",description="The month year the document is current to.",type="integer",)
    ]

self_query_retriever = load_self_query_retriever("storage/deploy/chroma-db-self-query", bancworks_docs, metadata_field_info)

The following error is thrown:

---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
Cell In[1], line 110
     76     return SelfQueryRetriever.from_llm(
     77         llm,
     78         vectorstore,
     79         document_content_description,
     80         metadata_field_info,
     81     )
     83 metadata_field_info = [
     84         AttributeInfo(name="source",description="The document this chunk is from.",type="string",),
     85         AttributeInfo(name="origin",description="The origin the document came from. Comes from either scraped websites like TheKinection.org, Kinecta.org or database files like Bancworks. Bancworks is the higher priority.",type="string",),
   (...)
    107         # ),
    108     ]
--> 110 self_query_retriever = load_self_query_retriever("storage/deploy/chroma-db-self-query", bancworks_docs, metadata_field_info)
    113 # parent_retriever = load_parent_retriever("full_docs", "storage/deploy/chroma-db-parent")
    114 
    115 # current_place = 0
   (...)
    127 # retriever.add_documents(bancworks_docs)
    128 # retriever.add_documents(bancworks_docs)
Cell In[1], line 76, in load_self_query_retriever(persist_dir, docs, metadata_field_info, document_content_description)
     73 llm = ChatOpenAI(temperature=0)
     74 vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings(), persist_directory=persist_dir)
---> 76 return SelfQueryRetriever.from_llm(
     77     llm,
     78     vectorstore,
     79     document_content_description,
     80     metadata_field_info,
     81 )
File /etc/system/kernel/.venv/lib64/python3.11/site-packages/langchain/retrievers/self_query/base.py:225, in SelfQueryRetriever.from_llm(cls, llm, vectorstore, document_contents, metadata_field_info, structured_query_translator, chain_kwargs, enable_limit, use_original_query, **kwargs)
    218 if (
    219     "allowed_operators" not in chain_kwargs
    220     and structured_query_translator.allowed_operators is not None
    221 ):
    222     chain_kwargs[
    223         "allowed_operators"
    224     ] = structured_query_translator.allowed_operators
--> 225 query_constructor = load_query_constructor_runnable(
    226     llm,
    227     document_contents,
    228     metadata_field_info,
    229     enable_limit=enable_limit,
    230     **chain_kwargs,
    231 )
    232 return cls(
    233     query_constructor=query_constructor,
    234     vectorstore=vectorstore,
   (...)
    237     **kwargs,
    238 )
File /etc/system/kernel/.venv/lib64/python3.11/site-packages/langchain/chains/query_constructor/base.py:357, in load_query_constructor_runnable(llm, document_contents, attribute_info, examples, allowed_comparators, allowed_operators, enable_limit, schema_prompt, fix_invalid, **kwargs)
    353 for ainfo in attribute_info:
    354     allowed_attributes.append(
    355         ainfo.name if isinstance(ainfo, AttributeInfo) else ainfo["name"]
    356     )
--> 357 output_parser = StructuredQueryOutputParser.from_components(
    358     allowed_comparators=allowed_comparators,
    359     allowed_operators=allowed_operators,
    360     allowed_attributes=allowed_attributes,
    361     fix_invalid=fix_invalid,
    362 )
    363 return prompt | llm | output_parser
File /etc/system/kernel/.venv/lib64/python3.11/site-packages/langchain/chains/query_constructor/base.py:99, in StructuredQueryOutputParser.from_components(cls, allowed_comparators, allowed_operators, allowed_attributes, fix_invalid)
     96         return fixed
     98 else:
---> 99     ast_parse = get_parser(
    100         allowed_comparators=allowed_comparators,
    101         allowed_operators=allowed_operators,
    102         allowed_attributes=allowed_attributes,
    103     ).parse
    104 return cls(ast_parse=ast_parse)
File /etc/system/kernel/.venv/lib64/python3.11/site-packages/langchain/chains/query_constructor/parser.py:174, in get_parser(allowed_comparators, allowed_operators, allowed_attributes)
    172 # QueryTransformer is None when Lark cannot be imported.
    173 if QueryTransformer is None:
--> 174     raise ImportError(
    175         "Cannot import lark, please install it with 'pip install lark'."
    176     )
    177 transformer = QueryTransformer(
    178     allowed_comparators=allowed_comparators,
    179     allowed_operators=allowed_operators,
    180     allowed_attributes=allowed_attributes,
    181 )
    182 return Lark(GRAMMAR, parser="lalr", transformer=transformer, start="program")
ImportError: Cannot import lark, please install it with 'pip install lark'.

Expected behavior

Be able to instantiate SelfQueryRetriever.from_llm successfully

dosubot[bot] commented 10 months ago

🤖

Hey @XariZaru! Good to see you here again. Hope you're coding away to glory! 🚀

Based on the error message you provided, it seems that the lark library is not being imported correctly. The lark library is used in the LangChain codebase for parsing structured queries in the SelfQueryRetriever.from_llm method.

The error message suggests that you should install lark using pip. However, you mentioned that you already have lark v1.1.8 installed. This could be a compatibility issue or an issue with the environment where your Python interpreter is running.

Here are a few things you could try:

  1. Reinstall lark: Uninstall the current version of lark and reinstall it using pip. You can do this with the following commands:
pip uninstall lark
pip install lark
  1. Check your Python environment: Make sure that the Python interpreter you're using to run your script has access to the lark library. If you're using a virtual environment, ensure that lark is installed in the correct environment.

  2. Restart your Python kernel: According to a similar issue in the LangChain repository, simply restarting the Python kernel solved the problem.

If none of these solutions work, please provide more information about your Python environment and how you're running your script. This will help us better understand the issue and find a solution.

Sources


This response is meant to be useful and save you time. It is not meant to be a precise solution, but rather a starting point for your own research.

Help me be more useful! Please leave a 👍 if this is helpful and 👎 if it is irrelevant.

If you want to continue the conversation, start your reply with @dosu-bot.

XariZaru commented 10 months ago

This did not work. As you can see, my Lark is imported and I printed out its version. It's not working still. @dosubot

dosubot[bot] commented 10 months ago

🤖

Based on the information you've provided, it seems like you're encountering an ImportError for the lark library when using the SelfQueryRetriever.from_llm method in LangChain v0.0.354. However, the lark library is not listed as a dependency for LangChain v0.0.354. This could suggest that the lark library is a transitive dependency, meaning it's a dependency of one of LangChain's dependencies, or it's being used in a part of the code that isn't covered by the listed dependencies.

The SelfQueryRetriever.from_llm method is used to create an instance of the SelfQueryRetriever class with a given language model, vector store, document contents, and metadata field information. It doesn't directly import or use the lark library in its implementation.

Given this, there are a few possibilities that could be causing your issue:

  1. There might be a part of the LangChain codebase that is importing lark but it's not listed as a dependency. This could be an oversight in the dependency management of the LangChain project.

  2. One of LangChain's dependencies might be importing lark and there's an issue with how that dependency is installed or configured in your environment.

  3. There might be a version conflict between the lark version that LangChain (or one of its dependencies) expects and the version you have installed.

To help narrow down the issue, could you provide the full traceback of the error you're encountering? This could help identify where the lark import is happening. Additionally, could you provide the output of pip freeze in your environment? This would list all the installed Python packages and their versions, which could help identify if there's a version conflict or a missing dependency.

If the issue is due to a missing or incorrect dependency in LangChain, it would be a good idea to report this to the LangChain maintainers so they can update the project's dependencies accordingly. However, as I'm an AI developed by OpenAI and don't have the ability to create issues or pull requests, I would recommend you to do so.

I hope this helps! Let me know if you have any other questions or need further clarification.

Sources


This response is meant to be useful and save you time. It is not meant to be a precise solution, but rather a starting point for your own research.

Help me be more useful! Please leave a 👍 if this is helpful and 👎 if it is irrelevant.

If you want to continue the conversation, start your reply with @dosu-bot.

XariZaru commented 10 months ago

Is it possible to get someone in here to help? @dosubot

dosubot[bot] commented 10 months ago

@baskaryan, it seems like this issue requires a deeper look, could you please lend your expertise?

XariZaru commented 10 months ago

Resolved by building an image with the Lark package installed. Does not work if I install lark while the container.

pnut2357 commented 6 months ago

@XariZaru - I have the same issue. What version did you use for lark and langchain?

Here is mine: aiohttp==3.9.5 aiosignal==1.3.1 altair==5.3.0 annotated-types==0.6.0 anyio==4.3.0 argilla==0.0.1 argon2-cffi==23.1.0 argon2-cffi-bindings==21.2.0 attrs==23.2.0 azure-core==1.30.1 azure-identity==1.16.0 azure-storage-blob==12.20.0 blinker==1.8.1 cachetools==5.3.3 certifi==2024.2.2 cffi==1.16.0 charset-normalizer==3.3.2 click==8.1.7 cryptography==42.0.7 dataclasses-json==0.6.5 distro==1.9.0 docarray==0.40.0 environs==9.5.0 et-xmlfile==1.1.0 faiss-cpu==1.8.0 Flask==3.0.3 frozenlist==1.4.1 gitdb==4.0.11 GitPython==3.1.43 grpcio==1.60.0 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 idna==3.7 isodate==0.6.1 itsdangerous==2.2.0 Jinja2==3.1.3 joblib==1.4.2 jsonpatch==1.33 jsonpointer==2.4 jsonschema==4.22.0 jsonschema-specifications==2023.12.1 langchain==0.1.19 langchain-community==0.0.38 langchain-core==0.1.52 langchain-experimental==0.0.58 langchain-openai==0.1.0 langchain-text-splitters==0.0.1 langsmith==0.1.56 lark==1.1.9 lark-parser==0.12.0 lxml==5.2.1 Markdown==3.6 markdown-it-py==3.0.0 MarkupSafe==2.1.5 marshmallow==3.21.2 mdurl==0.1.2 minio==7.2.7 msal==1.28.0 msal-extensions==1.1.0 msg-parser==1.2.0 multidict==6.0.5 mypy-extensions==1.0.0 nltk==3.8.1 numpy==1.26.4 olefile==0.47 openai==1.13.3 openpyxl==3.1.2 orjson==3.10.3 packaging==23.2 pandas==2.2.2 pdf2image==1.17.0 pdfminer.six==20231228 pdfplumber==0.11.0 pillow==10.3.0 portalocker==2.8.2 protobuf==4.25.3 pyarrow==16.0.0 pycparser==2.22 pycryptodome==3.20.0 pydantic==2.7.1 pydantic_core==2.18.2 pydeck==0.9.0 Pygments==2.18.0 PyJWT==2.8.0 pymilvus==2.4.0 PyMuPDF==1.24.2 PyMuPDFb==1.24.1 pyodbc==5.1.0 pypandoc==1.13 pypdf==4.2.0 PyPDF2==3.0.1 pypdfium2==4.30.0 python-dateutil==2.9.0.post0 python-docx==1.1.2 python-dotenv==1.0.1 python-magic==0.4.27 python-pptx==0.6.23 pytz==2024.1 PyYAML==6.0.1 referencing==0.35.1 regex==2024.4.28 requests==2.31.0 rich==13.7.1 rpds-py==0.18.1 scipy==1.13.0 six==1.16.0 smmap==5.0.1 sniffio==1.3.1 SQLAlchemy==2.0.30 streamlit==1.34.0 tenacity==8.3.0 tiktoken==0.6.0 toml==0.10.2 toolz==0.12.1 tornado==6.4 tqdm==4.66.4 types-requests==2.31.0.20240406 typing-inspect==0.9.0 typing_extensions==4.11.0 tzdata==2024.1 ujson==5.9.0 unstructured==0.6.7 urllib3==2.2.1 Werkzeug==3.0.2 XlsxWriter==3.2.0 yarl==1.9.4

zhiweit commented 5 months ago

@pnut2357 if you are in a python virtual environment (e.g. venv or poetry shell), exit the venv or shell first, then run pip install lark, then restart your python kernel in your virtual environment, should work.

As described in this comment

pip install --upgrade --quiet lark chromadb
restart kernel

worked for me