run-llama / llama_index

LlamaIndex is a data framework for your LLM applications
https://docs.llamaindex.ai
MIT License
35.85k stars 5.08k forks source link

[Bug]: view is not working with NLSQLTableQueryEngine even after modify the initialization of the SQLDatabase to include view_support=True #14514

Open akashkumar398 opened 3 months ago

akashkumar398 commented 3 months ago

Bug Description

view is not working with NLSQLTableQueryEngine even after modifying the initialization of the SQL database to include view_support=True

Version

llama-index==0.10.50

Steps to Reproduce

from llama_index.core import ( SQLDatabase, VectorStoreIndex, Settings, ) from llama_index.llms.openai import OpenAI from llama_index.core.query_engine import NLSQLTableQueryEngine from llama_index.core.callbacks import CallbackManager, TokenCountingHandler

Set OpenAI API key

openai.api_key = os.environ["OPENAI_API_KEY"]

engine = initialize_database_connection()

tables = ["customer_information_enquiry", "view_table"] sql_database = SQLDatabase(engine, include_tables=tables, sample_rows_in_table_info=1, view_support=True)

MODEL = 'gpt-3.5-turbo-0125'

token_counter = TokenCountingHandler( tokenizer=tiktoken.encoding_for_model(MODEL).encode ) callback_manager = CallbackManager([token_counter])

llm = OpenAI(temperature=0, model=MODEL) Settings.llm = llm Settings.callback_manager = callback_manager

query_engine = NLSQLTableQueryEngine( sql_database=sql_database,

TypeError Traceback (most recent call last) Cell In[12], line 103 99 engine = initialize_database_connection() 101 tables = ["customer_information_enquiry","vw_jobs"] --> 103 sql_database = SQLDatabase(engine, include_tables=tables, sample_rows_in_table_info=1, view_support=True) 104 # sql_database = SQLDatabase(engine, include_tables=tables, sample_rows_in_table_info=1) 106 MODEL = 'gpt-3.5-turbo-0125'

File ~\anaconda3\envs\dbgpt\lib\site-packages\llama_index\core\utilities\sql_wrapper.py:110, in SQLDatabase.init(self, engine, schema, metadata, ignore_tables, include_tables, sample_rows_in_table_info, indexes_in_table_info, custom_table_info, view_support, max_string_length) 108 self._metadata = metadata or MetaData() 109 # including view support if view_support = true --> 110 self._metadata.reflect( 111 views=view_support, 112 bind=self._engine, 113 only=list(self._usable_tables), 114 schema=self._schema, 115 )

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\sql\schema.py:5782, in MetaData.reflect(self, bind, schema, views, only, extend_existing, autoload_replace, resolve_fks, dialect_kwargs) 5775 load = [ 5776 name 5777 for name in only 5778 if extend_existing or name not in current 5779 ] 5780 # pass the available tables so the inspector can 5781 # choose to ignore the filter_names -> 5782 _reflect_info = insp._get_reflection_info( 5783 schema=schema, 5784 filter_names=load, 5785 available=available, 5786 kind=kind, 5787 scope=util.preloaded.engine_reflection.ObjectScope.ANY, 5788 dialect_kwargs, 5789 ) 5790 reflect_opts["_reflect_info"] = _reflect_info 5792 for name in load:

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:2008, in Inspector._get_reflection_info(self, schema, filter_names, available, _reflect_info, **kw) 2004 res = {} 2005 return res 2007 info = _ReflectionInfo( -> 2008 columns=run( 2009 self.get_multi_columns, check_filter_names_from_meth=True 2010 ), 2011 pk_constraint=run(self.get_multi_pk_constraint), 2012 foreign_keys=run(self.get_multi_foreign_keys), 2013 indexes=run(self.get_multi_indexes), 2014 unique_constraints=run( 2015 self.get_multi_unique_constraints, optional=True 2016 ), 2017 table_comment=run(self.get_multi_table_comment, optional=True), 2018 check_constraints=run( 2019 self.get_multi_check_constraints, optional=True 2020 ), 2021 table_options=run(self.get_multi_table_options, optional=True), 2022 unreflectable=unreflectable, 2023 ) 2024 if _reflect_info: 2025 _reflect_info.update(info)

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:1994, in Inspector._get_reflection_info..run(meth, optional, check_filter_names_from_meth) 1992 try: 1993 if has_result: -> 1994 res = meth(filter_names=_fn, **kw) 1995 if check_filter_names_from_meth and not res: 1996 # method returned no result data. 1997 # skip any future call methods 1998 has_result = False

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:921, in Inspector.get_multi_columns(self, schema, filter_names, kind, scope, kw) 884 r"""Return information about columns in all objects in the given 885 schema. 886 (...) 917 .. seealso:: :meth:Inspector.get_columns 918 """ 920 with self._operation_context() as conn: --> 921 table_col_defs = dict( 922 self.dialect.get_multi_columns( 923 conn, 924 schema=schema, 925 filter_names=filter_names, 926 kind=kind, 927 scope=scope, 928 info_cache=self.info_cache, 929 kw, 930 ) 931 ) 932 self._instantiate_types(table_col_defs.values()) 933 return table_col_defs

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\default.py:1098, in DefaultDialect._default_multi_reflect(self, single_tbl_method, connection, kind, schema, filter_names, scope, kw) 1094 key = (schema, table) 1095 try: 1096 yield ( 1097 key, -> 1098 single_tbl_method( 1099 connection, table, schema=schema, kw 1100 ), 1101 ) 1102 except exc.UnreflectableTableError as err: 1103 if key not in unreflectable:

File :2, in get_columns(self, connection, table_name, schema, **kw)

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:97, in cache(fn, self, con, *args, *kw) 95 ret: _R = info_cache.get(key) 96 if ret is None: ---> 97 ret = fn(self, con, args, **kw) 98 info_cache[key] = ret 99 return ret

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\dialects\mysql\base.py:2917, in MySQLDialect.get_columns(self, connection, table_name, schema, kw) 2915 @reflection.cache 2916 def get_columns(self, connection, table_name, schema=None, kw): -> 2917 parsed_state = self._parsed_state_or_create( 2918 connection, table_name, schema, **kw 2919 ) 2920 if parsed_state.columns: 2921 return parsed_state.columns

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\dialects\mysql\base.py:3177, in MySQLDialect._parsed_state_or_create(self, connection, table_name, schema, kw) 3174 def _parsed_state_or_create( 3175 self, connection, table_name, schema=None, kw 3176 ): -> 3177 return self._setup_parser( 3178 connection, 3179 table_name, 3180 schema, 3181 info_cache=kw.get("info_cache", None), 3182 )

File :2, in _setup_parser(self, connection, table_name, schema, **kw)

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\engine\reflection.py:97, in cache(fn, self, con, *args, *kw) 95 ret: _R = info_cache.get(key) 96 if ret is None: ---> 97 ret = fn(self, con, args, **kw) 98 info_cache[key] = ret 99 return ret

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\dialects\mysql\base.py:3212, in MySQLDialect._setup_parser(self, connection, table_name, schema, **kw) 3207 if parser._check_view(sql): 3208 # Adapt views to something table-like. 3209 columns = self._describe_table( 3210 connection, None, charset, full_name=full_name 3211 ) -> 3212 sql = parser._describe_to_create(table_name, columns) 3213 return parser.parse(sql, charset)

File ~\anaconda3\envs\dbgpt\lib\site-packages\sqlalchemy\dialects\mysql\reflection.py:350, in MySQLTableDefinitionParser._describe_to_create(self, table_name, columns) 348 line.append("NOT NULL") 349 if default: --> 350 if "auto_increment" in default: 351 pass 352 elif col_type.startswith("timestamp") and default.startswith( 353 "C" 354 ):

TypeError: a bytes-like object is required, not 'str'

it's giving an error

Relevant Logs/Tracbacks

No response

dosubot[bot] commented 3 months ago

The issue you're encountering with the NLSQLTableQueryEngine not working with views, even after setting view_support=True in the SQLDatabase initialization, is due to the fact that the NLSQLTableQueryEngine class does not have any implementation or parameter that directly handles or checks for view_support. The view_support parameter is correctly handled by the SQLDatabase class during initialization and schema reflection, but this support does not automatically extend to the NLSQLTableQueryEngine.

To support views in the NLSQLTableQueryEngine, you need to ensure that the NLSQLRetriever and other relevant components are properly configured to handle views. Here are the steps you can follow:

  1. Include Views in Table List: Ensure that the views are included in the list of tables that the NLSQLRetriever can query. This can be done by modifying the tables parameter when initializing the NLSQLRetriever.

  2. Get Table Context: The _get_table_context method should be able to retrieve the schema information for views as well as tables. This method constructs the context string that includes the schema information for the tables/views.

Here is an example of how you might adjust the initialization to include views:

from sqlalchemy import Table, MetaData

# Assuming `sql_database` is an instance of SQLDatabase
metadata = MetaData(bind=sql_database.engine)
metadata.reflect(views=True)  # Reflect both tables and views

# Get all table and view names
all_tables_and_views = [Table(t, metadata) for t in metadata.tables.keys()]

# Initialize NLSQLRetriever with the list of tables and views
nlsql_retriever = NLSQLRetriever(
    sql_database=sql_database,
    tables=all_tables_and_views,
    # other parameters...
)

By including views in the tables parameter, the NLSQLRetriever will be able to query them just like regular tables. Additionally, ensure that the _get_table_context method in the NLSQLRetriever class is capable of retrieving and constructing the schema information for both tables and views.

Moreover, the NLSQLTableQueryEngine class, which uses NLSQLRetriever, should be configured properly to handle the views. The NLSQLTableQueryEngine class initializes the NLSQLRetriever with the necessary parameters, including the list of tables and views. Ensure that the tables parameter passed to NLSQLTableQueryEngine includes the views as well.

class NLSQLTableQueryEngine(BaseSQLTableQueryEngine):
    def __init__(
        self,
        sql_database: SQLDatabase,
        text_to_sql_prompt: Optional[BasePromptTemplate] = None,
        context_query_kwargs: Optional[dict] = None,
        synthesize_response: bool = True,
        response_synthesis_prompt: Optional[BasePromptTemplate] = None,
        tables: Optional[Union[List[str], List[Table]]] = None,
        service_context: Optional[ServiceContext] = None,
        context_str_prefix: Optional[str] = None,
        sql_only: bool = False,
        verbose: bool = False,
        **kwargs: Any,
    ) -> None:
        self._sql_retriever = NLSQLRetriever(
            sql_database,
            text_to_sql_prompt=text_to_sql_prompt,
            context_query_kwargs=context_query_kwargs,
            tables=tables,
            context_str_prefix=context_str_prefix,
            service_context=service_context,
            sql_only=sql_only,
            verbose=verbose,
        )
        super().__init__(
            synthesize_response=synthesize_response,
            response_synthesis_prompt=response_synthesis_prompt,
            service_context=service_context,
            verbose=verbose,
            **kwargs,
        )

    @property
    def sql_retriever(self) -> NLSQLRetriever:
        return self._sql_retriever

By ensuring that the views are included in the tables parameter and that the _get_table_context method can handle views, the NLSQLRetriever and NLSQLTableQueryEngine will be able to support SQL views effectively [1][2][3][4].

To continue talking to Dosu, mention @dosu.