Open Jeferson100 opened 1 day ago
Hey can you give me a link to the dataframe that you are working with? I am trying to replicate your code to try and debug the issue
I think the problem might be in _query
def _query(
self,
query: Any,
k: Optional[int] = None,
filter: Optional[Any] = None,
name: Optional[str] = None,
**kwargs: Any,
) -> Any:
if k is None:
k = self.limit
tbl = self.get_table(name)
if isinstance(filter, dict):
filter = to_lance_filter(filter)
prefilter = kwargs.get("prefilter", False)
query_type = kwargs.get("query_type", "vector")
if metrics := kwargs.get("metrics"):
lance_query = (
tbl.search(query=query, vector_column_name=self._vector_key)
.limit(k)
.metric(metrics)
.where(filter, prefilter=prefilter)
)
else:
lance_query = (
tbl.search(query=query, vector_column_name=self._vector_key)
.limit(k)
.where(filter, prefilter=prefilter)
)
if query_type == "hybrid" and self._reranker is not None:
lance_query.rerank(reranker=self._reranker)
docs = lance_query.to_arrow()
if len(docs) == 0:
warnings.warn("No results found for the query.")
return docs
The error that occurs when using the function
tbl = docsearch.get_table(name="movies")
docsearch._query(query="I'm looking for an animated action movie. What could you suggest to me?", name="movies")
ValueError Traceback (most recent call last)
Cell In[32], [line 2](vscode-notebook-cell:?execution_count=32&line=2)
[1](vscode-notebook-cell:?execution_count=32&line=1) tbl = docsearch.get_table(name="movies")
----> [2](vscode-notebook-cell:?execution_count=32&line=2) docsearch._query(query="I'm looking for an animated action movie. What could you suggest to me?", name="movies")
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications\.conda\Lib\site-packages\langchain_community\vectorstores\lancedb.py:391, in LanceDB._query(self, query, k, filter, name, **kwargs)
[388](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/langchain_community/vectorstores/lancedb.py:388) if query_type == "hybrid" and self._reranker is not None:
[389](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/langchain_community/vectorstores/lancedb.py:389) lance_query.rerank(reranker=self._reranker)
--> [391](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/langchain_community/vectorstores/lancedb.py:391) docs = lance_query.to_arrow()
[392](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/langchain_community/vectorstores/lancedb.py:392) if len(docs) == 0:
[393](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/langchain_community/vectorstores/lancedb.py:393) warnings.warn("No results found for the query.")
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications\.conda\Lib\site-packages\lancedb\query.py:810, in LanceFtsQueryBuilder.to_arrow(self)
[793](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:793) raise NotImplementedError(
[794](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:794) "Phrase query is not yet supported in Lance FTS. "
[795](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:795) "Use tantivy-based index instead for now."
[796](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:796) )
[797](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:797) query = Query(
[798](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:798) columns=self._columns,
[799](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:799) filter=self._where,
(...)
[808](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:808) offset=self._offset,
[809](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:809) )
--> [810](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:810) results = self._table._execute_query(query)
[811](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:811) results = results.read_all()
[812](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/query.py:812) if self._reranker is not None:
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications\.conda\Lib\site-packages\lancedb\table.py:1752, in LanceTable._execute_query(self, query, batch_size)
[1733](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1733) if len(query.vector) > 0:
[1734](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1734) nearest = {
[1735](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1735) "column": query.vector_column,
[1736](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1736) "q": query.vector,
(...)
[1740](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1740) "refine_factor": query.refine_factor,
[1741](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1741) }
[1742](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1742) return ds.scanner(
[1743](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1743) columns=query.columns,
[1744](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1744) limit=query.k,
[1745](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1745) filter=query.filter,
[1746](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1746) prefilter=query.prefilter,
[1747](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1747) nearest=nearest,
[1748](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1748) full_text_query=query.full_text_query,
[1749](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1749) with_row_id=query.with_row_id,
[1750](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1750) batch_size=batch_size,
[1751](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1751) offset=query.offset,
-> [1752](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lancedb/table.py:1752) ).to_reader()
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications\.conda\Lib\site-packages\lance\dataset.py:2540, in LanceScanner.to_reader(self)
[2539](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lance/dataset.py:2539) def to_reader(self) -> pa.RecordBatchReader:
-> [2540](file:///C:/Users/jefer/Documents/Livros/LLMs/BUILDING_LLM_POWERED_APPLICATIONS/Building-LLM-Powered-Applications/.conda/Lib/site-packages/lance/dataset.py:2540) return self._scanner.to_pyarrow()
ValueError: LanceError(IO): Full text search supports only one column right now, but got 0 columns, D:\a\lance\lance\rust\lance\src\dataset\scanner.rs:1147:17
Checked other resources
Example Code
The DataFrame is in the following format.
The error occurs in the following code.
Error Message and Stack Trace (if applicable)
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications.conda\Lib\site-packages\langchain_community\vectorstores\lancedb.py:524, in LanceDB.similarity_search(self, query, k, name, filter, fts, kwargs) 500 def similarity_search( 501 self, 502 query: str, (...) 507 kwargs: Any, 508 ) -> List[Document]: 509 """Return documents most similar to the query 510 511 Args: (...) 522 List of documents most similar to the query. 523 """ --> 524 res = self.similarity_search_with_score( 525 query=query, k=k, name=name, filter=filter, fts=fts, score=False, **kwargs 526 ) 527 return res
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications.conda\Lib\site-packages\langchain_community\vectorstores\lancedb.py:498, in LanceDB.similarity_search_with_score(self, query, k, filter, kwargs) 496 embedding = self._embedding.embed_query(query) 497 res = self._query(embedding, k, filter=filter, kwargs) --> 498 return self.results_to_docs(res, score=score)
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications.conda\Lib\site-packages\langchain_community\vectorstores\lancedb.py:155, in LanceDB.results_to_docs(self, results, score) 152 score_col = None 154 if score_col is None or not score: --> 155 return [ 156 Document( 157 page_content=results[self._text_key][idx].as_py(), 158 metadata=results["metadata"][idx].as_py(), 159 ) 160 for idx in range(len(results)) 161 ] 162 elif score_col and score: 163 return [ 164 ( 165 Document( (...) 171 for idx in range(len(results)) 172 ]
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications.conda\Lib\site-packages\langchain_community\vectorstores\lancedb.py:158, in(.0)
152 score_col = None
154 if score_col is None or not score:
155 return [
156 Document(
157 page_content=results[self._text_key][idx].as_py(),
--> 158 metadata=results["metadata"][idx].as_py(),
159 )
160 for idx in range(len(results))
161 ]
162 elif score_col and score:
163 return [
164 (
165 Document(
(...)
171 for idx in range(len(results))
172 ]
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications.conda\Lib\site-packages\pyarrow\table.pxi:1539, in pyarrow.lib._Tabular.getitem()
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications.conda\Lib\site-packages\pyarrow\table.pxi:1625, in pyarrow.lib._Tabular.column()
File c:\Users\jefer\Documents\Livros\LLMs\BUILDING_LLM_POWERED_APPLICATIONS\Building-LLM-Powered-Applications.conda\Lib\site-packages\pyarrow\table.pxi:1561, in pyarrow.lib._Tabular._ensure_integer_index()
KeyError: 'Field "metadata" does not exist in schema'
Description
I connected to the LanceDB database and tried to create a table called "movies" using the data from the md_final DataFrame. If the table already existed, I opened it. Then, I initialized an instance of LanceDB with the "sentence-transformers/all-MiniLM-L6-v2" embeddings model, configuring the vector key, ID key, and text key options. I defined a text query and called the similarity_search method to obtain the relevant documents.
System Info