Hi. I'm trying to adapt the code to run with a Pinecone vector database. I can get it to run without any errors, but the chatbot doesn't actually seem to use the context data from the index. I'm using the same index in a different project as well, also using the ConversationalRetreivalChain, which works with no issues at all.
I think the issue is with the vectorstore, as the output from "print(vectorstore)" is:
<langchain.vectorstores.pinecone.Pinecone object at 0x126872010> search_type='similarity' search_kwargs={}
On the other project where its working correctly, the output from the same (but thats running in node.js so console.log(vectorstore) is the following, which. seems a lot more comprehensive.
PineconeStore {
embeddings: OpenAIEmbeddings {
caller: AsyncCaller {
maxConcurrency: Infinity,
maxRetries: 6,
queue: [PQueue]
},
modelName: 'text-embedding-ada-002',
batchSize: 512,
stripNewLines: true,
timeout: undefined,
client: undefined,
clientConfig: { apiKey: 'skxxx' }
},
textKey: 'text',
namespace: 'FCOM',
pineconeIndex: VectorOperationsApi {
configuration: Configuration { configuration: [Object] },
fetchApi: [Function (anonymous)],
middleware: [],
_deleteRaw: [Function (anonymous)],
_delete: [Function (anonymous)],
delete1Raw: [Function (anonymous)],
delete1: [Function (anonymous)],
describeIndexStatsRaw: [Function (anonymous)],
describeIndexStats: [Function (anonymous)],
describeIndexStats1Raw: [Function (anonymous)],
describeIndexStats1: [Function (anonymous)],
fetchRaw: [Function (anonymous)],
fetch: [Function (anonymous)],
queryRaw: [Function (anonymous)],
query: [Function (anonymous)],
updateRaw: [Function (anonymous)],
update: [Function (anonymous)],
upsertRaw: [Function (anonymous)],
upsert: [Function (anonymous)]
},
filter: undefined
}
Here is my modified query_data.py and ingest.py
from langchain.callbacks.manager import AsyncCallbackManager
from langchain.callbacks.tracers.langchain_v1 import LangChainTracerV1
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT, QA_PROMPT)
from langchain.chains.llm import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI
from langchain.vectorstores import Pinecone
from langchain.memory import ConversationBufferMemory
from langchain.schema import Document
from typing import List
async def aget_relevant_documents(self, query: str) -> List[Document]:
return self.get_relevant_documents(query)
def get_chain(
vectorstore: Pinecone,
question_handler,
stream_handler,
tracing: bool = False
) -> ConversationalRetrievalChain:
"""Create a ConversationalRetrievalChain for question/answering."""
# Construct a ConversationalRetrievalChain with a streaming llm for combine docs
# and a separate, non-streaming llm for question generation
manager = AsyncCallbackManager([])
question_manager = AsyncCallbackManager([question_handler])
stream_manager = AsyncCallbackManager([stream_handler])
if tracing:
tracer = LangChainTracerV1()
tracer.load_default_session()
manager.add_handler(tracer)
question_manager.add_handler(tracer)
stream_manager.add_handler(tracer)
question_gen_llm = ChatOpenAI(
model="gpt-3.5-turbo",
temperature=0,
verbose=True,
callbacks=[question_handler],
)
streaming_llm = ChatOpenAI(
model="gpt-4",
streaming=True,
callbacks=[stream_handler],
verbose=True,
temperature=0,
)
memory = ConversationBufferMemory(
output_key='answer',
memory_key='chat_history',
return_messages=True
)
question_generator = LLMChain(
llm=question_gen_llm, prompt=CONDENSE_QUESTION_PROMPT, callback_manager=manager
)
doc_chain = load_qa_chain(
streaming_llm, chain_type="stuff", prompt=QA_PROMPT, callback_manager=manager
)
print("vectorstore:", vectorstore)
qa = ConversationalRetrievalChain(
retriever=vectorstore.as_retriever(),
combine_docs_chain=doc_chain,
question_generator=question_generator,
callback_manager=manager,
)
print("qa", qa)
return qa`
ingest.py:
import os
from langchain.document_loaders import ReadTheDocsLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pinecone
from langchain.vectorstores import Pinecone
from credentials import PINECONE_INDEX_NAME, PINECONE_NAME_SPACE, PINECONE_API_KEY, PINECONE_ENVIRONMENT
from dotenv import load_dotenv
load_dotenv()
openai_api_key = os.environ.get("OPENAI_API_KEY")
def loadVectorStore():
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENVIRONMENT)
index = pinecone.Index(PINECONE_INDEX_NAME)
print("index:",index.describe_index_stats())
embeddings = OpenAIEmbeddings()
vectorstore = Pinecone.from_existing_index(index_name=PINECONE_INDEX_NAME, embedding=embeddings, namespace=PINECONE_NAME_SPACE)
print("Vectorstore: ",vectorstore ,"vectorstoreEnd")
return vectorstore
Hi. I'm trying to adapt the code to run with a Pinecone vector database. I can get it to run without any errors, but the chatbot doesn't actually seem to use the context data from the index. I'm using the same index in a different project as well, also using the ConversationalRetreivalChain, which works with no issues at all.
I think the issue is with the vectorstore, as the output from "print(vectorstore)" is: <langchain.vectorstores.pinecone.Pinecone object at 0x126872010> search_type='similarity' search_kwargs={}
On the other project where its working correctly, the output from the same (but thats running in node.js so console.log(vectorstore) is the following, which. seems a lot more comprehensive. PineconeStore { embeddings: OpenAIEmbeddings { caller: AsyncCaller { maxConcurrency: Infinity, maxRetries: 6, queue: [PQueue] }, modelName: 'text-embedding-ada-002', batchSize: 512, stripNewLines: true, timeout: undefined, client: undefined, clientConfig: { apiKey: 'skxxx' } }, textKey: 'text', namespace: 'FCOM', pineconeIndex: VectorOperationsApi { configuration: Configuration { configuration: [Object] }, fetchApi: [Function (anonymous)], middleware: [], _deleteRaw: [Function (anonymous)], _delete: [Function (anonymous)], delete1Raw: [Function (anonymous)], delete1: [Function (anonymous)], describeIndexStatsRaw: [Function (anonymous)], describeIndexStats: [Function (anonymous)], describeIndexStats1Raw: [Function (anonymous)], describeIndexStats1: [Function (anonymous)], fetchRaw: [Function (anonymous)], fetch: [Function (anonymous)], queryRaw: [Function (anonymous)], query: [Function (anonymous)], updateRaw: [Function (anonymous)], update: [Function (anonymous)], upsertRaw: [Function (anonymous)], upsert: [Function (anonymous)] }, filter: undefined }
Here is my modified query_data.py and ingest.py
ingest.py: