Open wangcailin opened 1 year ago
Which vectorstore are you working with?
I'm using AzureSerach, to which I need to add index_name and search_type as configurable fields
from langchain.vectorstores.analyticdb import AnalyticDB
I'm using AnalyticDB.
Could you elaborate on the use case / workflow?
Is this for ingestion or query time? Why a vectorstor rather than a retriever? If for query, should users be able to query any index or only a list of predefined indexes?
Thanks for your reply. Here is the complete code of my template; The collection_name field is the vector library database table name, rest api way to access it, I need to configure the collection_name field with the data from the api, the overall workflow is: api pass in configuration data:
To execute the RAG Chain
chain.py
from typing import Tuple, List
from langchain.schema import format_document, AIMessage, HumanMessage
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import (
RunnableBranch,
RunnableLambda,
RunnableMap,
RunnablePassthrough,
)
from operator import itemgetter
from langchain.schema.runnable import ConfigurableField
try:
from pydantic.v1 import BaseModel
except ImportError:
from pydantic import BaseModel
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from app.config.system import CONDENSE_QUESTION_PROMPT, PROMPT_TEMPLATE
from app.embeddings import embedding_model
from app.vectorstores.analyticdb import connection_string
from langchain.vectorstores.analyticdb import AnalyticDB
from app.config.model import embedding_model_dict, EMBEDDING_MODEL
vectorstore = AnalyticDB(
connection_string=connection_string,
embedding_function=embedding_model(),
embedding_dimension=embedding_model_dict[EMBEDDING_MODEL]['dimension'],
collection_name="repository_test")
retriever = vectorstore.as_retriever(
search_type="similarity_score_threshold", search_kwargs={"score_threshold": .5, "k": 2})
CONDENSE_QUESTION_PROMPT = ChatPromptTemplate.from_messages(
[
("system", CONDENSE_QUESTION_PROMPT),
MessagesPlaceholder(variable_name="chat_history"),
("human", "{question}"),
]
)
custom_prompt = PromptTemplate.from_template(
"{custom_prompt}."
).configurable_fields(
template=ConfigurableField(
id="custom_prompt",
name="Prompt",
)
)
ANSWER_PROMPT = ChatPromptTemplate.from_messages(
[
("system", PROMPT_TEMPLATE),
("human", "{question}"),
]
)
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(
template="{page_content}")
def _combine_documents(
docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
doc_strings = [format_document(doc, document_prompt) for doc in docs]
return document_separator.join(doc_strings)
def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
buffer = []
for human, ai in chat_history:
buffer.append(HumanMessage(content=human))
buffer.append(AIMessage(content=ai))
return buffer
def _custom_prompt_to_string(prompt: PromptTemplate):
return prompt.to_string()
class ChatHistory(BaseModel):
chat_history: List[Tuple[str, str]] = []
question: str
max_tokens = ConfigurableField(
id="max_tokens",
name="LLM Max Tokens",
)
temperature = ConfigurableField(
id="temperature",
name="LLM Temperature",
)
repository_id = ConfigurableField(
id="repository_id",
name="LLM Repository ID",
)
model = ChatOpenAI(model="gpt-3.5-turbo-1106").configurable_alternatives(
ConfigurableField(id="llm"),
gpt3=ChatOpenAI(model="gpt-3.5-turbo-1106").configurable_fields(
max_tokens=max_tokens,
temperature=temperature
),
gpt4=ChatOpenAI(model="gpt-4-1106-preview").configurable_fields(
max_tokens=max_tokens,
temperature=temperature
),
)
_search_query = RunnableBranch(
# If input includes chat_history, we condense it with the follow-up question
(
RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
run_name="HasChatHistoryCheck"
), # Condense follow-up question and chat into a standalone_question
RunnablePassthrough.assign(
chat_history=lambda x: _format_chat_history(x["chat_history"])
)
| CONDENSE_QUESTION_PROMPT
| model
| StrOutputParser(),
),
# Else, we have no chat history, so just pass through the question
RunnableLambda(itemgetter("question")),
)
_inputs = RunnableMap(
{
"question": lambda x: x["question"],
"chat_history": lambda x: _format_chat_history(x["chat_history"]),
"context": _search_query | retriever | _combine_documents,
"custom_prompt": custom_prompt | _custom_prompt_to_string,
}
).with_types(input_type=ChatHistory)
chain = _inputs | ANSWER_PROMPT | model | StrOutputParser()
@eyurtsev Based on the above requirement is it possible to help guide, thank you very much
@wangcailin / @SuryaPradeepM
I added an example here -- it uses a custom Runnable for making collection name configurable.
https://github.com/langchain-ai/langserve/blob/main/examples/configurable_retrieval/server.py
Let me know if you have questions
Can the VectorStore collection_name be added to the ConfigurableField?