Describe the bug
I am trying to generate synthetic data using Azure Openai api and AzureSearch as the document store. However, when I try to run the code, it throws that AzureSearch object doesn't have 'set_run_config' attribute.
Code to Reproduce
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from langchain.docstore.document import Document
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores.azuresearch import AzureSearch
from ragas.testset.evolutions import simple, reasoning, multi_context
from ragas.run_config import RunConfig
from ragas.testset import TestsetGenerator
Describe the bug I am trying to generate synthetic data using Azure Openai api and AzureSearch as the document store. However, when I try to run the code, it throws that AzureSearch object doesn't have 'set_run_config' attribute.
Ragas version: 0.1.dev317+g7363ec0 Python version: 3.9.12
Code to Reproduce import os from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient from langchain.docstore.document import Document from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings from langchain_community.vectorstores.azuresearch import AzureSearch from ragas.testset.evolutions import simple, reasoning, multi_context from ragas.run_config import RunConfig from ragas.testset import TestsetGenerator
os.environ["AZURE_OPENAI_API_KEY"] = "<API_KEY?" os.environ["AZURE_OPENAI_ENDPOINT"] = ""
os.environ["OPENAI_API_VERSION"] = "2024-02-15-preview"
key = ''
credentials = AzureKeyCredential(key)
service_endpoint = ''
index_name = ''
def get_chunks(search_client): results = search_client.search(search_text="*", top=100000, select="id, content, metadata") chunks = [] for result in results: record = Document(page_content=result["content"], metadata={"id": result["id"] , "title": result["metadata"], "language": "en"}) chunks.append(record) return chunks
search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credentials)
chunks = get_chunks(search_client)
run_config = RunConfig() generator_llm = AzureChatOpenAI(deployment_name="gpt-35-turbo-16k") critic_llm = AzureChatOpenAI(deployment_name="gpt-4-1106-preview") embeddings = AzureOpenAIEmbeddings(deployment="text-embedding-ada-002")
vector_store = AzureSearch( azure_search_endpoint=service_endpoint, azure_search_key=key, index_name=index_name, embedding_function=embeddings.embed_query, )
generator = TestsetGenerator( generator_llm, critic_llm, embeddings, vector_store ) testset = generator.generate_with_langchain_docs( chunks[: 10], test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25}, run_config=run_config )
test_df = testset.to_pandas() print(test_df)
Error trace Traceback (most recent call last): File "/Users/axs48/Projects/ke2-rag-chatbot/evaluation/test.py", line 59, in
testset = generator.generate_with_langchain_docs(chunks[: 10], test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25}
File "/Users/axs48/.pyenv/versions/3.9.12/lib/python3.9/site-packages/ragas/testset/generator.py", line 155, in generate_with_langchain_docs
return self.generate(
File "/Users/axs48/.pyenv/versions/3.9.12/lib/python3.9/site-packages/ragas/testset/generator.py", line 199, in generate
self.docstore.set_run_config(run_config)
AttributeError: 'AzureSearch' object has no attribute 'set_run_config'
Expected behavior It should generate the test dataset from the input document.