explodinggradients / ragas

Evaluation framework for your Retrieval Augmented Generation (RAG) pipelines
https://docs.ragas.io
Apache License 2.0
6.27k stars 599 forks source link

TypeError: object list can't be used in 'await' expression #1097

Closed Rugved2204 closed 1 week ago

Rugved2204 commented 1 month ago

[ ] I have checked the documentation and related resources and couldn't resolve my bug.

Describe the bug TypeError: object list can't be used in 'await' expression

Ragas version:0.1.10 Python version:3.10.12

Code to Reproduce

from langchain_community.document_loaders import PubMedLoader
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_community.embeddings import HuggingFaceEmbeddings
from utils.models.loader import load_model
from datasets import load_dataset
from ragas.testset.extractor import KeyphraseExtractor
from langchain.text_splitter import TokenTextSplitter
from ragas.testset.docstore import InMemoryDocumentStore
from ragas.llms import BaseRagasLLM
from langchain_community.llms import HuggingFaceTextGenInference
import nest_asyncio
from langchain_huggingface import HuggingFaceEndpoint
import asyncio
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import os

embedding_model_name = 'BAAI/bge-small-en-v1.5'
embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
    model_kwargs={'device': 'cuda:0'}
)

model_name = 'mistralai/Mistral-7B-Instruct-v0.2'
llm_model = HuggingFaceEndpoint(
      endpoint_url="http://localhost:8000/",
      max_new_tokens=1024,
      temperature=0.1,
      huggingfacehub_api_token = "token",
  )

# # Load documents
loader = PubMedLoader("liver", load_max_docs=10)
documents = loader.load()
# print(f"Loaded {len(documents)} documents")

# Attempt to generate the testset
generator = TestsetGenerator.from_langchain(
    generator_llm=llm_model,
    critic_llm=llm_model,
    embeddings=embeddings,
)

testset = generator.generate_with_langchain_docs(documents, test_size=2, distributions={'simple': 0.5, 'reasoning': 0.25, 'multi_context': 0.25})
testset.to_pandas()

Error trace

  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 87, in run
    results = self.loop.run_until_complete(self._aresults())
  File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
    return future.result()
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 79, in _aresults
    r = await future
  File "/usr/lib/python3.10/asyncio/tasks.py", line 571, in _wait_for_one
    return f.result()  # May raise f.exception().
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 37, in sema_coro
    return await coro
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 109, in wrapped_callable_async
    raise e
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 104, in wrapped_callable_async
    result = await callable(*args, **kwargs)
TypeError: object list can't be used in 'await' expression

Expected behavior A clear and concise description of what you expected to happen.

Additional context Add any other context about the problem here.

dosubot[bot] commented 1 month ago

To resolve the TypeError: object list can't be used in 'await' expression error, you need to ensure that the generate method is called correctly, especially when handling asynchronous operations. The error suggests that a list is being awaited, which is not allowed.

Here's how you can modify the generate method to handle asynchronous execution properly:

async def generate(
    self,
    test_size: int,
    distributions: t.Optional[Distributions] = None,
    with_debugging_logs=False,
    is_async: bool = True,
    raise_exceptions: bool = True,
    run_config: t.Optional[RunConfig] = None,
):
    distributions = distributions or DEFAULT_DISTRIBUTION
    if not check_if_sum_is_close(list(distributions.values()), 1.0, 3):
        raise ValueError(
            f"distributions passed do not sum to 1.0 [got {sum(list(distributions.values()))}]. Please check the "
            f"distributions."
        )

    if run_config is None:
        run_config = RunConfig(max_retries=15, max_wait=90)
    self.docstore.set_run_config(run_config)

    for evolution in distributions:
        self.init_evolution(evolution)
        evolution.init(is_async=is_async, run_config=run_config)

    if with_debugging_logs:
        from ragas.utils import patch_logger
        patch_logger("ragas.testset.evolutions", logging.DEBUG)
        patch_logger("ragas.testset.extractor", logging.DEBUG)
        patch_logger("ragas.testset.filters", logging.DEBUG)
        patch_logger("ragas.testset.docstore", logging.DEBUG)
        patch_logger("ragas.llms.prompt", logging.DEBUG)

    exec = Executor(
        desc="Generating",
        keep_progress_bar=True,
        raise_exceptions=raise_exceptions,
        run_config=run_config,
    )

    current_nodes = [
        CurrentNodes(root_node=n, nodes=[n])
        for n in self.docstore.get_random_nodes(k=test_size)
    ]

    total_evolutions = 0
    for evolution, probability in distributions.items():
        for i in sample(range(test_size), round(probability * test_size)):
            exec.submit(
                evolution.evolve,
                current_nodes[i],
                name=f"{evolution.__class__.__name__}-{i}",
            )
            total_evolutions += 1

    if total_evolutions <= test_size:
        filler_evolutions = choices(
            list(distributions), k=test_size - total_evolutions
        )
        for evolution in filler_evolutions:
            exec.submit(
                evolution.evolve,
                current_nodes[total_evolutions],
                name=f"{evolution.__class__.__name__}-{total_evolutions}",
            )
            total_evolutions += 1

    try:
        if is_async:
            test_data_rows = await exec.results()
        else:
            test_data_rows = exec.results()
        if not test_data_rows:
            raise ExceptionInRunner()

    except ValueError as e:
        raise e

    test_data_rows = [r for r in test_data_rows if not is_nan(r)]
    test_dataset = TestDataset(test_data=test_data_rows)
    evol_lang = [get_feature_language(e) for e in distributions]
    evol_lang = [e for e in evol_lang if e is not None]
    track(
        TestsetGenerationEvent(
            event_type="testset_generation",
            evolution_names=[e.__class__.__name__.lower() for e in distributions],
            evolution_percentages=[distributions[e] for e in distributions],
            num_rows=len(test_dataset.test_data),
            language=evol_lang[0] if len(evol_lang) > 0 else "",
        )
    )

    return test_dataset

This modification ensures that the exec.results() method is awaited when is_async is True, preventing the TypeError [1][2].

To continue talking to Dosu, mention @dosu.

Rugved2204 commented 1 month ago

@dosu

Same issue still persists : Traceback (most recent call last): File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner self.run() File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 87, in run results = self.loop.run_until_complete(self._aresults()) File "/home/ubuntu/.local/lib/python3.10/site-packages/nest_asyncio.py", line 98, in run_until_complete return f.result() File "/usr/lib/python3.10/asyncio/futures.py", line 201, in result raise self._exception.with_traceback(self._exception_tb) File "/usr/lib/python3.10/asyncio/tasks.py", line 232, in step result = coro.send(None) File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 79, in _aresults r = await future File "/usr/lib/python3.10/asyncio/tasks.py", line 571, in _wait_for_one return f.result() # May raise f.exception(). File "/usr/lib/python3.10/asyncio/futures.py", line 201, in result raise self._exception.with_traceback(self._exception_tb) File "/usr/lib/python3.10/asyncio/tasks.py", line 232, in step result = coro.send(None) File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 37, in sema_coro return await coro File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 109, in wrapped_callable_async raise e File "/home/ubuntu/.local/lib/python3.10/site-packages/ragas/executor.py", line 104, in wrapped_callable_async result = await callable(*args, **kwargs) TypeError: object list can't be used in 'await' expression

Rugved2204 commented 1 month ago

@dosu can you help me solve this problem

dosubot[bot] commented 1 month ago

To resolve the TypeError: object list can't be used in 'await' expression error, ensure that the exec.results() method is awaited correctly if it is an asynchronous function. Here is the corrected code snippet for the generate method:

async def generate(
    self,
    test_size: int,
    distributions: t.Optional[Distributions] = None,
    with_debugging_logs=False,
    is_async: bool = True,
    raise_exceptions: bool = True,
    run_config: t.Optional[RunConfig] = None,
):
    distributions = distributions or DEFAULT_DISTRIBUTION
    if not check_if_sum_is_close(list(distributions.values()), 1.0, 3):
        raise ValueError(
            f"distributions passed do not sum to 1.0 [got {sum(list(distributions.values()))}]. Please check the "
            f"distributions."
        )

    if run_config is None:
        run_config = RunConfig(max_retries=15, max_wait=90)
    self.docstore.set_run_config(run_config)

    for evolution in distributions:
        self.init_evolution(evolution)
        evolution.init(is_async=is_async, run_config=run_config)

    if with_debugging_logs:
        from ragas.utils import patch_logger
        patch_logger("ragas.testset.evolutions", logging.DEBUG)
        patch_logger("ragas.testset.extractor", logging.DEBUG)
        patch_logger("ragas.testset.filters", logging.DEBUG)
        patch_logger("ragas.testset.docstore", logging.DEBUG)
        patch_logger("ragas.llms.prompt", logging.DEBUG)

    exec = Executor(
        desc="Generating",
        keep_progress_bar=True,
        raise_exceptions=raise_exceptions,
        run_config=run_config,
    )

    current_nodes = [
        CurrentNodes(root_node=n, nodes=[n])
        for n in self.docstore.get_random_nodes(k=test_size)
    ]

    total_evolutions = 0
    for evolution, probability in distributions.items():
        for i in sample(range(test_size), round(probability * test_size)):
            exec.submit(
                evolution.evolve,
                current_nodes[i],
                name=f"{evolution.__class__.__name__}-{i}",
            )
            total_evolutions += 1

    if total_evolutions <= test_size:
        filler_evolutions = choices(
            list(distributions), k=test_size - total_evolutions
        )
        for evolution in filler_evolutions:
            exec.submit(
                evolution.evolve,
                current_nodes[total_evolutions],
                name=f"{evolution.__class__.__name__}-{total_evolutions}",
            )
            total_evolutions += 1

    try:
        test_data_rows = await exec.results()  # Ensure to await the async function
        if not test_data_rows:
            raise ExceptionInRunner()
    except ValueError as e:
        raise e

    test_data_rows = [r for r in test_data_rows if not is_nan(r)]
    test_dataset = TestDataset(test_data=test_data_rows)
    evol_lang = [get_feature_language(e) for e in distributions]
    evol_lang = [e for e in evol_lang if e is not None]
    track(
        TestsetGenerationEvent(
            event_type="testset_generation",
            evolution_names=[e.__class__.__name__.lower() for e in distributions],
            evolution_percentages=[distributions[e] for e in distributions],
            num_rows=len(test_dataset.test_data),
            language=evol_lang[0] if len(evol_lang) > 0 else "",
        )
    )

    return test_dataset

Ensure that the surrounding function is also defined as asynchronous if it is not already:

async def generate_with_langchain_docs(
    self,
    documents: t.Sequence[LCDocument],
    test_size: int,
    distributions: t.Optional[Distributions] = None,
    with_debugging_logs=False,
    is_async: bool = True,
    raise_exceptions: bool = True,
    run_config: t.Optional[RunConfig] = None,
):
    distributions = distributions or {}
    self.docstore.add_documents(
        [Document.from_langchain_document(doc) for doc in documents]
    )

    return await self.generate(
        test_size=test_size,
        distributions=distributions,
        with_debugging_logs=with_debugging_logs,
        is_async=is_async,
        raise_exceptions=raise_exceptions,
        run_config=run_config,
    )

This change ensures that the asynchronous exec.results() method is properly awaited, which should resolve the TypeError [1].

To continue talking to Dosu, mention @dosu.

Rugved2204 commented 1 month ago

@dosu

This is the new error I am getting now after making the changes Login successful <coroutine object TestsetGenerator.generate_with_langchain_docs at 0x7a91422dbf40> sys:1: RuntimeWarning: coroutine 'TestsetGenerator.generate_with_langchain_docs' was never awaited

dosubot[bot] commented 1 month ago

To resolve the issue where the coroutine TestsetGenerator.generate_with_langchain_docs was never awaited, you need to ensure that the coroutine is awaited when it is called. Here is an example of how you can do this:

import asyncio

# Assuming you have an instance of TestsetGenerator and the necessary documents
from ragas.testset import TestsetGenerator
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings.base import LangchainEmbeddingsWrapper
from langchain_community.document_loaders import PubMedLoader

# Load documents
loader = PubMedLoader("liver", load_max_docs=10)
documents = loader.load()

# Initialize models and embeddings
embedding_model_name = 'BAAI/bge-small-en-v1.5'
embeddings = LangchainEmbeddingsWrapper(
    model_name=embedding_model_name,
    model_kwargs={'device': 'cuda:0'}
)

model_name = 'mistralai/Mistral-7B-Instruct-v0.2'
llm_model = LangchainLLMWrapper(
    endpoint_url="http://localhost:8000/",
    max_new_tokens=1024,
    temperature=0.1,
    huggingfacehub_api_token="token"
)

# Initialize TestsetGenerator
generator = TestsetGenerator.from_langchain(
    generator_llm=llm_model,
    critic_llm=llm_model,
    embeddings=embeddings,
)

# Call the coroutine and await it
async def main():
    testset = await generator.generate_with_langchain_docs(
        documents=documents,
        test_size=2,
        distributions={'simple': 0.5, 'reasoning': 0.25, 'multi_context': 0.25}
    )
    print(testset.to_pandas())

# Run the main function
asyncio.run(main())

By wrapping the call to generate_with_langchain_docs in an async function and using await, you ensure that the coroutine is properly awaited [1].

To continue talking to Dosu, mention @dosu.

jjmachan commented 2 weeks ago

@Rugved2204 were you able to solve this issue?

github-actions[bot] commented 1 week ago

Closing after 8 days of waiting for the additional info requested.