Open beatG123 opened 2 months ago
The faithfulness.adapt(language="chinese")
method is not working in Ragas version 0.0.22 with a Chinese dataset because the Prompt
class did not originally support the adapt
method. However, the Prompt
class has now been updated to include an adapt
method, which allows for adapting the prompt to different languages.
To resolve the validation error in the Prompt
class, you need to ensure that the Prompt
class has the adapt
method implemented correctly. The updated Prompt
class with the adapt
method is as follows:
class Prompt(BaseModel):
# existing fields and methods
def adapt(self, language: str, llm, cache_dir: t.Optional[str] = None) -> Prompt:
def get_all_keys(nested_json):
keys = set()
for key, value in nested_json.items():
keys.add(key)
if isinstance(value, dict):
keys = keys.union(get_all_keys(value))
return keys
if self.language == language:
return self
cache_dir = cache_dir if cache_dir else get_cache_dir()
if os.path.exists(os.path.join(cache_dir, language, f"{self.name}.json")):
self_cp = self._load(language, self.name, cache_dir)
self.language = self_cp.language
self.examples = self_cp.examples
return self_cp
logger.info("Adapting %s to %s", self.name, language)
prompts = []
output_keys = []
for example in self.examples:
prompts.extend(
[
str_translation.format(
translate_to=language, input=example.get(key)
)
for key in self.input_keys
]
)
prompts.append(
json_translatation.format(
translate_to=language, input=example.get(self.output_key)
)
if self.output_type.lower() == "json"
else str_translation.format(
translate_to=language, input=example.get(self.output_key)
)
)
if self.output_type.lower() == "json":
output = example.get(self.output_key)
if isinstance(output, str):
output = json.loads(output)
if isinstance(output, dict):
output_keys.append(get_all_keys(output))
elif isinstance(output, list) and all(
isinstance(item, dict) for item in output
):
output_keys.append([get_all_keys(item) for item in output])
results = []
for p in prompts:
results.append(llm.generate_text(p).generations[0][0].text)
per_example_items = len(self.input_keys) + 1
grouped_results = [
results[i : i + per_example_items]
for i in range(0, len(results), per_example_items)
]
assert len(grouped_results) == len(
self.examples
), "examples and adapted examples must be of equal length"
for i, example in enumerate(grouped_results):
example_dict = {}
example_dict.update(
{k: v for k, v in zip(self.input_keys, example[: len(self.input_keys)])}
)
if self.output_type.lower() == "json":
example_dict[self.output_key] = json_loader._safe_load(example[-1], llm)
if example_dict[self.output_key] == {}:
dict_str = example[-1].split("(")[0].strip()
example_dict[self.output_key] = ast.literal_eval(dict_str)
else:
example_dict[self.output_key] = example[-1]
if self.output_type.lower() == "json":
output = example_dict[self.output_key]
if isinstance(output, dict):
assert (
set(output.keys()) == output_keys[i]
), f"Adapted output keys {set(output.keys())=} do not match with the original output keys: {output_keys[i]=}"
elif isinstance(output, list) and all(
isinstance(item, dict) for item in output
):
assert all(
set(item.keys()) in output_keys[i] for item in output
), "Adapted output keys do not match with the original output keys"
self.examples[i] = example_dict
self.language = language
self.save(cache_dir=cache_dir)
return self
After implementing the adapt
method in the Prompt
class, you can then call it in the Faithfulness
class's adapt
method:
@dataclass
class Faithfulness(MetricWithLLM, SingleTurnMetric):
# existing fields and methods
def adapt(self, language: str, cache_dir: t.Optional[str] = None) -> None:
assert self.llm is not None, "LLM is not set"
logger.info(f"Adapting Faithfulness metric to {language}")
self.nli_statements_message = self.nli_statements_message.adapt(
language, self.llm, cache_dir
)
self.statement_prompt = self.statement_prompt.adapt(
language, self.llm, cache_dir
)
self.sentence_segmenter = get_segmenter(language=language, clean=False)
By following these steps, you should be able to resolve the issue with the faithfulness.adapt(language="chinese")
method not working and the validation error in the Prompt
class [1][2][3].
To continue talking to Dosu, mention @dosu.
[ ] I have checked the documentation and related resources and couldn't resolve my bug.
Describe the bug 【faithfulness.adapt(language="chinese") is no useful】
Ragas version:0.1.0 Python version:
Code to Reproduce Share code to reproduce the issue from future import annotations
import logging import typing as t from dataclasses import dataclass, field
import numpy as np
from ragas.llms.json_load import json_loader from ragas.llms.prompt import Prompt from ragas.metrics.base import EvaluationMode, MetricWithLLM
if t.TYPE_CHECKING: from langchain_core.callbacks import Callbacks
logger = logging.getLogger(name)
LONG_FORM_ANSWER_PROMPT = Prompt( name="long_form_answer", instruction="Create one or more statements from each sentence in the given answer.", examples=[ { "question": "Who was Albert Einstein and what is he best known for?", "answer": "He was a German-born theoretical physicist, widely acknowledged to be one of the greatest and most influential physicists of all time. He was best known for developing the theory of relativity, he also made important contributions to the development of the theory of quantum mechanics.", "statements": { "statements": [ "Albert Einstein, a German-born theoretical physicist, is renowned for being one of the most influential physicists in history.", "Albert Einstein was best known for his theory of relativity.", "Einstein's contributions significantly advanced the field of quantum mechanics", "Recognized globally, Einstein's work has profoundly impacted the scientific community", "Einstein's groundbreaking theories continue to shape our understanding of physics today.", ] }, }, { "question": "Cadmium Chloride is slightly soluble in this chemical, it is also called what?", "answer": "alcohol", "statements": { "statements": ["Cadmium Chloride is slightly soluble in alcohol."] }, }, { "question": "Were Hitler and Benito Mussolini of the same nationality?", "answer": "Sorry, I can't provide answer to that question.", "statements": {"statements": []}, }, ], input_keys=["question", "answer"], output_key="statements", output_type="JSON", ) # noqa: E501
NLI_STATEMENTS_MESSAGE = Prompt( name="nli_statements", instruction="Natural language inference. Use only 'Yes' (1), 'No' (0) and 'Null' (-1) as verdict.", examples=[ { "context": """John is a student at XYZ University. He is pursuing a degree in Computer Science. He is enrolled in several courses this semester, including Data Structures, Algorithms, and Database Management. John is a diligent student and spends a significant amount of time studying and completing assignments. He often stays late in the library to work on his projects.""", "statements": """ statement_1: John is majoring in Biology. statement_2: John is taking a course on Artificial Intelligence. statement_3: John is a dedicated student. statement_4: John has a part-time job. """, "answer": [ { "statement_1": "John is majoring in Biology.", "reason": "John's major is explicitly mentioned as Computer Science. There is no information suggesting he is majoring in Biology.", "verdict": "0", }, { "statement_2": "John is taking a course on Artificial Intelligence.", "reason": "The context mentions the courses John is currently enrolled in, and Artificial Intelligence is not mentioned. Therefore, it cannot be deduced that John is taking a course on AI.", "verdict": "0", }, { "statement_3": "John is a dedicated student.", "reason": "The context states that he spends a significant amount of time studying and completing assignments. Additionally, it mentions that he often stays late in the library to work on his projects, which implies dedication.", "verdict": "1", }, { "statement_4": "John has a part-time job.", "reason": "There is no information given in the context about John having a part-time job.", "verdict": "0", }, ], }, { "context": """Photosynthesis is a process used by plants, algae, and certain bacteria to convert light energy into chemical energy.""", "statements": """statement_1: Albert Einstein was a genius.""", "answer": { "statement_1": "Albert Einstein was a genius.", "reason": "The context and statement are unrelated", "verdict": "0", }, }, { "context": """Albert Einstein was a German-born theoretical physicist who is widely held to be one of the greatest and most influential scientists of all time.""", "statements": """statement_1: Nil""", "answer": { "statement_1": "Nil", "reason": "The statement is invalid", "verdict": "-1", }, }, ], input_keys=["context", "statements"], output_key="answer", output_type="JSON", ) # noqa: E501
@dataclass class Faithfulness(MetricWithLLM): name: str = "faithfulness" # type: ignore evaluation_mode: EvaluationMode = EvaluationMode.qac # type: ignore long_form_answer_prompt: Prompt = field( default_factory=lambda: LONG_FORM_ANSWER_PROMPT ) nli_statements_message: Prompt = field( default_factory=lambda: NLI_STATEMENTS_MESSAGE )
faithfulness = Faithfulness()
Error trace Traceback (most recent call last): File "D:.study\myProject\ragas_eval_2.py", line 65, in
faithfulness.adapt(language="chinese")
File "F:\Application2022\Anaconda\an\lib\site-packages\ragas\metrics_faithfulness.py", line 204, in adapt
self.long_form_answer_prompt = self.long_form_answer_prompt.adapt(
File "F:\Application2022\Anaconda\an\lib\site-packages\ragas\llms\prompt.py", line 166, in adapt
return self._load(language, self.name, cache_dir)
File "F:\Application2022\Anaconda\an\lib\site-packages\ragas\llms\prompt.py", line 242, in _load
return cls(**json.load(open(path)))
File "F:\Application2022\Anaconda\an\lib\site-packages\pydantic\v1\main.py", line 341, in init
raise validation_error
pydantic.v1.error_wrappers.ValidationError: 1 validation error for Prompt
root
analysis in example 1 is not in valid json format: Expecting value: line 1 column 1 (char 0) (type=value_error)
Expected behavior I'm trying to use ragas to test my RAG (Retriever-Augmented Generation) program with a Chinese dataset. Initially, I encountered an error saying "no statements were generated from the answer." Later, I added the following code: from ragas.llms import LangchainLLMWrapper
llm_instance = LangchainLLMWrapper(ChatOpenAI())
faithfulness.llm = llm_instance
faithfulness.adapt(language="chinese") However, this didn't work, and I'm still getting the same or similar error.
The key points are:
I'm using a Chinese dataset. I'm trying to set up the language model wrapper for Chinese. The faithfulness.adapt(language="chinese") call does not seem to be effective in resolving the issue.
Additional context Add any other context about the problem here.