Senthselvi commented 1 week ago

I need code with llamaindex using bearer token and base url not with langchain.

from langchain_community.vectorstores import FAISS from langchain_community.vectorstores import Chroma from langchain.text_splitter import CharacterTextSplitter from langchain_community.embeddings import OpenAIEmbeddings from langchain_community.embeddings import HuggingFaceBgeEmbeddings from langchain.chains import RetrievalQA import os import openai import time

from langchain.llms.base import LLM from langchain.callbacks.manager import CallbackManagerForLLMRun from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, LlamaTokenizerFast from typing import Any, List, Optional import torch

os.environ["CUDA_VISIBLE_DEVICES"] = "6"

class Qwen_LLM(LLM):

基于本地 Qwen 自定义 LLM 类

tokenizer: AutoTokenizer = None
model: AutoModelForCausalLM = None
def __init__(self, mode_name_or_path :str):
    super().__init__()
    print("正在从本地加载模型...")
    self.tokenizer = AutoTokenizer.from_pretrained(mode_name_or_path, use_fast=False)
    self.model = AutoModelForCausalLM.from_pretrained(mode_name_or_path, torch_dtype=torch.bfloat16, device_map="auto")
    self.model.generation_config = GenerationConfig.from_pretrained(mode_name_or_path)
    print("完成本地模型的加载")

def _call(self, prompt : str, stop: Optional[List[str]] = None,
            run_manager: Optional[CallbackManagerForLLMRun] = None,
            **kwargs: Any):

    messages = [{"role": "user", "content": prompt }]
    input_ids = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = self.tokenizer([input_ids], return_tensors="pt").to('cuda')
    generated_ids = self.model.generate(model_inputs.input_ids,max_new_tokens=512)
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return response

@property
def _llm_type(self) -> str:
    return "Qwen_LLM"

mode_path = "" llm = Qwen_LLM(mode_name_or_path = mode_path)

embedding_model_dir = "" embedding_model_kwargs = {'device': 'cuda'} encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity embedding_model = HuggingFaceBgeEmbeddings( model_name=embedding_model_dir, model_kwargs=embedding_model_kwargs, encode_kwargs=encode_kwargs, query_instruction="为这个句子生成表示以用于检索相关文章：" )

from ragas.llms import LangchainLLMWrapper from ragas.embeddings import LangchainEmbeddingsWrapper

llm = LangchainLLMWrapper(llm) embedding_model = LangchainEmbeddingsWrapper(embedding_model)

from datasets import Dataset from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall from ragas import evaluate

faithfulness.llm = llm

faithfulness.embeddings = embedding_model

data_samples = {

'question': ['When was the first super bowl?', 'Who won the most super bowls?'],

'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'],

'contexts' : [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'],

['The Green Bay Packers...Green Bay, Wisconsin.','The Packers compete...Football Conference']],

}

data_samples = { 'question': ['When was the first super bowl?', 'Who won the most super bowls?'], 'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'], 'contexts' : [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'], ['The Green Bay Packers...Green Bay, Wisconsin.','The Packers compete...Football Conference']], 'ground_truth': ['The first superbowl was held on January 15, 1967', 'The New England Patriots have won the Super Bowl a record six times'] } dataset = Dataset.from_dict(data_samples)

score = evaluate(dataset,metrics=[faithfulness])

score = evaluate(dataset,metrics=[faithfulness, answer_relevancy, context_precision, context_recall],llm=llm,embeddings=embedding_model) score.to_pandas() print(score)

dosubot[bot] commented 1 week ago

To use custom LLMs and embeddings with the ragas evaluation framework, you can follow the steps below. This example uses a custom LLM (Qwen_LLM) and custom embeddings (HuggingFaceBgeEmbeddings) without relying on Langchain, and integrates them with ragas using LlamaIndexLLMWrapper and LlamaIndexEmbeddingsWrapper.

Here's the complete code:

import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from typing import Any, List, Optional
from datasets import Dataset
from ragas.llms import LlamaIndexLLMWrapper
from ragas.embeddings import LlamaIndexEmbeddingsWrapper
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
from ragas import evaluate

# Set environment variable for CUDA
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

# Define custom LLM class
class Qwen_LLM:
    tokenizer: AutoTokenizer = None
    model: AutoModelForCausalLM = None

    def __init__(self, mode_name_or_path: str):
        print("Loading model from local path...")
        self.tokenizer = AutoTokenizer.from_pretrained(mode_name_or_path, use_fast=False)
        self.model = AutoModelForCausalLM.from_pretrained(mode_name_or_path, torch_dtype=torch.bfloat16, device_map="auto")
        self.model.generation_config = GenerationConfig.from_pretrained(mode_name_or_path)
        print("Model loaded successfully")

    def __call__(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any):
        messages = [{"role": "user", "content": prompt}]
        input_ids = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        model_inputs = self.tokenizer([input_ids], return_tensors="pt").to('cuda')
        generated_ids = self.model.generate(model_inputs.input_ids, max_new_tokens=512)
        generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
        response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return response

# Initialize custom LLM
mode_path = "<path_to_your_model>"
llm = Qwen_LLM(mode_name_or_path=mode_path)

# Initialize custom embeddings
embedding_model_dir = "<path_to_your_embedding_model>"
embedding_model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': True}
embedding_model = HuggingFaceBgeEmbeddings(
    model_name=embedding_model_dir,
    model_kwargs=embedding_model_kwargs,
    encode_kwargs=encode_kwargs,
    query_instruction="为这个句子生成表示以用于检索相关文章："
)

# Wrap custom LLM and embeddings with LlamaIndex wrappers
llm = LlamaIndexLLMWrapper(llm)
embedding_model = LlamaIndexEmbeddingsWrapper(embedding_model)

# Define dataset
data_samples = {
    'question': ['When was the first super bowl?', 'Who won the most super bowls?'],
    'answer': ['The first superbowl was held on Jan 15, 1967', 'The most super bowls have been won by The New England Patriots'],
    'contexts': [['The First AFL–NFL World Championship Game was an American football game played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles,'], 
                 ['The Green Bay Packers...Green Bay, Wisconsin.', 'The Packers compete...Football Conference']],
    'ground_truth': ['The first superbowl was held on January 15, 1967', 'The New England Patriots have won the Super Bowl a record six times']
}
dataset = Dataset.from_dict(data_samples)

# Evaluate using custom LLM and embeddings
score = evaluate(dataset, metrics=[faithfulness, answer_relevancy, context_precision, context_recall], llm=llm, embeddings=embedding_model)
print(score.to_pandas())

This code initializes a custom LLM and embeddings, wraps them using LlamaIndexLLMWrapper and LlamaIndexEmbeddingsWrapper, and then evaluates the dataset using the specified metrics ^[1]^[2].

To continue talking to Dosu, mention @dosu.

minglong-huang commented 1 week ago

我觉得应该是不行的吧？

explodinggradients / ragas

Can you ragas code with custom ollama and custom embedings #1246