Langchain with GPU not working

marella / ctransformers

Python bindings for the Transformer models implemented in C/C++ using GGML library.

MIT License

1.8k stars 137 forks source link

Langchain with GPU not working #125

Closed drmwnrafi closed 11 months ago

drmwnrafi commented 1 year ago

I've tried Ctransformers via Langchain with gpu_layers since AutoModelForCasualLM not working with Langchain

def load_llm(model_path:str=None, model_name:str=None, model_file:str=None):
  if model_path is not None:
      llm = CTransformers(model=model_path,
                          model_type='llama',
                          config={'max_new_tokens': 256, 'temperature': 0.01})

  elif model_name is not None:
    print("Using Internet")
    llm = CTransformers(model=model_name,
                          model_file=model_file,
                          model_type='llama',
                          gpu_layers=50,
                          config={'max_new_tokens': 256, 'temperature': 0.01})
  return llm

llm = load_llm(model_name="TheBloke/Llama-2-13B-GGML", model_file="llama-2-13b.ggmlv3.q4_0.bin")

but, model not loaded to GPU VRAM Run in Google Colab with T4

Thanks,

drmwnrafi commented 11 months ago

I've solved this problem, here is the code:

class CustomLLM(LLM):
    model:Any
    def __init__(self, model):
      super().__init__()
      self.model = model
    @property
    def _llm_type(self) -> str:
       return "custom"

    def _call(self, prompt:str, stop:Optional[List[str]]=None, run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs: Any) -> str:
        response = self.model(prompt)
        return response[len(prompt):]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying params"""
        return {}

from ctransformers import AutoModelForCausalLM

llm = AutoModelForCausalLM.from_pretrained(model_path_or_repo_id="TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q2_K.bin", gpu_layers=50)

llms = CustomLLM(model=llm)

Stosan commented 11 months ago

when I try to replicate your code I get this error: TypeError: LLM.init() missing 1 required positional argument: 'model_path'

my implementation:

from ctransformers import AutoModelForCausalLM, LLM
from typing import Any,Optional,List,Mapping
from langchain.callbacks.manager import CallbackManager,CallbackManagerForLLMRun
class CustomLLM(LLM):
    model:Any
    def __init__(self, model):
      super().__init__()
      self.model = model
    @property
    def _llm_type(self) -> str:
       return "custom"

    def _call(self, prompt:str, stop:Optional[List[str]]=None, run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs: Any) -> str:
        response = self.model(prompt)
        return response[len(prompt):]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying params"""
        return {}

llm = AutoModelForCausalLM.from_pretrained(model_path_or_repo_id="TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q2_K.bin", gpu_layers=10)

llms = CustomLLM(model=llm)

drmwnrafi commented 11 months ago

That's not LLM class from ctransformers package, but from Langchain.

Here is the full code :

from langchain.llms.base import LLM
from typing import Any, List, Mapping, Optional
from langchain.callbacks.manager import CallbackManagerForLLMRun

class CustomLLM(LLM):
    model:Any
    def __init__(self, model):
      super().__init__()
      self.model = model
    @property
    def _llm_type(self) -> str:
       return "custom"

    def _call(self, prompt:str, stop:Optional[List[str]]=None, run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs: Any) -> str:
        response = self.model(prompt)
        return response[len(prompt):]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying params"""
        return {}

from ctransformers import AutoModelForCausalLM

llm = AutoModelForCausalLM.from_pretrained(model_path_or_repo_id="TheBloke/Llama-2-7B-Chat-GGML", model_file="llama-2-7b-chat.ggmlv3.q2_K.bin", gpu_layers=50)

llms = CustomLLM(model=llm)

hvmid commented 10 months ago

After following your code I try to run the following lines:

qa = RetrievalQA.from_chain_type(llm=llms, chain_type="stuff", retriever=retriever, return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)

result = qa(query)

and I get the following error:

TypeError: 'generator' object is not subscriptable