Running localGPT - Githubissues

I have tried running localGPT and get the follwoing error [enforce fail at ..\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 67108864 bytes.

C:\AI\LocalGPT>python run_localGPT.py Running on: cuda load INSTRUCTOR_Transformer max_seq_length 512 Using embedded DuckDB with persistence: data will be stored in: C:\AI\LocalGPT/DB ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ C:\AI\LocalGPT\run_localGPT.py:88 in │ │ │ │ 85 │ │ 86 │ │ 87 if name == "main": │ │ ❱ 88 │ main() │ │ 89 │ │ │ │ C:\Python\lib\site-packages\click\core.py:1130 in call │ │ │ │ 1127 │ │ │ 1128 │ def call(self, *args: t.Any, kwargs: t.Any) -> t.Any: │ │ 1129 │ │ """Alias for :meth:main.""" │ │ ❱ 1130 │ │ return self.main(*args, kwargs) │ │ 1131 │ │ 1132 │ │ 1133 class Command(BaseCommand): │ │ │ │ C:\Python\lib\site-packages\click\core.py:1055 in main │ │ │ │ 1052 │ │ try: │ │ 1053 │ │ │ try: │ │ 1054 │ │ │ │ with self.make_context(prog_name, args, extra) as ctx: │ │ ❱ 1055 │ │ │ │ │ rv = self.invoke(ctx) │ │ 1056 │ │ │ │ │ if not standalone_mode: │ │ 1057 │ │ │ │ │ │ return rv │ │ 1058 │ │ │ │ │ # it's not safe to ctx.exit(rv) here! │ │ │ │ C:\Python\lib\site-packages\click\core.py:1404 in invoke │ │ │ │ 1401 │ │ │ echo(style(message, fg="red"), err=True) │ │ 1402 │ │ │ │ 1403 │ │ if self.callback is not None: │ │ ❱ 1404 │ │ │ return ctx.invoke(self.callback, *ctx.params) │ │ 1405 │ │ │ 1406 │ def shell_complete(self, ctx: Context, incomplete: str) -> t.List["CompletionItem"]: │ │ 1407 │ │ """Return a list of completions for the incomplete value. Looks │ │ │ │ C:\Python\lib\site-packages\click\core.py:760 in invoke │ │ │ │ 757 │ │ │ │ 758 │ │ with augment_usage_errors(self): │ │ 759 │ │ │ with ctx: │ │ ❱ 760 │ │ │ │ return callback(args, kwargs) │ │ 761 │ │ │ 762 │ def forward( │ │ 763 │ │ self, cmd: "Command", *args: t.Any, *kwargs: t.Any # noqa: B902 │ │ │ │ C:\AI\LocalGPT\run_localGPT.py:61 in main │ │ │ │ 58 │ # Prepare the LLM │ │ 59 │ # callbacks = [StreamingStdOutCallbackHandler()] │ │ 60 │ # load the LLM for generating Natural Language responses. │ │ ❱ 61 │ llm = load_model() │ │ 62 │ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, r │ │ 63 │ # Interactive questions and answers │ │ 64 │ while True: │ │ │ │ C:\AI\LocalGPT\run_localGPT.py:21 in load_model │ │ │ │ 18 │ model_id = "TheBloke/vicuna-7B-1.1-HF" │ │ 19 │ tokenizer = LlamaTokenizer.from_pretrained(model_id) │ │ 20 │ │ │ ❱ 21 │ model = LlamaForCausalLM.from_pretrained(model_id, │ │ 22 │ │ │ │ │ │ │ │ │ │ │ # load_in_8bit=True, # set these options i │ │ 23 │ │ │ │ │ │ │ │ │ │ │ # device_map=1#'auto', │ │ 24 │ │ │ │ │ │ │ │ │ │ │ # torch_dtype=torch.float16, │ │ │ │ C:\Python\lib\site-packages\transformers\modeling_utils.py:2611 in from_pretrained │ │ │ │ 2608 │ │ │ init_contexts.append(init_empty_weights()) │ │ 2609 │ │ │ │ 2610 │ │ with ContextManagers(init_contexts): │ │ ❱ 2611 │ │ │ model = cls(config, model_args, model_kwargs) │ │ 2612 │ │ │ │ 2613 │ │ # Check first if we are from_pt │ │ 2614 │ │ if use_keep_in_fp32_modules: │ │ │ │ C:\Python\lib\site-packages\transformers\models\llama\modeling_llama.py:615 in init │ │ │ │ 612 class LlamaForCausalLM(LlamaPreTrainedModel): │ │ 613 │ def init(self, config): │ │ 614 │ │ super().init(config) │ │ ❱ 615 │ │ self.model = LlamaModel(config) │ │ 616 │ │ │ │ 617 │ │ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) │ │ 618 │ │ │ │ C:\Python\lib\site-packages\transformers\models\llama\modeling_llama.py:446 in init │ │ │ │ 443 │ │ self.vocab_size = config.vocab_size │ │ 444 │ │ │ │ 445 │ │ self.embed_tokens = nn.Embedding(config.vocab_size, config.hiddensize, self.pad │ │ ❱ 446 │ │ self.layers = nn.ModuleList([LlamaDecoderLayer(config) for in range(config.num │ │ 447 │ │ self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps) │ │ 448 │ │ │ │ 449 │ │ self.gradient_checkpointing = False │ │ │ │ C:\Python\lib\site-packages\transformers\models\llama\modeling_llama.py:446 in │ │ │ │ 443 │ │ self.vocab_size = config.vocab_size │ │ 444 │ │ │ │ 445 │ │ self.embed_tokens = nn.Embedding(config.vocab_size, config.hiddensize, self.pad │ │ ❱ 446 │ │ self.layers = nn.ModuleList([LlamaDecoderLayer(config) for in range(config.num │ │ 447 │ │ self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps) │ │ 448 │ │ │ │ 449 │ │ self.gradient_checkpointing = False │ │ │ │ C:\Python\lib\site-packages\transformers\models\llama\modeling_llama.py:256 in init │ │ │ │ 253 │ def init(self, config: LlamaConfig): │ │ 254 │ │ super().init() │ │ 255 │ │ self.hidden_size = config.hidden_size │ │ ❱ 256 │ │ self.self_attn = LlamaAttention(config=config) │ │ 257 │ │ self.mlp = LlamaMLP( │ │ 258 │ │ │ hidden_size=self.hidden_size, │ │ 259 │ │ │ intermediate_size=config.intermediate_size, │ │ │ │ C:\Python\lib\site-packages\transformers\models\llama\modeling_llama.py:179 in init │ │ │ │ 176 │ │ │ ) │ │ 177 │ │ self.q_proj = nn.Linear(self.hidden_size, self.num_heads self.head_dim, bias=F │ │ 178 │ │ self.k_proj = nn.Linear(self.hidden_size, self.num_heads self.head_dim, bias=F │ │ ❱ 179 │ │ self.v_proj = nn.Linear(self.hidden_size, self.num_heads self.head_dim, bias=F │ │ 180 │ │ self.o_proj = nn.Linear(self.num_heads self.head_dim, self.hidden_size, bias=F │ │ 181 │ │ self.rotary_emb = LlamaRotaryEmbedding(self.head_dim, max_position_embeddings=se │ │ 182 │ │ │ │ C:\Python\lib\site-packages\torch\nn\modules\linear.py:96 in init │ │ │ │ 93 │ │ super().init() │ │ 94 │ │ self.in_features = in_features │ │ 95 │ │ self.out_features = out_features │ │ ❱ 96 │ │ self.weight = Parameter(torch.empty((out_features, in_features), factory_kwarg │ │ 97 │ │ if bias: │ │ 98 │ │ │ self.bias = Parameter(torch.empty(out_features, **factory_kwargs)) │ │ 99 │ │ else: │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ RuntimeError: [enforce fail at ..\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 67108864 bytes.

PromtEngineer / localGPT

Running localGPT #21