I’m trying to use NVIDIA GeForce RTX 2080 SUPER to run gpt4-x-alpaca(chavinlo/gpt4-x-alpaca · Hugging Face) via koldAI, and got following error. It only happens when I try to use GPU. If I just uses CPU, no error with same pytorch. Any idea/suggestion for this error?
ERROR | main:generate:4945 - Traceback (most recent call last):
File “aiserver.py”, line 4934, in generate
genout, already_generated = tpool.execute(_generate, txt, minimum, maximum, found_entries)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/eventlet/tpool.py”, line 132, in execute
six.reraise(c, e, tb)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/six.py”, line 719, in reraise
raise value
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/eventlet/tpool.py”, line 86, in tworker
rv = meth(*args, kwargs)
File “aiserver.py”, line 4857, in _generate
genout = generator(
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/autograd/grad_mode.py”, line 27, in decorate_context
return func(*args, *kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/generation/utils.py”, line 1485, in generate
return self.sample(
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/generation/utils.py”, line 2524, in sample
outputs = self(
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl
return forward_call(input, kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 165, in new_forward
output = old_forward(*args, kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py”, line 687, in forward
outputs = self.model(
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl
return forward_call(*input, *kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py”, line 577, in forward
layer_outputs = decoder_layer(
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl
return forward_call(input, kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 165, in new_forward
output = old_forward(*args, kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py”, line 292, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl
return forward_call(*input, *kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 165, in new_forward
output = old_forward(args, kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py”, line 203, in forward
cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl
return forward_call(*input, *kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 160, in new_forward
args, kwargs = module._hf_hook.pre_forward(module, args, **kwargs)
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 280, in pre_forward
set_module_tensor_to_device(module, name, self.execution_device, value=self.weights_map[name])
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/utils/offload.py”, line 123, in getitem
return self.dataset[f"{self.prefix}{key}"]
File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/utils/offload.py”, line 170, in getitem
weight_info = self.index[key]
KeyError: ‘model.layers.0.self_attn.rotary_emb.cos_cached’
I’m trying to use NVIDIA GeForce RTX 2080 SUPER to run gpt4-x-alpaca(chavinlo/gpt4-x-alpaca · Hugging Face) via koldAI, and got following error. It only happens when I try to use GPU. If I just uses CPU, no error with same pytorch. Any idea/suggestion for this error? ERROR | main:generate:4945 - Traceback (most recent call last): File “aiserver.py”, line 4934, in generate genout, already_generated = tpool.execute(_generate, txt, minimum, maximum, found_entries) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/eventlet/tpool.py”, line 132, in execute six.reraise(c, e, tb) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/six.py”, line 719, in reraise raise value File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/eventlet/tpool.py”, line 86, in tworker rv = meth(*args, kwargs) File “aiserver.py”, line 4857, in _generate genout = generator( File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/autograd/grad_mode.py”, line 27, in decorate_context return func(*args, *kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/generation/utils.py”, line 1485, in generate return self.sample( File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/generation/utils.py”, line 2524, in sample outputs = self( File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl return forward_call(input, kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 165, in new_forward output = old_forward(*args, kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py”, line 687, in forward outputs = self.model( File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl return forward_call(*input, *kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py”, line 577, in forward layer_outputs = decoder_layer( File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl return forward_call(input, kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 165, in new_forward output = old_forward(*args, kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py”, line 292, in forward hidden_states, self_attn_weights, present_key_value = self.self_attn( File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl return forward_call(*input, *kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 165, in new_forward output = old_forward(args, kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py”, line 203, in forward cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/torch/nn/modules/module.py”, line 1110, in _call_impl return forward_call(*input, *kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 160, in new_forward args, kwargs = module._hf_hook.pre_forward(module, args, **kwargs) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/hooks.py”, line 280, in pre_forward set_module_tensor_to_device(module, name, self.execution_device, value=self.weights_map[name]) File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/utils/offload.py”, line 123, in getitem return self.dataset[f"{self.prefix}{key}"] File “/home/tonyaw/sandbox//koboldai-client/runtime/envs/koboldai/lib/python3.8/site-packages/accelerate/utils/offload.py”, line 170, in getitem weight_info = self.index[key] KeyError: ‘model.layers.0.self_attn.rotary_emb.cos_cached’