Traceback (most recent call last):
File "/nfs/home/zf/project/OmniQuant/main.py", line 375, in
main()
File "/nfs/home/zf/project/OmniQuant/main.py", line 370, in main
evaluate(lm, args,logger)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/nfs/home/zf/project/OmniQuant/main.py", line 148, in evaluate
t_results = evaluator.simple_evaluate(
File "/nfs/home/zf/project/OmniQuant/lm_eval/utils.py", line 160, in _wrapper
return fn(*args, *kwargs)
File "/nfs/home/zf/project/OmniQuant/lm_eval/evaluator.py", line 69, in simple_evaluate
results = evaluate(
File "/nfs/home/zf/project/OmniQuant/lm_eval/utils.py", line 160, in _wrapper
return fn(args, kwargs)
File "/nfs/home/zf/project/OmniQuant/lm_eval/evaluator.py", line 231, in evaluate
resps = getattr(lm, reqtype)([req.args for req in reqs])
File "/nfs/home/zf/project/OmniQuant/models/models_utils.py", line 221, in loglikelihood
return self._loglikelihood_tokens(new_reqs)
File "/nfs/home/zf/project/OmniQuant/models/models_utils.py", line 336, in _loglikelihood_tokens
self._model_call(batched_inps), dim=-1
File "/nfs/home/zf/project/OmniQuant/models/LMClass.py", line 90, in _model_call
return self.model(inps)["logits"]
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward
outputs = self.model(
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward
layer_outputs = decoder_layer(
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/nfs/home/zf/project/OmniQuant/models/int_llama_layer.py", line 243, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, **kwargs)
File "/nfs/home/zf/project/OmniQuant/models/int_llama_layer.py", line 125, in forward
kv_seq_len += past_key_value[0].shape[-2]
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/transformers/cache_utils.py", line 78, in getitem
raise KeyError(f"Cache only has {len(self)} layers, attempted to access layer with index {layer_idx}")
KeyError: 'Cache only has 0 layers, attempted to access layer with index 0'
I encountered the above problem when running the following command. What is the reason and how to solve it?
Traceback (most recent call last): File "/nfs/home/zf/project/OmniQuant/main.py", line 375, in
main()
File "/nfs/home/zf/project/OmniQuant/main.py", line 370, in main
evaluate(lm, args,logger)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/nfs/home/zf/project/OmniQuant/main.py", line 148, in evaluate
t_results = evaluator.simple_evaluate(
File "/nfs/home/zf/project/OmniQuant/lm_eval/utils.py", line 160, in _wrapper
return fn(*args, *kwargs)
File "/nfs/home/zf/project/OmniQuant/lm_eval/evaluator.py", line 69, in simple_evaluate
results = evaluate(
File "/nfs/home/zf/project/OmniQuant/lm_eval/utils.py", line 160, in _wrapper
return fn(args, kwargs)
File "/nfs/home/zf/project/OmniQuant/lm_eval/evaluator.py", line 231, in evaluate
resps = getattr(lm, reqtype)([req.args for req in reqs])
File "/nfs/home/zf/project/OmniQuant/models/models_utils.py", line 221, in loglikelihood
return self._loglikelihood_tokens(new_reqs)
File "/nfs/home/zf/project/OmniQuant/models/models_utils.py", line 336, in _loglikelihood_tokens
self._model_call(batched_inps), dim=-1
File "/nfs/home/zf/project/OmniQuant/models/LMClass.py", line 90, in _model_call
return self.model(inps)["logits"]
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, *kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward
outputs = self.model(
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(args, kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward
layer_outputs = decoder_layer(
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, kwargs)
File "/nfs/home/zf/project/OmniQuant/models/int_llama_layer.py", line 243, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, *kwargs)
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(args, **kwargs)
File "/nfs/home/zf/project/OmniQuant/models/int_llama_layer.py", line 125, in forward
kv_seq_len += past_key_value[0].shape[-2]
File "/nfs/home/zf/anaconda3/envs/omni/lib/python3.10/site-packages/transformers/cache_utils.py", line 78, in getitem
raise KeyError(f"Cache only has {len(self)} layers, attempted to access layer with index {layer_idx}")
KeyError: 'Cache only has 0 layers, attempted to access layer with index 0'
I encountered the above problem when running the following command. What is the reason and how to solve it?
CUDA_VISIBLE_DEVICES=0 python main.py \ --model /PATH/TO/LLaMA/llama-7b \ --epochs 20 --output_dir ./log/llama-7b-w4a4 \ --eval_ppl --wbits 4 --abits 4 --lwc --let \ --tasks piqa,arc_easy,arc_challenge,boolq,hellaswag,winogrande