您好~使用多卡推理时报错如下:请问怎么解决额?谢谢
"index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [21,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [22,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [23,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [24,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [25,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [26,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [27,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [28,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [29,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [30,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [31,0,0] Assertion-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. Traceback (most recent call last): File "/project_02/workspace/InternVL2/test_GPUs.py", line 130, in <module> response, history = model.chat(tokenizer, None, question, generation_config, history=None, return_history=True) File "/home/.cache/huggingface/modules/transformers_modules/internvl2-40b/modeling_internvl_chat.py", line 285, in chat generation_output = self.generate( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, **kwargs) File "/home/.cache/huggingface/modules/transformers_modules/internvl2-40b/modeling_internvl_chat.py", line 335, in generate outputs = self.language_model.generate( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/generation/utils.py", line 1479, in generate return self.greedy_search( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/generation/utils.py", line 2340, in greedy_search outputs = self( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward outputs = self.model( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward layer_outputs = decoder_layer( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/accelerate/hooks.py", line 170, in new_forward output = module._old_forward(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 798, in forward hidden_states, self_attn_weights, present_key_value = self.self_attn( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/accelerate/hooks.py", line 170, in new_forward output = module._old_forward(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 448, in forward attn_output = self.o_proj(attn_output) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/accelerate/hooks.py", line 170, in new_forward output = module._old_forward(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/bitsandbytes/nn/modules.py", line 441, in forward out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 563, in matmul return MatMul8bitLt.apply(A, B, out, bias, state) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/autograd/function.py", line 598, in apply return super().apply(*args, **kwargs) # type: ignore[misc] File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 327, in forward CA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold=state.threshold) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/bitsandbytes/functional.py", line 2016, in double_quant nnz = nnz_row_ptr[-1].item() RuntimeError: CUDA error: device-side assert triggered CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1. Compile withTORCH_USE_CUDA_DSA` to enable device-side assertions.
您好~使用多卡推理时报错如下:请问怎么解决额?谢谢 "index out of bounds"
failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [21,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [22,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [23,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [24,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [25,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [26,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [27,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [28,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [29,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [30,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. ../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [1,0,0], thread: [31,0,0] Assertion
-sizes[i] <= index && index < sizes[i] && "index out of bounds"failed. Traceback (most recent call last): File "/project_02/workspace/InternVL2/test_GPUs.py", line 130, in <module> response, history = model.chat(tokenizer, None, question, generation_config, history=None, return_history=True) File "/home/.cache/huggingface/modules/transformers_modules/internvl2-40b/modeling_internvl_chat.py", line 285, in chat generation_output = self.generate( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, **kwargs) File "/home/.cache/huggingface/modules/transformers_modules/internvl2-40b/modeling_internvl_chat.py", line 335, in generate outputs = self.language_model.generate( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context return func(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/generation/utils.py", line 1479, in generate return self.greedy_search( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/generation/utils.py", line 2340, in greedy_search outputs = self( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward outputs = self.model( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward layer_outputs = decoder_layer( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/accelerate/hooks.py", line 170, in new_forward output = module._old_forward(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 798, in forward hidden_states, self_attn_weights, present_key_value = self.self_attn( File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/accelerate/hooks.py", line 170, in new_forward output = module._old_forward(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 448, in forward attn_output = self.o_proj(attn_output) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/accelerate/hooks.py", line 170, in new_forward output = module._old_forward(*args, **kwargs) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/bitsandbytes/nn/modules.py", line 441, in forward out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 563, in matmul return MatMul8bitLt.apply(A, B, out, bias, state) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/torch/autograd/function.py", line 598, in apply return super().apply(*args, **kwargs) # type: ignore[misc] File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 327, in forward CA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A.to(torch.float16), threshold=state.threshold) File "/home/miniconda3/envs/internVL/lib/python3.10/site-packages/bitsandbytes/functional.py", line 2016, in double_quant nnz = nnz_row_ptr[-1].item() RuntimeError: CUDA error: device-side assert triggered CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1. Compile with
TORCH_USE_CUDA_DSA` to enable device-side assertions.