Open SuperFCR opened 1 year ago
我也遇到了这个问题,请问您解决了吗
I am not sure whether this bug is caused by this:
https://github.com/fangwei123456/spikingjelly/blob/master/bugs.md
Bug: When using CuPy with version >= 10
, CuPy will change torch.cuda.current_device()
to 0, https://github.com/cupy/cupy/issues/6569. This bug will break training when using Distributed Data Parallel (DDP).
Please using spikingjelly==0.0.0.0.14
and try again.
Issue type
SpikingJelly version
0.0.0.0.12
Description
Traceback (most recent call last): File "train.py", line 502, in
main(args)
File "train.py", line 446, in main
train_loss, train_acc1, train_acc5 = train_one_epoch(
File "train.py", line 216, in train_one_epoch
output = model(image)
File "/opt/conda/envs/spiking/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/root/falcary/cifar10dvs/model.py", line 245, in forward
x = self.forward_features(x)
File "/root/falcary/cifar10dvs/model.py", line 238, in forward_features
x = patch_embed(x)
File "/opt/conda/envs/spiking/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/root/falcary/cifar10dvs/model.py", line 153, in forward
x = self.proj_lif(x).flatten(0,1).contiguous()
File "/opt/conda/envs/spiking/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/opt/conda/envs/spiking/lib/python3.8/site-packages/spikingjelly/clock_driven/neuron.py", line 855, in forward
spike_seq, self.v_seq = neuron_kernel.MultiStepLIFNodePTT.apply(
File "/opt/conda/envs/spiking/lib/python3.8/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/opt/conda/envs/spiking/lib/python3.8/site-packages/spikingjelly/clock_driven/neuron_kernel.py", line 732, in forward
cp_numel = cupy.asarray(numel)
File "/opt/conda/envs/spiking/lib/python3.8/site-packages/cupy/_creation/from_data.py", line 76, in asarray
return _core.array(a, dtype, False, order)
File "cupy/_core/core.pyx", line 2266, in cupy._core.core.array
File "cupy/_core/core.pyx", line 2290, in cupy._core.core.array
File "cupy/_core/core.pyx", line 2424, in cupy._core.core._array_default
File "cupy/_core/core.pyx", line 699, in cupy._core.core.ndarray.fill
File "cupy/_core/_kernel.pyx", line 900, in cupy._core._kernel.ElementwiseKernel.call
File "cupy/_core/_kernel.pyx", line 925, in cupy._core._kernel.ElementwiseKernel._get_elementwise_kernel
File "cupy/_util.pyx", line 67, in cupy._util.memoize.decorator.ret
File "cupy/_core/_kernel.pyx", line 712, in cupy._core._kernel._get_elementwise_kernel
File "cupy/_core/_kernel.pyx", line 72, in cupy._core._kernel._get_simple_elementwise_kernel
File "cupy/_core/core.pyx", line 2141, in cupy._core.core.compile_with_cache
File "/opt/conda/envs/spiking/lib/python3.8/site-packages/cupy/cuda/compiler.py", line 492, in _compile_module_with_cache
return _compile_with_cache_cuda(
File "/opt/conda/envs/spiking/lib/python3.8/site-packages/cupy/cuda/compiler.py", line 561, in _compile_with_cache_cuda
mod.load(cubin)
File "cupy/cuda/function.pyx", line 264, in cupy.cuda.function.Module.load
File "cupy/cuda/function.pyx", line 266, in cupy.cuda.function.Module.load
File "cupy_backends/cuda/api/driver.pyx", line 210, in cupy_backends.cuda.api.driver.moduleLoadData
File "cupy_backends/cuda/api/driver.pyx", line 60, in cupy_backends.cuda.api.driver.check_status
cupy_backends.cuda.api.driver.CUDADriverError: CUDA_ERROR_INVALID_SOURCE: device kernel image is invalid
Minimal code to reproduce the error/bug
Thanks for your response!