How to fix this bug when I want to write a novel CUDA kernel within your code structre?

BodongDu commented 4 months ago

class MyKernel(NeuronFPTTKernel):
    def __init__(self,hard_reset,dtype):
        super().__init__(hard_reset,dtype)
        self.kernel_name = "IFNodeFPTTKernel_float_hard_reset"
        self.full_codes='''#include <cuda_fp16.h>
extern "C" __global__ void IFNodeFPTTKernel_half_hard_reset( const int & numel, const int & N, const float * x_seq, float * v_v_seq, float * h_seq, float * spike_seq, float & v_th, float & v_reset)
{
    const int index = blockIdx.x * blockDim.x + threadIdx.x;
    if (index < N)
    {
        const int dt = N;

        for(int t = index; t < numel; t += dt)
        {

            h_seq[t] = x_seq[t] + v_v_seq[t];
            spike_seq[t] = (h_seq[t] - v_th) >= 0.0f ? 1.0f: 0.0f;
            v_v_seq[t + dt] = h_seq[t] * (1.0f - spike_seq[t]) + v_reset * spike_seq[t];

        }

    }
    }'''
    @property
    def full_codes(self):
        return self._full_codes

    @full_codes.setter
    def full_codes(self, value):
        # Add any validation or processing logic if needed
        self._full_codes = value
forward_kernel = MyKernel(hard_reset=hard_reset, dtype=dtype)

I look after the "IFNodeFPTTKernel" to write my own CUDA kernel, I believe that I only need to change "self.full_code" and "self.kernel_name" to what I want. But when I run this class, it bugs. Would you tell me how to fix this bug?

Traceback (most recent call last): File "cupyNet.py", line 95, in y_cupy = net_cupy(x_seq) File "/opt/anaconda3/envs/torch1.12/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl return forward_call(*input, kwargs) File "/home/cxy_xs/dbd/spikingjelly/spikingjelly/activation_based/base.py", line 270, in forward return self.multi_step_forward(*args, *kwargs) File "cupyNet.py", line 75, in multi_step_forward spike_seq, v_seq = IFNodeATGF.apply(x_seq.flatten(1), self.v.flatten(), self.v_threshold, self.v_reset, forward_kernel, backward_kernel) File "/home/cxy_xs/dbd/spikingjelly/spikingjelly/activation_based/auto_cuda/neuron_kernel.py", line 459, in forward forward_kernel((blocks,), (threads,), py_dict) File "/home/cxy_xs/dbd/spikingjelly/spikingjelly/activation_based/auto_cuda/base.py", line 1265, in call super().call(grid, block, py_dict, args_1, kwargs) File "/home/cxy_xs/dbd/spikingjelly/spikingjelly/activation_based/auto_cuda/base.py", line 341, in call cp_kernel(grid, block, self.get_ptrs(py_dict), *args_1, **kwargs) File "cupy/_core/raw.pyx", line 89, in cupy._core.raw.RawKernel.call File "cupy/_core/raw.pyx", line 96, in cupy._core.raw.RawKernel.kernel.get File "cupy/_core/raw.pyx", line 117, in cupy._core.raw.RawKernel._kernel File "cupy/cuda/function.pyx", line 276, in cupy.cuda.function.Module.get_function File "cupy/cuda/function.pyx", line 217, in cupy.cuda.function.Function.init File "cupy_backends/cuda/api/driver.pyx", line 226, in cupy_backends.cuda.api.driver.moduleGetFunction File "cupy_backends/cuda/api/driver.pyx", line 60, in cupy_backends.cuda.api.driver.check_status cupy_backends.cuda.api.driver.CUDADriverError: CUDA_ERROR_NOT_FOUND: named symbol not found

fangwei123456 commented 4 months ago

Hi, I suggest that you can print the codes of the kernel (by the attribute .code (https://docs.cupy.dev/en/stable/reference/generated/cupy.RawKernel.html#cupy.RawKernel.code) ) before calling and check the CUDA codes.

BodongDu commented 4 months ago

Thank you, I will try it!!

BodongDu commented 4 months ago

cp_kernel = cupy.RawKernel(self.full_codes, self.kernel_name, options=configure.cuda_compiler_options,
                                   backend=configure.cuda_compiler_backend)

        print(cp_kernel.code)
        with cuda_utils.DeviceEnvironment(device):
            cp_kernel(grid, block, self.get_ptrs(py_dict), *args_1, **kwargs)

The result is

extern "C" __global__ void IFNodeFPTTKernel_half_hard_reset( const int & numel, const int & N, const float * x_seq, float * v_v_seq, float * h_seq, float * spike_seq, float & v_th, float & v_reset)
{
    const int index = blockIdx.x * blockDim.x + threadIdx.x;
    if (index < N)
    {
        const int dt = N;

        for(int t = index; t < numel; t += dt)
        {

            h_seq[t] = x_seq[t] + v_v_seq[t];
            spike_seq[t] = (h_seq[t] - v_th) >= 0.0f ? 1.0f: 0.0f;
            v_v_seq[t + dt] = h_seq[t] * (1.0f - spike_seq[t]) + v_reset * spike_seq[t];

        }

    }
    }

It seems right because of I can use nvcc to compile it, but it still error. How do you think about it?

fangwei123456 commented 4 months ago

You can try to call this kernel manually (by CuPy rather than functions in SpikingJelly) and check if it raises some errors.

fangwei123456 / spikingjelly

How to fix this bug when I want to write a novel CUDA kernel within your code structre? #502