`cudadef`: Meta code isn't searching the CUDA enabled Cling for symbols

Trying to document all the errors that I faced while working with the existing cudadef and cppdef definitions. The definitions given below:

def cppdef(src):
    """Declare C++ source <src> to the Cling. Conditionally compiled on the CPU target IncrementalCompiler"""
    src = "#ifndef __CUDA__\n" + src + "\n#endif"
    with _stderr_capture() as err:
        errcode = gbl.gInterpreter.Declare(src) 
    if not errcode or err.err:
        if 'warning' in err.err.lower() and not 'error' in err.err.lower():
            warnings.warn(err.err, SyntaxWarning)
            return True
        raise SyntaxError('Failed to parse the given C++ code%s' % err.err)
    return True

def cudadef(src):
    """Declare CUDA specific C++ source <src> to Cling. Conditionally compiled on the GPU target IncrementalCUDADeviceCompiler"""
    src = "#ifdef __CUDA__\n" + src + "\n#endif" 
    with _stderr_capture() as err:
        errcode = gbl.gInterpreter.Declare(src) 
    if not errcode or err.err:
        if 'warning' in err.err.lower() and not 'error' in err.err.lower():
            warnings.warn(err.err, SyntaxWarning)
            return True
        raise SyntaxError('Failed to parse the given CUDA C++ code%s' % err.err)
    return True

This is the example I tried:

import cppyy
import os
os.environ['CLING_ENABLE_CUDA'] = '1'
os.environ['CLING_CUDA_PATH'] = '/usr/local/cuda'
cppyy.add_include_path('/usr/local/cuda/include')
cppyy.add_library_path('/usr/local/cuda/lib64')
cppyy.include('iostream')
cppyy.include("cuda.h")
cppyy.include("cuda_runtime.h")
cppyy.load_library("cudart")
cppyy.cppexec("""int version; cudaRuntimeGetVersion(&version);""")
print("version:", cppyy.gbl.version)
cppyy.cudadef("""
                  __global__ void copy(float* out, float* in) {
                      *out = *in;
                  }""")
cppyy.cudadef("""
              void runit(){
                        float *in, *in_d, *out, *out_d;
                        in = (float*)malloc(sizeof(float));
                        out = (float*)malloc(sizeof(float));
                        *in = 5.0f;  // set initial value
                        cudaMalloc((void**)&in_d, sizeof(float));
                        cudaMalloc((void**)&out_d, sizeof(float));
                        cudaMemcpy(in_d, in, sizeof(float), cudaMemcpyHostToDevice);
                        cudaMemcpy(out_d, out, sizeof(float), cudaMemcpyHostToDevice);
                        copy<<<1,1>>>(out_d, in_d);
                        cudaMemcpy(in, in_d, sizeof(float), cudaMemcpyDeviceToHost);
                        cudaMemcpy(out, out_d, sizeof(float), cudaMemcpyDeviceToHost);
                        if (*out == *in)
                            std::cout << "Success, the value of out is " << *out  <<std::endl;
                        else
                            std::cout << "Failure " << std::endl;
                        cudaFree(out_d); cudaFree(in_d);
                        free(out); free(in);
                 } """)

cppyy.gbl.runit()

Output:

        ^
version: 11020
Traceback (most recent call last):
  File "copy.py", line 138, in <module>
    cppyy.gbl.runit()
AttributeError: <namespace cppyy.gbl at 0x1011600> has no attribute 'runit'. Full details:
  type object '' has no attribute 'runit'
  'runit' is not a known C++ class
  'runit' is not a known C++ template
  'runit' is not a known C++ enum

When I tried to print the macro __CUDA__ inside cppdef by reverting the #ifndef in the definition of cppdef, it showed macro undefined. As we discussed on discord before, this attribute error means that the meta code isn't searching the CUDA enabled Cling for symbols. This needs to be fixed in order to work with cudadef or Cling/JITed kernels.

Tested another example, using cppdef:

import cppyy
import os
os.environ['CLING_ENABLE_CUDA'] = '1'
os.environ['CLING_CUDA_PATH'] = '/usr/local/cuda'
os.environ['CLING_CUDA_ARCH'] = 'sm_75'
cppyy.add_include_path('/usr/local/cuda/include')
cppyy.add_library_path('/usr/local/cuda/lib64')
cppyy.include('iostream')
cppyy.include("cuda.h")
cppyy.include("cuda_runtime.h")
cppyy.include("cmath")
cppyy.load_library("cudart")
# Define the CUDA kernel
cppyy.cppdef('''
extern "C" __global__ void foo_kernel(float *result, float a) {
    *result = fabs(a);
}

''')

# Function to test loading the kernel
def test_load_kernel():
    try:
        # Attempt to get the function from the module
        foo_kernel = cppyy.gbl.foo_kernel
        print("Kernel loaded successfully:", foo_kernel)
    except Exception as e:
        print("Error loading kernel:", e)

# Test loading the kernel
test_load_kernel()

Output:

Kernel loaded successfully: <C++ overload "foo_kernel" at 0x7ff591f32780>

using cudadef:

import cppyy
import os
os.environ['CLING_ENABLE_CUDA'] = '1'
os.environ['CLING_CUDA_PATH'] = '/usr/local/cuda'
os.environ['CLING_CUDA_ARCH'] = 'sm_75'
cppyy.add_include_path('/usr/local/cuda/include')
cppyy.add_library_path('/usr/local/cuda/lib64')
cppyy.include('iostream')
cppyy.include("cuda.h")
cppyy.include("cuda_runtime.h")
cppyy.include("cmath")
cppyy.load_library("cudart")
# Define the CUDA kernel
cppyy.cudadef('''
extern "C" __global__ void foo_kernel(float *result, float a) {
    *result = fabs(a);
}

''')

# Function to test loading the kernel
def test_load_kernel():
    try:
        # Attempt to get the function from the module
        foo_kernel = cppyy.gbl.foo_kernel
        print("Kernel loaded successfully:", foo_kernel)
    except Exception as e:
        print("Error loading kernel:", e)

# Test loading the kernel
test_load_kernel()

Output:

Error loading kernel: <namespace cppyy.gbl at 0x15a0800> has no attribute 'foo_kernel'. Full details:
  type object '' has no attribute 'foo_kernel'
  'foo_kernel' is not a known C++ class
  'foo_kernel' is not a known C++ template
  'foo_kernel' is not a known C++ enum

wlav / cppyy

`cudadef`: Meta code isn't searching the CUDA enabled Cling for symbols #251