EnzymeAD / Enzyme.jl

Julia bindings for the Enzyme automatic differentiator
https://enzyme.mit.edu
MIT License
455 stars 63 forks source link

Support for CUDA.jl CuArrays? #144

Closed ChrisRackauckas closed 2 months ago

ChrisRackauckas commented 3 years ago

MWE:

using CUDA, Enzyme, Random

rng = MersenneTwister(1234)
m = 32
n = 16
Z = cu(randn(rng, Float32, (n,m)))
𝒯 = 2.0
Δτ = 0.1
ca_init = cu([zeros(1) ; ones(m)])

function f!(ċȧ, ca, Z, t)
  a = ca[2:end]

  a_unit = a / sum(a)
  w_unit = Z*a_unit
  Ka_unit = Z'*w_unit
  z_unit = dot(abs.(Ka_unit), a_unit)
  aKa_over_z = a .* Ka_unit / z_unit
  ċȧ[1] = sum(aKa_over_z) / m
  ċȧ[2:end] = -abs.(aKa_over_z)
end

tmp1, tmp3, tmp4 = copy(ca_init), copy(ca_init), copy(ca_init)
t = 0f0
tmp2 = copy(Z)

Enzyme.autodiff(Reverse, f!,Const, Enzyme.Duplicated(tmp3, tmp4),
                Enzyme.Duplicated(ca_init, tmp1),
                Enzyme.Duplicated(Z, tmp2),
                Const(t))
wsmoses commented 3 years ago

Adding in a couple of corrections to above test case, can you also confirm this is the bug you see?

using Enzyme, CUDA
using Random
using Adapt
gpu(x) = Adapt.adapt(CuArray, x)
rng = MersenneTwister(1234)
m = 32
n = 16
Z = randn(rng, Float32, (n,m)) |> gpu
𝒯 = 2.0
Δτ = 0.1
ca_init = [zeros(1) ; ones(m)] |> gpu

function f!(ċȧ, ca, Z, t)
  a = ca[2:end]

  a_unit = a / sum(a)
  w_unit = Z*a_unit
  Ka_unit = Z'*w_unit
  z_unit = dot(abs.(Ka_unit), a_unit)
  aKa_over_z = a .* Ka_unit / z_unit
  ċȧ[1] = sum(aKa_over_z) / m
  ċȧ[2:end] = -abs.(aKa_over_z)
end

tmp1, tmp3, tmp4 = copy(ca_init), copy(ca_init), copy(ca_init)
t = 0f0
tmp2 = copy(Z)

Enzyme.autodiff(f!,Enzyme.Duplicated(tmp3, tmp4),
                Enzyme.Duplicated(ca_init, tmp1),
                Enzyme.Duplicated(Z, tmp2),
                t)
ERROR: LoadError: InvalidIRError: compiling function f!(CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Float32) resulted in invalid LLVM IR
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:2277
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuDeviceGetMemPool
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:2276
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuDeviceGetMemPool
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:2276
 [7] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [8] cuDeviceGetMemPool
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [9] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] current_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:61
  [5] memory_pool
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/pool.jl:46
  [6] pool_mark
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:93
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:191
  [8] macro expansion
    @ ./timing.jl:287
  [9] #_alloc#173
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:187
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:41
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuDeviceGetName
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] name
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:88
 [5] macro expansion
   @ ./logging.jl:340
 [6] context
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:239
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:2267
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuMemAllocAsync
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:2266
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuMemAllocAsync
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:2266
 [7] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [8] cuMemAllocAsync
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [9] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:352
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuMemAlloc_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:351
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuMemAlloc_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:351
 [7] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [8] cuMemAlloc_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [9] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] unsafe_cuStreamQuery
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1025
 [2] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxSetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] activate
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
 [5] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
 [6] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [7] unsafe_cuStreamQuery
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1024
 [8] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] unsafe_cuStreamQuery
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1024
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:164
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxSynchronize
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:163
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuCtxSynchronize
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:163
 [7] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [8] cuCtxSynchronize
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [9] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1254
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuLaunchHostFunc
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] #launch#34
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:153
 [5] #synchronize#12
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/stream.jl:142
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1253
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuLaunchHostFunc
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] #launch#34
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:153
 [11] #synchronize#12
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/stream.jl:142
 [12] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1253
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [8] cuLaunchHostFunc
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [9] #launch#34
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:153
 [10] #synchronize#12
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/stream.jl:142
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:59
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuDeviceGetAttribute
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] attribute
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:118
 [5] capability
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:182
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:132
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxPopCurrent_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] pop!
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:169
 [5] device
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:304
 [6] check_exceptions
   @ ~/.julia/packages/CUDA/kSriU/src/compiler/exceptions.jl:33
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] unsafe_cuCtxGetDevice
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:150
 [2] current_device
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:22
 [3] device
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:303
 [4] check_exceptions
   @ ~/.julia/packages/CUDA/kSriU/src/compiler/exceptions.jl:33
 [5] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:126
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxPushCurrent_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] push!
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:160
 [5] device
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:302
 [6] check_exceptions
   @ ~/.julia/packages/CUDA/kSriU/src/compiler/exceptions.jl:33
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1032
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuStreamSynchronize
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1031
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuStreamSynchronize
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1031
 [7] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [8] cuStreamSynchronize
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [9] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:84
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuDevicePrimaryCtxRetain
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] CuContext
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:55
 [5] context
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:246
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:35
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuDeviceGetCount
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] ndevices
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:160
 [5] TaskLocalState
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:53
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:29
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuDeviceGet
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] CuDevice
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:16
 [5] TaskLocalState
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:50
 [6] task_local_state!
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:73
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:934
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuStreamCreate
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] CuStream
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/stream.jl:20
 [5] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:933
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuStreamCreate
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] CuStream
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/stream.jl:20
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:933
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [8] cuStreamCreate
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [9] CuStream
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/stream.jl:20
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] current_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:61
 [5] CuStream
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/stream.jl:26
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] nvtxNameCuStreamA
   @ ~/.julia/packages/CUDA/kSriU/lib/nvtx/libnvtx.jl:244
 [2] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] unsafe_cublasCreate_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:6
 [2] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxSetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] activate
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
 [5] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
 [6] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [7] unsafe_cublasCreate_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:5
 [8] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] unsafe_cublasCreate_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:5
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] current_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:61
  [5] memory_pool
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/pool.jl:46
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:341
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
  [8] cublasCreate
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/wrappers.jl:8
  [9] #1305
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:83
 [10] #6
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/cache.jl:24
 [11] lock
    @ ./lock.jl:187
 [12] check_cache
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/cache.jl:22
 [13] pop!
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/cache.jl:46
 [14] new_state
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:82
 [15] #1308
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:99
 [16] get!
    @ ./dict.jl:465
 [17] handle
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:98
 [18] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] current_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:61
 [5] memory_pool
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/pool.jl:46
 [6] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:341
 [7] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [8] cublasSetStream_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [9] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:36
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
 [3] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [4] cublasSetStream_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [5] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:35
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
  [9] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [10] cublasSetStream_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:35
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
  [9] cublasSetStream_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:36
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
 [3] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [4] cublasSetStream_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [5] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:35
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
  [9] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [10] cublasSetStream_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:35
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
  [9] cublasSetStream_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] current_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:61
  [5] memory_pool
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/pool.jl:46
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:341
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
  [8] cublasGetProperty
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [9] cublasGetProperty
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/wrappers.jl:32
 [10] version
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/wrappers.jl:36
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:25
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
 [3] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [4] cublasGetProperty
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [5] cublasGetProperty
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/wrappers.jl:32
 [6] version
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/wrappers.jl:36
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:25
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
 [3] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [4] cublasGetProperty
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [5] cublasGetProperty
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/wrappers.jl:32
 [6] version
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/wrappers.jl:36
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] current_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:61
  [5] memory_pool
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/pool.jl:46
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:341
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
  [8] cublasSetMathMode
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [9] math_mode!
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:62
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:85
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
 [3] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [4] cublasSetMathMode
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [5] math_mode!
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:62
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:84
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
  [9] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [10] cublasSetMathMode
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [11] math_mode!
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:62
 [12] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:84
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
  [9] cublasSetMathMode
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] math_mode!
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:62
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:85
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
 [3] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [4] cublasSetMathMode
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [5] math_mode!
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:62
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:84
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
  [9] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [10] cublasSetMathMode
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [11] math_mode!
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:62
 [12] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:84
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
  [9] cublasSetMathMode
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] math_mode!
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/CUBLAS.jl:62
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1238
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuLaunchCooperativeKernel
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] #27
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:64
  [5] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:33
  [6] macro expansion
    @ ./none:0
  [7] pack_arguments
    @ ./none:0
  [8] #launch#26
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:62
  [9] #32
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:136
 [10] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:95
 [11] macro expansion
    @ ./none:0
 [12] convert_arguments
    @ ./none:0
 [13] #cudacall#31
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:135
 [14] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:204
 [15] macro expansion
    @ ./none:0
 [16] #call#194
    @ ./none:0
 [17] #_#215
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:462
 [18] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:104
 [19] #gpu_call#236
    @ ~/.julia/packages/CUDA/kSriU/src/gpuarrays.jl:30
 [20] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1237
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuLaunchCooperativeKernel
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] #27
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:64
 [11] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:33
 [12] macro expansion
    @ ./none:0
 [13] pack_arguments
    @ ./none:0
 [14] #launch#26
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:62
 [15] #32
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:136
 [16] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:95
 [17] macro expansion
    @ ./none:0
 [18] convert_arguments
    @ ./none:0
 [19] #cudacall#31
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:135
 [20] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:204
 [21] macro expansion
    @ ./none:0
 [22] #call#194
    @ ./none:0
 [23] #_#215
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:462
 [24] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:104
 [25] #gpu_call#236
    @ ~/.julia/packages/CUDA/kSriU/src/gpuarrays.jl:30
 [26] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1237
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [8] cuLaunchCooperativeKernel
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [9] #27
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:64
 [10] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:33
 [11] macro expansion
    @ ./none:0
 [12] pack_arguments
    @ ./none:0
 [13] #launch#26
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:62
 [14] #32
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:136
 [15] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:95
 [16] macro expansion
    @ ./none:0
 [17] convert_arguments
    @ ./none:0
 [18] #cudacall#31
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:135
 [19] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:204
 [20] macro expansion
    @ ./none:0
 [21] #call#194
    @ ./none:0
 [22] #_#215
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:462
 [23] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:104
 [24] #gpu_call#236
    @ ~/.julia/packages/CUDA/kSriU/src/gpuarrays.jl:30
 [25] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1227
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuLaunchKernel
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] #27
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:69
  [5] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:33
  [6] macro expansion
    @ ./none:0
  [7] pack_arguments
    @ ./none:0
  [8] #launch#26
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:62
  [9] #32
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:136
 [10] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:95
 [11] macro expansion
    @ ./none:0
 [12] convert_arguments
    @ ./none:0
 [13] #cudacall#31
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:135
 [14] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:204
 [15] macro expansion
    @ ./none:0
 [16] #call#194
    @ ./none:0
 [17] #_#215
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:462
 [18] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:104
 [19] #gpu_call#236
    @ ~/.julia/packages/CUDA/kSriU/src/gpuarrays.jl:30
 [20] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1226
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuLaunchKernel
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] #27
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:69
 [11] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:33
 [12] macro expansion
    @ ./none:0
 [13] pack_arguments
    @ ./none:0
 [14] #launch#26
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:62
 [15] #32
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:136
 [16] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:95
 [17] macro expansion
    @ ./none:0
 [18] convert_arguments
    @ ./none:0
 [19] #cudacall#31
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:135
 [20] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:204
 [21] macro expansion
    @ ./none:0
 [22] #call#194
    @ ./none:0
 [23] #_#215
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:462
 [24] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:104
 [25] #gpu_call#236
    @ ~/.julia/packages/CUDA/kSriU/src/gpuarrays.jl:30
 [26] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1226
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [8] cuLaunchKernel
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [9] #27
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:69
 [10] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:33
 [11] macro expansion
    @ ./none:0
 [12] pack_arguments
    @ ./none:0
 [13] #launch#26
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:62
 [14] #32
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:136
 [15] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:95
 [16] macro expansion
    @ ./none:0
 [17] convert_arguments
    @ ./none:0
 [18] #cudacall#31
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/execution.jl:135
 [19] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:204
 [20] macro expansion
    @ ./none:0
 [21] #call#194
    @ ./none:0
 [22] #_#215
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:462
 [23] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:104
 [24] #gpu_call#236
    @ ~/.julia/packages/CUDA/kSriU/src/gpuarrays.jl:30
 [25] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] current_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:61
 [5] memory_pool
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/pool.jl:46
 [6] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:341
 [7] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [8] cublasDscal_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [9] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:294
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
 [3] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [4] cublasDscal_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [5] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:293
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
  [9] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [10] cublasDscal_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:293
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:356
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
  [9] cublasDscal_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:294
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
 [3] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [4] cublasDscal_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [5] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:293
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
  [9] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
 [10] cublasDscal_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/libcublas.jl:293
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/pool.jl:312
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cublas/error.jl:61
  [9] cublasDscal_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:59
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuDeviceGetAttribute
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] attribute
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:118
 [5] warpsize
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:174
 [6] nextwarp
   @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:506
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:59
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuDeviceGetAttribute
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] attribute
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:118
  [5] warpsize
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:174
  [6] prevwarp
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:511
  [7] compute_threads
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:223
  [8] #mapreducedim!#281
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:237
  [9] #mapreducedim!#281
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:279
 [10] #_mapreduce#22
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:62
 [11] #mapreduce#20
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [12] mapreduce
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [13] #_sum#682
    @ ./reducedim.jl:878
 [14] _sum
    @ ./reducedim.jl:878
 [15] #_sum#681
    @ ./reducedim.jl:877
 [16] _sum
    @ ./reducedim.jl:877
 [17] #sum#679
    @ ./reducedim.jl:873
 [18] sum
    @ ./reducedim.jl:873
 [19] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:16
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:59
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuDeviceGetAttribute
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] attribute
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:118
  [5] big_mapreduce_threshold
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:164
  [6] #mapreducedim!#281
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:200
  [7] #mapreducedim!#281
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:279
  [8] #_mapreduce#22
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:62
  [9] #mapreduce#20
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [10] mapreduce
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [11] #_sum#682
    @ ./reducedim.jl:878
 [12] _sum
    @ ./reducedim.jl:878
 [13] #_sum#681
    @ ./reducedim.jl:877
 [14] _sum
    @ ./reducedim.jl:877
 [15] #sum#679
    @ ./reducedim.jl:873
 [16] sum
    @ ./reducedim.jl:873
 [17] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:16
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:59
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuDeviceGetAttribute
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] attribute
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:118
  [5] warpsize
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:174
  [6] prevwarp
    @ ~/.julia/packages/CUDA/kSriU/src/compiler/execution.jl:511
  [7] compute_threads
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:223
  [8] #mapreducedim!#281
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:237
  [9] #_mapreduce#22
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:62
 [10] #mapreduce#20
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [11] mapreduce
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [12] #_sum#682
    @ ./reducedim.jl:878
 [13] _sum
    @ ./reducedim.jl:878
 [14] #_sum#681
    @ ./reducedim.jl:877
 [15] _sum
    @ ./reducedim.jl:877
 [16] #sum#679
    @ ./reducedim.jl:873
 [17] sum
    @ ./reducedim.jl:873
 [18] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:16
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:59
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuDeviceGetAttribute
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] attribute
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:118
  [5] big_mapreduce_threshold
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:164
  [6] #mapreducedim!#281
    @ ~/.julia/packages/CUDA/kSriU/src/mapreduce.jl:200
  [7] #_mapreduce#22
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:62
  [8] #mapreduce#20
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
  [9] mapreduce
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [10] #_sum#682
    @ ./reducedim.jl:878
 [11] _sum
    @ ./reducedim.jl:878
 [12] #_sum#681
    @ ./reducedim.jl:877
 [13] _sum
    @ ./reducedim.jl:877
 [14] #sum#679
    @ ./reducedim.jl:873
 [15] sum
    @ ./reducedim.jl:873
 [16] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:16
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:132
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxPopCurrent_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] pop!
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:169
 [5] synchronize
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:324
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:126
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxPushCurrent_v2
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] push!
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:160
 [5] synchronize
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:319
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:2272
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuMemPoolTrimTo
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:2271
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuMemPoolTrimTo
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:2271
 [7] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [8] cuMemPoolTrimTo
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [9] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] unsafe_cuCtxGetDevice
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:150
 [2] current_device
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:22
 [3] context!
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:150
 [4] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxSetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] activate
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
 [5] context!
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:149
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] unsafe_cuCtxGetDevice
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:150
 [2] current_device
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/devices.jl:22
 [3] context!
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:144
 [4] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxSetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] activate
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
 [5] context!
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:143
 [6] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] unsafe_cuPointerGetAttribute
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:883
 [2] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxSetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] activate
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
 [5] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
 [6] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [7] unsafe_cuPointerGetAttribute
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:882
 [8] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] unsafe_cuPointerGetAttribute
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:882
 [7] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:598
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuMemcpyDtoHAsync_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] #unsafe_copyto!#8
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/memory.jl:394
  [5] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:410
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:182
  [7] unsafe_copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:404
  [8] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:359
  [9] getindex
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:89
 [10] #25
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:75
 [11] task_local_storage
    @ ./task.jl:281
 [12] macro expansion
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:74
 [13] #_mapreduce#22
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:65
 [14] #mapreduce#20
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [15] mapreduce
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [16] #_sum#682
    @ ./reducedim.jl:878
 [17] _sum
    @ ./reducedim.jl:878
 [18] #_sum#681
    @ ./reducedim.jl:877
 [19] _sum
    @ ./reducedim.jl:877
 [20] #sum#679
    @ ./reducedim.jl:873
 [21] sum
    @ ./reducedim.jl:873
 [22] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:16
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:597
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuMemcpyDtoHAsync_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] #unsafe_copyto!#8
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/memory.jl:394
 [11] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:410
 [12] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:182
 [13] unsafe_copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:404
 [14] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:359
 [15] getindex
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:89
 [16] #25
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:75
 [17] task_local_storage
    @ ./task.jl:281
 [18] macro expansion
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:74
 [19] #_mapreduce#22
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:65
 [20] #mapreduce#20
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [21] mapreduce
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [22] #_sum#682
    @ ./reducedim.jl:878
 [23] _sum
    @ ./reducedim.jl:878
 [24] #_sum#681
    @ ./reducedim.jl:877
 [25] _sum
    @ ./reducedim.jl:877
 [26] #sum#679
    @ ./reducedim.jl:873
 [27] sum
    @ ./reducedim.jl:873
 [28] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:16
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:597
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [8] cuMemcpyDtoHAsync_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [9] #unsafe_copyto!#8
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/memory.jl:394
 [10] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:410
 [11] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:182
 [12] unsafe_copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:404
 [13] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:359
 [14] getindex
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:89
 [15] #25
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:75
 [16] task_local_storage
    @ ./task.jl:281
 [17] macro expansion
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:74
 [18] #_mapreduce#22
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:65
 [19] #mapreduce#20
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [20] mapreduce
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/mapreduce.jl:28
 [21] #_sum#682
    @ ./reducedim.jl:878
 [22] _sum
    @ ./reducedim.jl:878
 [23] #_sum#681
    @ ./reducedim.jl:877
 [24] _sum
    @ ./reducedim.jl:877
 [25] #sum#679
    @ ./reducedim.jl:873
 [26] sum
    @ ./reducedim.jl:873
 [27] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:16
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:591
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuMemcpyHtoDAsync_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] #unsafe_copyto!#9
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/memory.jl:394
  [5] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:393
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:182
  [7] unsafe_copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:387
  [8] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:345
  [9] setindex!
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:97
 [10] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:21
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:590
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuMemcpyHtoDAsync_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] #unsafe_copyto!#9
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/memory.jl:394
 [11] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:393
 [12] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:182
 [13] unsafe_copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:387
 [14] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:345
 [15] setindex!
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:97
 [16] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:21
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:590
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [8] cuMemcpyHtoDAsync_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [9] #unsafe_copyto!#9
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/memory.jl:394
 [10] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:393
 [11] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:182
 [12] unsafe_copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:387
 [13] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:345
 [14] setindex!
    @ ~/.julia/packages/GPUArrays/3sW6s/src/host/indexing.jl:97
 [15] f!
    @ /mnt/Data/git/Enzyme.jl/cu.jl:21
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:605
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuMemcpyDtoDAsync_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] #unsafe_copyto!#10
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/memory.jl:394
  [5] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:427
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:182
  [7] unsafe_copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:425
  [8] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:373
  [9] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:377
 [10] copy
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:171
 [11] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:604
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuMemcpyDtoDAsync_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] #unsafe_copyto!#10
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/memory.jl:394
 [11] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:427
 [12] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:182
 [13] unsafe_copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:425
 [14] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:373
 [15] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:377
 [16] copy
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:171
 [17] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxGetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
  [5] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [6] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:604
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [8] cuMemcpyDtoDAsync_v2
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [9] #unsafe_copyto!#10
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/memory.jl:394
 [10] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:427
 [11] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:182
 [12] unsafe_copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:425
 [13] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:373
 [14] copyto!
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:377
 [15] copy
    @ ~/.julia/packages/CUDA/kSriU/src/array.jl:171
 [16] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1634
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuOccupancyMaxPotentialBlockSize
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] multiple call sites
   @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
  [1] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:138
  [2] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [3] cuCtxSetCurrent
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
  [4] activate
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/context.jl:197
  [5] prepare_cuda_state
    @ ~/.julia/packages/CUDA/kSriU/src/state.jl:96
  [6] initialize_context
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
  [7] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1633
  [8] macro expansion
    @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
  [9] cuOccupancyMaxPotentialBlockSize
    @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [10] multiple call sites
    @ unknown:0
Reason: unsupported jl_lazy_load_and_lookup
Stacktrace:
 [1] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:144
 [2] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [3] cuCtxGetCurrent
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [4] prepare_cuda_state
   @ ~/.julia/packages/CUDA/kSriU/src/state.jl:94
 [5] initialize_context
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:80
 [6] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/libcuda.jl:1633
 [7] macro expansion
   @ ~/.julia/packages/CUDA/kSriU/lib/cudadrv/error.jl:97
 [8] cuOccupancyMaxPotentialBlockSize
   @ ~/.julia/packages/CUDA/kSriU/lib/utils/call.jl:26
 [9] multiple call sites
   @ unknown:0
Stacktrace:
 [1] check_ir(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(f!), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Float32}}}, mod::LLVM.Module)
   @ Enzyme.Compiler /mnt/Data/git/Enzyme.jl/src/compiler/validation.jl:22
 [2] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(f!), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Float32}}}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
   @ Enzyme.Compiler /mnt/Data/git/Enzyme.jl/src/compiler.jl:1988
 [3] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(f!), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Float32}}})
   @ Enzyme.Compiler /mnt/Data/git/Enzyme.jl/src/compiler.jl:2471
 [4] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(Enzyme.Compiler._thunk), linker::typeof(Enzyme.Compiler._link))
   @ GPUCompiler ~/.julia/packages/GPUCompiler/DksJc/src/cache.jl:89
 [5] thunk(f::typeof(f!), df::Nothing, ::Type{Const{Any}}, tt::Type{Tuple{Duplicated{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, Duplicated{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, Duplicated{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, Const{Float32}}}, ::Val{false})
   @ Enzyme.Compiler /mnt/Data/git/Enzyme.jl/src/compiler.jl:2524
 [6] autodiff(::typeof(f!), ::Type{Const{Any}}, ::Duplicated{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, ::Vararg{Any, N} where N)
   @ Enzyme /mnt/Data/git/Enzyme.jl/src/Enzyme.jl:184
 [7] autodiff(::typeof(f!), ::Duplicated{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, ::Duplicated{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, ::Vararg{Any, N} where N)
   @ Enzyme /mnt/Data/git/Enzyme.jl/src/Enzyme.jl:214
 [8] top-level scope
   @ /mnt/Data/git/Enzyme.jl/cu.jl:29
in expression starting at /mnt/Data/git/Enzyme.jl/cu.jl:29
ChrisRackauckas commented 3 years ago

Yeah that looks right. Does your CUDA kernel generation paper cover this kind of case and it's just API/integration issues, or something deeper?

wsmoses commented 3 years ago

Haven't yet gone through it all so there may be some surprises, but I believe the answer is just API/integration.

Essentially because LLVM presently permits only one architecture in a module, the CUDA code and non-cuda code must be in separate modules. As such, the first way to solve this (and how we tested julia kernels, for example) is to run enzyme autodiff from within the gpu function, which gets the desired behavior. To get heterogeneous code to work we used Enzyme's custom derivative registration and exporting to export the forward/reverse pass of the gpu code, and import that into the cpu module.

That can be done manually here as well, though we haven't set up the automatic registration for that within Enzyme.jl.

For example, see an example of the manual registration for C/C++ of this here: https://enzyme.mit.edu/getting_started/CUDAGuide/

wsmoses commented 1 year ago
ERROR: SystemError: opening file "libcuda.so.1": No such file or directory
Stacktrace:
  [1] systemerror(p::String, errno::Int32; extrainfo::Nothing)
    @ Base ./error.jl:176
  [2] kwcall(::NamedTuple{(:extrainfo,), Tuple{Nothing}}, ::typeof(systemerror), p::String, errno::Int32)
    @ Base ./error.jl:176
  [3] kwcall(::NamedTuple{(:extrainfo,), Tuple{Nothing}}, ::typeof(systemerror), p::String)
    @ Base ./error.jl:176
  [4] #systemerror#82
    @ ./error.jl:175 [inlined]
  [5] systemerror
    @ ./error.jl:175 [inlined]
  [6] open(fname::String; lock::Bool, read::Bool, write::Nothing, create::Nothing, truncate::Nothing, append::Nothing)
    @ Base ./iostream.jl:293
  [7] open(fname::String, mode::String; lock::Bool)
    @ Base ./iostream.jl:356
  [8] open(fname::String, mode::String)
    @ Base ./iostream.jl:355
  [9] open(::Enzyme.Compiler.var"#110#115", ::String, ::Vararg{String}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Base ./io.jl:393
 [10] open
    @ ./io.jl:392 [inlined]
 [11] check_ir!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(speelpenning), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}, errors::Vector{Tuple{String, Vector{Base.StackTraces.StackFrame}, Any}}, imported::Set{String}, inst::LLVM.CallInst, calls::Vector{Any})
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler/validation.jl:301
 [12] check_ir!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(speelpenning), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}, errors::Vector{Tuple{String, Vector{Base.StackTraces.StackFrame}, Any}}, imported::Set{String}, f::LLVM.Function)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler/validation.jl:189
 [13] check_ir!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(speelpenning), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}, errors::Vector{Tuple{String, Vector{Base.StackTraces.StackFrame}, Any}}, mod::LLVM.Module)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler/validation.jl:162
 [14] check_ir
    @ ~/git/Enzyme.jl/src/compiler/validation.jl:140 [inlined]
 [15] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(speelpenning), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, ctx::LLVM.ThreadSafeContext, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:7252
 [16] kwcall(::NamedTuple{(:optimize, :ctx), Tuple{Bool, LLVM.ThreadSafeContext}}, ::typeof(GPUCompiler.codegen), output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(speelpenning), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}})
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:7205
 [17] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(speelpenning), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}, ctx::Nothing)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:8087
 [18] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(speelpenning), Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}})
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:8081
 [19] cached_compilation(job::GPUCompiler.CompilerJob, key::UInt64, specid::UInt64)
    @ Enzyme.Compiler ~/git/Enzyme.jl/src/compiler.jl:8125
 [20] #s479#163
    @ ~/git/Enzyme.jl/src/compiler.jl:8185 [inlined]
 [21] var"#s479#163"(F::Any, Fn::Any, DF::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, specid::Any, ReturnPrimal::Any, ShadowInit::Any, ::Any, #unused#::Type, f::Any, df::Any, #unused#::Type, tt::Any, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Any)
    @ Enzyme.Compiler ./none:0
 [22] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any})
    @ Core ./boot.jl:602
 [23] thunk
    @ ~/git/Enzyme.jl/src/compiler.jl:8218 [inlined]
 [24] thunk
    @ ~/git/Enzyme.jl/src/compiler.jl:8211 [inlined]
 [25] autodiff
    @ ~/git/Enzyme.jl/src/Enzyme.jl:197 [inlined]
 [26] autodiff
    @ ~/git/Enzyme.jl/src/Enzyme.jl:223 [inlined]
 [27] reverse(y::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, x::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
    @ Main ./REPL[26]:9
 [28] top-level scope
    @ REPL[26]:26
 [29] top-level scope
    @ ~/.julia/packages/CUDA/ZdCxS/src/initialization.jl:155
wsmoses commented 2 months ago

Mostly resolved, any pending issues should be filed on CUDA.jl for the EnzymeExtension there.