Closed mitkotak closed 2 years ago
Here's the MWE
>>> import pycuda.autoinit >>> import pycuda.gpuarray as gpuarray >>> import numpy as np >>> np.zeros(10, dtype="complex") + np.zeros(10, dtype="int") # Passes >>> gpuarray.zeros(10, dtype="complex") + gpuarray.zeros(10, dtype="int") # Fails
Error trace
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) File ~/pycuda/pycuda/tools.py:470, in context_dependent_memoize.<locals>.wrapper(*args, **kwargs) 469 try: --> 470 return ctx_dict[cur_ctx][cache_key] 471 except KeyError: KeyError: <pycuda._driver.Context object at 0x7faa41c31bd0> During handling of the above exception, another exception occurred: CompileError Traceback (most recent call last) Input In [3], in <cell line: 1>() ----> 1 gpuarray.zeros(10, dtype="complex") + gpuarray.zeros(10, dtype="int") File ~/pycuda/pycuda/gpuarray.py:588, in GPUArray.__add__(self, other) 585 if isinstance(other, GPUArray): 586 # add another vector 587 result = _get_broadcasted_binary_op_result(self, other) --> 588 return self._axpbyz(1, other, 1, result) 590 elif np.isscalar(other): 591 # add a scalar 592 if other == 0: File ~/pycuda/pycuda/gpuarray.py:427, in GPUArray._axpbyz(self, selffac, other, otherfac, out, add_timer, stream) 421 raise RuntimeError( 422 "only contiguous arrays may " "be used as arguments to this operation" 423 ) 424 assert ((self.shape == other.shape == out.shape) 425 or ((self.shape == ()) and other.shape == out.shape) 426 or ((other.shape == ()) and self.shape == out.shape)) --> 427 func = elementwise.get_axpbyz_kernel( 428 self.dtype, other.dtype, out.dtype, 429 x_is_scalar=(self.shape == ()), 430 y_is_scalar=(other.shape == ())) 431 if add_timer is not None: 432 add_timer( 433 3 * self.size, 434 func.prepared_timed_call( (...) 442 ), 443 ) File ~/pycuda/pycuda/tools.py:474, in context_dependent_memoize.<locals>.wrapper(*args, **kwargs) 472 context_dependent_memoized_functions.append(func) 473 arg_dict = ctx_dict.setdefault(cur_ctx, {}) --> 474 result = func(*args, **kwargs) 475 arg_dict[cache_key] = result 476 return result File ~/pycuda/pycuda/elementwise.py:480, in get_axpbyz_kernel(dtype_x, dtype_y, dtype_z, x_is_scalar, y_is_scalar) 478 by = f"b*(({out_t}) {y})" 479 result = f"{ax} + {by}" --> 480 return get_elwise_kernel( 481 "%(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y, %(tp_z)s *z" 482 % { 483 "tp_x": dtype_to_ctype(dtype_x), 484 "tp_y": dtype_to_ctype(dtype_y), 485 "tp_z": dtype_to_ctype(dtype_z), 486 }, 487 f"z[i] = {result}", 488 "axpbyz", 489 ) File ~/pycuda/pycuda/elementwise.py:191, in get_elwise_kernel(arguments, operation, name, keep, options, **kwargs) 185 def get_elwise_kernel( 186 arguments, operation, name="kernel", keep=False, options=None, **kwargs 187 ): 188 """Return a L{pycuda.driver.Function} that performs the same scalar operation 189 on one or several vectors. 190 """ --> 191 mod, func, arguments = get_elwise_kernel_and_types( 192 arguments, operation, name, keep, options, **kwargs 193 ) 195 return func File ~/pycuda/pycuda/elementwise.py:177, in get_elwise_kernel_and_types(arguments, operation, name, keep, options, use_range, **kwargs) 174 else: 175 module_builder = get_elwise_module --> 177 mod = module_builder(arguments, operation, name, keep, options, **kwargs) 179 func = mod.get_function(name) 180 func.prepare("".join(arg.struct_char for arg in arguments)) File ~/pycuda/pycuda/elementwise.py:46, in get_elwise_module(arguments, operation, name, keep, options, preamble, loop_prep, after_loop) 35 def get_elwise_module( 36 arguments, 37 operation, (...) 43 after_loop="", 44 ): 45 from pycuda.compiler import SourceModule ---> 46 return SourceModule( 47 """ 48 #include <pycuda-complex.hpp> 49 50 %(preamble)s 51 52 extern "C" 53 __global__ void %(name)s(%(arguments)s) 54 { 55 56 unsigned tid = threadIdx.x; 57 unsigned total_threads = gridDim.x*blockDim.x; 58 unsigned cta_start = blockDim.x*blockIdx.x; 59 unsigned i; 60 61 %(loop_prep)s; 62 63 for (i = cta_start + tid; i < n; i += total_threads) 64 { 65 %(operation)s; 66 } 67 68 %(after_loop)s; 69 } 70 """ 71 % { 72 "arguments": ", ".join(arg.declarator() for arg in arguments), 73 "operation": operation, 74 "name": name, 75 "preamble": preamble, 76 "loop_prep": loop_prep, 77 "after_loop": after_loop, 78 }, 79 options=options, 80 keep=keep, 81 no_extern_c=True, 82 ) File ~/pycuda/pycuda/compiler.py:355, in SourceModule.__init__(self, source, nvcc, options, keep, no_extern_c, arch, code, cache_dir, include_dirs) 341 def __init__( 342 self, 343 source, (...) 351 include_dirs=[], 352 ): 353 self._check_arch(arch) --> 355 cubin = compile( 356 source, 357 nvcc, 358 options, 359 keep, 360 no_extern_c, 361 arch, 362 code, 363 cache_dir, 364 include_dirs, 365 ) 367 from pycuda.driver import module_from_buffer 369 self.module = module_from_buffer(cubin) File ~/pycuda/pycuda/compiler.py:304, in compile(source, nvcc, options, keep, no_extern_c, arch, code, cache_dir, include_dirs, target) 301 for i in include_dirs: 302 options.append("-I" + i) --> 304 return compile_plain(source, options, keep, nvcc, cache_dir, target) File ~/pycuda/pycuda/compiler.py:154, in compile_plain(source, options, keep, nvcc, cache_dir, target) 148 warn( 149 "PyCUDA: nvcc exited with status 0, but appears to have " 150 "encountered an error" 151 ) 152 from pycuda.driver import CompileError --> 154 raise CompileError( 155 "nvcc compilation of %s failed" % cu_file_path, 156 cmdline, 157 stdout=stdout.decode("utf-8", "replace"), 158 stderr=stderr.decode("utf-8", "replace"), 159 ) 161 if stdout or stderr: 162 lcase_err_text = (stdout + stderr).decode("utf-8", "replace").lower() CompileError: nvcc compilation of /tmp/tmpgvce8ei3/kernel.cu failed [command: nvcc --cubin -arch sm_70 -I/home/mitak2/pycuda/pycuda/cuda kernel.cu] [stderr: kernel.cu(19): error: no operator "*" matches these operands operand types are: long * pycuda::complex<double> 1 error detected in the compilation of "kernel.cu". ]
Proposed a fix at https://gitlab.tiker.net/inducer/pycuda/-/merge_requests/81.
Here's the MWE
Error trace