maweigert / gputools

GPU accelerated image/volume processing in Python
BSD 3-Clause "New" or "Revised" License
108 stars 20 forks source link

LogicError: create_buffer failed: invalid buffer size #2

Closed robintw closed 8 years ago

robintw commented 8 years ago

I've been testing convolve and comparing it to the ndimage implementation. Running the following code:

import gputools import numpy as np img = np.random.rand(10000, 10000) kernel = np.ones((3, 3)) res = gputools.convolve(img, kernel)

Gives the error below. I assume this is due to my GPU not having enough space to store this array - is that correct? The array is around 762Mb, so I guess that might be too large?

---------------------------------------------------------------------------
LogicError                                Traceback (most recent call last)
<ipython-input-10-867ee5568ea6> in <module>()
----> 1 get_ipython().magic(u'timeit res = gputools.convolve(img, kernel)')

/Users/robin/.conda/envs/python2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
   2305         magic_name, _, magic_arg_s = arg_s.partition(' ')
   2306         magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2307         return self.run_line_magic(magic_name, magic_arg_s)
   2308
   2309     #-------------------------------------------------------------------------

/Users/robin/.conda/envs/python2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
   2226                 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
   2227             with self.builtin_trap:
-> 2228                 result = fn(*args,**kwargs)
   2229             return result
   2230

/Users/robin/.conda/envs/python2/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, line, cell)

/Users/robin/.conda/envs/python2/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
    191     # but it's overkill for just that one bit of state.
    192     def magic_deco(arg):
--> 193         call = lambda f, *a, **k: f(*a, **k)
    194
    195         if callable(arg):

/Users/robin/.conda/envs/python2/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, line, cell)
   1034             number = 1
   1035             for _ in range(1, 10):
-> 1036                 time_number = timer.timeit(number)
   1037                 worst_tuning = max(worst_tuning, time_number / number)
   1038                 if time_number >= 0.2:

/Users/robin/.conda/envs/python2/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, number)
    130         gc.disable()
    131         try:
--> 132             timing = self.inner(it, self.timer)
    133         finally:
    134             if gcold:

<magic-timeit> in inner(_it, _timer)

/Users/robin/.conda/envs/python2/lib/python2.7/site-packages/gputools-0.1.1-py2.7.egg/gputools/convolve/convolve.pyc in convolve(data, h, res_g)
     35         return _convolve_buf(data,h, res_g)
     36     elif isinstance(data,np.ndarray) and  isinstance(h,np.ndarray):
---> 37         return _convolve_np(data,h)
     38
     39     else:

/Users/robin/.conda/envs/python2/lib/python2.7/site-packages/gputools-0.1.1-py2.7.egg/gputools/convolve/convolve.pyc in _convolve_np(data, h)
     50
     51
---> 52     data_g = OCLArray.from_array(data.astype(np.float32, copy = False))
     53     h_g = OCLArray.from_array(h.astype(np.float32, copy = False))
     54

/Users/robin/.conda/envs/python2/lib/python2.7/site-packages/gputools-0.1.1-py2.7.egg/gputools/core/ocltypes.pyc in from_array(cls, arr, *args, **kwargs)
     27     def from_array(cls,arr,*args, **kwargs):
     28         queue = get_device().queue
---> 29         return cl_array.to_device(queue, arr,*args, **kwargs)
     30     @classmethod
     31     def empty(cls, shape, dtype = np.float32):

/Users/robin/.local/lib/python2.7/site-packages/pyopencl/array.pyc in to_device(queue, ary, allocator, async)
   1676
   1677     result = Array(queue, ary.shape, ary.dtype,
-> 1678                     allocator=allocator, strides=ary.strides)
   1679     result.set(ary, async=async)
   1680     return result

/Users/robin/.local/lib/python2.7/site-packages/pyopencl/array.pyc in __init__(self, cqa, shape, dtype, order, allocator, data, offset, queue, strides, events)
    564
    565                 self.base_data = cl.Buffer(
--> 566                         context, cl.mem_flags.READ_WRITE, alloc_nbytes)
    567             else:
    568                 self.base_data = self.allocator(alloc_nbytes)

LogicError: create_buffer failed: invalid buffer size
robintw commented 8 years ago

Just for reference, this works fine with smaller array sizes, so I suspect it is due to my rubbish graphics card (I know very little about GPUs, but apparently a GeForce 320M is not good!)

maweigert commented 8 years ago

Yes, that is essentially an out of memory error. You can find out about the maximal allocatable memory of the used by printing the device info like so

gputools.get_device().print_info()