inducer / pycuda

CUDA integration for Python, plus shiny features
http://mathema.tician.de/software/pycuda
Other
1.85k stars 288 forks source link

CUDA operation error: cuGraphicsResourceGetMappedPointer failed: resource not mapped as pointer when pycuda.gl.RegisteredMapping.device_ptr_and_size() #455

Closed yyuyulm closed 3 months ago

yyuyulm commented 3 months ago

Describe the bug I have an variable that is <class 'pycuda._driver.RegisteredMapping'> yet when I try to call pycuda.gl.RegisteredMapping.device_ptr_and_size() to get its pointer and size so that I can map it to a GPU array (then to a pytorch tensor for down stream tasks), I got the following error: CUDA operation error: cuGraphicsResourceGetMappedPointer failed: resource not mapped as pointer

To Reproduce Steps to reproduce the behavior:

  1. go to the notebook code bellow
  2. run the whole notebook with an available spout stream matching the names in the notebook Expected behavior logs: <class 'pycuda._driver.RegisteredMapping'> CUDA operation error: cuGraphicsResourceGetMappedPointer failed: resource not mapped as pointer

Environment (please complete the following information):

Additional context I had some trouble installing pycuda with OpenGL, had to change siteconf.py to install the pycuda.gl module

Code:

%%

import torch import numpy as np import pycuda.driver as cuda import pycuda.gl as cuda_gl import pycuda.gpuarray as gpuarray from pycuda.gl import graphics_map_flags import pycuda.autoinit from OpenGL.GL import * import glfw from SpoutGL import SpoutReceiver, SpoutSender

%%

def setProjection(width, height): glMatrixMode(GL_PROJECTION) glLoadIdentity() glOrtho(0, width, height, 0, 1, -1) glMatrixMode(GL_MODELVIEW)

def drawSquare(width, height): glEnable(GL_TEXTURE_2D) glBegin(GL_QUADS) glTexCoord2f(0, 0); glVertex2f(0, 0) glTexCoord2f(1, 0); glVertex2f(width, 0) glTexCoord2f(1, 1); glVertex2f(width, height) glTexCoord2f(0, 1); glVertex2f(0, height) glEnd() glDisable(GL_TEXTURE_2D)

def check_gl_error(): err = glGetError() if err != GL_NO_ERROR: print(f"OpenGL error: {err}")

class SimpleModel(torch.nn.Module): def init(self): super(SimpleModel, self).init() self.conv = torch.nn.Conv2d(4, 4, 3, padding=1)

def forward(self, x):
    return torch.relu(self.conv(x))

%%

Replace with your parameters

receiverName = 'tdOut' senderName = 'pythonOut' displayWidth, displayHeight = 1280, 720

Initialize GLFW and create a window

if not glfw.init(): raise Exception("GLFW initialization failed")

window = glfw.create_window(displayWidth, displayHeight, "Spout Receiver/Sender", None, None) if not window: glfw.terminate() raise Exception("GLFW window creation failed")

glfw.make_context_current(window)

Set up Spout receiver and sender

spout_receiver = SpoutReceiver() spout_receiver.setReceiverName(receiverName) spout_sender = SpoutSender() spout_sender.setSenderName(senderName)

def create_texture(width, height): tex_id = glGenTextures(1) glBindTexture(GL_TEXTURE_2D, tex_id) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR) glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, None) glBindTexture(GL_TEXTURE_2D, 0) check_gl_error() return tex_id

def recreate_textures(new_width, new_height): global width, height, input_tex_id, output_tex_id, input_image, output_image

# Delete old textures and CUDA mappings
glDeleteTextures([input_tex_id, output_tex_id])
input_image.unregister()
output_image.unregister()

# Create new textures
input_tex_id = create_texture(new_width, new_height)
output_tex_id = create_texture(new_width, new_height)

# Create new CUDA mappings
input_image = cuda_gl.RegisteredImage(int(input_tex_id), GL_TEXTURE_2D)
output_image = cuda_gl.RegisteredImage(int(output_tex_id), GL_TEXTURE_2D)

width, height = new_width, new_height
setProjection(width, height)

Create initial textures

input_tex_id = create_texture(displayWidth, displayHeight) output_tex_id = create_texture(displayWidth, displayHeight)

setProjection(displayWidth, displayHeight) glClearColor(0.0, 0.0, 0.0, 1.0)

Create CUDA-OpenGL interop resources

cuda.init() cuda_gl_context = cuda_gl.make_context(pycuda.autoinit.device) cuda_gl_context.push()

input_image = cuda_gl.RegisteredImage(int(input_tex_id), GL_TEXTURE_2D) output_image = cuda_gl.RegisteredImage(int(output_tex_id), GL_TEXTURE_2D)

Define a simple PyTorch model (replace with your actual model)

model = SimpleModel().cuda()

width, height = displayWidth, displayHeight

Main loop

while not glfw.window_should_close(window): glfw.poll_events()

# Receive frame from Spout into input OpenGL texture
spout_receiver.receiveTexture(input_tex_id, GL_TEXTURE_2D, False, 0)
if spout_receiver.isUpdated():
    new_width = spout_receiver.getSenderWidth()
    new_height = spout_receiver.getSenderHeight()

    # Update texture size if needed
    if new_width != width or new_height != height:
        print(f"Size change detected. Current: {width}x{height}, New: {new_width}x{new_height}")
        recreate_textures(new_width, new_height)

glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)

if width > 0 and height > 0:
    # Draw received texture
    glBindTexture(GL_TEXTURE_2D, input_tex_id)
    drawSquare(width, height)
    glBindTexture(GL_TEXTURE_2D, 0)

    # Process the texture with the model
    try:
        # Map the input OpenGL texture to CUDA
        input_mapping = input_image.map()
        input_array = input_mapping.array(0,0)
        print(type(input_mapping))
        print(input_mapping.device_ptr_and_size())

        # Create a GPUArray from the CUDA array
        gpu_array = gpuarray.empty((height, width, 4), dtype=np.uint8)
        #gpu_array = gpuarray.GPUArray((height, width, 4), dtype=torch.uint8, gpudata=input_array.ptr)
        cuda.memcpy_dtod(gpu_array.ptr, input_mapping.dev_pointer, gpu_array.size)
        print(type(gpu_array))

        # Create a PyTorch tensor from the GPUArray
        input_tensor = torch.as_tensor(gpu_array, device='cuda')

        # Reshape and normalize the tensor
        input_tensor = input_tensor.permute(2, 0, 1).unsqueeze(0).float() / 255.0
        '''
        # Run the model
        with torch.no_grad():
            output_tensor = model(input_tensor)

        # Denormalize and reshape the output tensor
        output_tensor = (output_tensor.squeeze(0).permute(1, 2, 0) * 255).byte()

        # Map the output OpenGL texture to CUDA
        output_gl_resource = cuda_gl.GraphicsResource(int(output_tex_id), cuda.graphics.TextureTarget.TEXTURE_2D, cuda.graphics.map_flags.WRITE_DISCARD)
        output_mapping = output_gl_resource.map()
        output_array = output_mapping.array(cuda.array_format.UNSIGNED_INT8, 4)

        # Copy the output tensor to the CUDA array
        cuda.memcpy_dtod(output_array.gpudata, output_tensor.data_ptr(), output_tensor.numel() * output_tensor.element_size())
        '''
        # Unmap the resources
        input_mapping.unmap()
        # output_gl_resource.unmap()

        # Send the processed texture using Spout
        # spout_sender.sendTexture(output_tex_id, GL_TEXTURE_2D, width, height)

    except cuda.LogicError as e:
        print(f"CUDA operation error: {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")
        import traceback
        traceback.print_exc()

glfw.swap_buffers(window)
spout_receiver.waitFrameSync(receiverName, 100)  # Wait for next frame, timeout after 100ms

Cleanup

glDeleteTextures([input_tex_id, output_tex_id]) glfw.terminate() spout_sender.releaseSender() cuda_gl_context.pop()