kunzmi / managedCuda

ManagedCUDA aims an easy integration of NVidia's CUDA in .net applications written in C#, Visual Basic or any other .net language.
Other
440 stars 79 forks source link

ManagedCuda.NVRTC.NVRTCException:“ErrorBuiltinOperationFailure: Builtin operation failure.” #121

Open williamlzw opened 1 year ago

williamlzw commented 1 year ago

ManagedCuda.NVRTC.NVRTCException:“ErrorBuiltinOperationFailure: Builtin operation failure.” cuda11.4, nuget package managedCuda11

 static void Main(string[] args)
          {
              string kernelCode = @"
            __global__ void cropImage(uchar3* input, uchar3* output, int inputWidth, int outputWidth, int outputHeight, int startX, int startY)
            {
                int x = blockIdx.x * blockDim.x + threadIdx.x;
                int y = blockIdx.y * blockDim.y + threadIdx.y;

                if (x < outputWidth && y < outputHeight)
                {
                    output[y * outputWidth + x] = input[(y + startY) * inputWidth + (x + startX)];
                }
            }"
        ;
              CudaRuntimeCompiler rtc = new CudaRuntimeCompiler(kernelCode, "cropImage");
              rtc.Compile(args);//  ManagedCuda.NVRTC.NVRTCException:“ErrorBuiltinOperationFailure: Builtin operation failure.”
              string log = rtc.GetLogAsString();
              Console.WriteLine(log);
              byte[] ptx = rtc.GetPTX();
              rtc.Dispose();
              CudaContext ctx = new CudaContext(0);
              Mat originalImage = new Mat("d:/234.jpg", ImreadModes.Color);
              Mat croppedImage = new Mat(300, 300, MatType.CV_8UC3);
              CudaDeviceVariable<byte> cudaCropData = new CudaDeviceVariable<byte>(croppedImage.Width * croppedImage.Height * 3);
              CudaKernel cropKernel = ctx.LoadKernelPTX(ptx, "cropImage");
              CudaDeviceVariable<byte> cudaImageData = new CudaDeviceVariable<byte>(originalImage.Total());
              cudaImageData.CopyToDevice(originalImage.Data);
              int startX = 100;
              int startY = 50;

              cropKernel.BlockDimensions = new dim3(16, 16);
              cropKernel.GridDimensions = new dim3((croppedImage.Width + 15) / 16, (croppedImage.Height + 15) / 16);
              cropKernel.Run(
                  cudaImageData.DevicePointer,
                      cudaCropData.DevicePointer,
                      originalImage.Width,
                      croppedImage.Width,
                      startX,
                      startY
              );
              byte[] croppedImageData = new byte[croppedImage.Width * croppedImage.Height * 3];
              cudaCropData.CopyToHost(croppedImageData);
              croppedImage.Create(croppedImage.Height, croppedImage.Width, MatType.CV_8UC3);
              croppedImage.SetArray(croppedImageData);
              ctx.Dispose();
              Cv2.ImShow("test", croppedImage);
              Cv2.WaitKey(0);
          }