NVIDIA / VideoProcessingFramework

Set of Python bindings to C++ libraries which provides full HW acceleration for video decoding, encoding and GPU-accelerated color space and pixel format conversions
Apache License 2.0
1.32k stars 233 forks source link

makefromDevicePtrUint8 returns inaccurate data when running in multiple processes #546

Closed niujiabenbeng closed 1 year ago

niujiabenbeng commented 1 year ago

I am using PyNvDecoder to decode video, and use makefromDevicePtrUint8() to wrap the decoded frames to torch.tensor. It works fine when running in a single process, but generates some strange frames when running in multiple processes. After some investigations, we found that sometimes makefromDevicePtrUint8 returns inaccurate data when running in multiple processes. Specifically, to download a surface from gpu to cpu, makefromDevicePtrUint8().cpu().numpy() and PySurfaceDownloader generate different results, and the result from makefromDevicePtrUint8 is corrupted.

example.jpg

environment:

reproduce code:

#! /usr/bin/env python
# coding: utf-8

# yapf: disable

import os
import multiprocessing

import cv2
import torch
import numpy as np
import PyNvCodec as nvc
import PytorchNvCodec as pnvc

def to_opencv_image(image, width, height):
    image = image.reshape((3, height, width))
    image = image.transpose((1, 2, 0))[:, :, ::-1]
    image = np.ascontiguousarray(image)
    return image

class NvColorConverter:
    "Color converter using PySurfaceConverter."

    def __init__(self, width, height, gpuid=0):
        self.context = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG)
        self.to_yuv = nvc.PySurfaceConverter(width, height, nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420, gpuid)
        self.to_rgb = nvc.PySurfaceConverter(width, height, nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB, gpuid)
        self.to_planar = nvc.PySurfaceConverter(width, height, nvc.PixelFormat.RGB, nvc.PixelFormat.RGB_PLANAR, gpuid)
        self.downloader = nvc.PySurfaceDownloader(width, height, nvc.PixelFormat.RGB_PLANAR, gpuid)

    def convert_color(self, surface):
        surface = self.to_yuv.Execute(surface, self.context)
        if surface.Empty(): return None
        surface = self.to_rgb.Execute(surface, self.context)
        if surface.Empty(): return None
        surface = self.to_planar.Execute(surface, self.context)
        if surface.Empty(): return None
        return surface

    def get_frame_from_torch(self, surface):
        surface_plane = surface.PlanePtr()
        surface_tensor = pnvc.makefromDevicePtrUint8(
            surface_plane.GpuMem(),
            surface_plane.Width(),
            surface_plane.Height(),
            surface_plane.Pitch(),
            surface_plane.ElemSize())
        return surface_tensor.cpu().numpy().flatten()

    def get_frame_from_downloader(self, surface):
        frame = np.ndarray(shape=(0,), dtype=np.uint8)
        assert self.downloader.DownloadSingleSurface(surface, frame)
        return frame

def decode_video(testid, path, gpuid=0):
    dec = nvc.PyNvDecoder(path, gpuid)
    cvt = NvColorConverter(dec.Width(), dec.Height())

    for i in range(dec.Numframes()):
        surface = dec.DecodeSingleSurface()
        if surface.Empty(): break
        surface = cvt.convert_color(surface)
        if surface is None: break
        # download same surface in two different ways
        frame1 = cvt.get_frame_from_torch(surface)
        frame2 = cvt.get_frame_from_downloader(surface)
        if np.sum(np.abs(frame1 - frame2)) == 0: continue
        # if two frames are not equal, write them to file
        frame1 = to_opencv_image(frame1, dec.Width(), dec.Height())
        frame2 = to_opencv_image(frame2, dec.Width(), dec.Height())
        image = np.concatenate((frame1, frame2), axis=0)
        path = f"images/{testid:02d}_{i:04d}.jpg"
        print("write image to: ", path)
        cv2.imwrite(path, image)

# replace this path
path = "samplevideo.mp4"
samples = list(enumerate([path] * 10))
os.makedirs("./images", exist_ok=True)

# if we use single process, everything works fine.
print("run in single process:")
with multiprocessing.Pool(processes=1) as pool:
    pool.starmap(decode_video, samples)

# if we use 4 processes, some error images are recorded.
print("run in multiple processes:")
with multiprocessing.Pool(processes=4) as pool:
    pool.starmap(decode_video, samples)

sample video: samplevideo

RomanArzumanyan commented 1 year ago

Hi @niujiabenbeng

It looks like a #506 duplicate, please check it out. Please LMK if that doesn't help.

niujiabenbeng commented 1 year ago

Hi @RomanArzumanyan Thank you for so quick reply. It works!!!