NVIDIA / VideoProcessingFramework

Set of Python bindings to C++ libraries which provides full HW acceleration for video decoding, encoding and GPU-accelerated color space and pixel format conversions
Apache License 2.0
1.32k stars 233 forks source link

Encoder produces black frame the second time running in the same process #509

Open david-molnar-oculai opened 1 year ago

david-molnar-oculai commented 1 year ago

Describe the bug I use the code below to decode and encode a video file. The video has 1 fps and contains 4 frames. The first encoding works as expected, but when the processing is called again on the same file (or any other), the first frame of the produced video is black (the rest is fine). As far as I understand, the problem is caused by converting the surface to tensor and back. Without that conversion the problem doesn't occur.

To Reproduce See the provided code (python3 code.py). Sample mkv: https://1drv.ms/u/s!At82OVPhN7VajIhfttp06Xb4lx-gVw?e=4bpERG Store the sample mkv in the same folder as the code.py and name it sample.mkv. The output will be stored in the same folder under output.mkv.

Expected behavior The first frame of the video is encoded correctly.

Desktop (please complete the following information):

import torch
import subprocess
import numpy as np
import PyNvCodec as nvc
import PytorchNvCodec as pnvc

class cconverter:
    """
    Colorspace conversion chain.
    """

    def __init__(self, width: int, height: int, gpu_id: int):
        self.gpu_id = gpu_id
        self.w = width
        self.h = height
        self.chain = []

    def add(self, src_fmt: nvc.PixelFormat, dst_fmt: nvc.PixelFormat) -> None:
        self.chain.append(
            nvc.PySurfaceConverter(self.w, self.h, src_fmt, dst_fmt, self.gpu_id)
        )

    def run(self, src_surface: nvc.Surface) -> nvc.Surface:
        surf = src_surface
        cc = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG)

        for cvt in self.chain:
            surf = cvt.Execute(surf, cc)
            if surf.Empty():
                raise RuntimeError("Failed to perform color conversion")

        return surf.Clone(self.gpu_id)

def surface_to_tensor(surface: nvc.Surface) -> torch.Tensor:
    """
    Converts planar rgb surface to cuda float tensor.
    """
    if surface.Format() != nvc.PixelFormat.RGB_PLANAR:
        raise RuntimeError("Surface shall be of RGB_PLANAR pixel format")

    surf_plane = surface.PlanePtr()
    img_tensor = pnvc.DptrToTensor(
        surf_plane.GpuMem(),
        surf_plane.Width(),
        surf_plane.Height(),
        surf_plane.Pitch(),
        surf_plane.ElemSize(),
    )
    if img_tensor is None:
        raise RuntimeError("Can not export to tensor.")

    img_tensor.resize_(3, int(surf_plane.Height() / 3), surf_plane.Width())
    img_tensor = img_tensor.type(dtype=torch.cuda.FloatTensor)
    img_tensor = torch.divide(img_tensor, 255.0)
    img_tensor = torch.clamp(img_tensor, 0.0, 1.0)

    return img_tensor

def tensor_to_surface(img_tensor: torch.tensor, gpu_id: int) -> nvc.Surface:
    """
    Converts cuda float tensor to planar rgb surface.
    """
    if len(img_tensor.shape) != 3 and img_tensor.shape[0] != 3:
        raise RuntimeError("Shape of the tensor must be (3, height, width)")

    tensor_w, tensor_h = img_tensor.shape[2], img_tensor.shape[1]
    img = torch.clamp(img_tensor, 0.0, 1.0)
    img = torch.multiply(img, 255.0)
    img = img.type(dtype=torch.cuda.ByteTensor)

    surface = nvc.Surface.Make(nvc.PixelFormat.RGB_PLANAR, tensor_w, tensor_h, gpu_id)
    surf_plane = surface.PlanePtr()
    pnvc.TensorToDptr(
        img,
        surf_plane.GpuMem(),
        surf_plane.Width(),
        surf_plane.Height(),
        surf_plane.Pitch(),
        surf_plane.ElemSize(),
    )

    return surface

def process():
    w = 2560
    h = 1920
    gpu_id = 0

    to_rgb = cconverter(w, h, gpu_id)
    to_rgb.add(nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420)
    to_rgb.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB)
    to_rgb.add(nvc.PixelFormat.RGB, nvc.PixelFormat.RGB_PLANAR)

    to_nv12 = cconverter(w, h, gpu_id)
    to_nv12.add(nvc.PixelFormat.RGB_PLANAR, nvc.PixelFormat.RGB)
    to_nv12.add(nvc.PixelFormat.RGB, nvc.PixelFormat.YUV420)
    to_nv12.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.NV12)

    dstFile = open("output.h264", "wb")

    encFrame = np.ndarray(shape=(0), dtype=np.uint8)
    nvEnc = nvc.PyNvEncoder({
            "preset": "default",
            "codec": "h264",
            "s": f"{str(w)}x{str(h)}",
            "bitrate": "5M",
            "fps": '1'
    }, gpu_id)

    nvDec = nvc.PyNvDecoder(w, h, nvc.PixelFormat.NV12, nvc.CudaVideoCodec.H264, gpu_id)

    packet = np.ndarray(shape=(0), dtype=np.uint8)
    pdata_in, pdata_out = nvc.PacketData(), nvc.PacketData()

    nvDmx = nvc.PyFFmpegDemuxer('sample.mkv')

    while True:
        if not nvDmx.DemuxSinglePacket(packet):
            break

        # Get last packet data to obtain frame timestamp
        nvDmx.LastPacketData(pdata_in)

        src_surface = nvDec.DecodeSurfaceFromPacket(pdata_in, packet, pdata_out)
        if not src_surface.Empty():
            # Convert to planar RGB
            rgb_pln = to_rgb.run(src_surface)
            if rgb_pln.Empty():
                break

            src_tensor = surface_to_tensor(rgb_pln)
            dst_tensor = src_tensor
            surface_rgb = tensor_to_surface(dst_tensor, gpu_id)

            # Convert back to NV12
            dst_surface = to_nv12.run(surface_rgb)
            if src_surface.Empty():
                break

            # Encode
            success = nvEnc.EncodeSingleSurface(dst_surface, encFrame)
            if success:
                byteArray = bytearray(encFrame)
                dstFile.write(byteArray)

    while True:
        src_surface = nvDec.FlushSingleSurface()
        if src_surface.Empty():
            break
            # Convert to planar RGB
        rgb_pln = to_rgb.run(src_surface)
        if rgb_pln.Empty():
            break

        src_tensor = surface_to_tensor(rgb_pln)
        dst_tensor = src_tensor
        surface_rgb = tensor_to_surface(dst_tensor, gpu_id)

        # Convert back to NV12
        dst_surface = to_nv12.run(surface_rgb)
        if src_surface.Empty():
            break

        # Encode
        success = nvEnc.EncodeSingleSurface(dst_surface, encFrame)
        if success:
            byteArray = bytearray(encFrame)
            dstFile.write(byteArray)

    while True:
        success = nvEnc.FlushSinglePacket(encFrame)
        if success:
            byteArray = bytearray(encFrame)
            dstFile.write(byteArray)
        else:
            break

    dstFile.close()
    subprocess.run(f"ffmpeg -hide_banner -r 1 -i output.h264 -c copy -y output.mkv".split(' '))

process()
process()