Set of Python bindings to C++ libraries which provides full HW acceleration for video decoding, encoding and GPU-accelerated color space and pixel format conversions
Apache License 2.0
1.32k
stars
233
forks
source link
Encoder produces black frame the second time running in the same process #509
Describe the bug
I use the code below to decode and encode a video file. The video has 1 fps and contains 4 frames. The first encoding works as expected, but when the processing is called again on the same file (or any other), the first frame of the produced video is black (the rest is fine). As far as I understand, the problem is caused by converting the surface to tensor and back. Without that conversion the problem doesn't occur.
To Reproduce
See the provided code (python3 code.py). Sample mkv: https://1drv.ms/u/s!At82OVPhN7VajIhfttp06Xb4lx-gVw?e=4bpERG
Store the sample mkv in the same folder as the code.py and name it sample.mkv. The output will be stored in the same folder under output.mkv.
Expected behavior
The first frame of the video is encoded correctly.
Desktop (please complete the following information):
OS: Linux (AWS g4dn instance with Tesla T4, 16GB RAM and 4 vCPUs)
Nvidia driver version: 535.54.03
CUDA Version: 12.2
Python Version: 3.8
import torch
import subprocess
import numpy as np
import PyNvCodec as nvc
import PytorchNvCodec as pnvc
class cconverter:
"""
Colorspace conversion chain.
"""
def __init__(self, width: int, height: int, gpu_id: int):
self.gpu_id = gpu_id
self.w = width
self.h = height
self.chain = []
def add(self, src_fmt: nvc.PixelFormat, dst_fmt: nvc.PixelFormat) -> None:
self.chain.append(
nvc.PySurfaceConverter(self.w, self.h, src_fmt, dst_fmt, self.gpu_id)
)
def run(self, src_surface: nvc.Surface) -> nvc.Surface:
surf = src_surface
cc = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG)
for cvt in self.chain:
surf = cvt.Execute(surf, cc)
if surf.Empty():
raise RuntimeError("Failed to perform color conversion")
return surf.Clone(self.gpu_id)
def surface_to_tensor(surface: nvc.Surface) -> torch.Tensor:
"""
Converts planar rgb surface to cuda float tensor.
"""
if surface.Format() != nvc.PixelFormat.RGB_PLANAR:
raise RuntimeError("Surface shall be of RGB_PLANAR pixel format")
surf_plane = surface.PlanePtr()
img_tensor = pnvc.DptrToTensor(
surf_plane.GpuMem(),
surf_plane.Width(),
surf_plane.Height(),
surf_plane.Pitch(),
surf_plane.ElemSize(),
)
if img_tensor is None:
raise RuntimeError("Can not export to tensor.")
img_tensor.resize_(3, int(surf_plane.Height() / 3), surf_plane.Width())
img_tensor = img_tensor.type(dtype=torch.cuda.FloatTensor)
img_tensor = torch.divide(img_tensor, 255.0)
img_tensor = torch.clamp(img_tensor, 0.0, 1.0)
return img_tensor
def tensor_to_surface(img_tensor: torch.tensor, gpu_id: int) -> nvc.Surface:
"""
Converts cuda float tensor to planar rgb surface.
"""
if len(img_tensor.shape) != 3 and img_tensor.shape[0] != 3:
raise RuntimeError("Shape of the tensor must be (3, height, width)")
tensor_w, tensor_h = img_tensor.shape[2], img_tensor.shape[1]
img = torch.clamp(img_tensor, 0.0, 1.0)
img = torch.multiply(img, 255.0)
img = img.type(dtype=torch.cuda.ByteTensor)
surface = nvc.Surface.Make(nvc.PixelFormat.RGB_PLANAR, tensor_w, tensor_h, gpu_id)
surf_plane = surface.PlanePtr()
pnvc.TensorToDptr(
img,
surf_plane.GpuMem(),
surf_plane.Width(),
surf_plane.Height(),
surf_plane.Pitch(),
surf_plane.ElemSize(),
)
return surface
def process():
w = 2560
h = 1920
gpu_id = 0
to_rgb = cconverter(w, h, gpu_id)
to_rgb.add(nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420)
to_rgb.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB)
to_rgb.add(nvc.PixelFormat.RGB, nvc.PixelFormat.RGB_PLANAR)
to_nv12 = cconverter(w, h, gpu_id)
to_nv12.add(nvc.PixelFormat.RGB_PLANAR, nvc.PixelFormat.RGB)
to_nv12.add(nvc.PixelFormat.RGB, nvc.PixelFormat.YUV420)
to_nv12.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.NV12)
dstFile = open("output.h264", "wb")
encFrame = np.ndarray(shape=(0), dtype=np.uint8)
nvEnc = nvc.PyNvEncoder({
"preset": "default",
"codec": "h264",
"s": f"{str(w)}x{str(h)}",
"bitrate": "5M",
"fps": '1'
}, gpu_id)
nvDec = nvc.PyNvDecoder(w, h, nvc.PixelFormat.NV12, nvc.CudaVideoCodec.H264, gpu_id)
packet = np.ndarray(shape=(0), dtype=np.uint8)
pdata_in, pdata_out = nvc.PacketData(), nvc.PacketData()
nvDmx = nvc.PyFFmpegDemuxer('sample.mkv')
while True:
if not nvDmx.DemuxSinglePacket(packet):
break
# Get last packet data to obtain frame timestamp
nvDmx.LastPacketData(pdata_in)
src_surface = nvDec.DecodeSurfaceFromPacket(pdata_in, packet, pdata_out)
if not src_surface.Empty():
# Convert to planar RGB
rgb_pln = to_rgb.run(src_surface)
if rgb_pln.Empty():
break
src_tensor = surface_to_tensor(rgb_pln)
dst_tensor = src_tensor
surface_rgb = tensor_to_surface(dst_tensor, gpu_id)
# Convert back to NV12
dst_surface = to_nv12.run(surface_rgb)
if src_surface.Empty():
break
# Encode
success = nvEnc.EncodeSingleSurface(dst_surface, encFrame)
if success:
byteArray = bytearray(encFrame)
dstFile.write(byteArray)
while True:
src_surface = nvDec.FlushSingleSurface()
if src_surface.Empty():
break
# Convert to planar RGB
rgb_pln = to_rgb.run(src_surface)
if rgb_pln.Empty():
break
src_tensor = surface_to_tensor(rgb_pln)
dst_tensor = src_tensor
surface_rgb = tensor_to_surface(dst_tensor, gpu_id)
# Convert back to NV12
dst_surface = to_nv12.run(surface_rgb)
if src_surface.Empty():
break
# Encode
success = nvEnc.EncodeSingleSurface(dst_surface, encFrame)
if success:
byteArray = bytearray(encFrame)
dstFile.write(byteArray)
while True:
success = nvEnc.FlushSinglePacket(encFrame)
if success:
byteArray = bytearray(encFrame)
dstFile.write(byteArray)
else:
break
dstFile.close()
subprocess.run(f"ffmpeg -hide_banner -r 1 -i output.h264 -c copy -y output.mkv".split(' '))
process()
process()
Describe the bug I use the code below to decode and encode a video file. The video has 1 fps and contains 4 frames. The first encoding works as expected, but when the processing is called again on the same file (or any other), the first frame of the produced video is black (the rest is fine). As far as I understand, the problem is caused by converting the surface to tensor and back. Without that conversion the problem doesn't occur.
To Reproduce See the provided code (
python3 code.py
). Sample mkv: https://1drv.ms/u/s!At82OVPhN7VajIhfttp06Xb4lx-gVw?e=4bpERG Store the sample mkv in the same folder as thecode.py
and name itsample.mkv
. The output will be stored in the same folder underoutput.mkv
.Expected behavior The first frame of the video is encoded correctly.
Desktop (please complete the following information):