Encode Video Stream get black frames or small size h265 frames

Describe the bug I wanna use VPF to encode the ros2 foxy image topic stream, and I use VPF to encode the frame in the image callback function, But I found that some encoded image frames are very small compared to other frames. If I write all the frames as video and play it, I find that those small image frames are black, or the video stream has flickering, which does not Not the result I want.

Screenshots I print the encoded frame size, and if the shape is too small, I print the frame data, and I got the picture in the picture Screenshot from 2023-10-10 11-29-54 and the video https://github.com/NVIDIA/VideoProcessingFramework/assets/19700579/32c2beee-ca6e-4523-9ae9-005934aa5379

From the video we can see there is intermittent flickering, which appears to be a encode error

my code is the derived from sample/SampleCupy.py

#
# Copyright 2023 @royinx

#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Starting from Python 3.8 DLL search policy has changed.
# We need to add path to CUDA DLLs explicitly.
import rclpy
from rclpy.node import Node
from sensor_msgs.msg import CompressedImage
from sensor_msgs.msg import Image
from cv_bridge import CvBridge
import sys
import os
from typing import Any
import PyNvCodec as nvc
import numpy as np
import cupy as cp
import time

def numpy_to_cupy(img_array):
    return cp.asarray(img_array)

def cupy_to_numpy(img_array):
    return cp.asnumpy(img_array)

gpu_id = 0
output_width = 1920
output_height = 1080

class cconverter:
    """
    Colorspace conversion chain.
    """

    def __init__(self, width: int, height: int, gpu_id: int):
        self.gpu_id = gpu_id
        self.w = width
        self.h = height
        self.chain = []

    def add(self, src_fmt: nvc.PixelFormat, dst_fmt: nvc.PixelFormat) -> None:
        self.chain.append(
            nvc.PySurfaceConverter(self.w, self.h, src_fmt, dst_fmt, self.gpu_id)
        )

    def run(self, src_surface: nvc.Surface) -> nvc.Surface:
        surf = src_surface
        cc = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG)

        for cvt in self.chain:
            surf = cvt.Execute(surf, cc)
            if surf.Empty():
                raise RuntimeError("Failed to perform color conversion")

        return surf.Clone(self.gpu_id)

class CupyNVC:
    def __init__(self, gpu_id: int):
        self.gpu_id = gpu_id

    def get_memptr(self, surface: nvc.Surface) -> int:
        return surface.PlanePtr().GpuMem()

    def SurfaceToArray(self, surface: nvc.Surface) -> cp.array:
        """
        Converts surface to cupy unit8 tensor.

        - surface: nvc.Surface
        - return: cp.array (height, width, 3)
        """
        if surface.Format() != nvc.PixelFormat.RGB:
            raise RuntimeError("Surface shall be of RGB PLANAR format , got {}".format(surface.Format()))
        plane = surface.PlanePtr()
        # cuPy array zero copy non ownned
        height, width, pitch = (plane.Height(), plane.Width(), plane.Pitch())
        cupy_mem = cp.cuda.UnownedMemory(self.get_memptr(surface), height * width * 1, surface)
        cupy_memptr = cp.cuda.MemoryPointer(cupy_mem, 0)
        cupy_frame = cp.ndarray((height, width // 3, 3), cp.uint8, cupy_memptr, strides=(pitch, 3, 1)) # RGB

        return cupy_frame

    def _memcpy(self, surface: nvc.Surface, img_array: cp.array) -> None:
        cp.cuda.runtime.memcpy2DAsync(self.get_memptr(surface),
                                        surface.Pitch(),
                                        img_array.data.ptr,
                                        surface.Width(),
                                        surface.Width(),
                                        surface.Height()*3,
                                        cp.cuda.runtime.memcpyDeviceToDevice,
                                        0) # null_stream.ptr: 0
        return

    def ArrayToSurface(self, img_array: cp.array, gpu_id: int) -> nvc.Surface:
        """
        Converts cupy ndarray to rgb surface.
        - surface: cp.array
        - return: nvc.Surface
        """
        img_array = img_array.astype(cp.uint8)
        img_array = cp.transpose(img_array, (2,0,1)) # HWC to CHW
        img_array = cp.ascontiguousarray(img_array)
        _ ,tensor_h , tensor_w= img_array.shape
        surface = nvc.Surface.Make(nvc.PixelFormat.RGB_PLANAR, tensor_w, tensor_h, gpu_id)
        self._memcpy(surface, img_array)
        return surface

class CupyNVCResizer:
    def __init__(self, gpu_id: int, out_width: int, out_height: int, format):
        self.gpu_id = gpu_id
        self.resizer = nvc.PySurfaceResizer(out_width, out_height, format, self.gpu_id)

    def resize_image(self, surface: nvc.Surface) -> nvc.Surface:
        """
        Resizes the input surface to the pre-specified size using GPU acceleration.
        - surface: nvc.Surface
        - return: nvc.Surface
        """
        resized_surface = self.resizer.Execute(surface)
        return resized_surface

class ImagePublisher(Node):
    def __init__(self):
        super().__init__('image_publisher')
        self.bridge = CvBridge()
        self.publisher_ = self.create_publisher(CompressedImage, '/carla/encoder/front/h265', 1)
        self.subscription = self.create_subscription(Image, '/carla/ego_vehicle/rgb_front/image', self.image_callback, 1)

        self.cpnvc = CupyNVC(gpu_id)

        w = 1920
        h = 1080
        res = str(w) + "x" + str(h)
        self.nvEnc = nvc.PyNvEncoder(
                {"preset": "P4", "codec": "hevc", "s": res, "bitrate": "20M", "gop": "30", "qmin": "0", "qmax":"10", "profile":"high"}, gpu_id
        )
        # self.nvEnc = nvc.PyNvEncoder(
        #         {"preset": "P4", "codec": "hevc", "s": res, "bitrate": "20M", "gop": "60", "qmin": "0", "qmax":"20", "profile":"high"}, gpu_id
        # )
        # self.nvEnc = nvc.PyNvEncoder(
        #         {"preset": "P4", "codec": "hevc", "s": res, "bitrate": "20M", "gop": "30", "qmin": "1", "qmax":"10", "profile":"main", "tuning_info": "high_quality"}, gpu_id
        # )
        # output test
        self.dstFile = open("/home/hil/work2/dst.mp4", "wb")
        # resizer
        self.cpnvc_resizer = CupyNVCResizer(gpu_id, w, h, nvc.PixelFormat.RGB_PLANAR)
        # Surface converters
        self.to_rgb = cconverter(w, h, gpu_id)
        self.to_rgb.add(nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420)
        self.to_rgb.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB)
        self.to_nv12 = cconverter(w, h, gpu_id)
        self.to_nv12.add(nvc.PixelFormat.RGB_PLANAR, nvc.PixelFormat.RGB)
        self.to_nv12.add(nvc.PixelFormat.RGB, nvc.PixelFormat.YUV420)
        self.to_nv12.add(nvc.PixelFormat.YUV420, nvc.PixelFormat.NV12)

    def image_callback(self, msg: Image) -> None:
        start = time.time()
        cv_image = self.bridge.imgmsg_to_cv2(msg, "rgb8")
        img_array = np.array(cv_image)
        print('origin_image.shape',img_array.shape)
        cupy_array = numpy_to_cupy(img_array)
        surface_rgb = self.cpnvc.ArrayToSurface(cupy_array, gpu_id)
        resized_surface = self.cpnvc_resizer.resize_image(surface_rgb)
        nv12_surface = self.to_nv12.run(resized_surface)

        # Encode
        encFrame = np.ndarray(shape=(0), dtype=np.uint8)
        success = self.nvEnc.EncodeSingleSurface(nv12_surface, encFrame)
        if success:
            # npEncArray = np.array(encFrame)
            self.publish_compressed_image(encFrame, msg.header)
            end = time.time()
            print("Encoded Array Shape:", encFrame.shape)
            if encFrame.shape[0] < 1000:
                print('XXXXXXXXXX')
                print('small encode size, may be wrong')
                print(encFrame)
                print('XXXXXXXXXX')
            print('cost: ', end - start)
            byteArray = bytearray(encFrame)
            self.dstFile.write(byteArray)
            # print('write bytearray')

    def publish_compressed_image(self, enc_frame: np.ndarray, header) -> None:
        msg = CompressedImage()
        msg.header = header
        msg.format = 'h265'
        msg.data = enc_frame.tostring()
        self.publisher_.publish(msg)

def main(args=None):
    rclpy.init(args=args)
    node = ImagePublisher()
    rclpy.spin(node)
    rclpy.shutdown()

if __name__ == "__main__":

    main()

Desktop:

OS: [Linux]
RTX 3090
Nvidia driver version 535.113.01 with 12.2
CUDA Version [11.4] /usr/local/cuda version
Python Version [ 3.8]
ros2 version [foxy]
Video Codec SDK Version (The latest`)

Additional info

NVIDIA / VideoProcessingFramework

Encode Video Stream get black frames or small size h265 frames #539