PyAV-Org / PyAV

Pythonic bindings for FFmpeg's libraries.
https://pyav.basswood-io.com/
BSD 3-Clause "New" or "Revised" License
2.44k stars 359 forks source link

When I define and set pts and dts myself, the value of tbr will become 100 #1347

Closed ncheng89 closed 2 weeks ago

ncheng89 commented 5 months ago

When I set the pts and dts myself, the btr will become 100 uncontrollably. When I don't modify the pts and dts, the value of the btr is consistent with the fps.

When I modify pts and dts

Duration: N/A, start: 0.000000, bitrate: 3200 kb/s
Stream #0:0: Data: none
Stream #0:1: Audio: aac (LC), 16000 Hz, mono, fltp, 128 kb/s
Stream #0:2: Video: h264 (High), yuv420p(progressive), 1920x1080, 3072 kb/s, 25 fps, 100 tbr, 1k tbn, 50 tbc

When I don't modify pts and dts

Stream #0:0: Data: none
Stream #0:1: Audio: aac (LC), 16000 Hz, mono, fltp, 128 kb/s
Stream #0:2: Video: h264 (High), yuv420p(progressive), 1920x1080, 3072 kb/s, 25 fps, 25 tbr, 1k tbn, 50 tbc

I also set time_base:

self.archive_video_stream = self.out_archive.add_stream('h264', rate=25, options={"tbr":"25"})
self.archive_audio_stream = self.out_archive.add_stream('aac', rate=audio_rate)

self.archive_video_stream.height = self.videoHeight
self.archive_video_stream.width = self.videoWidth

self.archive_video_stream.time_base = fractions.Fraction(1, 25)
print(self.archive_video_stream.time_base)
self.archive_video_stream.framerate =  fractions.Fraction(25, 1)
self.archive_audio_stream.time_base = fractions.Fraction(1, audio_rate)
WyattBlue commented 5 months ago

btr Do you mean tbr? Also, can you make a standalone example to show what exactly is the problem?

ncheng89 commented 5 months ago

The problem is that it will cause video freezes and the video cannot be played at fps speed. The result of my analysis is that the speed of tbr must be consistent with the fps speed to avoid video freezes. In addition, I will write a demo to test it. @WyattBlue

ncheng89 commented 5 months ago

This is a demo I wrote. As long as I enable the add_video_pts_dts method and set pts and dts myself, the tbr value in the rtmp stream will be greater than the fps value and become 100. As long as I do not set pts and dts myself, the tbr value will be the normal sum. Consistent fps @WyattBlue You try this demo

import fractions
import os
import queue
import threading
import time
import av

class TestAvLive:
    def __init__(self, rtsp, rtmp):
        self.frame_queue = queue.Queue()
        self.video_pts = 10
        self.audio_pts = 10
        self.frame_count = 0
        self.rtsp = rtsp
        self.rtmp = rtmp

    def test_in(self):
        options = {'buffer_size': '1024000', 'rtsp_transport': 'tcp', 'stimeout': '20000000', 'max_delay': '500000'}

        self.in_video = av.open(self.rtsp, 'r', format=None, options=options, metadata_errors='strict')
        self.in_video.streams.video[0].thread_type = "AUTO"
        self.in_video_stream = self.in_video.streams.video[0]
        self.in_audio_stream = self.in_video.streams.audio[0]

        for packet in self.in_video.demux(self.in_video_stream, self.in_audio_stream):
            if packet.stream.type == 'video':
                for frame in packet.decode():
                    self.frame_queue.put((frame, 'video'))
            elif packet.stream.type == 'audio':
                self.frame_queue.put((packet, 'audio'))

    def get_frame_duration(self):

        in_video_stream = self.in_video_stream
        in_audio_stream = self.in_audio_stream
        print(in_video_stream.time_base, in_audio_stream.time_base)
        try:
            video_frame_duration = in_video_stream.time_base.denominator / 25
            audio_frame_duration = in_audio_stream.time_base.denominator / in_audio_stream.rate

        except Exception as e:
            video_frame_duration = 3600
            audio_frame_duration = 1000

        print(video_frame_duration, audio_frame_duration)
        return int(video_frame_duration), audio_frame_duration

    def add_video_pts_dts(self, packet):
        return packet
        for i in packet:
            i.pts = i.dts = self.video_pts
            self.video_pts += self.video_frame_duration
        return packet

    def test_out(self):
        time.sleep(2)
        audio_rate = 44100
        self.video_frame_duration, self.audio_frame_duration = self.get_frame_duration()
        out_archive = av.open(self.rtmp, 'w', format='flv')

        encode_m = "h264_nvenc" if os.name == 'posix' else "h264"

        archive_video_stream = out_archive.add_stream(encode_m, rate=25, options={"framerate": '25'})
        archive_audio_stream = out_archive.add_stream('aac', rate=audio_rate)

        archive_video_stream.height = 1080
        archive_video_stream.width = 1920
        archive_video_stream.bit_rate = 3072000
        archive_video_stream.pix_fmt = 'nv12'
        archive_video_stream.gop_size = 60

        archive_video_stream.time_base = fractions.Fraction(1, 25)
        print(archive_video_stream.time_base)
        archive_video_stream.framerate = fractions.Fraction(25, 1)
        archive_audio_stream.time_base = fractions.Fraction(1, audio_rate)
        print(archive_video_stream.time_base, archive_audio_stream.time_base)

        while True:
            frame, data_type = self.frame_queue.get()
            if data_type == 'video':
                print("video pts:{} dts:{}".format(frame.pts, frame.dts))
                undistort_frame = frame.to_ndarray(format='bgr24')
                frame = av.VideoFrame.from_ndarray(undistort_frame, format='bgr24')
                out_video_packet = self.add_video_pts_dts(archive_video_stream.encode(frame))
                out_archive.mux(out_video_packet)
                self.frame_count += 1

            elif data_type == 'audio':
                out_archive.mux(frame)

    def run(self):
        threads = [
            threading.Thread(target=self.test_in),
            threading.Thread(target=self.test_out),
        ]
        [thread.start() for thread in threads]
        [thread.join() for thread in threads]

if __name__ == '__main__':
    rtsp = "rtsp://admin:abc@12345@192.168.4.112:554/ch1/stream1"
    rtmp = 'rtmp://192.168.1.91:1935/live/6666/9944'
    t = TestAvLive(rtsp,rtmp)
    t.run()
WyattBlue commented 5 months ago

When I run it, I get this. I don't think it show cases the actual problem

Exception in thread Thread-2 (test_out):
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py", line 1052, in _bootstrap_inner
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py", line 989, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/wyattblue/projects/auto-editor/test.py", line 58, in test_out
    self.video_frame_duration, self.audio_frame_duration = self.get_frame_duration()
                                                           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/wyattblue/projects/auto-editor/test.py", line 34, in get_frame_duration
    in_video_stream = self.in_video_stream
                      ^^^^^^^^^^^^^^^^^^^^
AttributeError: 'TestAvLive' object has no attribute 'in_video_stream'
Connection to tcp://192.168.4.112:554?timeout=0 failed: Operation timed out
Exception in thread Thread-1 (test_in):
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py", line 1052, in _bootstrap_inner
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py", line 989, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/wyattblue/projects/auto-editor/test.py", line 20, in test_in
    self.in_video = av.open(self.rtsp, 'r', format=None, options=options, metadata_errors='strict')
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "av/container/core.pyx", line 420, in av.container.core.open
  File "av/container/core.pyx", line 266, in av.container.core.Container.__cinit__
  File "av/container/core.pyx", line 286, in av.container.core.Container.err_check
  File "av/error.pyx", line 328, in av.error.err_check
av.error.TimeoutError: [Errno 60] Operation timed out: 'rtsp://admin:abc@12345@192.168.4.112:554/ch1/stream1'; last error log: [tcp] Connection to tcp://192.168.4.112:554?timeout=0 failed: Operation timed out
ncheng89 commented 5 months ago

You need to modify the addresses of rtmp and rtsp in the demo


if __name__ == '__main__':
    rtsp = "rtsp://admin:abc@12345@192.168.4.112:554/ch1/stream1"
    rtmp = 'rtmp://192.168.1.91:1935/live/6666/9944'
    t = TestAvLive(rtsp,rtmp)
    t.run()
ncheng89 commented 5 months ago

If you enable the add_video_pts_dts method and set pts and dts yourself, it will cause tbr to become 100 instead of consistent with the value of fps, which will cause the video stream to freeze.

So my question is how to control the value of this tbr

    def add_video_pts_dts(self, packet):
        # return packet
        for i in packet:
            i.pts = i.dts = self.video_pts
            self.video_pts += self.video_frame_duration
        return packet

[root@3-90 tmp]# ffprobe rtmp://192.168.1.91:1935/live/6666/9944
ffprobe version 4.3.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2007-2021 the FFmpeg developers
  built with gcc 8 (Debian 8.3.0-6)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d --enable-libxvid --enable-libzvbi --enable-libzimg
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavformat    58. 45.100 / 58. 45.100
  libavdevice    58. 10.100 / 58. 10.100
  libavfilter     7. 85.100 /  7. 85.100
  libswscale      5.  7.100 /  5.  7.100
  libswresample   3.  7.100 /  3.  7.100
  libpostproc    55.  7.100 / 55.  7.100
Input #0, flv, from 'rtmp://192.168.1.91:1935/live/6666/9944':
  Metadata:
    encoder         : Lavf60.16.100
    server          : SRS/4.0.268(Leo)
    server_version  : 4.0.268
  Duration: N/A, start: 0.000000, bitrate: 3200 kb/s
    Stream #0:0: Data: none
    Stream #0:1: Audio: aac (LC), 16000 Hz, mono, fltp, 128 kb/s
    Stream #0:2: Video: h264 (High), yuv420p(progressive), 1920x1080, 3072 kb/s, 25 fps, 100 tbr, 1k tbn, 50 tbc
Unsupported codec with id 0 for input stream 0
[root@3-90 tmp]# 
WyattBlue commented 5 months ago

Does this only happen in rtmp inputs? Can you replicate this in other rtmp streams?

ncheng89 commented 3 months ago

Yes, I tested other rtmp streams and it was the same. I tried to write the input rtmp stream to an mp4 video file and it was the same.

import fractions
import os
import time
import av
import numpy as np

def main():
    # url = "rtsp://admin:abc@12345@192.168.11.116:554/h264/ch1/main/av_stream"
    url = "rtmp://192.168.3.90:1935/live/1212/C9999"
    in_options = {'rtsp_transport': 'tcp', 'max_delay': '5000000', 'timeout': '5000000'}
    if "rtmp" in url:
        in_options = {}
    in_video = av.open(url, mode='r', options=in_options)

    out_video = av.open("test.mp4", 'w')

    if in_video.streams.video:
        in_video_stream = in_video.streams.video[0]
        video_stream = out_video.add_stream(template=in_video_stream)
    if in_video.streams.audio:
        in_audio_stream = in_video.streams.audio[0]
        audio_stream = out_video.add_stream(template=in_audio_stream)

    fps = 30
    video_stream = out_video.add_stream("libx264", rate=fps)
    video_stream.height = 1080
    video_stream.width = 1920
    video_stream.bit_rate = 3072000
    video_stream.pix_fmt = 'yuvj420p'
    video_stream.gop_size = 60
    video_stream.framerate = fractions.Fraction(fps, 1)
    # video_stream.time_base = fractions.Fraction(1, fps)
    video_stream.time_base = fractions.Fraction(1, 90000)

    frame_count = 0
    duration = 20
    total_frames = fps * duration

    for packet in in_video.demux(in_video_stream, in_audio_stream):

        if packet.stream.type == 'video':
            for frame in packet.decode():

                frame.pts = frame_count * (video_stream.time_base.denominator / video_stream.average_rate.numerator)
                packet_out = video_stream.encode(frame)
                for _packet in packet_out:
                    out_video.mux(_packet)
                frame_count += 1

        elif packet.stream.type == 'audio':
            out_video.mux(packet)

        if frame_count >= total_frames:
            break
    for packet in video_stream.encode():
        out_video.mux(packet)

    in_video.close()
    out_video.close()

if __name__ == '__main__':
    main()

This is the output result

root@c68efa4a6117:/tmp# python3 t.py     
root@c68efa4a6117:/tmp# ffprobe -v error -select_streams v:0 -show_entries stream=codec_name,codec_type,time_base,r_frame_rate,avg_frame_rate -of default=noprint_wrappers=1 test.mp4
codec_name=h264
codec_type=video
r_frame_rate=1/3
avg_frame_rate=18000/53911
time_base=1/90000
root@c68efa4a6117:/tmp# ffprobe test.mp4 
ffprobe version 5.1.4-0+deb12u1 Copyright (c) 2007-2023 the FFmpeg developers
  built with gcc 12 (Debian 12.2.0-14)
  configuration: --prefix=/usr --extra-version=0+deb12u1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librist --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --disable-sndio --enable-libjxl --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-libplacebo --enable-librav1e --enable-shared
  libavutil      57. 28.100 / 57. 28.100
  libavcodec     59. 37.100 / 59. 37.100
  libavformat    59. 27.100 / 59. 27.100
  libavdevice    59.  7.100 / 59.  7.100
  libavfilter     8. 44.100 /  8. 44.100
  libswscale      6.  7.100 /  6.  7.100
  libswresample   4.  7.100 /  4.  7.100
  libpostproc    56.  6.100 / 56.  6.100
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'test.mp4':
  Metadata:
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    encoder         : Lavf60.16.100
  Duration: 00:29:57.03, start: 0.000000, bitrate: 264 kb/s
  Stream #0:0[0x2](und): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 99 kb/s (default)
    Metadata:
      handler_name    : SoundHandler
      vendor_id       : [0][0][0][0]
  Stream #0:1[0x3](und): Video: h264 (High) (avc1 / 0x31637661), yuvj420p(pc, progressive), 1920x1080, 263 kb/s, 0.33 fps, 0.33 tbr, 90k tbn
    Metadata:
      handler_name    : VideoHandler
      vendor_id       : [0][0][0][0]
root@c68efa4a6117:/tmp# 

When I try to change the time base to match the frame rate video_stream.time_base = fractions.Fraction(1, fps) Get this result

root@c68efa4a6117:/tmp# ffprobe -v error -select_streams v:0 -show_entries stream=codec_name,codec_type,time_base,r_frame_rate,avg_frame_rate -of default=noprint_wrappers=1 test.mp4
codec_name=h264
codec_type=video
r_frame_rate=30/1
avg_frame_rate=9000/4601
time_base=1/15360
root@c68efa4a6117:/tmp# ffprobe test.mp4 
ffprobe version 5.1.4-0+deb12u1 Copyright (c) 2007-2023 the FFmpeg developers
  built with gcc 12 (Debian 12.2.0-14)
  configuration: --prefix=/usr --extra-version=0+deb12u1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librist --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --disable-sndio --enable-libjxl --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-libplacebo --enable-librav1e --enable-shared
  libavutil      57. 28.100 / 57. 28.100
  libavcodec     59. 37.100 / 59. 37.100
  libavformat    59. 27.100 / 59. 27.100
  libavdevice    59.  7.100 / 59.  7.100
  libavfilter     8. 44.100 /  8. 44.100
  libswscale      6.  7.100 /  6.  7.100
  libswresample   4.  7.100 /  4.  7.100
  libpostproc    56.  6.100 / 56.  6.100
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'test.mp4':
  Metadata:
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    encoder         : Lavf60.16.100
  Duration: 00:05:06.73, start: 0.000000, bitrate: 1534 kb/s
  Stream #0:0[0x2](und): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 98 kb/s (default)
    Metadata:
      handler_name    : SoundHandler
      vendor_id       : [0][0][0][0]
  Stream #0:1[0x3](und): Video: h264 (High) (avc1 / 0x31637661), yuvj420p(pc, progressive), 1920x1080, 1526 kb/s, 1.96 fps, 30 tbr, 15360 tbn
    Metadata:
      handler_name    : VideoHandler
      vendor_id       : [0][0][0][0]
ncheng89 commented 3 months ago

When I turn off the custom setting pts, the output mp4 file seems to become normal, but I

    fps = 30
    video_stream = out_video.add_stream("libx264", rate=fps)
    video_stream.height = 1080
    video_stream.width = 1920
    video_stream.bit_rate = 3072000
    video_stream.pix_fmt = 'yuvj420p'
    video_stream.gop_size = 60
    video_stream.framerate = fractions.Fraction(fps, 1)
    #video_stream.time_base = fractions.Fraction(1, fps)
    video_stream.time_base = fractions.Fraction(1, 90000)

    frame_count = 0
    duration = 20
    total_frames = fps * duration

    for packet in in_video.demux(in_video_stream, in_audio_stream):

        if packet.stream.type == 'video':
            for frame in packet.decode():

                #frame.pts = frame_count * (video_stream.time_base.denominator / video_stream.average_rate.numerator)
                packet_out = video_stream.encode(frame)
                for _packet in packet_out:
                    out_video.mux(_packet)
                frame_count += 1
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'test.mp4':
  Metadata:
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    encoder         : Lavf60.16.100
  Duration: 00:00:20.03, start: 0.000000, bitrate: 3227 kb/s
  Stream #0:0[0x2](und): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 103 kb/s (default)
    Metadata:
      handler_name    : SoundHandler
      vendor_id       : [0][0][0][0]
  Stream #0:1[0x3](und): Video: h264 (High) (avc1 / 0x31637661), yuvj420p(pc, progressive), 1920x1080, 3113 kb/s, 29.93 fps, 30 tbr, 90k tbn
    Metadata:
      handler_name    : VideoHandler
      vendor_id       : [0][0][0][0]
root@c68efa4a6117:/tmp# ffprobe -v error -select_streams v:0 -show_entries stream=codec_name,codec_type,time_base,r_frame_rate,avg_frame_rate -of default=noprint_wrappers=1 test.mp4
codec_name=h264
codec_type=video
r_frame_rate=30/1
avg_frame_rate=1800000/60133
time_base=1/90000

When I turn off the custom pts setting and set the time scale to 90000, the output mp4 file seems to be normal.

But if I output the rtmp stream, set the time scale to 90000 and turn off the custom pts setting, the picture will become very stuck. Only when the time scale is set to the same as the pts will it not be stuck. I don't understand why this happens.