Open acris5 opened 2 months ago
As I learned problem is somewhere in the following function
// Given that |time| is coded using 33 bits,
// UnrollTimestamp returns the corresponding unrolled timestamp.
// The unrolled timestamp is defined by:
// |time| + k * (2 ^ 33)
// where k is estimated so that the unrolled timestamp
// is as close as possible to |previous_unrolled_time|.
static int64_t UnrollTimestamp(int64_t previous_unrolled_time, int64_t time) {
// Mpeg2 TS timestamps have an accuracy of 33 bits.
const int nbits = 33;
// |timestamp| has a precision of |nbits|
// so make sure the highest bits are set to 0.
DCHECK_EQ((time >> nbits), 0);
// Consider 3 possibilities to estimate the missing high bits of |time|.
int64_t previous_unrolled_time_high = (previous_unrolled_time >> nbits);
int64_t time0 = ((previous_unrolled_time_high - 1) << nbits) | time;
int64_t time1 = ((previous_unrolled_time_high + 0) << nbits) | time;
int64_t time2 = ((previous_unrolled_time_high + 1) << nbits) | time;
// Select the min absolute difference with the current time
// so as to ensure time continuity.
int64_t diff0 = time0 - previous_unrolled_time;
int64_t diff1 = time1 - previous_unrolled_time;
int64_t diff2 = time2 - previous_unrolled_time;
if (diff0 < 0)
diff0 = -diff0;
if (diff1 < 0)
diff1 = -diff1;
if (diff2 < 0)
diff2 = -diff2;
int64_t unrolled_time;
int64_t min_diff;
if (diff1 < diff0) {
unrolled_time = time1;
min_diff = diff1;
} else {
unrolled_time = time0;
min_diff = diff0;
}
if (diff2 < min_diff)
unrolled_time = time2;
std::cout<<"time: "<<time<<", unrolled: "<<unrolled_time<<std::endl;
return unrolled_time;
2024-08-27T09:20:12.087333044Z time: 5319075769, unrolled: -3270858823 2024-08-27T09:20:12.087336357Z TS Packet size=139 pts=-3270858823 dts=-9223372036854775808 data_alignment_indicator=1 2024-08-27T09:20:12.087337907Z time: 5319078476, unrolled: -3270856116 2024-08-27T09:20:12.087339549Z TS Packet size=139 pts=-3270856116 dts=-9223372036854775808 data_alignment_indicator=1 2024-08-27T09:20:12.087341980Z time: 5319063823, unrolled: 5319063823 2024-08-27T09:20:12.087354873Z TS Packet size=2794 pts=5319063823 dts=-9223372036854775808 data_alignment_indicator=0
Ffmpeg generates several packages with 126000 pts at start and then starts passing real pts input 126000 5474206969 leads to negative result
And I am not sure if it is correct to parse pts as signed int values.
Hi @acris5,
PESContext in FFMPEG also uses signed integer type for storing the PTS/DTS.
typedef struct PESContext {
int pid;
int pcr_pid; /**< if -1 then all packets containing PCR are considered */
int stream_type;
MpegTSContext *ts;
AVFormatContext *stream;
AVStream *st;
AVStream *sub_st; /**< stream for the embedded AC3 stream in HDMV TrueHD */
enum MpegTSState state;
/* used to get the format */
int data_index;
int flags; /**< copied to the AVPacket flags */
int PES_packet_length;
int pes_header_size;
int extended_stream_id;
uint8_t stream_id;
int64_t pts, dts;
int64_t ts_packet_pos; /**< position of first TS packet of this PES packet */
uint8_t header[MAX_PES_HEADER_SIZE];
AVBufferRef *buffer;
SLConfigDescr sl;
int merged_st;
} PESContext;
The issue must come from somewhere else.
I mean that UnrollTimestamp(126000, 5474206969) returns negative value -3115727623 when fixed function should return 5474206969
static int64_t UnrollTimestampFix(int64_t previous_unrolled_time, int64_t time) {
// Mpeg2 TS timestamps have an accuracy of 33 bits.
const int nbits = 33;
// |timestamp| has a precision of |nbits|
// so make sure the highest bits are set to 0.
DCHECK_EQ((time >> nbits), 0);
//126000 5474206969 leads to negative result
//time0: -2394030290 time1: 6195904302 time2: 14785838894
//diff0: 2394156290 diff1: 6195778302 diff2: 14785712894
//time: 6195904302, unrolled: -2394030290, previos: 126000
if (time > 0 && time > previous_unrolled_time) return time; //fix because we don't want to get wrong time at start
// Consider 3 possibilities to estimate the missing high bits of |time|.
int64_t previous_unrolled_time_high = (previous_unrolled_time >> nbits);
int64_t time0 = ((previous_unrolled_time_high - 1) << nbits) | time;
int64_t time1 = ((previous_unrolled_time_high + 0) << nbits) | time;
int64_t time2 = ((previous_unrolled_time_high + 1) << nbits) | time;
// Select the min absolute difference with the current time
// so as to ensure time continuity.
int64_t diff0 = time0 - previous_unrolled_time;
int64_t diff1 = time1 - previous_unrolled_time;
int64_t diff2 = time2 - previous_unrolled_time;
if (diff0 < 0)
diff0 = -diff0;
if (diff1 < 0)
diff1 = -diff1;
if (diff2 < 0)
diff2 = -diff2;
int64_t unrolled_time;
int64_t min_diff;
if (diff1 < diff0) {
unrolled_time = time1;
min_diff = diff1;
} else {
unrolled_time = time0;
min_diff = diff0;
}
if (diff2 < min_diff)
unrolled_time = time2;
return unrolled_time;
}
static uint64_t UnrollTimestampFix(uint64_t previous_unrolled_time, uint64_t time) {
// MPEG-2 TS timestamps have an accuracy of 33 bits.
const int nbits = 33;
const uint64_t max_timestamp = (1ULL << nbits);
const uint64_t half_max = max_timestamp / 2;
// Ensure the highest bits are set to 0.
DCHECK_EQ((time >> nbits), 0);
// Calculate the modulo of the previous unrolled time
uint64_t previous_mod = previous_unrolled_time % max_timestamp;
// If time is greater than previous_mod and the difference is less than half the max_timestamp,
// assume no wrap-around has occurred.
if (time > previous_mod && (time - previous_mod) < half_max) {
return (previous_unrolled_time & ~(max_timestamp - 1)) + time;
}
// Otherwise, determine the correct wrap-around by finding the closest unrolled time
uint64_t base = previous_unrolled_time & ~(max_timestamp - 1);
// Candidates: base - max_timestamp, base, base + max_timestamp
// Ensure that base - max_timestamp does not underflow
uint64_t time0 = (base >= max_timestamp) ? (base - max_timestamp) + time : time;
uint64_t time1 = base + time;
uint64_t time2 = base + max_timestamp + time;
// Compute differences
uint64_t diff0 = (time0 > previous_unrolled_time) ? (time0 - previous_unrolled_time) : (previous_unrolled_time - time0);
uint64_t diff1 = (time1 > previous_unrolled_time) ? (time1 - previous_unrolled_time) : (previous_unrolled_time - time1);
uint64_t diff2 = (time2 > previous_unrolled_time) ? (time2 - previous_unrolled_time) : (previous_unrolled_time - time2);
// Find the candidate with the smallest difference
uint64_t unrolled_time = time1;
uint64_t min_diff = diff1;
if (diff0 < min_diff) {
unrolled_time = time0;
min_diff = diff0;
}
if (diff2 < min_diff) {
unrolled_time = time2;
}
return unrolled_time;
}
struct TSPacket {
size_t size;
uint64_t pts;
uint64_t dts;
bool data_alignment_indicator;
// ... other fields ...
};
I wanted to suggest switching from int64_t to uint64_t for our timestamp handling in the UnrollTimestampFix function. Using uint64_t makes a lot of sense since timestamps are always positive (MPEG-2 TS), and it helps prevent those pesky negative values we’ve been seeing.
Update UnrollTimestampFix: Change the function to use uint64_t instead of int64_t. Check Related Code: Make sure other parts of the code, like the dts field, also use uint64_t. Right now, some of them are still int64_t, which is why we’re getting negative numbers when we assign large unsigned values.
If we decide to stick with int64_t: We’ll need to be extra careful to handle any potential overflow or underflow to avoid those negative timestamps. It’s doable, but it adds a bit more complexity to our code.
Are there legitimate cases for negative DTS/PTS? Particularly, small negative values in the first GOP? If so, we might need to stick with int64_t and just be careful about overflow. And this could be true for any format supported by Packager, not just TS, if internally we use the same structures for all.
If there are no legitimate negative timestamps in any formats we care about, I think uint64_t could be reasonable.
@cosmin, what do you think?
Some containers do not support negative timestamps, but that's a limitation of those containers. Negative DTS is needed for any samples that have a composition time offset and a PTS of 0 for example. A common case that I've always run into is HEVC with open GOP and decode-able leading B frames (which have only forward references).
System info
Operating System: docker Shaka Packager Version: b5c2cb8b73701911f765a149dd37521026e7ed2a
Issue and steps to reproduce the problem
I got http mpegts stream and decode it with ffmpeg and then pass to packager. ffmpeg -re -i http://172.17.0.1:8008/2222 -map "0:v:0" -map "0:a:0" -map "0:s?" -map "0:d?" -forced-idr 1 -force_key_frames "expr:gte(t,n_forced*2)" -sc_threshold 0 -c:v:0 libx264 -c:a aac -c:s copy -copyts -f mpegts pipe: >/tmp/pipes/pipe1
packager \ 'in=/shaka-packager/pipes/pipe1,stream=video,init_segment=/shaka-results/bitrate_1/video_init.mp4,segment_template=/shaka-results/bitrate1/video$Number$.m4s,playlist_name=/shaka-results/bitrate_1/video.m3u8' \ 'in=/shaka-packager/pipes/pipe1,stream=audio,lang=ru,init_segment=/shaka-results/bitrate_1/audio_init.mp4,segment_template=/shaka-results/bitrate1/audio$Number$.m4s,playlist_name=/shaka-results/bitrate_1/audio.m3u8' \ 'in=/shaka-packager/pipes/pipe1,stream=text,format=ttml+mp4,init_segment=/shaka-results/bitrate_1/text_init.mp4,segment_template=/shaka-results/bitrate1/text$Number$.m4s,lang=ru,playlist_name=/shaka-results/bitrate_1/text.m3u8' --max_hd_pixels 8294400 --hls_master_playlist_output /shaka-results/demo_master.m3u8 --mpd_output /shaka-results/manifest.mpd --hls_playlist_type LIVE \ --segment_duration 2 --min_buffer_time 4 --suggested_presentation_delay 10 --time_shift_buffer_depth 12 --allow_approximate_segment_timeline --preserved_segments_outside_live_window 20
What is the expected result? HLS and Dash output should contain video audio and teletext, but sometime it lacks some streams.
I noticed that it wrongly parses pts and dts and gets following different values:
While ffprobe shows that there are normal values instead: