nonocast / me

记录和分享技术的博客
http://nonocast.cn
MIT License
20 stars 0 forks source link

学习 rtmp 开发 (Part 8: push streaming from H.264) #285

Open nonocast opened 2 years ago

nonocast commented 2 years ago

H264 Format

AnnexB format:

([start code] NALU) | ( [start code] NALU) |

AVCC format:

([extradata]) | ([length] NALU) | ([length] NALU) |

In annexb, [start code] may be 0x000001 or 0x00000001.

In avcc, the bytes of [length] depends on NALULengthSizeMinusOne in avcc extradata, the value of [length] depends on the size of following NALU and in both annexb and avcc format, the NALUs are no different.

Ref: StackOverflow

这里的Annex B中,Annex是附件的意思,类似附件二,所以Annex B具体来说是指T-REC-H.264-201610-S这个文档中328页附件B,附件B就是描述了Byte stream format, 整个附件B一共就2页

parse code

#include "log.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define max(a, b) (((a) > (b)) ? (a) : (b))
#define min(a, b) (((a) < (b)) ? (a) : (b))

static const char hexdig[] = "0123456789abcdef";
static const uint8_t startcode[] = {0x00, 0x00, 0x00, 0x01};
static const char *nalu_types[10] = {[0] = "Unspecified", "non-IDR", [5] = "IDR", "SEI", "SPS", "PPS", "AU delimiter"};

typedef struct frame {
  uint8_t *address;
  uint8_t first_byte;
  uint8_t nal_unit_type;
  off_t offset;
  size_t size;
} frame_t;

typedef struct annexb_stream {
  uint8_t *sps;
  size_t sps_size;
  uint8_t *pps;
  size_t pps_size;
  frame_t *frames[4096];
  size_t frame_count;
  frame_t *video_frames[4096];
  size_t video_frame_count;
} annexb_stream_t;

static void print_hex(const uint8_t *data, unsigned long len);
static void annexb_parse(annexb_stream_t *);
static void parse_frames(annexb_stream_t *, uint8_t *buffer, size_t size);

FILE *infile;
uint8_t *buffer;

int main() {
  annexb_stream_t stream;
  memset(&stream, 0x00, sizeof(annexb_stream_t));
  annexb_parse(&stream);

  // sps
  printf("sps: ");
  print_hex(stream.sps, stream.sps_size);

  // pps
  printf("pps: ");
  print_hex(stream.pps, stream.pps_size);

  // video frames
  printf("video frames: %lu\n", stream.video_frame_count);

  // top 10 frames
  for (int i = 0; i < 10; ++i) {
    frame_t *frame = stream.frames[i];
    printf("frame #%03d - %s\n", i + 1, nalu_types[frame->nal_unit_type]);
  }

  free(buffer);
  return 0;
}

static void annexb_parse(annexb_stream_t *stream) {
  // 简化: 将文件一次load进缓冲区
  infile = fopen("sample.h264", "rb");
  fseek(infile, 0, SEEK_END);
  size_t filesize = ftell(infile);
  printf("file size: %lu bytes\n", filesize);
  rewind(infile);

  buffer = malloc(filesize);
  fread(buffer, 1, filesize, infile);
  parse_frames(stream, buffer, filesize);

  for (int i = 0; i < stream->frame_count; ++i) {
    frame_t *frame = stream->frames[i];
    if (stream->sps == NULL && frame->nal_unit_type == 0x07) {
      stream->sps_size = frame->size;
      stream->sps = malloc(frame->size);
      memcpy(stream->sps, frame->address, frame->size);
    }

    if (stream->pps == NULL && frame->nal_unit_type == 0x08) {
      stream->pps_size = frame->size;
      stream->pps = malloc(frame->size);
      memcpy(stream->pps, frame->address, frame->size);
    }

    if (frame->nal_unit_type == 0x01 || frame->nal_unit_type == 0x05) {
      stream->video_frames[stream->video_frame_count++] = frame;
    }
  }
  fclose(infile);
}

void parse_frames(annexb_stream_t *stream, uint8_t *buffer, size_t size) {
  uint8_t *p = buffer;
  uint8_t *begin = NULL;

  while (buffer + size - p >= 0) {
    if ((p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x01) || p == buffer + size) {
      if (begin == NULL) {
        p += 4;
        begin = p;
      } else {
        frame_t *frame = malloc(sizeof(frame_t));
        frame->first_byte = *begin;
        frame->nal_unit_type = *begin & 0x1f;
        frame->address = begin;
        frame->offset = begin - buffer;
        frame->size = p - begin;
        stream->frames[stream->frame_count++] = frame;

        // printf("frame(#%04lu) offset: 0x%08llx, size: %lu\n", frame_count, frame->offset, frame->size);
        // print_hex(frame->address, frame->size);
        begin = NULL;
      }
    } else {
      p++;
    }
  }
}

static void print_hex(const uint8_t *data, unsigned long len) {
  if (len <= 16) {
    for (int i = 0; i < len; ++i) {
      printf("%02x ", data[i]);
    }
    printf("\n");
  } else {
    for (int i = 0; i < 12; ++i) {
      printf("%02x ", data[i]);
    }
    printf("...... ");
    for (int i = 4; i > 0; --i) {
      printf("%02x ", data[len - i]);
    }
    printf("\n");
  }
}

dump frames 如下:

frame(#0001) offset: 0x00000004, size: 15
27 4d 00 15 ab 61 a3 7c b2 cd 40 40 40 40 80 
frame(#0002) offset: 0x00000017, size: 4
28 ee 3c 80 
frame(#0003) offset: 0x0000001f, size: 15
27 4d 00 15 ab 61 a3 7c b2 cd 40 40 40 40 80 
frame(#0004) offset: 0x00000032, size: 4
28 ee 3c 80 
frame(#0005) offset: 0x0000003a, size: 54556
65 b8 10 00 35 ff f9 19 e9 dd b0 3b ...... 03 00 00 03 
frame(#0006) offset: 0x0000d55a, size: 15679
41 e1 08 41 af ab ae 27 38 f5 67 69 ...... de c8 48 46 
...
frame(#0426) offset: 0x001a89a7, size: 344
41 e3 18 41 af fe da a7 f8 21 dc 24 ...... 95 82 fa 40 
frame(#0427) offset: 0x001a8b03, size: 126
01 a9 05 88 2f ff 01 ac 8d 42 d6 f7 ...... 81 d4 60 7d 
frame(#0428) offset: 0x001a8b85, size: 310
41 e4 20 41 af fe da a7 f8 21 dc 24 ...... 15 b6 23 78 
frame(#0429) offset: 0x001a8cbf, size: 125
01 a9 47 88 2f ff 01 ac 8d 42 d6 f7 ...... 6d 58 27 40 

整理后的总体输出如下:

file size: 1740092 bytes
sps: 27 4d 00 15 ab 61 a3 7c b2 cd 40 40 40 40 80 
pps: 28 ee 3c 80 
video frames: 399
frame #001 - SPS
frame #002 - PPS
frame #003 - SPS
frame #004 - PPS
frame #005 - IDR
frame #006 - non-IDR
frame #007 - non-IDR
frame #008 - non-IDR
frame #009 - non-IDR
frame #010 - non-IDR

RTMP

当从flv改到发送h264的时候就需要根据sps/pps构造出两个packet

@setMetaFrame

Wireshark dump内容如下:

RTMP Body
    String '@setDataFrame'
        AMF0 type: String (0x02)
        String length: 13
        String: @setDataFrame
    String 'onMetaData'
        AMF0 type: String (0x02)
        String length: 10
        String: onMetaData
    ECMA array (20 items)
        AMF0 type: ECMA array (0x08)
        Array length: 20
        Property 'duration' Number 0
        Property 'fileSize' Number 0
        Property 'width' Number 200
        Property 'height' Number 200
        Property 'videocodecid' Number 7
        Property 'videodatarate' Number 2500
        Property 'framerate' Number 10
        Property 'encoder' String 'obs-output module (libobs version 27.2.4)'
        End Of Object Marker

然后观察obs (flv-mux.c)

static void build_flv_meta_data(obs_output_t context, uint8_t **output, size_t *size) {
  obs_encoder_t vencoder = obs_output_get_video_encoder(context);
  obs_encoder_t aencoder = obs_output_get_audio_encoder(context);
  video_t video = obs_encoder_video(vencoder);
  audio_t audio = obs_encoder_audio(aencoder);
  char buf[4096];
  char *enc = buf;
  char *end = enc + sizeof(buf);

  enc_str(&enc, end, "onMetaData");

  *enc++ = AMF_ECMA_ARRAY;
  enc = AMF_EncodeInt32(enc, end, 14);

  enc_num_val(&enc, end, "duration", 0.0);
  enc_num_val(&enc, end, "fileSize", 0.0);

  enc_num_val(&enc, end, "width", (double) video_output_width(video));
  enc_num_val(&enc, end, "height", (double) video_output_height(video));
  enc_str_val(&enc, end, "videocodecid", "avc1");
  enc_num_val(&enc, end, "videodatarate", encoder_bitrate(vencoder));
  enc_num_val(&enc, end, "framerate", video_output_framerate(video));

  enc_str_val(&enc, end, "audiocodecid", "mp4a");
  enc_num_val(&enc, end, "audiodatarate", encoder_bitrate(aencoder));
  enc_num_val(&enc, end, "audiosamplerate", (double) audio_output_samplerate(audio));
  enc_num_val(&enc, end, "audiosamplesize", 16.0);
  enc_num_val(&enc, end, "audiochannels", (double) audio_output_channels(audio));

  enc_bool_val(&enc, end, "stereo", audio_output_channels(audio) == 2);
  enc_str_val(&enc, end, "encoder", MODULE_NAME);

  *enc++ = 0;
  *enc++ = 0;
  *enc++ = AMF_OBJECT_END;

  *size = enc - buf;
  *output = bmemdup(buf, *size);
}

如果仅video,我们需要提供的内容包括:

注:

AVCDecoderConfigurationRecord

FLV中存放sps/pps的tag如下, 以下字节去掉了Tag Header, 从VIDEODATA为0x17开始,1表示keyframe (for AVC, a seeable fame), 7表示AVC

  /*
   * 17 00 00 00 00 01 4d 00 15 ff e1 00 0f 27 4d 00
   * 15 ab 61 a3 7c b2 cd 40 40 40 40 80 01 00 04 28
   * ee 3c 80
   */
DEBUG: Tag type: 9 - video
DEBUG:   Data size: 35
DEBUG:   Timestamp: 135000
DEBUG:   Timestamp etxended: 0
DEBUG:   StreamID: 0
DEBUG:   Video tag:
DEBUG:     Frame type: 1 - keyframe (for AVC, a seekable frame)
DEBUG:     Codec ID: 7 - AVC
// 以下对应 avc_decoder_configuration_record_t
DEBUG:     AVC video packet:
DEBUG:       AVC packet type: 0 - AVC sequence header
DEBUG:       AVC composition time: 0
DEBUG:       AVCDecoderConfigurationRecord:
DEBUG:         Configuration Version: 1
DEBUG:         AVC Profile Indication: 77
DEBUG:         Profile Compatibility: 0
DEBUG:         AVC Level Indication: 21
DEBUG:         Minus One: 3
DEBUG:         SPS num: 1
DEBUG:         SPS length: 15
DEBUG: 27 4d 00 15 ab 61 a3 7c b2 cd 40 40 40 40 80 
DEBUG:         PPS num: 1
DEBUG:         PPS length: 4
DEBUG: 28 ee 3c 80 

这里需要填写的内容比较简单, 参考AVC File Format (ISO 14496-15)

obs对应的代码:

obs-avc.c

size_t obs_parse_avc_header(uint8_t **header, const uint8_t *data, size_t size)
{
    struct array_output_data output;
    struct serializer s;
    const uint8_t *sps = NULL, *pps = NULL;
    size_t sps_size = 0, pps_size = 0;

    array_output_serializer_init(&s, &output);

    if (size <= 6)
        return 0;

    if (!has_start_code(data)) {
        *header = bmemdup(data, size);
        return size;
    }

    get_sps_pps(data, size, &sps, &sps_size, &pps, &pps_size);
    if (!sps || !pps || sps_size < 4)
        return 0;

    s_w8(&s, 0x01);
    s_write(&s, sps + 1, 3);
    s_w8(&s, 0xff);
    s_w8(&s, 0xe1);

    s_wb16(&s, (uint16_t)sps_size);
    s_write(&s, sps, sps_size);
    s_w8(&s, 0x01);
    s_wb16(&s, (uint16_t)pps_size);
    s_write(&s, pps, pps_size);

    *header = output.bytes.array;
    return output.bytes.num;
}

sps/pps

我们先从sps/pps分析开始,

指数哥伦布编码

[Golomb及指数哥伦布编码原理介绍及实现 - Brook_icv - 博客园]https://www.cnblogs.com/wangguchangqing/p/6297792.html)

以之前的flv为例,

27 4d 00 15 ab 61 a3 7c b2 cd 40 40 40 40 80

% xxd sps
00000000: 274d 0015 ab61 a37c b2cd 4040 4040 80    'M...a.|..@@@@.

% xxd -b sps
00000000: 00100111 01001101 00000000 00010101 10101011 01100001  'M...a
00000006: 10100011 01111100 10110010 11001101 01000000 01000000  .|..@@
0000000c: 01000000 01000000 10000000 

Timestamp

传输过程中涉及2个时间,

  1. FLV Tag header中的Timestamp(UI24)和TimestampExtended(UI8), ext是BL的高8位
  2. AVCVIDEOPACKET中的CompositionTime(SI24)

注:

写了一个解析flv的timestamp和composition time,

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 00 23 02 0f 58 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 17 00 00 00 00 
 #000001    video     0x000259         46               35       135000       0x17      0x0                0

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 d5 40 02 0f 58 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 17 01 00 00 64 
 #000002    video     0x00029e      54603            54592       135000       0x17      0x1              100

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 3d 48 02 0f bc 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 27 01 00 00 c8 
 #000003    video     0x00dedc      15699            15688       135100       0x27      0x1              200

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 07 a1 02 10 20 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 27 01 00 00 00 
 #000004    video     0x0124dd       1964             1953       135200       0x27      0x1                0

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 08 f9 02 10 84 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 27 01 00 00 c8 
 #000005    video     0x013537       2308             2297       135300       0x27      0x1              200

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 05 0a 02 10 e8 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 27 01 00 00 00 
 #000006    video     0x01452e       1301             1290       135400       0x27      0x1                0

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 05 c3 02 11 4c 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 27 01 00 00 64 
 #000007    video     0x0152f1       1486             1475       135500       0x27      0x1              100

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 05 3e 02 11 b0 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 27 01 00 00 64 
 #000008    video     0x01616e       1353             1342       135600       0x27      0x1              100

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 05 29 02 12 14 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 27 01 00 00 c8 
 #000009    video     0x016da9       1332             1321       135700       0x27      0x1              200

  tag id     type      address   tag size     payload size    timestamp video type pkt type composition time
-------- -------- ------------ ---------- ---------------- ------------ ---------- -------- ----------------
..09 00 04 b7 02 12 78 00 00 00 00 
.... .. .. .. .. .. .. .. .. .. .. 27 01 00 00 00 
 #000010    video     0x017b8c       1218             1207       135800       0x27      0x1                0

参考阅读