fluent-ffmpeg / node-fluent-ffmpeg

A fluent API to FFMPEG (http://www.ffmpeg.org)
MIT License
7.97k stars 881 forks source link

Convert video to Instagram specs #1177

Closed simon-tannai closed 2 years ago

simon-tannai commented 2 years ago

Version information

Code to reproduce

const pathToConvertedVideo = `/tmp/${nanoid()}-${Date.now()}${getFileExt(pathToVideo)}`;

  return new Promise((resolve, reject) => {
    ffmpeg()
      .input(pathToVideo)
      .videoBitrate(3500)
      .on('progress', (progress) => {
        console.log(`Processing: ${progress.percent}%`);
      })
      .on('error', (error) => reject(error))
      .on('end', () => resolve(pathToConvertedVideo))
      .save(pathToConvertedVideo);
  });

(note: if the problem only happens with some inputs, include a link to such an input file)

Expected results

I'm trying to convert video to fill with Instagram API specs. According to the API documentation, here the specs:

Container: MOV or MP4 (MPEG-4 Part 14), no edit lists, moov atom at the front of the file. Audio codec: AAC, 48khz sample rate maximum, 1 or 2 channels (mono or stereo). Video codec: HEVC or H264, progressive scan, closed GOP, 4:2:0 chroma subsampling. Frame rate: 23-60 FPS. Picture size: Maximum columns (horizontal pixels): 1920 Minimum aspect ratio [cols / rows]: 4 / 5 Maximum aspect ratio [cols / rows]: 16 / 9 Video bitrate: VBR, 5Mbps maximum Audio bitrate: 128kbps Duration: 60 seconds maximum, 3 seconds minimum File size: 100MB maximum

The inputed video of the above code is a raw .mov file filmed with an iPhone 13. Its metada:

{
  "streams": [
    {
      "index": 0,
      "codec_name": "aac",
      "codec_long_name": "AAC (Advanced Audio Coding)",
      "profile": "LC",
      "codec_type": "audio",
      "codec_tag_string": "mp4a",
      "codec_tag": "0x6134706d",
      "sample_fmt": "fltp",
      "sample_rate": 44100,
      "channels": 2,
      "channel_layout": "stereo",
      "bits_per_sample": 0,
      "id": "N/A",
      "r_frame_rate": "0/0",
      "avg_frame_rate": "0/0",
      "time_base": "1/44100",
      "start_pts": 0,
      "start_time": 0,
      "duration_ts": 176327,
      "duration": 3.998345,
      "bit_rate": 141828,
      "max_bit_rate": "N/A",
      "bits_per_raw_sample": "N/A",
      "nb_frames": 175,
      "nb_read_frames": "N/A",
      "nb_read_packets": "N/A",
      "tags": {
        "creation_time": "2022-03-10T12:06:23.000000Z",
        "language": "und",
        "handler_name": "Core Media Audio",
        "vendor_id": "[0][0][0][0]"
      },
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0,
        "timed_thumbnails": 0,
        "captions": 0,
        "descriptions": 0,
        "metadata": 0,
        "dependent": 0,
        "still_image": 0
      }
    },
    {
      "index": 1,
      "codec_name": "h264",
      "codec_long_name": "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
      "profile": "High",
      "codec_type": "video",
      "codec_tag_string": "avc1",
      "codec_tag": "0x31637661",
      "width": 1920,
      "height": 1080,
      "coded_width": 1920,
      "coded_height": 1080,
      "closed_captions": 0,
      "has_b_frames": 1,
      "sample_aspect_ratio": "N/A",
      "display_aspect_ratio": "N/A",
      "pix_fmt": "yuv420p",
      "level": 40,
      "color_range": "tv",
      "color_space": "bt709",
      "color_transfer": "bt709",
      "color_primaries": "bt709",
      "chroma_location": "left",
      "field_order": "unknown",
      "refs": 1,
      "is_avc": "true",
      "nal_length_size": 4,
      "id": "N/A",
      "r_frame_rate": "30/1",
      "avg_frame_rate": "30/1",
      "time_base": "1/600",
      "start_pts": 0,
      "start_time": 0,
      "duration_ts": 2400,
      "duration": 4,
      "bit_rate": 14535878,
      "max_bit_rate": "N/A",
      "bits_per_raw_sample": 8,
      "nb_frames": 120,
      "nb_read_frames": "N/A",
      "nb_read_packets": "N/A",
      "side_data_type": "Display Matrix",
      "displaymatrix": "",
      "rotation": "-90",
      "tags": {
        "creation_time": "2022-03-10T12:06:23.000000Z",
        "language": "und",
        "handler_name": "Core Media Video",
        "vendor_id": "[0][0][0][0]",
        "encoder": "H.264"
      },
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0,
        "timed_thumbnails": 0,
        "captions": 0,
        "descriptions": 0,
        "metadata": 0,
        "dependent": 0,
        "still_image": 0
      }
    },
    {
      "index": 2,
      "codec_name": "unknown",
      "codec_long_name": "unknown",
      "profile": "unknown",
      "codec_type": "data",
      "codec_tag_string": "mebx",
      "codec_tag": "0x7862656d",
      "id": "N/A",
      "r_frame_rate": "0/0",
      "avg_frame_rate": "0/0",
      "time_base": "1/600",
      "start_pts": 0,
      "start_time": 0,
      "duration_ts": 2400,
      "duration": 4,
      "bit_rate": 20,
      "max_bit_rate": "N/A",
      "bits_per_raw_sample": "N/A",
      "nb_frames": 1,
      "nb_read_frames": "N/A",
      "nb_read_packets": "N/A",
      "tags": {
        "creation_time": "2022-03-10T12:06:23.000000Z",
        "language": "und",
        "handler_name": "Core Media Metadata"
      },
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0,
        "timed_thumbnails": 0,
        "captions": 0,
        "descriptions": 0,
        "metadata": 0,
        "dependent": 0,
        "still_image": 0
      }
    },
    {
      "index": 3,
      "codec_name": "unknown",
      "codec_long_name": "unknown",
      "profile": "unknown",
      "codec_type": "data",
      "codec_tag_string": "mebx",
      "codec_tag": "0x7862656d",
      "id": "N/A",
      "r_frame_rate": "0/0",
      "avg_frame_rate": "0/0",
      "time_base": "1/600",
      "start_pts": 0,
      "start_time": 0,
      "duration_ts": 2400,
      "duration": 4,
      "bit_rate": 16,
      "max_bit_rate": "N/A",
      "bits_per_raw_sample": "N/A",
      "nb_frames": 1,
      "nb_read_frames": "N/A",
      "nb_read_packets": "N/A",
      "tags": {
        "creation_time": "2022-03-10T12:06:23.000000Z",
        "language": "und",
        "handler_name": "Core Media Metadata"
      },
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0,
        "timed_thumbnails": 0,
        "captions": 0,
        "descriptions": 0,
        "metadata": 0,
        "dependent": 0,
        "still_image": 0
      }
    },
    {
      "index": 4,
      "codec_name": "unknown",
      "codec_long_name": "unknown",
      "profile": "unknown",
      "codec_type": "data",
      "codec_tag_string": "mebx",
      "codec_tag": "0x7862656d",
      "id": "N/A",
      "r_frame_rate": "0/0",
      "avg_frame_rate": "0/0",
      "time_base": "1/600",
      "start_pts": 0,
      "start_time": 0,
      "duration_ts": 2400,
      "duration": 4,
      "bit_rate": 34560,
      "max_bit_rate": "N/A",
      "bits_per_raw_sample": "N/A",
      "nb_frames": 120,
      "nb_read_frames": "N/A",
      "nb_read_packets": "N/A",
      "tags": {
        "creation_time": "2022-03-10T12:06:23.000000Z",
        "language": "und",
        "handler_name": "Core Media Metadata"
      },
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0,
        "timed_thumbnails": 0,
        "captions": 0,
        "descriptions": 0,
        "metadata": 0,
        "dependent": 0,
        "still_image": 0
      }
    }
  ],
  "format": {
    "filename": "/tmp/wnTjPDRuVMWi14lqrHrHu-1648584636399.mov",
    "nb_streams": 5,
    "nb_programs": 0,
    "format_name": "mov,mp4,m4a,3gp,3g2,mj2",
    "format_long_name": "QuickTime / MOV",
    "start_time": 0,
    "duration": 4,
    "size": 7364275,
    "bit_rate": 14728550,
    "probe_score": 100,
    "tags": {
      "major_brand": "qt  ",
      "minor_version": "0",
      "compatible_brands": "qt  ",
      "creation_time": "2022-03-10T12:06:23.000000Z",
      "com.apple.quicktime.location.accuracy.horizontal": "17.659223",
      "com.apple.quicktime.location.ISO6709": "+48.8375+002.2455+039.982/",
      "com.apple.quicktime.make": "Apple",
      "com.apple.quicktime.model": "iPhone 13",
      "com.apple.quicktime.software": "15.3.1",
      "com.apple.quicktime.creationdate": "2022-03-10T13:05:59+0100",
      "com.apple.photos.originating.signature": "Ae3STL2rUaPaT2D/6h8fa4xtqYST"
    }
  },
  "chapters": []
}

We can see the bitrate of the video stream is too high. That's why the code above reduce it.

The metadata of the converted video:

{
  "streams": [
    {
      "index": 0,
      "codec_name": "h264",
      "codec_long_name": "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
      "profile": "High",
      "codec_type": "video",
      "codec_tag_string": "avc1",
      "codec_tag": "0x31637661",
      "width": 1080,
      "height": 1920,
      "coded_width": 1080,
      "coded_height": 1920,
      "closed_captions": 0,
      "has_b_frames": 2,
      "sample_aspect_ratio": "N/A",
      "display_aspect_ratio": "N/A",
      "pix_fmt": "yuv420p",
      "level": 40,
      "color_range": "unknown",
      "color_space": "unknown",
      "color_transfer": "unknown",
      "color_primaries": "unknown",
      "chroma_location": "left",
      "field_order": "unknown",
      "refs": 1,
      "is_avc": "true",
      "nal_length_size": 4,
      "id": "N/A",
      "r_frame_rate": "30/1",
      "avg_frame_rate": "30/1",
      "time_base": "1/15360",
      "start_pts": 0,
      "start_time": 0,
      "duration_ts": 61440,
      "duration": 4,
      "bit_rate": 3434502,
      "max_bit_rate": "N/A",
      "bits_per_raw_sample": 8,
      "nb_frames": 120,
      "nb_read_frames": "N/A",
      "nb_read_packets": "N/A",
      "tags": {
        "language": "eng",
        "handler_name": "Core Media Video",
        "vendor_id": "FFMP",
        "encoder": "Lavc58.42.100 libx264"
      },
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0,
        "timed_thumbnails": 0,
        "captions": 0,
        "descriptions": 0,
        "metadata": 0,
        "dependent": 0,
        "still_image": 0
      }
    },
    {
      "index": 1,
      "codec_name": "aac",
      "codec_long_name": "AAC (Advanced Audio Coding)",
      "profile": "LC",
      "codec_type": "audio",
      "codec_tag_string": "mp4a",
      "codec_tag": "0x6134706d",
      "sample_fmt": "fltp",
      "sample_rate": 44100,
      "channels": 2,
      "channel_layout": "stereo",
      "bits_per_sample": 0,
      "id": "N/A",
      "r_frame_rate": "0/0",
      "avg_frame_rate": "0/0",
      "time_base": "1/44100",
      "start_pts": 0,
      "start_time": 0,
      "duration_ts": 177150,
      "duration": 4.017007,
      "bit_rate": 133157,
      "max_bit_rate": "N/A",
      "bits_per_raw_sample": "N/A",
      "nb_frames": 174,
      "nb_read_frames": "N/A",
      "nb_read_packets": "N/A",
      "tags": {
        "language": "eng",
        "handler_name": "Core Media Audio",
        "vendor_id": "[0][0][0][0]"
      },
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0,
        "timed_thumbnails": 0,
        "captions": 0,
        "descriptions": 0,
        "metadata": 0,
        "dependent": 0,
        "still_image": 0
      }
    }
  ],
  "format": {
    "filename": "/tmp/jrnQMwpnTWmcAVicTPvl5-1648584640817.mov",
    "nb_streams": 2,
    "nb_programs": 0,
    "format_name": "mov,mp4,m4a,3gp,3g2,mj2",
    "format_long_name": "QuickTime / MOV",
    "start_time": 0,
    "duration": 4.041,
    "size": 1790215,
    "bit_rate": 3544102,
    "probe_score": 100,
    "tags": {
      "major_brand": "qt  ",
      "minor_version": "512",
      "compatible_brands": "qt  ",
      "encoder": "Lavf58.24.100"
    }
  },
  "chapters": []
}

Observed results

When I'm publishing the converted video through Instagram API, I have an error with code 2207026. According to Instagram error documentation, the error is:

The video format is not supported. Please check spec for supported {video} format

The converted video seems to have a valid format (MOV) and a valid codec (h264). I supposed there is something in the specs / metadata I forgot or an operation to do with the conversion. Do you have an idea on this point ?

Checklist