abema / go-mp4

Go library for reading and writing MP4 file
https://dev.to/sunfishshogi/go-mp4-golang-library-and-cli-tool-for-mp4-52o1
MIT License
464 stars 30 forks source link

Add new Atom success But can't play #165

Closed oustn closed 8 months ago

oustn commented 8 months ago

I'm attempting to write some metadata below the ilst atom using go-mp4. Sometimes the udta atom doesn't exist, so I'm trying to add it. However, after doing so, the file can't play anymore. Could you help me figure out the issue?

package main

import (
    "github.com/abema/go-mp4"
    "github.com/sunfish-shogi/bufseekio"
    "os"
)

var (
    inputFile   = "/tmp/in.m4a"
    outputFile  = "/tmp/out.m4a"
    ctx         = mp4.Context{
        UnderUdta:         true,
        UnderIlstMeta:     true,
        UnderIlst:         true,
        UnderIlstFreeMeta: false,
    }
)

func copyOrigin(w *mp4.Writer, h *mp4.ReadHandle) error {
    _, err := w.StartBox(&h.BoxInfo)
    if err != nil {
        return err
    }
    box, _, err := h.ReadPayload()
    if err != nil {
        return err
    }
    _, err = mp4.Marshal(w, box, h.BoxInfo.Context)
    if err != nil {
        return err
    }
    _, err = h.Expand()
    if err != nil {
        return err
    }
    return err
}

func main() {
    file, err := os.Open(inputFile)
    if err != nil {
        panic(err)
    }
    defer func(file *os.File) {
        _ = file.Close()
    }(file)

    output, err := os.Create(outputFile)

    if err != nil {
        panic(err)
    }
    defer func(file *os.File) {
        _ = output.Close()
    }(output)

    r := bufseekio.NewReadSeeker(file, 128*1024, 4)
    w := mp4.NewWriter(output)

    _, err = mp4.ReadBoxStructure(r, func(h *mp4.ReadHandle) (interface{}, error) {
        switch h.BoxInfo.Type {
        case mp4.BoxTypeMoov():
            err := copyOrigin(w, h)
            if err != nil {
                return nil, err
            }
                         // check the udta box exist
            boxes, err := mp4.ExtractBox(r, nil, mp4.BoxPath{mp4.BoxTypeUdta()})
            if err != nil {
                return nil, err
            }
            if len(boxes) < 1 {
                err = addMiss(w, h, []mp4.BoxType{
                    mp4.BoxTypeUdta(),
                    mp4.BoxTypeMeta(),
                    mp4.BoxTypeIlst(),
                })
            }

            _, err = w.EndBox()
            return nil, nil
        default:
            return nil, w.CopyBox(r, &h.BoxInfo)
        }
    })
}

func addMiss(w *mp4.Writer, h *mp4.ReadHandle, types []mp4.BoxType) error {
    if len(types) <= 0 {
        return addMeta(w, mp4.BoxType{'\251', 'a', 'l', 'b'}, "Hello world")
    }
    _, err := w.StartBox(&mp4.BoxInfo{Type: types[0]})
    if err != nil {
        return err
    }
    err = addMiss(w, h, types[1:])
    if err != nil {
        return err
    }
    _, err = w.EndBox()
    return err
}

func marshalData(w *mp4.Writer, val string) error {
    _, err := w.StartBox(&mp4.BoxInfo{Type: mp4.BoxTypeData()})
    if err != nil {
        return err
    }
    var boxData = &mp4.Data{
        DataType: mp4.DataTypeStringUTF8,
        Data:     []byte(val),
    }
    _, err = mp4.Marshal(w, boxData, ctx)
    if err != nil {
        return err
    }
    _, err = w.EndBox()
    return err
}
func addMeta(w *mp4.Writer, tag mp4.BoxType, val string) error {
    _, err := w.StartBox(&mp4.BoxInfo{Type: tag})
    if err != nil {
        return err
    }
    err = marshalData(w, val)
    if err != nil {
        return err
    }
    _, err = w.EndBox()
    return err
}

The origin file info:

[ftyp] Size=28 ... (use "-full ftyp" to show all)
[moov] Size=53717
  [mvhd] Size=108 ... (use "-full mvhd" to show all)
  [iods] (unsupported box type) Size=21 Data=[...] (use "-full iods" to show all)
  [trak] Size=53580
    [tkhd] Size=92 ... (use "-full tkhd" to show all)
    [mdia] Size=53480
      [mdhd] Size=32 ... (use "-full mdhd" to show all)
      [hdlr] Size=98 ... (use "-full hdlr" to show all)
      [minf] Size=53342
        [smhd] Size=16 Version=0 Flags=0x000000 Balance=0
        [dinf] Size=36
          [dref] Size=28 Version=0 Flags=0x000000 EntryCount=1
            [url ] Size=12 Version=0 Flags=0x000001
        [stbl] Size=53282
          [stsd] Size=94 Version=0 Flags=0x000000 EntryCount=1
            [mp4a] Size=78 ... (use "-full mp4a" to show all)
              [esds] Size=42 ... (use "-full esds" to show all)
          [stts] Size=24 ... (use "-full stts" to show all)
          [stsc] Size=52 ... (use "-full stsc" to show all)
          [stsz] Size=48264 ... (use "-full stsz" to show all)
          [stco] Size=4840 ... (use "-full stco" to show all)
[mdat] Size=4396475 Data=[...] (use "-full mdat" to show all)
[free] Size=99 Data=[...] (use "-full free" to show all)

After that:

[ftyp] Size=28 ... (use "-full ftyp" to show all)
[moov] Size=53776
  [mvhd] Size=108 ... (use "-full mvhd" to show all)
  [iods] (unsupported box type) Size=21 Data=[...] (use "-full iods" to show all)
  [trak] Size=53580
    [tkhd] Size=92 ... (use "-full tkhd" to show all)
    [mdia] Size=53480
      [mdhd] Size=32 ... (use "-full mdhd" to show all)
      [hdlr] Size=98 ... (use "-full hdlr" to show all)
      [minf] Size=53342
        [smhd] Size=16 Version=0 Flags=0x000000 Balance=0
        [dinf] Size=36
          [dref] Size=28 Version=0 Flags=0x000000 EntryCount=1
            [url ] Size=12 Version=0 Flags=0x000001
        [stbl] Size=53282
          [stsd] Size=94 Version=0 Flags=0x000000 EntryCount=1
            [mp4a] Size=78 ... (use "-full mp4a" to show all)
              [esds] Size=42 ... (use "-full esds" to show all)
          [stts] Size=24 ... (use "-full stts" to show all)
          [stsc] Size=52 ... (use "-full stsc" to show all)
          [stsz] Size=48264 ... (use "-full stsz" to show all)
          [stco] Size=4840 ... (use "-full stco" to show all)
  [udta] Size=59
    [meta] Size=51 Version=0 Flags=0x000000
      [ilst] Size=43
        [(c)alb] Size=35
          [data] Size=27 DataType=UTF8 DataLang=0 Data="Hello world"
[mdat] Size=4396475 Data=[...] (use "-full mdat" to show all)
[free] Size=99 Data=[...] (use "-full free" to show all)
oustn commented 8 months ago

image

image

oustn commented 8 months ago

Add bytes:

0000003B 75647461 00000033 6D657461 0000002B 696C7374 00000023 A9616C62 0000001B 64617461 00000001 00000000 48656C6C 6F20776F 726C64

sunfish-shogi commented 8 months ago

@oustn stco box has offset list of chunks of mdat. So you should update all stco box entries to add 59 (=size of added udta box).

In your sample files, moov box appeared earlier than mdat box. It is often called "fast-start MP4". When change moov box size of fast-start MP4, those rewrite operations are necessary. On the other hand, for non-fast-start, it is unnecessary.

oustn commented 8 months ago

@sunfish-shogi Thanks so much. I understand what you mean. I will try to resolve it.

oustn commented 8 months ago

@sunfish-shogi Sorry I need your help.

case mp4.BoxTypeMoov():
            // ...
            stcoes, err := mp4.ExtractBoxes(r, nil, []mp4.BoxPath{{mp4.BoxTypeStco()}})

I try to get stcoBox, but len(stcoes) == 0

I change the insert order so udtainsert before trak and get the size of udta,But how to update stco box ? case mp4.BoxTypeStco() look like not working

sunfish-shogi commented 8 months ago

@oustn

I hope this sample code will help you.

func main() {
    input, err := os.Open(os.Args[1])
    if err != nil {
        panic(err)
    }
    defer input.Close()
    output, err := os.Create(os.Args[2])
    if err != nil {
        panic(err)
    }
    defer output.Close()

    var ilstExists bool
    var mdatOffsetDiff int64
    var stcoOffsets []int64

    r := bufseekio.NewReadSeeker(input, 1024*1024, 3)
    w := mp4.NewWriter(output)
    _, err = mp4.ReadBoxStructure(r, func(h *mp4.ReadHandle) (interface{}, error) {
        switch h.BoxInfo.Type {
        // 1. moov, trak, mdia, minf, stbl, udta
        case mp4.BoxTypeMoov(),
            mp4.BoxTypeTrak(),
            mp4.BoxTypeMdia(),
            mp4.BoxTypeMinf(),
            mp4.BoxTypeStbl(),
            mp4.BoxTypeUdta(),
            mp4.BoxTypeMeta(),
            mp4.BoxTypeIlst():
            _, err := w.StartBox(&h.BoxInfo)
            if err != nil {
                return nil, err
            }
            if _, err := h.Expand(); err != nil {
                return nil, err
            }
            // 1-a. [only moov box] add udta box if not exists
            if h.BoxInfo.Type == mp4.BoxTypeMoov() && !ilstExists {
                path := []mp4.BoxType{mp4.BoxTypeUdta(), mp4.BoxTypeMeta(), mp4.BoxTypeIlst()}
                for _, boxType := range path {
                    if _, err := w.StartBox(&mp4.BoxInfo{Type: boxType}); err != nil {
                        return nil, err
                    }
                }
                ctx := h.BoxInfo.Context
                ctx.UnderUdta = true
                ctx.UnderIlst = true
                if err := addMeta(w, ctx); err != nil {
                    return nil, err
                }
                for range path {
                    if _, err := w.EndBox(); err != nil {
                        return nil, err
                    }
                }
            }
            // 1-b. [only ilst box] add metadatas
            if h.BoxInfo.Type == mp4.BoxTypeIlst() {
                ctx := h.BoxInfo.Context
                ctx.UnderIlst = true
                if err := addMeta(w, ctx); err != nil {
                    return nil, err
                }
                ilstExists = true
            }
            if _, err = w.EndBox(); err != nil {
                return nil, err
            }
        // 2. otherwise
        default:
            // 2-a. [only stco box] keep offset
            if h.BoxInfo.Type == mp4.BoxTypeStco() {
                offset, _ := w.Seek(0, io.SeekCurrent)
                stcoOffsets = append(stcoOffsets, offset)
            }
            // 2-b. [only mdat box] keep difference of offsets
            if h.BoxInfo.Type == mp4.BoxTypeMdat() {
                iOffset := int64(h.BoxInfo.Offset)
                oOffset, _ := w.Seek(0, io.SeekCurrent)
                mdatOffsetDiff = oOffset - iOffset
            }
            // copy box without modification
            w.CopyBox(r, &h.BoxInfo)
        }

        return nil, nil
    })
    if err != nil {
        panic(err)
    }

    // if mdat box is moved, update stco box
    if mdatOffsetDiff != 0 {
        for _, stcoOffset := range stcoOffsets {
            // seek to stco box header
            if _, err := output.Seek(stcoOffset, io.SeekStart); err != nil {
                panic(err)
            }
            // read box header
            bi, err := mp4.ReadBoxInfo(output)
            if err != nil {
                panic(err)
            }
            // read stco box payload
            var stco mp4.Stco
            if _, err := mp4.Unmarshal(output, bi.Size-bi.HeaderSize, &stco, bi.Context); err != nil {
                panic(err)
            }
            // update chunk offsets
            for i := range stco.ChunkOffset {
                stco.ChunkOffset[i] += uint32(mdatOffsetDiff)
            }
            // seek to stco box payload
            if _, err := bi.SeekToPayload(output); err != nil {
                panic(err)
            }
            // write stco box payload
            if _, err := mp4.Marshal(output, &stco, bi.Context); err != nil {
                panic(err)
            }
        }
    }
}
sunfish-shogi commented 8 months ago

Sorry, error handling is required for CopyBox.

- w.CopyBox(r, &h.BoxInfo)
+ if _, err := w.CopyBox(r, &h.BoxInfo); err != nil {
+   return _, err
+ }
oustn commented 8 months ago

@sunfish-shogi Thanks again for your help! The files play fine, but, unfortunately, Apple Music/Mp3tag can't read the metadata. If this issue is taking up too much of your time, feel free to close it. I think it's beyond my expertise; it seems to require in-depth knowledge of MP4 encoding, which I'm not familiar with at all.

image

Output file:

[ftyp] Size=28 ... (use "-full ftyp" to show all)
[moov] Size=53809
  [mvhd] Size=108 ... (use "-full mvhd" to show all)
  [iods] (unsupported box type) Size=21 Data=[...] (use "-full iods" to show all)
  [trak] Size=53580
    [tkhd] Size=92 ... (use "-full tkhd" to show all)
    [mdia] Size=53480
      [mdhd] Size=32 ... (use "-full mdhd" to show all)
      [hdlr] Size=98 ... (use "-full hdlr" to show all)
      [minf] Size=53342
        [smhd] Size=16 Version=0 Flags=0x000000 Balance=0
        [dinf] Size=36
          [dref] Size=28 Version=0 Flags=0x000000 EntryCount=1
            [url ] Size=12 Version=0 Flags=0x000001
        [stbl] Size=53282
          [stsd] Size=94 Version=0 Flags=0x000000 EntryCount=1
            [mp4a] Size=78 ... (use "-full mp4a" to show all)
              [esds] Size=42 ... (use "-full esds" to show all)
          [stts] Size=24 ... (use "-full stts" to show all)
          [stsc] Size=52 ... (use "-full stsc" to show all)
          [stsz] Size=48264 ... (use "-full stsz" to show all)
          [stco] Size=4840 ... (use "-full stco" to show all)
  [udta] Size=92
    [meta] Size=84 Version=0 Flags=0x000000
      [hdlr] Size=33 Version=0 Flags=0x000000 PreDefined=0 HandlerType="mdir" Name=""
      [ilst] Size=43
        [(c)alb] Size=35
          [data] Size=27 DataType=UTF8 DataLang=0 Data="Hello world"
[mdat] Size=4396475 Data=[...] (use "-full mdat" to show all)
[free] Size=99 Data=[...] (use "-full free" to show all)

Use App music set Album:

[ftyp] Size=28 ... (use "-full ftyp" to show all)
[moov] Size=55862
  [mvhd] Size=108 ... (use "-full mvhd" to show all)
  [iods] (unsupported box type) Size=21 Data=[...] (use "-full iods" to show all)
  [trak] Size=53580
    [tkhd] Size=92 ... (use "-full tkhd" to show all)
    [mdia] Size=53480
      [mdhd] Size=32 ... (use "-full mdhd" to show all)
      [hdlr] Size=98 ... (use "-full hdlr" to show all)
      [minf] Size=53342
        [smhd] Size=16 Version=0 Flags=0x000000 Balance=0
        [dinf] Size=36
          [dref] Size=28 Version=0 Flags=0x000000 EntryCount=1
            [url ] Size=12 Version=0 Flags=0x000001
        [stbl] Size=53282
          [stsd] Size=94 Version=0 Flags=0x000000 EntryCount=1
            [mp4a] Size=78 ... (use "-full mp4a" to show all)
              [esds] Size=42 ... (use "-full esds" to show all)
          [stts] Size=24 ... (use "-full stts" to show all)
          [stsc] Size=52 ... (use "-full stsc" to show all)
          [stsz] Size=48264 ... (use "-full stsz" to show all)
          [stco] Size=4840 ... (use "-full stco" to show all)
  [udta] Size=2145
    [meta] Size=2137 Version=0 Flags=0x000000
      [hdlr] Size=34 Version=0 Flags=0x000000 PreDefined=0 HandlerType="mdir" Name=""
      [free] Size=2048 Data=[...] (use "-full free" to show all)
      [ilst] Size=43
        [(c)alb] Size=35
          [data] Size=27 DataType=UTF8 DataLang=0 Data="Hello world"
[mdat] Size=4396475 Data=[...] (use "-full mdat" to show all)
[free] Size=99 Data=[...] (use "-full free" to show all)
sunfish-shogi commented 8 months ago

@oustn I updated script to insert hdlr, and succeeded to read text: "Hello, world!", by ffprobe command.

script: https://gist.github.com/sunfish-shogi/cccde016a38c66d32c07a0234368804e

ffprobe:

% ffprobe -hide_banner with-udta.mp4 
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'with-udta.mp4':
  Metadata:
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    album           : Hello, World!
  Duration: 00:00:01.01, start: 0.000000, bitrate: 204 kb/s
  Stream #0:0[0x1](eng): Video: h264 (High) (avc1 / 0x31637661), yuvj420p(pc, progressive), 320x180 [SAR 1:1 DAR 16:9], 139 kb/s, 10 fps, 10 tbr, 10240 tbn (default)
    Metadata:
      handler_name    : VideoHandle
      vendor_id       : [0][0][0][0]
      encoder         : Lavc60.31.102 libx264
  Stream #0:1[0x2](eng): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 49 kb/s (default)
    Metadata:
      handler_name    : SoundHandle
      vendor_id       : [0][0][0][0]

Sorry, I'm not familiar to specification of Apple Music and Mp3tag.

But I know that old apple style hdlr atoms are different with ISO-14496. It is might related to this issue. https://github.com/abema/go-mp4/issues/7

oustn commented 8 months ago

@sunfish-shogi

Yeah, ffprobe can echo the right metadata, and some players can recognize the correct tags. Thanks for your effective work and patience.

oustn commented 8 months ago

@sunfish-shogi

I found maybe is the file's format not support by some player or MacOS: the compatible_brands is isomM4A mp42. Some player like VLC can display the correct info.

Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'in.m4a':
  Metadata:
    major_brand     : M4A
    minor_version   : 1
    compatible_brands: isomM4A mp42
    creation_time   : 2020-09-24T05:46:42.000000Z
  Duration: 00:09:20.11, start: 0.000000, bitrate: 63 kb/s
  Stream #0:0[0x1](und): Audio: aac (HE-AAC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 62 kb/s (default)
    Metadata:
      creation_time   : 2020-09-24T05:46:42.000000Z
      handler_name    : aac@GPAC0.5.2-DEV-revVersion: 0.5.2-426-gc5ad4e4+dfsg5-1ubuntu0.1
      vendor_id       : [0][0][0][0]