JamesHeinrich / getID3

http://www.getid3.org/
Other
1.16k stars 246 forks source link

Fix: ogg file formats wrong duration #450

Closed zaerl closed 3 months ago

zaerl commented 3 months ago

Hi. This is Francesco from Automattic. Thanks for the work done with getID3.

Sometimes, Opus encoders (WhatsApp voice registrations and others) add a special last header with a granule duration of 0xFFFFFFFFFFFFFF. This value indicates "this is the end," but must be ignored; otherwise, it makes calculations wrong.

You can see on this publicly available file https://ladywoodunite.com/wp-content/uploads/2024/03/whatsapp-audio-2024-02-29-at-19.47.24.ogg:

This can be solved here https://github.com/JamesHeinrich/getID3/blob/master/getid3/module.audio.ogg.php#L352 by checking if the granule position is the maximum 64-bit integer, and if it is, perform the scan again. Something like this:

if(substr($LastChunkOfOgg, 13, 8) === "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF") {
    $LastOggSpostion = strpos($LastChunkOfOgg, 'SggO', $LastOggSpostion + 1);
}

See the following JSON, which shows a very long duration.

{
    "GETID3_VERSION": "1.9.23-202310190849",
    "filesize": 772095,
    "filepath": "path",
    "filename": "whatsapp-audio-2024-02-29-at-19.47.24.ogg",
    "filenamepath": "path/whatsapp-audio-2024-02-29-at-19.47.24.ogg",
    "avdataoffset": 99,
    "avdataend": 772068,
    "fileformat": "ogg",
    "audio": {
        "dataformat": "opus",
        "bitrate_mode": "vbr",
        "lossless": false,
        "channels": 1,
        "sample_rate_input": 48000,
        "sample_rate": 48000,
        "bitrate": 1.6069832964316078e-8,
        "encoder": "Recorder",
        "channelmode": "mono",
        "compression_ratio": 2.092426167228656e-14,
        "streams": [
            {
                "dataformat": "opus",
                "bitrate_mode": "vbr",
                "lossless": false,
                "channels": 1,
                "sample_rate_input": 48000,
                "sample_rate": 48000,
                "bitrate": 1.6069832964316078e-8,
                "encoder": "Recorder",
                "channelmode": "mono",
                "compression_ratio": 2.092426167228656e-14
            }
        ]
    },
    "encoding": "UTF-8",
    "mime_type": "audio/ogg; codecs=opus",
    "ogg": {
        "pageheader": {
            "0": {
                "page_start_offset": 0,
                "stream_structver": 0,
                "flags_raw": 2,
                "flags": {
                    "fresh": false,
                    "bos": true,
                    "eos": false
                },
                "pcm_abs_position": 0,
                "stream_serialno": 3942934480,
                "page_seqno": 0,
                "page_checksum": 1863616617,
                "page_segments": 1,
                "page_length": 19,
                "segment_table": [
                    19
                ],
                "header_end_offset": 28,
                "page_end_offset": 47
            },
            "opus": {
                "opus_magic": "OpusHead",
                "version": 1,
                "out_channel_count": 1,
                "pre_skip": 3840,
                "input_sample_rate": 48000
            },
            "1": {
                "page_start_offset": 47,
                "stream_structver": 0,
                "flags_raw": 0,
                "flags": {
                    "fresh": false,
                    "bos": false,
                    "eos": false
                },
                "pcm_abs_position": 0,
                "stream_serialno": 3942934480,
                "page_seqno": 1,
                "page_checksum": 2217457902,
                "page_segments": 1,
                "page_length": 24,
                "segment_table": [
                    24
                ],
                "header_end_offset": 75,
                "page_end_offset": 99,
                "stream_type": "OpusTags"
            },
            "eos": {
                "page_start_offset": 772068,
                "stream_structver": 0,
                "flags_raw": 4,
                "flags": {
                    "fresh": false,
                    "bos": false,
                    "eos": true
                },
                "pcm_abs_position": 1.8446744073709552e+19,
                "stream_serialno": 3942934480,
                "page_seqno": 113,
                "page_checksum": 3093262010,
                "page_segments": 0,
                "page_length": 0,
                "header_end_offset": 772095,
                "page_end_offset": 772095
            }
        },
        "vendor": "Recorder",
        "comments_raw": null,
        "samples": 1.8446744073709552e+19,
        "bitrate_average": 1.6069832964316078e-8,
        "encoding": "UTF-8"
    },
    "opus": {
        "opus_version": 1,
        "sample_rate_input": 48000,
        "out_channel_count": 1
    },
    "playtime_seconds": 384307168202282.3,
    "bitrate": 1.6069832964316078e-8,
    "playtime_string": "106751991167:18:02"
}
JamesHeinrich commented 3 months ago

Thanks, fixed in https://github.com/JamesHeinrich/getID3/commit/3a59f2e0567cd560d5197638f35bc3b544896a70