dbieber / audiorecorder

AudioRecorder: a cross platform javascript utility for recording and playing audio in all major browsers
33 stars 14 forks source link

problems with speex compressor. #11

Open pgee70 opened 9 years ago

pgee70 commented 9 years ago

Hi. Firstly thanks very much for putting this together. It was very helpful. I noted that in your demo you have a worker to compress the microphone stream into a speex format, but when you export as speex, you compressed the samples again. I was finding that this caused unnecessary delays in the saving process, so I tried to use the speex file that was being compressed on the worker's interval script, however i run into issues using it - the code did not work. I also found that the interval to set to process the Encoder was too low, resulting in poorly compressed audio, and Chrome at least was running into a problem creating the .ogg file in the .mux function where it was getting stack overflow errors for larger files caused by String.fromCharCode.apply not working properly. In larger files I found some bits of your code broke, so I rewrote them, I also managed the memory better by not storing the PCM recorded data after it had been compressed. I also added a little more padding at the end of the file to stop clipping of the audio.

In my implementation I used the AudioContext to drive other tools (like a visualiser) so I changed the constructor slightly. enc. are the files I modified for your perusal. hope it is of help to you. Using this code, I have been able to record for 60 minutes, to a 17.9MB file that has intelligible audio quality, suitable for transcription/notes.

in audiorecorder.js Ogg.prototype.mux = function (d, o) { function OggPageHeader(type, length, checksum, granulePos) { return page = { capturePattern: [0x4f, 0x67, 0x67, 0x53] , version: 0 , headerType: type , granulePos: granulePos || 0 , serial: 406 , sequence: 0 , checksum: checksum || 0 , pageSegments: 1 , segments: [ length || 0 ] , frames: "" }; }

    function OggPageData(segments, granulePos) {
        var p = OggPageHeader(0, null, null, granulePos);
        p.pageSegments = segments.length;
        p.segments = segments;
        return p;
    }

    function chksum(str, c) {
        var buf = new ArrayBuffer(str.length);
        var bufView = new Uint8Array(buf);
        for (var i=0, len=str.length; i<len; i++) {
            bufView[i] = str.charCodeAt(i);
        }
        dv = new DataView(buf);
        dv.setUint32(22, c, true);

        return String.fromCharCode.apply(null, new Uint8Array(buf));
    }

    function hdrup(hdr, content) {
        var csum, str;
        csum = crc32(hdr + content);
        str = chksum(hdr, csum) + content;
        return str;
    }

    function frames(segments) {
        var sum = 0;
        for (var i=0; i<segments.length; ++i) {
            sum += segments[i];
        }
        return sum;
    }

    o=o||{};

    var str = "";
    var p = "";
    var header = d[0];
    var hdr = header.raw;
    // header page
    p = this.createPage(OggPageHeader(2,o.length || hdr.length, o.checksum, 0))
    str = hdrup(p, hdr);
    if (d.length == 1)
        return str;

    var comments = d[1];
    var comments_raw = comments.raw;
    // comments page
    p = this.createPage(OggPageHeader(0,o.length || comments_raw.length, o.checksum));
    str += hdrup(p, comments_raw);
    if (d.length == 2)
        return str;

    // data page
    var data = d[2];
    var segments = [];
    var len = data[1].length;
    var a,b=0;
    // this replaces the .chunk which fails in large file sizes.
    while ( data[1].length > 0 ){
        b = Math.min(data[1].length,100);
        segments.push(data[1].splice(0,b));
    }

    var bytes = new Uint8Array(data[0].buffer);
    var len = bytes.length;
    stream = '';
    // added this split to stop it breaking in chrome.
    for( var i = 0 ; i < len ; i = i + 1024)
    {
        to = Math.min(len,i+1024);
        stream += String.fromCharCode.apply(null, bytes.subarray(i,to));
    }

    var a = 0
      , b = 0
      , len = segments.length;

    var granulePos = 0;
    var frame_size = header.data.frame_size;
    for (var i = 0; i < len; ++i) {
        var segchunk = segments[i];
        b += frames(segchunk);
        granulePos += segchunk.length * frame_size;

        p = this.createPage(OggPageData(segchunk, granulePos));
        str += hdrup(p, stream.substring(a, b));

        a = b;
    }
    return str;
}

... 

var FileHandler = {
    speexFile: function(data) {
        var sampleRate = 44100;
        var isNarrowband = sampleRate < 16000;
        var oggdata = new Ogg(null, {file: true});
        spxdata = [new Uint8Array(data[0].length),data[1]];
        spxdata[0].set(data[0]);
        var spxhdr = new SpeexHeader({
            bitrate: -1,
            extra_headers: 0,
            frame_size: isNarrowband ? 160 : 320,
            frames_per_packet: 1,
            header_size: 80,
            mode: isNarrowband ? 0 : 1,
            mode_bitstream_version: 4,
            nb_channels: 1,
            rate: sampleRate,
            reserved1: 0,
            reserved2: 0,
            speex_string: "Speex   ",
            speex_version_id: 1,
            speex_version_string: "1.2rc1\0\0\0\0\0\0\0\0\0\0\0\0\0\0",
            vbr: 0
        });

        var comment = "Encoded with speex.js";
        var spxcmt = new SpeexComment({
            vendor_string: comment,
            vendor_length: comment.length
        });

        var result = oggdata.mux([spxhdr, spxcmt, spxdata]);
        return result;
    }
};

// Defines the Clip API
var Clip = {
    create: function() {
        return {
            samples: [],
            sampleRate: 44100, // TODO(Bieber): Use actual sample rate
            speex: [[],[]],
            startTime: undefined,
            finalized: false
        };
    },

    createFromSamples: function(samples) {
        var clip = Clip.create();
        Clip.setSamples(clip, samples);
        return clip;
    },

    createFromSpeex: function(speex) {
        var clip = Clip.create();
        Clip.setSpeex(clip, speex);
        return clip;
    },

    setStartTime: function(clip, time) {
        clip.startTime = time;
    },

    setSamples: function(clip, data) {
        clip.samples = data;
        Clip.computeSpeex(clip);
    },

    setSpeex: function(clip, data) {
        clip.speex = data;
        Clip.computeSamples(clip);
    },

    // WARNING: Leaves speex out of date.
    addSamples: function(clip, data) {
        Array.prototype.push.apply(clip.samples, data);
    },

    // WARNING: Leaves samples out of date.
    addSpeex: function(clip, data) {
        Array.prototype.push.apply(clip.speex[0], data[0]);
        Array.prototype.push.apply(clip.speex[1], data[1]);
    },

    // WARNING: Potentially slow.
    computeSamples: function(clip) {
        // Decodes speex data to get playable samples
        // TODO(Bieber): Make a copy
        clip.samples = Codec.decode(clip.speex);
    },

    // WARNING: Potentially slow.
    computeSpeex: function(clip) {
        // Encodes samples to get smaller speex data
        // TODO(Bieber): Make a copy
        clip.speex = Codec.encode(clip.samples);
    },

    getStartTime: function(clip) {
        return clip.startTime;
    },

    getEndTime: function(clip) {
        return clip.startTime + Clip.getLength(clip);
    },

    // Returns clip length in milliseconds.
    getLength: function(clip) {
        return 1000 * clip.samples.length / clip.sampleRate;
    },

    finalize: function(clip) {
        clip.finalized = true;
    }
};
// The HTML5 Audio middleware that does the recording in modern browsers
var Html5Audio = {
    DEFAULT_WORKER_PATH: 'worker.js',
    worker: undefined,

    audioContext: undefined,
    mediaStreamSource: undefined,
    playingSources: [],
    bufferLen:0,
    ready: false,
    recording: false,

    init: function(config) {
        if (typeof(config.audioContext) == 'object')
        {
            Html5Audio.audioContext = config.audioContext;
            Html5Audio._useStream(config.stream);
        }
        else
        {
            Html5Audio.audioContext = new AudioContext();   
            navigator.getUserMedia({audio: true}, Html5Audio._useStream, function(err){});
        }

        var worker_path = (config && config.worker_path) || Html5Audio.DEFAULT_WORKER_PATH;
        try {
            Html5Audio.worker = new Worker(worker_path);
            Html5Audio.worker.onmessage = Html5Audio._handleMessage;
        } catch(error) {
            console.error(error);
        }
    },

    // Called by init with a MediaStream object
    _useStream: function(stream) {
        Html5Audio.mediaStreamSource = Html5Audio.audioContext.createMediaStreamSource(stream);
        var context = Html5Audio.mediaStreamSource.context;

        var bufferLen = 4 * 4096;
        var numChannelsIn = 1;
        var numChannelsOut = 1;
        var node = context.createScriptProcessor(bufferLen, numChannelsIn, numChannelsOut);
        node.onaudioprocess = Html5Audio._handleAudio;

        Html5Audio.mediaStreamSource.connect(node);
        node.connect(context.destination);

        Html5Audio.ready = true;
    },

    _handleAudio: function(event) {
        // Buffer has length specified in _useStream
        var buffer = event.inputBuffer.getChannelData(0);
        if (Html5Audio.recording) {
            // Add the samples immediately to the Clip
            Clip.addSamples(AudioRecorder.clip, buffer);

            // In the background, in multiples of 160, encode the samples
            // And push the encoded data back into the Clip ASAP.
            Html5Audio.bufferLen = buffer.length;
            Html5Audio.worker.postMessage({
                command: 'put',
                buffer: buffer
            });
        }
    },

    _handleMessage: function(event) {
        switch(event.data.command) {
            case 'speex':
            var data = event.data.data;
            Clip.addSpeex(AudioRecorder.clip, data);
            break;

            case 'finalized':
            Clip.finalize(AudioRecorder.clip);
            if (Html5Audio.cb) Html5Audio.cb(AudioRecorder.clip);
            break;

            case 'cleared':
            Clip.setSamples(AudioRecorder.clip, []);
            break;

            case 'print':
            console.log(event.data.message);
            break;

            case 'clear-samples':
            // remove the PCM samples that have been encoded to save memory.
            AudioRecorder.clip.samples.splice(0,Html5Audio.bufferLen);
            break;
        }
    },

    record: function() {
        Html5Audio.recording = true;
    },

    stopRecording: function(cb) {
        if (Html5Audio.recording) {
            Html5Audio.cb = cb; // TODO(Bieber): Be more robust maybe with ids
            Html5Audio.recording = false;
            Html5Audio.worker.postMessage({
                command: 'finalize'
            });
        }
    },

    clear: function(cb) {
        Html5Audio.worker.postMessage({
            command: 'clear'
        });
    },

    playClip: function(clip, inHowLong, offset) {
        var when = Html5Audio.audioContext.currentTime + inHowLong;
        var samples = clip.samples;

        var newBuffer = Html5Audio.audioContext.createBuffer(1, samples.length, clip.sampleRate);
        newBuffer.getChannelData(0).set(samples);

        var newSource = Html5Audio.audioContext.createBufferSource();
        newSource.buffer = newBuffer;

        newSource.connect(Html5Audio.audioContext.destination);
        newSource.start(when, offset);

        Html5Audio.playingSources.push(newSource);
    },

    stopPlaying: function() {
        // Stops playing all playing sources.
        // TODO(Bieber): Make sure things are removed from playingSources when they finish naturally
        for (var i = 0; i < Html5Audio.playingSources.length; i++) {
          var source = Html5Audio.playingSources[i];
          source.stop(0);
          delete source;
        }
        Html5Audio.playingSources = [];
    },

    isRecording: function() {
        return Html5Audio.ready && Html5Audio.recording;
    }
};
....

var AudioRecorder = {
    clip: undefined,
    middleware: undefined, // HTML5 or Flash audio

    init: function(config) {
        // Initializes the AudioRecorder
        if (typeof(config.audioContext) == 'object')
        {
            AudioRecorder.middleware = Html5Audio;
            //window.AudioContext = config.audioContext;
            window.AudioContext = window.AudioContext || window.webkitAudioContext || window.mozAudioContext;
            navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia;
        }
        else
        {
            window.AudioContext = window.AudioContext || window.webkitAudioContext || window.mozAudioContext;
            navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia;

            var html5audio = !!window.AudioContext && !!navigator.getUserMedia;
            if (html5audio) {
                AudioRecorder.middleware = Html5Audio;
            } else {
                AudioRecorder.middleware = FlashAudio;
            }
        }
        AudioRecorder.middleware.init(config);
    },

    record: function() {
        // Starts recording to the current clip
        if (AudioRecorder.isRecording()) return true;

        // If we can't record on the current clip, make a new one
        if (AudioRecorder.clip === undefined || AudioRecorder.clip.finalized) {
            AudioRecorder.newClip();
        }

        return AudioRecorder.middleware.record();
    },

    stopRecording: function(cb) {
        // Stops recording and passes the newly created clip object to the
        // callback function cb
        if (!AudioRecorder.isRecording()) return true;
        return AudioRecorder.middleware.stopRecording(cb);
    },

    newClip: function() {
        if (AudioRecorder.isRecording()) {
            console.warn("Cannot create a new clip while recording");
            return false;
        }
        AudioRecorder.clip = Clip.create();
        return true;
    },

    getClip: function() {
        return AudioRecorder.clip;
    },

    setClip: function(clip) {
        if (AudioRecorder.isRecording()) {
            console.warn("Cannot set the clip while recording");
            return false;
        }
        AudioRecorder.clip = clip;
    },

    clear: function() {
        // Clears the current clip back to empty
        AudioRecorder.middleware.clear();
        return true;
    },

    playClip: function(clip, inHowLong, offset) {
        // Plays clip starting from the appropriate position at the
        // appropriate time
        if (inHowLong === undefined) {
            inHowLong = 0;
        }
        if (offset === undefined) {
            offset = 0;
        }
        AudioRecorder.middleware.playClip(clip, inHowLong, offset);
        return true;
    },

    stopPlaying: function() {
        // Stops all playing clips
        AudioRecorder.middleware.stopPlaying();
        return true;
    },

    isRecording: function() {
        // Returns True if currently recording, False otherwise
        return AudioRecorder.middleware.isRecording();
    }
};

// in worker.js .... // data page var data = d[2]; // this replaces the .chunk which fails in large file sizes. var segments = []; while ( data[1].length > 0 ){ b = Math.min(data[1].length,100); segments.push(data[1].splice(0,b)); } var stream = String.fromCharCode.apply(null, new Uint8Array(data[0].buffer)) , a = 0 , b = 0 , len = segments.length;

    var granulePos = 0;
    var frame_size = header.data.frame_size;
    for (var i = 0; i < len; ++i) {
        var segchunk = segments[i];
        b += frames(segchunk);
        granulePos += segchunk.length * frame_size;

        p = this.createPage(OggPageData(segchunk, granulePos));
        str += hdrup(p, stream.substring(a, b));

        a = b;
    }
    return str;
}

Ogg.prototype.bitstream = function () {
    if (!this.unpacked) return null;
    return this.data.join("");
};
var Codec = {
    speex: new Speex({
            quality: 4,
            mode:  1,
            bits_size: 70}),

    // TODO(Bieber): See if you need to make a copy before returning the buffer
    encode: function(buffer) {
        // To preserve length, encode a multiple of 320 samples.
        var datalen = buffer.length;
        var shorts = new Int16Array(datalen);
        for(var i = 0; i < datalen; i++) {
            shorts[i] = Math.floor(Math.min(1.0, Math.max(-1.0, buffer[i])) * 32767);
        }
        var encoded = Codec.speex.encode(shorts, true);
        return encoded;
    },

    decode: function(buffer) {
        return Codec.speex.decode(buffer);
    }
};

var FileHandler = {
    speexFile: function(data) {
        var sampleRate = 44100;
        var isNarrowband = sampleRate < 16000;
        var oggdata = new Ogg(null, {file: true});

        var spxcodec = new Speex({
            quality: 10,
            mode: isNarrowband ? 0 : 1,
            bits_size: isNarrowband ? 15 : 70
        });

        var datalen = data.length;
        var shorts = new Int16Array(datalen);
        for(var i = 0; i < datalen; i++) {
            shorts[i] = Math.floor(Math.min(1.0, Math.max(-1.0, data[i])) * 32767);
        }
        spxdata = spxcodec.encode(shorts, true);

        var spxhdr = new SpeexHeader({
            bitrate: -1,
            extra_headers: 0,
            frame_size: isNarrowband ? 160 : 320,
            frames_per_packet: 1,
            header_size: 80,
            mode: isNarrowband ? 0 : 1,
            mode_bitstream_version: 4,
            nb_channels: 1,
            rate: sampleRate,
            reserved1: 0,
            reserved2: 0,
            speex_string: "Speex   ",
            speex_version_id: 1,
            speex_version_string: "1.2rc1\0\0\0\0\0\0\0\0\0\0\0\0\0\0",
            vbr: 0
        });

        var comment = "Encoded with speex.js";
        var spxcmt = new SpeexComment({
            vendor_string: comment,
            vendor_length: comment.length
        });

        var result = oggdata.mux([spxhdr, spxcmt, data]);
        return result;
    }
};
....
    var Encoder = {
    FRAME_SIZE: 320,

    samples: [],

    put: function(buffer) {
        Array.prototype.push.apply(Encoder.samples, buffer);
        _this.postMessage({'command': 'clear-samples'});
    },

    process: function() {
        var amountTotal = Encoder.samples.length;
        var amountToProcess = amountTotal - amountTotal % Encoder.FRAME_SIZE;
        var toProcess = Encoder.samples.splice(0, amountToProcess);

        if (toProcess.length > 0) {
            var encoded = Codec.encode(toProcess);
            _this.postMessage({
                'command': 'speex',
                'data': encoded
            });
        }
    },

    finalize: function() {
        for(var i = 0 ; i < Encoder.FRAME_SIZE *2; i++){
            Encoder.samples.push(0); // more padding....
        }
        while (Encoder.samples.length % Encoder.FRAME_SIZE !== 0) {
            Encoder.samples.push(0);  // pad with silence
        }
        Encoder.process();
        _this.postMessage({
            'command': 'finalized'
        });
    },

    clear: function() {
        Encoder.samples = [];
        _this.postMessage({
            'command': 'cleared'
        });
    }
};

setInterval(Encoder.process, 5000);
pgee70 commented 9 years ago

Hi try this:

use this snippet to construct: AudioRecorderConfig = {worker_path:'assets/js/ogg/worker.min.js'};

AudioRecorderConfig.audioContext = audioCtx; AudioRecorderConfig.stream = stream; AudioRecorder.init(AudioRecorderConfig);

On 17 Mar 2015, at 2:13 am, jhiswin notifications@github.com wrote:

@pgee70 https://github.com/pgee70 Can you post the full source for your modified audiorecorder.js and worker.js? I've tried manually making the changes you posted, and it does not seem to produce working code.

— Reply to this email directly or view it on GitHub https://github.com/dbieber/audiorecorder/issues/11#issuecomment-81726754.

jhiswin commented 9 years ago

@pgee70 Nice, performs much faster and does create smaller files.

Suggestion: Use this for .chunk, performs better:

    Array.prototype.chunk = function(chunkSize) {
        var arr=this;
        var R = [];
        for (var i=0,len=arr.length; i<len; i+=chunkSize)
            R.push(arr.slice(i,i+chunkSize));
        return R;
    }

I can't seem to extract working .speex data, or a valid speex file. I get garbled cut up audio as a file, and the .speex data doesn't work with .decode()

jhiswin commented 9 years ago

Alright, got it working after modifying worker.js AudioRecorder.playClip(Clip.createFromSamples(Codec.speex.decode(clips[0].speex[0],clips[0].speex[1]))). I've got the source files patched up, so I can probably send a pull request in a bit.

@pgee70 Would you happen to know how to resample and set to narrowband?

pgee70 commented 9 years ago

Thanks for the suggestion. I really should learn to prototype… there is just too much to learn…..

here is some of the code in my code that calls the audiorecorder.js it may help you. please don’t publish this. but you can use snippets to make your examples work.

here is an example of the fourier transformer working + the buttons in the code included.

On 17 Mar 2015, at 7:32 am, jhiswin notifications@github.com wrote:

@pgee70 https://github.com/pgee70 Performs much faster and does create smaller files.

Suggestion: Use this for .chunk, performs better than your custom function and speeds up emscripten code: Array.prototype.chunk = function(chunkSize) { var arr=this; var R = []; for (var i=0,len=arr.length; i<len; i+=chunkSize) R.push(arr.slice(i,i+chunkSize)); return R; }

I can't seem to extract working .speex data, or a valid speex file. I get garbled cut up audio as a file, and the .speex data doesn't work with .decode()

— Reply to this email directly or view it on GitHub https://github.com/dbieber/audiorecorder/issues/11#issuecomment-81922842.

pgee70 commented 9 years ago

ok, i didn’t use the play clip function. I just wanted to make a recorder, not a player…

No, I really don’t understand audio that well. It has been a struggle for me to get working what I have, with your help to guide me.

The other nights when I was testing it was so frustrating - it would record for 10 minutes, not 15, then 20 but not 30, then 30 but not 60. Each time was a different error, something failing because it couldn’t handle the amount of data. But after I got it to record for 60 minutes, post the data and download it again, it haven’t thought about it again…..

On 17 Mar 2015, at 8:51 am, jhiswin notifications@github.com wrote:

Alright, got it working after modifying worker.js AudioRecorder.playClip(Clip.createFromSamples(Codec.speex.decode(clips[0].speex[0],clips[0].speex[1]))). I've got the source files patched up, so I can probably send a pull request.

@pgee70 https://github.com/pgee70 Would you happen to know how to resample and set to narrowband?

— Reply to this email directly or view it on GitHub https://github.com/dbieber/audiorecorder/issues/11#issuecomment-81957278.

jhiswin commented 9 years ago

@pgee70 Another tip to speed it up if you target Firefox users. speex.min.js minifies the initializer, which prevents Firefox from recognizing and precompiling as asm.js. Make sure the section between "// EMSCRIPTEN_START_ASM" and "// EMSCRIPTEN_END_ASM" is pasted into the final file unminified.