Vanilagy / webm-muxer

WebM multiplexer in pure TypeScript with support for WebCodecs API, video & audio.
https://vanilagy.github.io/webm-muxer/demo
MIT License
197 stars 12 forks source link

Can microphone audio be synthesized when recording screen? #21

Closed excing closed 1 year ago

excing commented 1 year ago

Hi there! I'm wondering how to use this library for screen recording since I'm not using Canvas. Also, I'll be speaking into a microphone while recording and I'd like to merge the audio from the microphone with the video. Can you guide me on how to do that? Thanks!

    import { Muxer, ArrayBufferTarget } from 'webm-muxer';

    let audioTrack: MediaStreamTrack;
    let audioTrack1: MediaStreamTrack;
    let audioEncoder: AudioEncoder | null;
    let videoEncoder: VideoEncoder | null;
    let muxer: Muxer<ArrayBufferTarget> | null;

    async function start() {
        let userMedia = await navigator.mediaDevices.getUserMedia({ video: false, audio: true });
        let _audioTrack = userMedia.getAudioTracks()[0];
        let audioSampleRate = _audioTrack?.getCapabilities().sampleRate?.max || 22050;

        let displayMedia = await navigator.mediaDevices.getDisplayMedia({ video: true, audio: true });
        let _audioTrack1 = displayMedia.getAudioTracks()[0];
        let audioSampleRate1 = _audioTrack1?.getCapabilities().sampleRate?.max || audioSampleRate;

        let _muxer = new Muxer({
            target: new ArrayBufferTarget(),
            video: {
                codec: 'V_VP9',
                width: 1280,
                height: 720
            },
            audio: {
                codec: 'A_OPUS',
                sampleRate: audioSampleRate1,
                numberOfChannels: 1
            },
            firstTimestampBehavior: 'offset' // Because we're directly piping a MediaStreamTrack's data into it
        });

        let _videoEncoder = new VideoEncoder({
            output: (chunk, meta) => _muxer.addVideoChunk(chunk, meta),
            error: (e) => console.error(e)
        });
        _videoEncoder.configure({
            codec: 'vp09.00.10.08',
            width: 1280,
            height: 720,
            bitrate: 1e6
        });

        let _audioEncoder = new AudioEncoder({
            output: (chunk, meta) => _muxer.addAudioChunk(chunk, meta),
            error: (e) => console.error(e)
        });
        _audioEncoder.configure({
            codec: 'opus',
            numberOfChannels: 1,
            sampleRate: audioSampleRate1,
            bitrate: 64000
        });

        writeAudioToEncoder(_audioEncoder, _audioTrack);
        writeAudioToEncoder(_audioEncoder, _audioTrack1);

        muxer = _muxer;
        audioEncoder = _audioEncoder;
        audioTrack = _audioTrack;
        audioTrack1 = _audioTrack1;
    }

    function writeAudioToEncoder(audioEncoder: AudioEncoder, audioTrack: MediaStreamTrack) {
        // Create a MediaStreamTrackProcessor to get AudioData chunks from the audio track
        let trackProcessor = new MediaStreamTrackProcessor({ track: audioTrack });
        let consumer = new WritableStream({
            write(audioData) {
                audioEncoder.encode(audioData);
                audioData.close();
            }
        });
        trackProcessor.readable.pipeTo(consumer);
    }

    let frameCounter = 0;
    function encodeVideoFrame(videoEncoder: VideoEncoder) {
        let frame = new VideoFrame(canvas, {
            timestamp: ((frameCounter * 1000) / 30) * 1000
        });

        frameCounter++;

        videoEncoder.encode(frame, { keyFrame: frameCounter % 30 === 0 });
        frame.close();
    }

    const endRecording = async () => {
        audioTrack?.stop();
        audioTrack1?.stop();

        await audioEncoder?.flush();
        await videoEncoder?.flush();
        muxer?.finalize();

        if (muxer) {
            let { buffer } = muxer.target;
            downloadBlob(new Blob([buffer]));
        }

        audioEncoder = null;
        videoEncoder = null;
        muxer = null;
    };

    const downloadBlob = (blob: Blob) => {
        let url = window.URL.createObjectURL(blob);
        let a = document.createElement('a');
        a.style.display = 'none';
        a.href = url;
        a.download = 'picasso.webm';
        document.body.appendChild(a);
        a.click();
        window.URL.revokeObjectURL(url);
    };

I have a couple of questions. Can this library merge two audio segments into one media file? And is it possible to process videos without using Canvas?

Vanilagy commented 1 year ago

Hey!

So what you'll want to do is combine (mix) the two audio signals before encoding and muxing them, not muxing them separately and trying to combine them later. We can do this easily using the WebAudio API, by just hooking up some MediaStreamSourceNodes to a MediaStreamDestinationNode:

let audioContext = new AudioContext()

// Create MediaStreamSource nodes
let micStream = await navigator.mediaDevices.getUserMedia({ audio: true })
let micSource = audioContext.createMediaStreamSource(micStream)
let micGain = audioContext.createGain()

let displayStream = await navigator.mediaDevices.getDisplayMedia({ video: true, audio: true })
let displaySource = audioContext.createMediaStreamSource(displayStream)
let displayGain = audioContext.createGain()

// Create the MediaStreamDestination
let destination = audioContext.createMediaStreamDestination()

// Connect the microphone source to gain node and destination
micSource.connect(micGain)
micGain.connect(destination)

// Connect the display source to gain node and destination
displaySource.connect(displayGain)
displayGain.connect(destination)

// Set whatever volumes you want
micGain.gain.value = 1
displayGain.gain.value = 0.7

// Create the MediaStreamTrackProcessor
let trackProcessor = new MediaStreamTrackProcessor(destination.stream.getAudioTracks()[0])
let consumer = new WritableStream({
    write(audioData) {
        // Assuming the AudioEncoder and stuff are set up already
        audioEncoder.encode(audioData);
        audioData.close();
    }
});
trackProcessor.readable.pipeTo(consumer);

I added some GainNodes to also control the volume, which you might wanna do.

For your second question, of course it is. You can use a second MediaStreamTrackProcessor to get VideoFrames from a video track, which is likely what you want to do for capturing the recorded screen. Additionally, you can create VideoFrames manually. You're not limited by canvas, that's just something I did in my demo. By my knowledge, I think you can also draw a MediaStream directly to a canvas by going through a <video> element with its srcObject set. You could then also use that to generate a new VideoFrame.

Vanilagy commented 1 year ago

Do you still need help or can I close this issue?