RTCAudioDevice missing delegate and RTCPeerConnectionFactory misses initialisation with audioDevice #94

isandeepj commented 12 months ago

@cloudwebrtc & @hiroshihorie can you please check?. I can't able to test locally due to facing generate framework issues.


isandeepj commented 12 months ago

@hiroshihorie if all is good then can you please release a new version with this fix quickly because it's a blocker for me!

hiroshihorie commented 12 months ago

Can you explain what are you trying to do in this PR ?

hiroshihorie commented 12 months ago

I confirmed this will break since RTCAudioDeviceModule only gets allocated when bypassVoiceProcessing is true. Can you explain the purpose of the PR so I can make a separate patch to unblock you.

isandeepj commented 12 months ago

@hiroshihorie want to record audio for a live stream for a specific duration on the client side and that audio file merges with video images to generate special moments for the live stream captures for users. Therefore we use this class with and pass the object on

internal class Engine....
static private var audioDevice: AVAudioEngineRTCAudioDevice = AVAudioEngineRTCAudioDevice()
return RTCPeerConnectionFactory(encoderFactory: encoderFactory,
                                        decoderFactory: decoderFactory,
                                        audioDevice: audioDevice)


import Foundation
import WebRTC
import AVFoundation
import AudioToolbox
final class AVAudioEngineRTCAudioDevice: NSObject {
    let audioSession = AVAudioSession.sharedInstance()
    private var subscribtions: [Any]?
    private let queue = DispatchQueue(label: "AVAudioEngineRTCAudioDevice")

    private lazy var backgroundPlayer = AVAudioPlayerNode()
    private var backgroundSound: AVAudioPCMBuffer?

    private var audioEngine: AVAudioEngine?
    private var audioEngineObserver: Any?
    private var audioEQ = AVAudioUnitEQ(numberOfBands: 2)

    // Extended Audio File Services to attach to audioFile
    private var outref: ExtAudioFileRef?
    private var outrefMic: ExtAudioFileRef?

    private var audioConverer: AVAudioConverter?
    private var audioSinkNode: AVAudioSinkNode?
    private var audioSourceNode: AVAudioSourceNode?
    private var shouldPlay = false
    private var shouldRecord = false

    private lazy var audioInputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
                                                      sampleRate: audioSession.sampleRate,
                                                      channels: AVAudioChannelCount(min(2, audioSession.inputNumberOfChannels)),
                                                      interleaved: true) {
        didSet {
            guard oldValue != audioInputFormat else { return }

    private lazy var audioOutputFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
                                                       sampleRate: audioSession.sampleRate,
                                                       channels: AVAudioChannelCount(min(2, audioSession.outputNumberOfChannels)),
                                                       interleaved: true) {
        didSet {
            guard oldValue != audioOutputFormat else { return }

    private var isInterrupted_ = false
    private var isInterrupted: Bool {
        get { queue.sync { isInterrupted_ } }
        set { queue.sync { isInterrupted_ = newValue } }

    var delegate_: RTCAudioDeviceDelegate?
    private var delegate: RTCAudioDeviceDelegate? {
        get { queue.sync { delegate_ } }
        set { queue.sync { delegate_ = newValue } }

    private (set) lazy var inputLatency = audioSession.inputLatency {
        didSet {
            guard oldValue != inputLatency else { return }

    private (set) lazy var outputLatency = audioSession.outputLatency {
        didSet {
            guard oldValue != outputLatency else { return }

    override init() {

    func startRecordingToFile(_ filePath: String) {
        guard let audioEngine = audioEngine else { return }

        let format0 = audioEngine.mainMixerNode.outputFormat(forBus: 0)
        let format1 = audioEQ.outputFormat(forBus: 0)

        // Create file to save recording
        let url0 = URL(fileURLWithPath: filePath.appending(".0.wav"))
        _ = ExtAudioFileCreateWithURL(url0 as CFURL,

        let url1 = URL(fileURLWithPath: filePath.appending(".1.wav"))
        _ = ExtAudioFileCreateWithURL(url1 as CFURL,

        let bufferSize = AVAudioFrameCount(max(format0.sampleRate * 0.4, 1024))

        audioEngine.mainMixerNode.installTap(onBus: 0, bufferSize: bufferSize, format: format0) { [weak self] (buffer, _) in
            guard let self = self, let outref = self.outref else { return }
            ExtAudioFileWriteAsync(outref, bufferSize, buffer.audioBufferList)

        audioEQ.installTap(onBus: 0, bufferSize: bufferSize, format: format1) { [weak self] (buffer, _) in
            guard let self = self, let outrefMic = self.outrefMic else { return }
            ExtAudioFileWriteAsync(outrefMic, bufferSize, buffer.audioBufferList)

    func stopRecordingToFile() {
        guard let audioEngine = audioEngine else { return }
        // Removes tap on Engine Mixer
        audioEngine.mainMixerNode.removeTap(onBus: 0)
        audioEQ.removeTap(onBus: 0)

        if let outref = outref {
            self.outref = nil

        if let outrefMic = outrefMic {
            self.outrefMic = nil

    private func shutdownEngine() {
        if self.outref != nil {
        guard let audioEngine = audioEngine else {
        if audioEngine.isRunning {
        if let audioEngineObserver = audioEngineObserver {
            self.audioEngineObserver = nil
        if let audioSinkNode = self.audioSinkNode {
            self.audioSinkNode = nil
        if let audioSourceNode = audioSourceNode {
            self.audioSourceNode = nil
        self.audioEngine = nil

    private func updateEngine() {
        guard let delegate = delegate,
              shouldPlay || shouldRecord,
              !isInterrupted else {
            printDebug("Audio Engine must be stopped: shouldPla=\(shouldPlay), shouldRecord=\(shouldRecord), isInterrupted=\(isInterrupted)")
            measureTime(label: "Shutdown AVAudioEngine") {

        let useVoiceProcessingAudioUnit = audioSession.supportsVoiceProcessing
        if let audioEngine = audioEngine, audioEngine.outputNode.isVoiceProcessingEnabled != useVoiceProcessingAudioUnit {
            printDebug("Shutdown AVAudioEngine to toggle usage of Voice Processing I/O")

        var audioEngine: AVAudioEngine
        if let engine = self.audioEngine {
            audioEngine = engine
        } else {
            audioEngine = AVAudioEngine()
            audioEngine.isAutoShutdownEnabled = true
            // NOTE: Toggle voice processing state over outputNode, not to eagerly create inputNote.
            // Also do it just after creation of AVAudioEngine to avoid random crashes observed when voice processing changed on later stages.
            if audioEngine.outputNode.isVoiceProcessingEnabled != useVoiceProcessingAudioUnit {
              do {
                // Use VPIO to as I/O audio unit.
                try audioEngine.outputNode.setVoiceProcessingEnabled(useVoiceProcessingAudioUnit)
              } catch let e {
                  printDebug("setVoiceProcessingEnabled error: \(e)")
            if backgroundSound != nil {
            audioEngine.connect(audioEngine.mainMixerNode, to: audioEngine.outputNode, format: audioOutputFormat)

            audioEngineObserver = NotificationCenter.default.addObserver(forName: NSNotification.Name.AVAudioEngineConfigurationChange,
                                                                         object: audioEngine,
                                                                         queue: nil,
                                                                         using: { [weak self] _ in

            audioEngine.dumpState(label: "State of newly created audio engine")
            self.audioEngine = audioEngine

        let ioAudioUnit = audioEngine.outputNode.auAudioUnit
        if ioAudioUnit.isInputEnabled != shouldRecord ||
            ioAudioUnit.isOutputEnabled != shouldPlay {
            if audioEngine.isRunning {
                measureTime(label: "AVAudioEngine stop (to enable/disable AUAudioUnit output/input)") {

            measureTime(label: "Change input/output enabled/disabled") {
                ioAudioUnit.isInputEnabled = shouldRecord
                ioAudioUnit.isOutputEnabled = shouldPlay

        if shouldRecord {
            if audioSinkNode == nil {
                measureTime(label: "Add AVAudioSinkNode") {
                    let deliverRecordedData = delegate.deliverRecordedData
                    let inputFormat = audioEngine.inputNode.outputFormat(forBus: 1)
                    guard inputFormat.isSampleRateAndChannelCountValid else {
                        printDebug("Invalid input format: \(inputFormat)")

                    guard let rtcRecordFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
                                                        sampleRate: inputFormat.sampleRate,
                                                        channels: inputFormat.channelCount,
                                                              interleaved: true) else { return }
                    audioEngine.connect(audioEngine.inputNode, to: audioEQ, format: inputFormat)

                    audioInputFormat = rtcRecordFormat
                    inputLatency = audioSession.inputLatency

                    // NOTE: AVAudioSinkNode provides audio data with HW sample rate in 32-bit float format,
                    // WebRTC requires 16-bit int format, so do the conversion
                    guard let converter = SimpleAudioConverter(from: inputFormat, to: rtcRecordFormat) else { return }

                    let customRenderBlock: RTCAudioDeviceRenderRecordedDataBlock = { _, _, _, frameCount, abl, renderContext in
                        let (converter, inputData) = renderContext!.assumingMemoryBound(to: (Unmanaged<SimpleAudioConverter>, UnsafeMutablePointer<AudioBufferList>).self).pointee
                        return converter.takeUnretainedValue().convert(framesCount: frameCount, from: inputData, to: abl)

                    let audioSink = AVAudioSinkNode(receiverBlock: { (timestamp, framesCount, inputData) -> OSStatus in
                        var flags: AudioUnitRenderActionFlags = []
                        var renderContext = (Unmanaged.passUnretained(converter), inputData)
                        return deliverRecordedData(&flags, timestamp, 1, framesCount, nil, &renderContext, customRenderBlock)

                    measureTime(label: "Attach AVAudioSinkNode") {

                    measureTime(label: "Connect AVAudioSinkNode") {
                        audioEngine.connect(audioEQ, to: audioSink, format: inputFormat)

                    audioSinkNode = audioSink
        } else {
            if let audioSinkNode = audioSinkNode {
                self.audioSinkNode = nil

        if shouldPlay {
            if audioSourceNode == nil {
                measureTime(label: "Add AVAudioSourceNode") {
                    let outputFormat = audioEngine.outputNode.outputFormat(forBus: 0)
                    guard outputFormat.isSampleRateAndChannelCountValid else {
                        printDebug("Invalid audio output format detected: \(outputFormat)")

                    guard let rtcPlayFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: outputFormat.sampleRate, channels: outputFormat.channelCount, interleaved: true) else {

                    audioOutputFormat = rtcPlayFormat
                    outputLatency = audioSession.outputLatency

                    let getPlayoutData = delegate.getPlayoutData
                    let audioSource = AVAudioSourceNode(format: rtcPlayFormat, renderBlock: { (isSilence, timestamp, frameCount, outputData) -> OSStatus in
                        var flags: AudioUnitRenderActionFlags = []
                        let res = getPlayoutData(&flags, timestamp, 0, frameCount, outputData)
                        guard noErr == res else {
                            return res
                        isSilence.initialize(to: ObjCBool(flags.contains(AudioUnitRenderActionFlags.unitRenderAction_OutputIsSilence)))
                        return noErr

                    measureTime(label: "Attach AVAudioSourceNode") {

                    measureTime(label: "Connect AVAudioSourceNode") {
                        audioEngine.connect(audioSource, to: audioEngine.mainMixerNode, format: outputFormat)

                    self.audioSourceNode = audioSource
        } else {
            if let audioSourceNode = audioSourceNode {
                self.audioSourceNode = nil

        if !audioEngine.isRunning {
            measureTime(label: "Prepare AVAudioEngine") {

            measureTime(label: "Start AVAudioEngine") {
                do {
                    try audioEngine.start()
                } catch let e {
                    printDebug("Unable to start audio engine: \(e)")

            if let backgroundSound = backgroundSound, audioEngine.isRunning, shouldPlay {
                measureTime(label: "Background music") {
                    audioEngine.connect(backgroundPlayer, to: audioEngine.mainMixerNode, format: nil)
                    if !backgroundPlayer.isPlaying {
                        backgroundPlayer.scheduleBuffer(backgroundSound, at: nil, options: [.loops], completionHandler: nil)

        audioEngine.dumpState(label: "After updateEngine")

    private func handleAudioEngineConfigurationChanged() {
        guard let delegate = delegate else {
        delegate.dispatchAsync { [weak self] in

// MARK: - RTCAudioDevice
extension AVAudioEngineRTCAudioDevice: RTCAudioDevice {

    var deviceInputSampleRate: Double {
        guard let sampleRate = audioInputFormat?.sampleRate, sampleRate > 0 else {
            return audioSession.sampleRate
        return sampleRate

    var deviceOutputSampleRate: Double {
        guard let sampleRate = audioOutputFormat?.sampleRate, sampleRate > 0 else {
            return audioSession.sampleRate
        return sampleRate

    var inputIOBufferDuration: TimeInterval { audioSession.ioBufferDuration }

    var outputIOBufferDuration: TimeInterval { audioSession.ioBufferDuration }

    var inputNumberOfChannels: Int {
        guard let channelCount = audioInputFormat?.channelCount, channelCount > 0 else {
            return min(2, audioSession.inputNumberOfChannels)
        return Int(channelCount)

    var outputNumberOfChannels: Int {
        guard let channelCount = audioOutputFormat?.channelCount, channelCount > 0 else {
            return min(2, audioSession.outputNumberOfChannels)
        return Int(channelCount)

    var isInitialized: Bool {
        self.delegate != nil

    func initialize(with delegate: RTCAudioDeviceDelegate) -> Bool {
        guard self.delegate == nil else { return false }

        if subscribtions == nil {
            subscribtions = self.subscribeAudioSessionNotifications()

        self.delegate = delegate
        return true

    func terminateDevice() -> Bool {
        if let subscribtions = subscribtions {
            self.unsubscribeAudioSessionNotifications(observers: subscribtions)
        subscribtions = nil

        shouldPlay = false
        shouldRecord = false
        measureTime {
        delegate = nil
        return true

    var isPlayoutInitialized: Bool { isInitialized }

    func initializePlayout() -> Bool {
        return isPlayoutInitialized

    var isPlaying: Bool {

    func startPlayout() -> Bool {
        shouldPlay = true
        measureTime {
        return true

    func stopPlayout() -> Bool {
        shouldPlay = false
        measureTime {
        return true

    var isRecordingInitialized: Bool { isInitialized }

    func initializeRecording() -> Bool {
        return isRecordingInitialized

    var isRecording: Bool {

    func startRecording() -> Bool {
        shouldRecord = true
        measureTime {
        return true

    func stopRecording() -> Bool {
        shouldRecord = false
        measureTime {
        return true

extension AVAudioEngineRTCAudioDevice: AudioSessionHandler {
    func handleInterruptionBegan(applicationWasSuspended: Bool) {
        guard !applicationWasSuspended else {
            // NOTE: Not an actual interruption
        isInterrupted = true
        guard let delegate = delegate else { return }
        delegate.dispatchAsync { [weak self] in

    func handleInterruptionEnd(shouldResume: Bool) {
        isInterrupted = false
        guard let delegate = delegate else { return }
        delegate.dispatchAsync { [weak self] in

    func handleAudioRouteChange() {

    func handleMediaServerWereReset() {

    func handleMediaServerWereLost() {

Taken reference form: https://github.com/mstyura/RTCAudioDevice/tree/main


hiroshihorie commented 12 months ago

Ok I understand now you want to pass in a RTCAudioDevice delegate but it's not working. But for your use case you don't need to use a custom AudioDeviceModule just for recording, since I recently added custom audio processing you can implement RTCAudioCustomProcessingDelegate and get buffer in audioProcessingProcess(audioBuffer: RTCAudioBuffer). You will still need to convert raw buffer to apple's format.

isandeepj commented 12 months ago

@hiroshihorie can you please guide how to implement RTCAudioCustomProcessingDelegate delegate to get a buffer?

isandeepj commented 11 months ago

Recorded the audio using RTCAudioCustomProcessingDelegate so no required changes now!