spatialaudio / python-sounddevice

:sound: Play and Record Sound with Python :snake:
https://python-sounddevice.readthedocs.io/
MIT License
980 stars 145 forks source link

support sample rate auto convert using PortAudio paWinWasapiAutoConvert flag when in share mode #492

Closed yinkaisheng closed 8 months ago

yinkaisheng commented 9 months ago

The default sample rate of my Wasapi output device is 48000. When I use it as the output device and play a 44100hz wav file, an error occured: sounddevice.PortAudioError: Error opening RawOutputStream: Invalid sample rate [PaErrorCode -9997]

I read PortAudio source code and found that it has a flag paWinWasapiAutoConvert = 64 that supports sample rate auto conversion. If we pass the flag to PortAudio in shared mode, we can play a audio that sample rate is not equal to the Wasapi device's default. I tested it and it worked. The following code works with sounddevice 0.4.6 after setting

exSetting = sd.WasapiSettings()
exSetting._streaminfo.flags = paWinWasapiAutoConvert # set the flag 64, not needed if pull request is merged
#!python3
# -*- coding: utf-8 -*-
import os
import sys
import time
import wave
import queue
import sounddevice as sd

print(f'sounddevice.__version__= {sd.__version__}')

paWinWasapiAutoConvert = 64 # add the flag from PortAudio, see include\pa_win_wasapi.h enum PaWasapiFlags
_audioQueue = queue.Queue(maxsize=10)

class PlaybackInfo:
    def __init__(self):
        self.callbackCount = 0
        self.sampleRate = 0
        self.channels = 0
        self.sampleWidth = 0
        self.bytesPerSample = 0

_playInfo = PlaybackInfo()

def playCallback(outdata, sampleCount, timeInfo, status):
    if status.output_underflow:
        print('Output underflow: increase blocksize??')
        raise sd.CallbackAbort
    dlen = len(outdata)
    _playInfo.callbackCount += 1
    if _playInfo.callbackCount == 1:
        print(outdata, sampleCount, timeInfo, status)
    if _audioQueue.empty():
        rdata = b''
    else:
        rdata = _audioQueue.get()
    readLen = len(rdata)
    if _playInfo.callbackCount <= 5:
        print(f'buffer len {dlen}, get data(len {readLen}) from queue, qsize {_audioQueue.qsize()}')
    if readLen == dlen:
        outdata[:] = rdata
    else:
        outdata[:readLen] = rdata
        outdata[readLen:] = b'\x00' * (dlen - readLen)
        print(f'buffer len {dlen}, get data(len {readLen}) from queue, qsize {_audioQueue.qsize()}, stop')
        raise sd.CallbackStop
    # raise sd.CallbackStop will stop the stream, see sd._wrap_callback

def main(wavePath: str):
    fwav = wave.open(wavePath, 'rb')
    _playInfo.sampleRate = fwav.getframerate()
    _playInfo.channels = fwav.getnchannels()
    _playInfo.sampleWidth = fwav.getsampwidth()
    _playInfo.bytesPerSample = _playInfo.sampleWidth * _playInfo.channels

    apiName = 'Windows WASAPI'
    #apiName = 'MME'
    #apiName = 'Windows DirectSound'
    #apiName = 'Windows WDM-KS'
    devices = sd.query_devices()
    inputDeviceIndex, outputDeviceIndex = sd.default.device
    hostApis = sd.query_hostapis()
    for api in hostApis:
        if api['name'] == apiName:
            outputDeviceIndex = api['default_output_device']
            break
    print(f'{apiName} output device: {devices[outputDeviceIndex]}')
    if apiName == 'Windows WASAPI':
        exSetting = sd.WasapiSettings()
        print(f'stream flags {exSetting._streaminfo.flags}')
        exSetting._streaminfo.flags = paWinWasapiAutoConvert # set the flag 64, not needed if pull request is merged
    else:
        exSetting = None
    sampleCount50ms = _playInfo.sampleRate * 50 // 1000
    audioData = fwav.readframes(sampleCount50ms)
    if audioData:
        print(f'put first {len(audioData)} len data to queue')
        _audioQueue.put(audioData)
    outStream = sd.RawOutputStream(device=outputDeviceIndex, samplerate=_playInfo.sampleRate, channels=_playInfo.channels,
                                   dtype='int16', blocksize=sampleCount50ms, extra_settings=exSetting, callback=playCallback)
    with outStream, fwav:
        while 1:
            audioData = fwav.readframes(sampleCount50ms)
            if audioData:
                _audioQueue.put(audioData)
            else:
                break
        while not _audioQueue.empty():
            time.sleep(0.05)
        print('queue empty, stopped')

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--file', help='wave file path')

    args = parser.parse_args()
    main(args.file)

There is an article about wasapi-sample-rate-conversion: https://markheath.net/post/wasapi-sample-rate-conversion