PaddlePaddle / PaddleSpeech

Easy-to-use Speech Toolkit including Self-Supervised Learning model, SOTA/Streaming ASR with punctuation, Streaming TTS with text frontend, Speaker Verification System, End-to-End Speech Translation and Keyword Spotting. Won NAACL2022 Best Demo Award.
https://paddlespeech.readthedocs.io
Apache License 2.0
10.56k stars 1.81k forks source link

websockets.exceptions.InvalidStatusCode: server rejected WebSocket connection: HTTP 403 #3231

Open monkeycc opened 1 year ago

monkeycc commented 1 year ago
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
record wave from the mic
"""
import asyncio
import json
import logging
import threading
import wave
from signal import SIGINT
from signal import SIGTERM
import signal
import pyaudio
import websockets

class ASRWsAudioHandler(threading.Thread):
    def __init__(self, url="127.0.0.1", port=8090):
        threading.Thread.__init__(self)
        self.url = url
        self.port = port
        self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr"
        self.fileName = "./output.wav"
        self.chunk = 5120
        self.format = pyaudio.paInt16
        self.channels = 1
        self.rate = 16000
        self._running = True
        self._frames = []
        self.data_backup = []

    def startrecord(self):
        """
        start a new thread to record wave
        """
        threading._start_new_thread(self.recording, ())

    def recording(self):
        """
        recording wave
        """
        self._running = True
        self._frames = []
        p = pyaudio.PyAudio()
        stream = p.open(
            format=self.format,
            channels=self.channels,
            rate=self.rate,
            input=True,
            frames_per_buffer=self.chunk)
        while (self._running):
            data = stream.read(self.chunk)
            self._frames.append(data)
            self.data_backup.append(data)

        stream.stop_stream()
        stream.close()
        p.terminate()

    def save(self):
        """
        save wave data
        """
        p = pyaudio.PyAudio()
        wf = wave.open(self.fileName, 'wb')
        wf.setnchannels(self.channels)
        wf.setsampwidth(p.get_sample_size(self.format))
        wf.setframerate(self.rate)
        wf.writeframes(b''.join(self.data_backup))
        wf.close()
        p.terminate()

    def stoprecord(self):
        """
        stop recording
        """
        self._running = False

    async def run(self):
        aa = input("是否开始录音?   (y/n)")
        if aa.strip() == "y":
            self.startrecord()
            logging.info("*" * 10 + "开始录音,请输入语音")

            async with websockets.connect(self.url) as ws:
                # 发送开始指令
                audio_info = json.dumps(
                    {
                        "name": "test.wav",
                        "signal": "start",
                        "nbest": 5
                    },
                    sort_keys=True,
                    indent=4,
                    separators=(',', ': '))
                await ws.send(audio_info)
                msg = await ws.recv()
                logging.info("receive msg={}".format(msg))

                # send bytes data
                logging.info("结束录音请: Ctrl + c。继续请按回车。")
                try:
                    while True:
                        while len(self._frames) > 0:
                            await ws.send(self._frames.pop(0))
                            msg = await ws.recv()
                            msg = json.loads(msg)
                            logging.info("receive msg={}".format(msg))
                except asyncio.CancelledError:
                    # quit
                    # send finished 
                    audio_info = json.dumps(
                        {
                            "name": "test.wav",
                            "signal": "end",
                            "nbest": 5
                        },
                        sort_keys=True,
                        indent=4,
                        separators=(',', ': '))
                    await ws.send(audio_info)
                    msg = await ws.recv()
                    logging.info("receive msg={}".format(msg))

                    self.stoprecord()
                    logging.info("*" * 10 + "录音结束")
                    self.save()
        elif aa.strip() == "n":
            exit()
        else:
            print("无效输入!")
            exit()

if __name__ == "__main__":

    logging.basicConfig(level=logging.INFO)
    logging.info("asr websocket client start")

    handler = ASRWsAudioHandler("127.0.0.1", 8090)
    loop = asyncio.get_event_loop()
    main_task = asyncio.ensure_future(handler.run())

    signal.signal(signal.SIGINT, main_task.cancel)
    signal.signal(signal.SIGTERM, main_task.cancel)

    # for signal in [SIGINT, SIGTERM]:
    #     loop.add_signal_handler(signal, main_task.cancel)
    try:
        loop.run_until_complete(main_task)
    finally:
        loop.close()

    logging.info("asr websocket client finished")

https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/paddlespeech/server/tests/asr/online/microphone_client.py


INFO:root:asr websocket client start
是否开始录音?   (y/n)y
INFO:root:**********开始录音,请输入语音
Traceback (most recent call last):
  File "microphone_client.py", line 163, in <module>
    loop.run_until_complete(main_task)
  File "D:\anaconda3\envs\PaddleSpeech\lib\asyncio\base_events.py", line 587, in run_until_complete
    return future.result()
  File "microphone_client.py", line 97, in run
    async with websockets.connect(self.url) as ws:
  File "D:\anaconda3\envs\PaddleSpeech\lib\site-packages\websockets\legacy\client.py", line 637, in __aenter__
    return await self
  File "D:\anaconda3\envs\PaddleSpeech\lib\site-packages\websockets\legacy\client.py", line 655, in __await_impl_timeout__
    return await self.__await_impl__()
  File "D:\anaconda3\envs\PaddleSpeech\lib\site-packages\websockets\legacy\client.py", line 667, in __await_impl__
    extra_headers=protocol.extra_headers,
  File "D:\anaconda3\envs\PaddleSpeech\lib\site-packages\websockets\legacy\client.py", line 329, in handshake
    raise InvalidStatusCode(status_code, response_headers)
websockets.exceptions.InvalidStatusCode: server rejected WebSocket connection: HTTP 403

服务端

2023-05-06 19:26:12.167 | INFO     | paddlespeech.s2t.modules.embedding:__init__:150 - max len: 5000
[2023-05-06 19:26:12,835] [    INFO] - Initialize ASR server engine successfully on device: gpu:0.
INFO:     Started server process [7548]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
INFO:     ('127.0.0.1', 58695) - "WebSocket /ws/asr" 403
INFO:     connection failed (403 Forbidden)
INFO:     connection closed
INFO:     ('127.0.0.1', 58808) - "WebSocket /ws/asr" 403
INFO:     connection failed (403 Forbidden)
INFO:     connection closed
INFO:     ('127.0.0.1', 58819) - "WebSocket /ws/asr" 403
INFO:     connection failed (403 Forbidden)
INFO:     connection closed

-----

paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml
D:\anaconda3\envs\PaddleSpeech\lib\site-packages\pkg_resources\__init__.py:121: DeprecationWarning: pkg_resources is deprecated as an API
  warnings.warn("pkg_resources is deprecated as an API", DeprecationWarning)
D:\anaconda3\envs\PaddleSpeech\lib\site-packages\pkg_resources\__init__.py:2870: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('mpl_toolkits')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
D:\anaconda3\envs\PaddleSpeech\lib\site-packages\pkg_resources\__init__.py:2870: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
[2023-05-06 14:45:10,852] [    INFO] - start to init the engine
[2023-05-06 14:45:10,852] [    INFO] - asr : online engine.
2023-05-06 14:45:24.597 | INFO     | paddlespeech.s2t.modules.ctc:<module>:45 - paddlespeech_ctcdecoders not installed!
2023-05-06 14:45:25.610 | INFO     | paddlespeech.s2t.modules.embedding:__init__:150 - max len: 5000
[2023-05-06 14:45:27,743] [    INFO] - Initialize ASR server engine successfully on device: cpu.
INFO:     Started server process [1252]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
INFO:     ('127.0.0.1', 50247) - "WebSocket /ws/asr" 403
INFO:     connection failed (403 Forbidden)
INFO:     connection closed
Fmaj7 commented 1 year ago

我也遇到相同的问题,有什么头绪吗?

monkeycc commented 1 year ago

不管是win还是linux 都是这样问题

应该是框架问题 没什么头绪 看官方了

qingjiaozyn commented 7 months ago

有解决的吗?请求方法

LDBS666 commented 7 months ago

我也是这个问题,插眼

happywch commented 4 months ago

我通过更改34行代码为 self.url = "ws://" + self.url + ":" + str(self.port) + "/paddlespeech/asr/streaming" 并且在119行增加 msg = json.loads(msg) 完成了麦克风识别 @monkeycc @Fmaj7 @LDBS666 @jzhang533