Hey im trying to make a little Python Script for Transcribing Voice to Text with the Huggingface Inferrence API but nothing happens can someone maybe help me ?
import argparse
import asyncio
from functools import partial
from huggingface_hub import model_info
from wyoming.flycheck_server import AsyncServer
from wyoming.info import AsrModel, AsrProgram, Attribution, Info
import logging
from handler import HuggingfaceWhisper
LOGGER = logging.getLogger(__name__)
async def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--key", required=True, help="Your Huggingface API Key")
parser.add_argument("--uri", required=True, help="unix:// or tcp://")
args = parser.parse_args()
wyoming_info = Info(
asr=[
AsrProgram(
name="Huggingface Whisper",
description="Faster Whisper transcription with Whisper Large V3",
attribution=Attribution(
name="Bensonheimer992",
url="https://github.com/Bensonheimer992"
),
installed=True,
version="1.0",
models=[
AsrModel(
name="Whisper Large V3",
description="The Large Whisper Model",
attribution=Attribution(
name="OpenAI",
url="https://huggingface.co/openai",
),
installed=True,
languages=["de", "en"],
version="3.0",
)
],
)
],
)
server = AsyncServer.from_uri(args.uri)
LOGGER.info("Ready!")
lock = asyncio.Lock()
await server.run(
partial(
HuggingfaceWhisper,
wyoming_info,
args,
lock
)
)
if __name__ == "__main__":
asyncio.run(main())
import argparse
import asyncio
import logging
import os.path
import tempfile
import wave
from typing import Optional
import aiohttp
from wyoming.asr import Transcript, Transcribe
from wyoming.audio import AudioChunk, AudioStop
from wyoming.event import Event
from wyoming.info import Info, Describe
from wyoming.server import AsyncEventHandler
LOGGER = logging.getLogger(__name__)
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
class HuggingfaceWhisper(AsyncEventHandler):
def __init__(self, wyoming_info: Info, cliargs: argparse.Namespace, lock: asyncio.Lock, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.cliargs = cliargs
self.wyoming_info_event = wyoming_info.event()
self.lock = lock
self.wyoming_info_event = wyoming_info.event()
self.wavdir = tempfile.TemporaryDirectory()
self.wavpath = os.path.join(self.wavdir.name, "speech.wav")
self.wavfile = Optional[wave.Wave_write]
async def handle_event(self, event: Event) -> bool:
if AudioChunk.is_type(event.type):
chunk = AudioChunk.from_event(event)
if self.wavfile is None:
self.wavfile = wave.open(self.wavpath, "wb")
self.wavfile.setframerate(chunk.rate)
self.wavfile.setsampwidth(chunk.width)
self.wavfile.setnchannels(chunk.channels)
self.wavfile.writeframes(chunk.audio)
return True
if AudioStop.is_type(event.type):
LOGGER.debug("Audio Stopped. Transcribing ...")
assert self.wavfile is not None
self.wavfile.close()
self.wavfile = None
async with self.lock:
try:
headers = {"Authorization": f"Bearer {self.cliargs.key}"}
async with aiohttp.ClientSession() as session:
with open(self.wavpath, "rb") as f:
data = f.read()
async with session.post(API_URL, headers=headers, data=data) as response:
if response.status == 200:
result = await response.json()
text = result.get('text', '')
LOGGER.info("Transcription Recieved")
await self.write_event(Transcript(text=text).event())
else:
LOGGER.error(f"Error from Huggingface API: {response.status}")
except Exception as e:
LOGGER.error(f"Error during Transcription: {str(e)}")
if Transcribe.is_type(event.type):
return True
if Describe.is_type(event.type):
await self.write_event(self.wyoming_info_event)
LOGGER.debug("Sent Info")
return True
return False
Hey im trying to make a little Python Script for Transcribing Voice to Text with the Huggingface Inferrence API but nothing happens can someone maybe help me ?