deepgram / deepgram-go-sdk

Go SDK for Deepgram's automated speech recognition APIs.
https://developers.deepgram.com
MIT License
34 stars 28 forks source link

Help with microphone streaming #32

Closed go-dockly closed 1 year ago

go-dockly commented 1 year ago

Could you provide an example that shows how to use this sdk with microphone streaming through portaudio? Had something like this in mind but yeah it needs a write seeker and stuff -_- `

      package main

      import (
          "bytes"
          "fmt"
          "os"
          "os/signal"
          "syscall"
          "time"

          "github.com/deepgram-devs/go-sdk/deepgram"
          "github.com/go-audio/audio"
          "github.com/go-audio/wav"
          "github.com/go-dockly/utility/xerrors/iferr"
          "github.com/gordonklaus/portaudio"
          "github.com/gorilla/websocket"
      )

      func main() {
          var (
              inputChannels   = 1
              outputChannels  = 0
              sampleRate      = 16000
              framesPerBuffer = 1024
              frames          = make([]int16, framesPerBuffer)
              dg              = *deepgram.NewClient("apikey")
          )

          sig := make(chan os.Signal, 1)
          signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM)

          iferr.Exit(portaudio.Initialize())
          defer portaudio.Terminate()

          stream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, float64(sampleRate), framesPerBuffer, frames)
          iferr.Exit(err)
          defer func() {
              iferr.Exit(stream.Stop())
              iferr.Exit(stream.Close())
          }()
          dgConn, _, err := dg.LiveTranscription(deepgram.LiveTranscriptionOptions{
              Language:  "en-US",
              Punctuate: true,
          })
          iferr.Exit(err)
          iferr.Exit(stream.Start())
          buf := new(bytes.Buffer)
          encoder := wav.NewEncoder(buf, 16000, 16, 1, 1)
          go func() {
              for {
                  _, message, err := dgConn.ReadMessage()
                  iferr.Exit(err)
                 fmt.Println(string(message))
              }
          }()

          for {
              select {
              case <-sig:
                  fmt.Println("stopping...")
                  return
              default:
                  iferr.Exit(stream.Read())
                  buffer := &audio.IntBuffer{
                      Format: &audio.Format{
                          NumChannels: 1,
                          SampleRate:  16000,
                      },
                      Data:           int16ToInt(frames),
                      SourceBitDepth: 16,
                  }
                  iferr.Exit(encoder.Write(buffer))
                  iferr.Exit(dgConn.WriteMessage(websocket.BinaryMessage, buf.Bytes()))
                  time.Sleep(10 * time.Millisecond)
              }
          }
      }

      func int16ToInt(slice []int16) []int {
          result := make([]int, len(slice))
          for i, value := range slice {
              result[i] = int(value)
          }
          return result
      }

`

SandraRodgers commented 1 year ago

Please add this question to our github discussions forum. You'll have a lot more eyes on it there, and it will help support others who have the same question.