pion / opus

Pure Go implementation of Opus
https://pion.ly/
MIT License
386 stars 16 forks source link

unsupported configuration mode: 2 #36

Open Davincible opened 1 year ago

Davincible commented 1 year ago

Trying to decode Telegram voice messages, get unsupported configuration mode: 2

Sample file: https://file.io/LAv06s4gaw1o

DeryabinSergey commented 1 year ago

I have same problem. Example file from this repo works fine, telegram voice message - no.

I prepeare some example, but playgrounds not supported loading files.

package main

import (
    "bytes"
    "errors"
    "github.com/pion/opus"
    "github.com/pion/opus/pkg/oggreader"
    "io"
    "log"
    "net/http"
)

func main() {
    files := []string{
        "https://storage.googleapis.com/training-cloud-bucket/tiny.ogg",
        "https://storage.googleapis.com/training-cloud-bucket/telegram.ogg",
    }

    for _, file := range files {
        response, err := http.Get(file)
        if err != nil {
            panic(err)
        }
        defer response.Body.Close()

        if err = OggRead(response.Body); err != nil {
            log.Printf("error: %v\n", err)
            continue
        }

        response.Body.Close()
        log.Println(file, "done")
    }

}

func OggRead(input io.Reader) error {
    ogg, _, err := oggreader.NewWith(input)
    if err != nil {
        return err
    }

    out := make([]byte, 1920)
    decoder := opus.NewDecoder()

    for {
        segments, _, err := ogg.ParseNextPage()
        if errors.Is(err, io.EOF) {
            break
        } else if bytes.HasPrefix(segments[0], []byte("OpusTags")) {
            continue
        }

        if err != nil {
            return err
        }

        for i := range segments {
            if _, _, err = decoder.Decode(segments[i], out); err != nil {
                return err
            }
        }
    }

    return nil
}

and have same problem

2023/04/09 12:23:10 https://storage.googleapis.com/training-cloud-bucket/tiny.ogg done
2023/04/09 12:23:10 error: unsupported configuration mode: 3

from here https://github.com/pion/opus/blob/master/decoder.go#L42

Davincible commented 1 year ago

I managed to get the following to work to decode to wav

package audio

import (
    "bytes"
    "encoding/binary"
    "errors"
    "io"

    "gopkg.in/hraban/opus.v2"
)

func DecodeOpus(data []byte) ([]byte, error) {
    // Wrap the input data in a bytes buffer
    input := bytes.NewBuffer(data)

    // Initialize a new stream with the bytes buffer
    stream, err := opus.NewStream(input)
    if err != nil {
        return nil, err
    }

    // Create a buffer to store the decoded PCM data
    pcm := make([]int16, 2880)

    // Read from the stream and decode until there is no more data
    var output []int16
    for {
        n, err := stream.Read(pcm)
        if errors.Is(err, io.EOF) {
            break
        } else if err != nil {
            return nil, err
        }

        if n == 0 {
            break
        }

        output = append(output, pcm[:n]...)
    }

    // Convert the PCM data to a byte slice
    out := make([]byte, len(output)*2)
    for i, v := range output {
        out[i*2] = byte(v)
        out[i*2+1] = byte(v >> 8)
    }

    return out, nil
}

func PCMToWAV(pcm []byte, sampleRate int, channels int) ([]byte, error) {
    // Calculate the size of the audio data and the size of the file
    audioSize := len(pcm)
    fileSize := audioSize + 36

    // Initialize a buffer to store the WAV audio data
    buf := new(bytes.Buffer)

    // Write the RIFF header
    if err := binary.Write(buf, binary.LittleEndian, []byte("RIFF")); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, uint32(fileSize)); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, []byte("WAVE")); err != nil {
        return nil, err
    }

    // Write the format chunk
    if err := binary.Write(buf, binary.LittleEndian, []byte("fmt ")); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, uint32(16)); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, uint16(1)); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, uint16(channels)); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate)); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate*int(channels)*2)); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, uint16(channels*2)); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, uint16(16)); err != nil {
        return nil, err
    }

    // Write the data chunk
    if err := binary.Write(buf, binary.LittleEndian, []byte("data")); err != nil {
        return nil, err
    }
    if err := binary.Write(buf, binary.LittleEndian, uint32(audioSize)); err != nil {
        return nil, err
    }
    if _, err := buf.Write(pcm); err != nil {
        return nil, err
    }

    return buf.Bytes(), nil
}
        pcm, err := audio.DecodeOpus(voice)
        if err != nil {
            return "", fmt.Errorf("decode ogg: %w", err)
        }

        wav, err := audio.PCMToWAV(pcm, 48000, 2)
        if err != nil {
            return "", fmt.Errorf("encode wav: %w", err)
        }
DeryabinSergey commented 1 year ago

@Davincible thanks for the information. I saw and tried this library. But they include C modules. And this solution did not compile to use in the google cloud function.

Davincible commented 1 year ago

@DeryabinSergey yeah pure Go would be better, and tried to make it work but libs but couldn't quickly get it working

DeryabinSergey commented 1 year ago

@Davincible i think, but I am not sure, that voice messages from telegram use CELT and how described in roadmap at this moment it`s not ready

linnv commented 4 months ago

I managed to get the following to work to decode to wav

package audio

import (
  "bytes"
  "encoding/binary"
  "errors"
  "io"

  "gopkg.in/hraban/opus.v2"
)

func DecodeOpus(data []byte) ([]byte, error) {
  // Wrap the input data in a bytes buffer
  input := bytes.NewBuffer(data)

  // Initialize a new stream with the bytes buffer
  stream, err := opus.NewStream(input)
  if err != nil {
      return nil, err
  }

  // Create a buffer to store the decoded PCM data
  pcm := make([]int16, 2880)

  // Read from the stream and decode until there is no more data
  var output []int16
  for {
      n, err := stream.Read(pcm)
      if errors.Is(err, io.EOF) {
          break
      } else if err != nil {
          return nil, err
      }

      if n == 0 {
          break
      }

      output = append(output, pcm[:n]...)
  }

  // Convert the PCM data to a byte slice
  out := make([]byte, len(output)*2)
  for i, v := range output {
      out[i*2] = byte(v)
      out[i*2+1] = byte(v >> 8)
  }

  return out, nil
}

func PCMToWAV(pcm []byte, sampleRate int, channels int) ([]byte, error) {
  // Calculate the size of the audio data and the size of the file
  audioSize := len(pcm)
  fileSize := audioSize + 36

  // Initialize a buffer to store the WAV audio data
  buf := new(bytes.Buffer)

  // Write the RIFF header
  if err := binary.Write(buf, binary.LittleEndian, []byte("RIFF")); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, uint32(fileSize)); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, []byte("WAVE")); err != nil {
      return nil, err
  }

  // Write the format chunk
  if err := binary.Write(buf, binary.LittleEndian, []byte("fmt ")); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, uint32(16)); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, uint16(1)); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, uint16(channels)); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate)); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate*int(channels)*2)); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, uint16(channels*2)); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, uint16(16)); err != nil {
      return nil, err
  }

  // Write the data chunk
  if err := binary.Write(buf, binary.LittleEndian, []byte("data")); err != nil {
      return nil, err
  }
  if err := binary.Write(buf, binary.LittleEndian, uint32(audioSize)); err != nil {
      return nil, err
  }
  if _, err := buf.Write(pcm); err != nil {
      return nil, err
  }

  return buf.Bytes(), nil
}
      pcm, err := audio.DecodeOpus(voice)
      if err != nil {
          return "", fmt.Errorf("decode ogg: %w", err)
      }

      wav, err := audio.PCMToWAV(pcm, 48000, 2)
      if err != nil {
          return "", fmt.Errorf("encode wav: %w", err)
      }

it works but the output wav file decoded using example from opus file is not expected, it sounds unnormal and the size is bigger than the file decoded from opusdec opusdec --rate 16000 ./files/pcm-stereo.opus ./files/pcm-stereo.opus.wav

there are the files I got

-rw-r--r-- 1 root root 5.7M Apr 19 08:30 pcm-stereo-bygolib.opus.wav
-rw-r--r-- 1 root root 372K Apr 19 08:28 pcm-stereo.opus
-rw-r--r-- 1 root root 3.8M Apr 19 08:30 pcm-stereo.opus.wav

the code

        voice, _ := os.ReadFile("./files/pcm-stereo.opus")
        pcm, err := DecodeOpus(voice)
        if err != nil {
                panic(err)
        }

        wav, err := PCMToWAV(pcm, 16000, 2)
        if err != nil {
                panic(err)
        }
        os.WriteFile("./files/pcm-stereo-bygolib.opus.wav", wav, 0666)

any suggestion to to taclke this issue?

files.zip

linnv commented 4 months ago

There is what i found in the source code of opusdec https://github.com/xiph/opus-tools/blob/master/src/opusdec.c#L516 which resample may work

xypwn commented 3 months ago

The library doesn't seem to support CELT coding yet. https://github.com/pion/opus/blob/867e82f700140532b924bb6fc94523ec9cd412eb/decoder.go#L45

It's on the roadmap, but until then, only SILK coding is supported.