Open Davincible opened 1 year ago
I have same problem. Example file from this repo works fine, telegram voice message - no.
I prepeare some example, but playgrounds not supported loading files.
package main
import (
"bytes"
"errors"
"github.com/pion/opus"
"github.com/pion/opus/pkg/oggreader"
"io"
"log"
"net/http"
)
func main() {
files := []string{
"https://storage.googleapis.com/training-cloud-bucket/tiny.ogg",
"https://storage.googleapis.com/training-cloud-bucket/telegram.ogg",
}
for _, file := range files {
response, err := http.Get(file)
if err != nil {
panic(err)
}
defer response.Body.Close()
if err = OggRead(response.Body); err != nil {
log.Printf("error: %v\n", err)
continue
}
response.Body.Close()
log.Println(file, "done")
}
}
func OggRead(input io.Reader) error {
ogg, _, err := oggreader.NewWith(input)
if err != nil {
return err
}
out := make([]byte, 1920)
decoder := opus.NewDecoder()
for {
segments, _, err := ogg.ParseNextPage()
if errors.Is(err, io.EOF) {
break
} else if bytes.HasPrefix(segments[0], []byte("OpusTags")) {
continue
}
if err != nil {
return err
}
for i := range segments {
if _, _, err = decoder.Decode(segments[i], out); err != nil {
return err
}
}
}
return nil
}
and have same problem
2023/04/09 12:23:10 https://storage.googleapis.com/training-cloud-bucket/tiny.ogg done
2023/04/09 12:23:10 error: unsupported configuration mode: 3
from here https://github.com/pion/opus/blob/master/decoder.go#L42
I managed to get the following to work to decode to wav
package audio
import (
"bytes"
"encoding/binary"
"errors"
"io"
"gopkg.in/hraban/opus.v2"
)
func DecodeOpus(data []byte) ([]byte, error) {
// Wrap the input data in a bytes buffer
input := bytes.NewBuffer(data)
// Initialize a new stream with the bytes buffer
stream, err := opus.NewStream(input)
if err != nil {
return nil, err
}
// Create a buffer to store the decoded PCM data
pcm := make([]int16, 2880)
// Read from the stream and decode until there is no more data
var output []int16
for {
n, err := stream.Read(pcm)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return nil, err
}
if n == 0 {
break
}
output = append(output, pcm[:n]...)
}
// Convert the PCM data to a byte slice
out := make([]byte, len(output)*2)
for i, v := range output {
out[i*2] = byte(v)
out[i*2+1] = byte(v >> 8)
}
return out, nil
}
func PCMToWAV(pcm []byte, sampleRate int, channels int) ([]byte, error) {
// Calculate the size of the audio data and the size of the file
audioSize := len(pcm)
fileSize := audioSize + 36
// Initialize a buffer to store the WAV audio data
buf := new(bytes.Buffer)
// Write the RIFF header
if err := binary.Write(buf, binary.LittleEndian, []byte("RIFF")); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, uint32(fileSize)); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, []byte("WAVE")); err != nil {
return nil, err
}
// Write the format chunk
if err := binary.Write(buf, binary.LittleEndian, []byte("fmt ")); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, uint32(16)); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, uint16(1)); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, uint16(channels)); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate)); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate*int(channels)*2)); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, uint16(channels*2)); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, uint16(16)); err != nil {
return nil, err
}
// Write the data chunk
if err := binary.Write(buf, binary.LittleEndian, []byte("data")); err != nil {
return nil, err
}
if err := binary.Write(buf, binary.LittleEndian, uint32(audioSize)); err != nil {
return nil, err
}
if _, err := buf.Write(pcm); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
pcm, err := audio.DecodeOpus(voice)
if err != nil {
return "", fmt.Errorf("decode ogg: %w", err)
}
wav, err := audio.PCMToWAV(pcm, 48000, 2)
if err != nil {
return "", fmt.Errorf("encode wav: %w", err)
}
@Davincible thanks for the information. I saw and tried this library. But they include C modules. And this solution did not compile to use in the google cloud function.
@DeryabinSergey yeah pure Go would be better, and tried to make it work but libs but couldn't quickly get it working
@Davincible i think, but I am not sure, that voice messages from telegram use CELT and how described in roadmap at this moment it`s not ready
I managed to get the following to work to decode to wav
package audio import ( "bytes" "encoding/binary" "errors" "io" "gopkg.in/hraban/opus.v2" ) func DecodeOpus(data []byte) ([]byte, error) { // Wrap the input data in a bytes buffer input := bytes.NewBuffer(data) // Initialize a new stream with the bytes buffer stream, err := opus.NewStream(input) if err != nil { return nil, err } // Create a buffer to store the decoded PCM data pcm := make([]int16, 2880) // Read from the stream and decode until there is no more data var output []int16 for { n, err := stream.Read(pcm) if errors.Is(err, io.EOF) { break } else if err != nil { return nil, err } if n == 0 { break } output = append(output, pcm[:n]...) } // Convert the PCM data to a byte slice out := make([]byte, len(output)*2) for i, v := range output { out[i*2] = byte(v) out[i*2+1] = byte(v >> 8) } return out, nil } func PCMToWAV(pcm []byte, sampleRate int, channels int) ([]byte, error) { // Calculate the size of the audio data and the size of the file audioSize := len(pcm) fileSize := audioSize + 36 // Initialize a buffer to store the WAV audio data buf := new(bytes.Buffer) // Write the RIFF header if err := binary.Write(buf, binary.LittleEndian, []byte("RIFF")); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, uint32(fileSize)); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, []byte("WAVE")); err != nil { return nil, err } // Write the format chunk if err := binary.Write(buf, binary.LittleEndian, []byte("fmt ")); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, uint32(16)); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, uint16(1)); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, uint16(channels)); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate)); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, uint32(sampleRate*int(channels)*2)); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, uint16(channels*2)); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, uint16(16)); err != nil { return nil, err } // Write the data chunk if err := binary.Write(buf, binary.LittleEndian, []byte("data")); err != nil { return nil, err } if err := binary.Write(buf, binary.LittleEndian, uint32(audioSize)); err != nil { return nil, err } if _, err := buf.Write(pcm); err != nil { return nil, err } return buf.Bytes(), nil }
pcm, err := audio.DecodeOpus(voice) if err != nil { return "", fmt.Errorf("decode ogg: %w", err) } wav, err := audio.PCMToWAV(pcm, 48000, 2) if err != nil { return "", fmt.Errorf("encode wav: %w", err) }
it works but the output wav file decoded using example from opus file is not expected, it sounds unnormal and the size is bigger than the file decoded from opusdec
opusdec --rate 16000 ./files/pcm-stereo.opus ./files/pcm-stereo.opus.wav
there are the files I got
-rw-r--r-- 1 root root 5.7M Apr 19 08:30 pcm-stereo-bygolib.opus.wav
-rw-r--r-- 1 root root 372K Apr 19 08:28 pcm-stereo.opus
-rw-r--r-- 1 root root 3.8M Apr 19 08:30 pcm-stereo.opus.wav
the code
voice, _ := os.ReadFile("./files/pcm-stereo.opus")
pcm, err := DecodeOpus(voice)
if err != nil {
panic(err)
}
wav, err := PCMToWAV(pcm, 16000, 2)
if err != nil {
panic(err)
}
os.WriteFile("./files/pcm-stereo-bygolib.opus.wav", wav, 0666)
any suggestion to to taclke this issue?
There is what i found in the source code of opusdec https://github.com/xiph/opus-tools/blob/master/src/opusdec.c#L516 which resample may work
The library doesn't seem to support CELT coding yet. https://github.com/pion/opus/blob/867e82f700140532b924bb6fc94523ec9cd412eb/decoder.go#L45
It's on the roadmap, but until then, only SILK coding is supported.
Trying to decode Telegram voice messages, get
unsupported configuration mode: 2
Sample file: https://file.io/LAv06s4gaw1o