asticode / go-astiav

Golang ffmpeg and libav C bindings
MIT License
351 stars 38 forks source link

h264 -> h265 #45

Closed mateothegreat closed 7 months ago

mateothegreat commented 7 months ago

Hey there, I want to start off by saying thank you for your hard work, none of this is easy! 💪

I need to go from RTSP (h264) --> h265 and I think I'm close but I'm stumped a error encoding packet: Invalid argument error.

Basically I'm coming from an rtp.Packet from https://github.com/bluenviron/gortsplib/v4 which is h264.

Can I get some pointers on to what I may be missing or how to debug this error (it's super vague).

ffprobe output:

Input #0, rtsp, from 'rtsp://***:***@108.53.***.****12200/Streaming/Channels/101':
  Metadata:
    title           : Media Presentation
  Duration: N/A, start: 0.100000, bitrate: N/A
  Stream #0:0: Video: h264 (Main), yuvj420p(pc, bt709, progressive), 3840x2160 [SAR 1:1 DAR 16:9], 20 fps, 10 tbr, 90k tbn

Work so far:

package clients

import (
    "errors"
    "fmt"
    "log"
    "strings"

    "github.com/asticode/go-astiav"
    "github.com/bluenviron/gortsplib/v4/pkg/format"
    "github.com/nvr-ai/go-rtsp/streams"
)

type H264Client struct {
    DecoderCodec   *astiav.Codec
    DecoderContext *astiav.CodecContext
    EncoderCodec   *astiav.Codec
    EncoderContext *astiav.CodecContext
}

func NewH264Client() (*H264Client, error) {
    astiav.SetLogLevel(astiav.LogLevelVerbose)
    astiav.SetLogCallback(func(l astiav.LogLevel, fmt, msg, parent string) {
        log.Printf("ffmpeg log: %s (level: %d)\n", strings.TrimSpace(msg), l)
    })

    client := &H264Client{}

    // Get the H.264 decoder codec
    client.DecoderCodec = astiav.FindDecoder(astiav.CodecIDH264)
    if client.DecoderCodec == nil {
        return nil, errors.New("H.264 decoder not found")
    }

    // Allocate the decoder context
    client.DecoderContext = astiav.AllocCodecContext(client.DecoderCodec)
    if client.DecoderContext == nil {
        return nil, errors.New("could not allocate decoder context")
    }

    // Open the decoder
    if err := client.DecoderContext.Open(client.DecoderCodec, nil); err != nil {
        return nil, fmt.Errorf("error opening decoder: %w", err)
    }

    // Get the H.265 encoder codec
    client.EncoderCodec = astiav.FindEncoder(astiav.CodecIDHevc)
    if client.EncoderCodec == nil {
        return nil, errors.New("H.265 encoder not found")
    }

    // Allocate the encoder context
    client.EncoderContext = astiav.AllocCodecContext(client.EncoderCodec)
    if client.EncoderContext == nil {
        return nil, errors.New("could not allocate encoder context")
    }

    // Set encoder parameters
    client.EncoderContext.SetTimeBase(astiav.NewRational(1, 25))
    client.EncoderContext.SetBitRate(1000000)
    client.EncoderContext.SetWidth(1280) // Set width of the output frame
    client.EncoderContext.SetHeight(720) // Set height of the output frame
    client.EncoderContext.SetPixelFormat(astiav.PixelFormat(astiav.PixelFormatYuv420P))

    // Open the encoder
    if err := client.EncoderContext.Open(client.EncoderCodec, nil); err != nil {
        return nil, fmt.Errorf("error opening encoder: %w", err)
    }

    return client, nil
}

func (c *H264Client) DecodeVideoFrame(rtspPacket *streams.Frame) error {
    // Parse H.264 NAL units from the RTP payload
    nalUnits, err := parseRTPH264NALUnits(rtspPacket.Packet.Payload)
    if err != nil {
        return fmt.Errorf("error parsing H.264 NAL units: %w", err)
    }

    // Encode each H.264 NAL unit to H.265
    for _, nalUnit := range nalUnits {
        // Convert H.264 NAL unit to H.265 frame with start code, SPS, and PPS
        h265Frame, err := h264ToH265(rtspPacket.Media.Formats[0].(*format.H264).PPS, rtspPacket.Media.Formats[0].(*format.H264).SPS, nalUnit)
        if err != nil {
            return fmt.Errorf("error converting H.264 to H.265: %w", err)
        }

        // Allocate a packet for encoding
        packet := astiav.AllocPacket()

        // Set packet data
        packet.FromData(h265Frame)

        // Encode the packet
        if err := c.EncoderContext.SendPacket(packet); err != nil {
            return fmt.Errorf("error encoding packet: %w", err)
        }
    }

    return nil
}

func h264ToH265(pps []byte, sps []byte, h264Frame []byte) ([]byte, error) {
    // Add start code prefix if necessary
    if len(h264Frame) >= 4 && h264Frame[0] == 0x00 && h264Frame[1] == 0x00 && h264Frame[2] == 0x00 && h264Frame[3] == 0x01 {
        // Start code prefix already exists
        return h264Frame, nil
    }

    // Create H.265 frame with start code prefix and SPS/PPS
    h265Frame := append([]byte{0x00, 0x00, 0x00, 0x01}, sps...)
    h265Frame = append(h265Frame, []byte{0x00, 0x00, 0x00, 0x01}...)
    h265Frame = append(h265Frame, pps...)
    h265Frame = append(h265Frame, []byte{0x00, 0x00, 0x00, 0x01}...)
    h265Frame = append(h265Frame, h264Frame...)

    return h265Frame, nil
}

// parseRTPH264NALUnits parses H264 NAL units from an RTP packet payload.
func parseRTPH264NALUnits(payload []byte) ([][]byte, error) {
    var nalUnits [][]byte

    // Extract other NAL units from the payload
    for i := 0; i < len(payload); {
        // Find start code prefix (0x000001 or 0x00000001)
        start := i
        for i < len(payload)-3 && !(payload[i] == 0 && payload[i+1] == 0 && (payload[i+2] == 1 || (payload[i+2] == 0 && payload[i+3] == 1))) {
            i++
        }

        // Append the NAL unit to the list of NAL units
        nalUnit := payload[start:i]
        if len(nalUnit) > 0 {
            nalUnits = append(nalUnits, nalUnit)
        }

        // Move to the next NAL unit
        if i < len(payload)-3 && payload[i+2] == 0 && payload[i+3] == 1 {
            i += 4 // 0x00000001 prefix
        } else if i < len(payload)-2 && payload[i+2] == 1 {
            i += 3 // 0x000001 prefix
        } else {
            i++
        }
    }

    return nalUnits, nil
}

Thanks again, we appreciate ya!

asticode commented 7 months ago

Looking at your goals, I'd strongly suggest that you take a look at the transcoding example (you can skip the filter part since you don't need it) since that's basically what you're trying to do.

Also, I'd strongly consider replacing gortsplib with demuxing the stream with go-astiav directly, if possible, that would simplify things greatly (but that's not mandatory).

Regarding your code, there seems to be a misunderstanding: data is coming from rtsp as h264 packets (compressed) therefore you first need to decode them (transform them to frames) and encode them using an h265 encoder. Again, understanding the transcoding example is a must in your situation and particularly the decoding part as well as the encoding part.

mateothegreat commented 7 months ago

Ok, roger that.. I'll give it another go. I think I just got lost in the weeds.

Thank you for the quick response @asticode !!