larksuite / rsmpeg

A Rust crate that exposes FFmpeg's power as much as possible.
https://docs.rs/rsmpeg/latest/rsmpeg/
MIT License
677 stars 41 forks source link

how to convert `rsmpeg::avutil::AVFrame` to `opencv::core::Mat`, #194

Closed phial3 closed 2 months ago

phial3 commented 2 months ago

there's my code:

use anyhow::{Context, Result};
use opencv::{
    core::Mat,
    prelude::*,
};
use rsmpeg::avcodec::{AVCodec, AVCodecContext};
use rsmpeg::avformat::AVFormatContextInput;
use rsmpeg::avutil::AVFrame;
use rsmpeg::ffi;
use rsmpeg::swscale::SwsContext;
use std::ffi::CString;

/// to mat
fn avframe_to_mat(frame: &AVFrame) -> Result<Mat> {
    let width = frame.width as i32;
    let height = frame.height as i32;
    let data = unsafe { std::slice::from_raw_parts(frame.data[0], (width * height * 3) as usize) };

    Ok(Mat::new_rows_cols_with_data(height, width, data).unwrap().try_clone()?)
}

fn main() -> Result<()> {
    // Path to the image file
    let image_path = "assets/test.jpg";

    // Convert the file path to a CString
    let c_image_path = CString::new(image_path).expect("Failed to create CString");

    // Open the input file
    let mut input_context = AVFormatContextInput::open(&c_image_path, None, &mut None)?;

    // Find the first video stream
    let video_stream_index = input_context
        .streams()
        .iter()
        .position(|stream| stream.codecpar().codec_type == ffi::AVMEDIA_TYPE_VIDEO)
        .expect("No video stream found");

    let stream = &input_context.streams()[video_stream_index];
    let codec_id = stream.codecpar().codec_id;
    println!("Found video stream with index: {}, codec_id: {}", video_stream_index, codec_id);

    let codec = AVCodec::find_decoder(codec_id).expect("Failed to find codec");
    let mut codec_context = AVCodecContext::new(&codec);

    // Open the codec context
    codec_context.open(None)?;

    // Set up the SwsContext for converting the frame to RGB
    let mut sws_context = SwsContext::get_context(
        codec_context.width,
        codec_context.height,
        ffi::AV_PIX_FMT_YUV420P,
        codec_context.width,
        codec_context.height,
        ffi::AV_PIX_FMT_RGB24,
        ffi::SWS_BILINEAR,
        None,
        None,
        None,
    ).context("Failed to create SwsContext")?;

    let mut rgb_frame = AVFrame::new();
    rgb_frame.set_format(ffi::AV_PIX_FMT_RGB24);
    rgb_frame.set_width(codec_context.width);
    rgb_frame.set_height(codec_context.height);
    rgb_frame.alloc_buffer()?;

    // Read frames from the file
    loop {
        let packet = input_context.read_packet()?;
        if let Some(packet) = packet {
            if packet.stream_index as i32 == video_stream_index as i32 {
                // Send the packet to the decoder
                codec_context.send_packet(Some(&packet))?;

                println!("Received packet with size: {}, video_stream_index: {}", packet.size, video_stream_index);

                // Receive the frame from the decoder
                while let Ok(f) = codec_context.receive_frame() {
                    // Convert the frame to RGB format
                    sws_context.scale_frame(&f, rgb_frame.width, rgb_frame.height, &mut rgb_frame)?;

                    // Convert the RGB frame to OpenCV Mat
                    let mat = avframe_to_mat(&rgb_frame)?;
                    println!("Converted frame to OpenCV Mat with size: {:?}", mat.size()?);
                }
            }
        }
    }
}

error info:

Found video stream with index: 0, codec_id: 7
[swscaler @ 0x150008000] 0x0 -> 0x0 is invalid scaling dimension
thread 'main' panicked at examples/avframe_to_mat.rs:62:7:
Failed to create SwsContext
ldm0 commented 2 months ago

There are several issues in your code:

  1. You haven't set any parameters for codec_context, which is why codec_context.width and codec_context.height are both zero. You need to call codec_context.apply_codecpar(&stream.codecpar())?; before opening the codec_context.
  2. The second parameter of scale_frame( should be zero since your image doesn't have stride: https://github.com/FFmpeg/FFmpeg/blob/2eef902d38dded68df7d874bc348aaa42ec87933/libswscale/swscale.h#L235
  3. Make sure to break out of your loop when packet is None.

You can refer to the example here.

phial3 commented 2 months ago

There are several issues in your code:

  1. You haven't set any parameters for codec_context, which is why codec_context.width and codec_context.height are both zero. You need to call codec_context.apply_codecpar(&stream.codecpar())?; before opening the codec_context.
  2. The second parameter of scale_frame( should be zero since your image doesn't have stride: https://github.com/FFmpeg/FFmpeg/blob/2eef902d38dded68df7d874bc348aaa42ec87933/libswscale/swscale.h#L235
  3. Make sure to break out of your loop when packet is None.

You can refer to the example here.

thank you very much for you help, there needs your advice.

use anyhow::{Context, Result};
use opencv::core::Vector;
use opencv::{
    core::Mat,
    imgcodecs,
    prelude::*,
};
use rsmpeg::avutil::{AVFrame};
use rsmpeg::ffi;
use rsmpeg::swscale::SwsContext;
use std::ffi::CString;
use std::sync::atomic::Ordering;
use yolo_rsmpeg_opencv::misc::avio;

/// rsmpeg AVFrame converter to OpenCV Mat
fn avframe_to_mat(frame: &AVFrame) -> opencv::Result<Mat> {
    // Only support RGB24
    if frame.format != ffi::AV_PIX_FMT_RGB24 as i32 {
        return Err(opencv::Error::new(opencv::core::StsBadArg, "Unsupported pixel format"));
    }

    let width = frame.width as i32;
    let height = frame.height as i32;
    let linesize = frame.linesize[0] as i32;
    println!("width: {}, height: {}, linesize: {}", width, height, linesize);

    // convert the frame data to a slice of bytes
    let data = unsafe {
        std::slice::from_raw_parts(frame.data[0], (linesize * height) as usize)
    };

    let mat = Mat::from_slice(data)?
        .reshape(3, height)? // 3 channels (RGB)
        .try_clone()?;

    Ok(mat)
}

fn main() -> Result<()> {
    // Path to the image file
    let file_path = CString::new("assets/cat.jpg").unwrap();

    // Open the input file
    // see https://github.com/larksuite/rsmpeg/blob/master/tests/misc/avio_writing.rs
    let (video_stream_index, mut input_format_context, mut decode_context) =
        avio::open_input_file(file_path.as_c_str()).unwrap();

    // Set up the SwsContext for converting the frame to RGB
    let mut sws_context = SwsContext::get_context(
        decode_context.width,
        decode_context.height,
        ffi::AV_PIX_FMT_YUV420P,
        decode_context.width,
        decode_context.height,
        ffi::AV_PIX_FMT_RGB24,
        ffi::SWS_BILINEAR | ffi::SWS_PRINT_INFO,
        None,
        None,
        None,
    ).context("Failed to create SwsContext")?;

    let img_index = std::sync::atomic::AtomicUsize::new(0);

    // Read frames from the file
    while let Some(packet) = input_format_context.read_packet()? {
        if packet.stream_index == video_stream_index as i32 {
            decode_context.send_packet(Some(&packet))?;

            while let Ok(cover_frame) = decode_context.receive_frame() {
                // RGB AVFrame output
                let mut rgb_frame = AVFrame::new();
                rgb_frame.set_format(ffi::AV_PIX_FMT_RGB24);
                rgb_frame.set_width(decode_context.width);
                rgb_frame.set_height(decode_context.height);
                rgb_frame.set_time_base(cover_frame.time_base);
                rgb_frame.set_pict_type(cover_frame.pict_type);
                rgb_frame.alloc_buffer()?;

                sws_context.scale_frame(
                    &cover_frame,
                    0,
                    decode_context.height,
                    &mut rgb_frame,
                )?;

                // Convert
                let mat = avframe_to_mat(&rgb_frame)?;
                println!("Converted AVFrame to Mat successfully.");

                imgcodecs::imwrite(
                    format!("/tmp/save/write_mat_{}.jpg", img_index.fetch_add(1, Ordering::SeqCst)).as_str(),
                    &mat,
                    &Vector::new(),
                ).unwrap();
            }
        }
    }

    Ok(())
}

Here's the original: 1213770

result: write_mat_0

the color be changed, how to maintain original color output?

phial3 commented 2 months ago

There are several issues in your code:

  1. You haven't set any parameters for codec_context, which is why codec_context.width and codec_context.height are both zero. You need to call codec_context.apply_codecpar(&stream.codecpar())?; before opening the codec_context.
  2. The second parameter of scale_frame( should be zero since your image doesn't have stride: https://github.com/FFmpeg/FFmpeg/blob/2eef902d38dded68df7d874bc348aaa42ec87933/libswscale/swscale.h#L235
  3. Make sure to break out of your loop when packet is None.

You can refer to the example here.

thank you very much for you help, there needs your advice.

use anyhow::{Context, Result};
use opencv::core::Vector;
use opencv::{
    core::Mat,
    imgcodecs,
    prelude::*,
};
use rsmpeg::avutil::{AVFrame};
use rsmpeg::ffi;
use rsmpeg::swscale::SwsContext;
use std::ffi::CString;
use std::sync::atomic::Ordering;
use yolo_rsmpeg_opencv::misc::avio;

/// rsmpeg AVFrame converter to OpenCV Mat
fn avframe_to_mat(frame: &AVFrame) -> opencv::Result<Mat> {
    // Only support RGB24
    if frame.format != ffi::AV_PIX_FMT_RGB24 as i32 {
        return Err(opencv::Error::new(opencv::core::StsBadArg, "Unsupported pixel format"));
    }

    let width = frame.width as i32;
    let height = frame.height as i32;
    let linesize = frame.linesize[0] as i32;
    println!("width: {}, height: {}, linesize: {}", width, height, linesize);

    // convert the frame data to a slice of bytes
    let data = unsafe {
        std::slice::from_raw_parts(frame.data[0], (linesize * height) as usize)
    };

    let mat = Mat::from_slice(data)?
        .reshape(3, height)? // 3 channels (RGB)
        .try_clone()?;

    Ok(mat)
}

fn main() -> Result<()> {
    // Path to the image file
    let file_path = CString::new("assets/cat.jpg").unwrap();

    // Open the input file
    // see https://github.com/larksuite/rsmpeg/blob/master/tests/misc/avio_writing.rs
    let (video_stream_index, mut input_format_context, mut decode_context) =
        avio::open_input_file(file_path.as_c_str()).unwrap();

    // Set up the SwsContext for converting the frame to RGB
    let mut sws_context = SwsContext::get_context(
        decode_context.width,
        decode_context.height,
        ffi::AV_PIX_FMT_YUV420P,
        decode_context.width,
        decode_context.height,
        ffi::AV_PIX_FMT_RGB24,
        ffi::SWS_BILINEAR | ffi::SWS_PRINT_INFO,
        None,
        None,
        None,
    ).context("Failed to create SwsContext")?;

    let img_index = std::sync::atomic::AtomicUsize::new(0);

    // Read frames from the file
    while let Some(packet) = input_format_context.read_packet()? {
        if packet.stream_index == video_stream_index as i32 {
            decode_context.send_packet(Some(&packet))?;

            while let Ok(cover_frame) = decode_context.receive_frame() {
                // RGB AVFrame output
                let mut rgb_frame = AVFrame::new();
                rgb_frame.set_format(ffi::AV_PIX_FMT_RGB24);
                rgb_frame.set_width(decode_context.width);
                rgb_frame.set_height(decode_context.height);
                rgb_frame.set_time_base(cover_frame.time_base);
                rgb_frame.set_pict_type(cover_frame.pict_type);
                rgb_frame.alloc_buffer()?;

                sws_context.scale_frame(
                    &cover_frame,
                    0,
                    decode_context.height,
                    &mut rgb_frame,
                )?;

                // Convert
                let mat = avframe_to_mat(&rgb_frame)?;
                println!("Converted AVFrame to Mat successfully.");

                imgcodecs::imwrite(
                    format!("/tmp/save/write_mat_{}.jpg", img_index.fetch_add(1, Ordering::SeqCst)).as_str(),
                    &mat,
                    &Vector::new(),
                ).unwrap();
            }
        }
    }

    Ok(())
}

Here's the original: 1213770

result: write_mat_0

the color be changed, how to maintain original color output?

Sorry, I think i know what the mistake was, the frame (AVFrame) pix was set AV_PIX_FMT_RGB24, By default, opencv files are read in BGR format

Codewithteju commented 2 months ago

Hey guys @phial3 @ldm0 , Can you help me in optimizing my function where I am trying to convert rgb array into YUV frame, below is my function, and I dont want to do mem copy or pixel modifying operations, without these can i able to convert this array back into a YUV frame :

pub fn rgb_array_to_yuv_frame(rgb_data: &[u8], width: i32, height: i32) -> AVFrame {

//Check rgb frame having proper shape or not
assert_eq!(rgb_data.len(), (width * height * 3) as usize, "RGB data size mismatch");

// Create an AVFrame for YUV
let mut yuv_frame = AVFrame::new();
yuv_frame.set_format(rsmpeg::ffi::AV_PIX_FMT_YUV420P);
yuv_frame.set_width(width);
yuv_frame.set_height(height);
yuv_frame.alloc_buffer().unwrap(); 

// Create an AVFrame for RGB input
let mut rgb_frame = AVFrame::new();
rgb_frame.set_format(rsmpeg::ffi::AV_PIX_FMT_RGB24);
rgb_frame.set_width(width);
rgb_frame.set_height(height);
rgb_frame.alloc_buffer().unwrap(); 

unsafe {
    let rgb_stride = rgb_frame.linesize[0] ; 
    let packed_rgb_data: &[u8] = rgb_data; 

    for y in 0..height {
        let row_start = rgb_frame.data[0].offset((y * rgb_stride) as isize);
        let row_slice = std::slice::from_raw_parts_mut(row_start, (width * 3) as usize);
        row_slice.copy_from_slice(&packed_rgb_data[(y * width * 3) as usize..((y + 1) * width * 3) as usize]);
    }
}

// Create a conversion context
let mut sws_ctx = SwsContext::get_context(
    width,
    height,
    rsmpeg::ffi::AV_PIX_FMT_RGB24,
    width,
    height,
    rsmpeg::ffi::AV_PIX_FMT_YUV420P,
    SWS_BILINEAR,
    None,
    None,
    None,
).unwrap();

// Perform the conversion from RGB to YUV
sws_ctx.scale_frame(&rgb_frame, 0, height as i32, &mut yuv_frame).unwrap();

yuv_frame

}

I am a beginner in Rust, hence can't able to do it myself. The problem is I am not able to get the linesize of the array with padding properly. Hence the output video is not in proper shape. Kindly help me in this.,