VideoChromekeyFilter with an image

ChristopheI commented 2 years ago

ffmpeg version: 5.1

I tried successfully with basic filter like "scale", "fps", "crop" and even used them in the same filter graph. I'm successfully using VideoChromekeyFilter with 2 video files.

Now, with VideoChromekeyFilter , I'm trying to use an image as "input0" but it didn't work. I try to change the code to get the MediaFrame from the image and then use it in the Filter graph.

I did this:

                    // Get MediaFrame from the image file
                    MediaFrame srcFrameImage = null;
                    foreach (var srcPacket in reader0.ReadPacket())
                    {
                        foreach (var srcFrame in reader0[videoIndex0].ReadFrame(srcPacket))
                        {
                            srcFrameImage = srcFrame;
                        }
                    }

                    foreach (var srcPacket in reader1.ReadPacket())
                    {
                        foreach (var srcFrame in reader1[videoIndex1].ReadFrame(srcPacket))
                        {
                            filterGraph.Inputs[0].WriteFrame(srcFrameImage); // Use the MediaFrame from the image file

                            filterGraph.Inputs[1].WriteFrame(srcFrame);

                            foreach (var filterFrame in filterGraph.Outputs.First().ReadFrame())
                            {
                                foreach (var dstFrame in pixelConverter.Convert(filterFrame))
                                {
                                    SaveFrame(dstFrame, Path.Combine(dir, $"{pts}.bmp"));
                                    dstFrame.Pts = pts++;
                                    foreach (var dstPacket in writer[0].WriteFrame(dstFrame))
                                    {
                                        writer.WritePacket(dstPacket);
                                    }
                                }
                            }
                        }
                    }

As a result, i have a video file without overlay which is the same than i set as "input1"

What is wrong with this code ? I miss something but I don't know what.

Thanks

ChristopheI commented 2 years ago

I made a mistake in my code. I assigned directly srcFrameImage to srcFrame (without doing a copy)

So I tried to make a Copy of srcFrame to create srcFrameImage like this:

srcFrameImage = srcFrame.Copy();

But it doesn't work too,

filterGraph.Inputs[0].WriteFrame(srcFrameImage);  // <= failed with -22 as error.

I tried to use WriteFrame with different parameters without success: BufferSrcFlags.Push, BufferSrcFlags.None, ...

IOL0ol1 commented 2 years ago

@ChristopheI Can you provide the most concise code that can be compiled and executed

ChristopheI commented 2 years ago

With

input0: path to valid image file smaller in size than input1
input1: path to valid video file
output: @".\output.mp4"

        public VideoChromekeyFilter(string input0, string input1, string output, string chromakeyOptions = "green:0.1:0.0")
        {
            var dir = Directory.CreateDirectory(Path.Combine(Path.GetDirectoryName(output), Path.GetFileNameWithoutExtension(output))).FullName;

            using (MediaReader reader0 = new MediaReader(input0))
            using (MediaReader reader1 = new MediaReader(input1))
            using (MediaWriter writer = new MediaWriter(output))
            {
                var videoIndex0 = reader0.Where(_ => _.Codec.AVCodecContext.codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO).First().Index;
                var videoIndex1 = reader1.Where(_ => _.Codec.AVCodecContext.codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO).First().Index;

                // init complex filter graph
                int height0 = reader0[videoIndex0].Codec.AVCodecContext.height;
                int width0 = reader0[videoIndex0].Codec.AVCodecContext.width;
                int format0 = (int)reader0[videoIndex0].Codec.AVCodecContext.pix_fmt;
                AVRational time_base0 = reader0[videoIndex0].TimeBase;
                AVRational sample_aspect_ratio0 = reader0[videoIndex0].Codec.AVCodecContext.sample_aspect_ratio;

                int height1 = reader1[videoIndex1].Codec.AVCodecContext.height;
                int width1 = reader1[videoIndex1].Codec.AVCodecContext.width;
                int format1 = (int)reader1[videoIndex1].Codec.AVCodecContext.pix_fmt;
                AVRational time_base1 = reader1[videoIndex1].TimeBase;
                AVRational sample_aspect_ratio1 = reader1[videoIndex1].Codec.AVCodecContext.sample_aspect_ratio;

                MediaFilterGraph filterGraph = new MediaFilterGraph();
                var in0 = filterGraph.AddVideoSrcFilter(new MediaFilter(MediaFilter.VideoSources.Buffer), width0, height0, (AVPixelFormat)format0, time_base0, sample_aspect_ratio0);
                var in1 = filterGraph.AddVideoSrcFilter(new MediaFilter(MediaFilter.VideoSources.Buffer), width1, height1, (AVPixelFormat)format1, time_base1, sample_aspect_ratio1);
                var chromakey = filterGraph.AddFilter(new MediaFilter("chromakey"), chromakeyOptions); 
                var overlay = filterGraph.AddFilter(new MediaFilter("overlay"));
                var out0 = filterGraph.AddVideoSinkFilter(new MediaFilter(MediaFilter.VideoSinks.Buffersink));
                in0.LinkTo(0, chromakey, 0).LinkTo(0, overlay, 1).LinkTo(0, out0, 0);
                in1.LinkTo(0, overlay, 0);
                filterGraph.Initialize();

                // add stream by reader and init writer
                writer.AddStream(reader0[videoIndex0]);
                writer.Initialize();

                // init video frame format converter by dstcodec
                PixelConverter pixelConverter = new PixelConverter(writer[0].Codec);

                long pts = 0;
                // Get MediaFrame from the image file
                MediaFrame srcFrameImage = null;
                foreach (var srcPacket in reader0.ReadPacket())
                {
                    foreach (var srcFrame in reader0[videoIndex0].ReadFrame(srcPacket))
                    {
                        srcFrameImage = srcFrame.Copy();
                    }
                }

                foreach (var srcPacket in reader1.ReadPacket())
                {
                    foreach (var srcFrame in reader1[videoIndex1].ReadFrame(srcPacket))
                    {
                        filterGraph.Inputs[0].WriteFrame(srcFrameImage); // <= failed with -22 as error. Does Copy is working well ?
                        filterGraph.Inputs[1].WriteFrame(srcFrame);

                        foreach (var filterFrame in filterGraph.Outputs.First().ReadFrame())
                        {
                            foreach (var dstFrame in pixelConverter.Convert(filterFrame))
                            {
                                dstFrame.Pts = pts++;
                                foreach (var dstPacket in writer[0].WriteFrame(dstFrame))
                                {
                                    writer.WritePacket(dstPacket);
                                }
                            }
                        }
                    }
                }

                // flush codec cache
                writer.FlushMuxer();
            }
        }

ChristopheI commented 2 years ago

Hi @IOL0ol1 , I notice your fix about AudioFrame.Copy() and VideoFrame.Copy(). It's working great !

So now it's easier to have an overlay using the same MediaFrame based on the image.

In order to display on each frame I had to change my sample to set a different value to the "Pst" of this MediaFrame like this:

        public VideoChromekeyFilter(string input0, string input1, string output, string chromakeyOptions = "green:0.1:0.0")
        {
            var dir = Directory.CreateDirectory(Path.Combine(Path.GetDirectoryName(output), Path.GetFileNameWithoutExtension(output))).FullName;

            using (MediaReader reader0 = new MediaReader(input0))
            using (MediaReader reader1 = new MediaReader(input1))
            using (MediaWriter writer = new MediaWriter(output))
            {
                var videoIndex0 = reader0.Where(_ => _.Codec.AVCodecContext.codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO).First().Index;
                var videoIndex1 = reader1.Where(_ => _.Codec.AVCodecContext.codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO).First().Index;

                // init complex filter graph
                int height0 = reader0[videoIndex0].Codec.AVCodecContext.height;
                int width0 = reader0[videoIndex0].Codec.AVCodecContext.width;
                int format0 = (int)reader0[videoIndex0].Codec.AVCodecContext.pix_fmt;
                AVRational time_base0 = reader0[videoIndex0].TimeBase;
                AVRational sample_aspect_ratio0 = reader0[videoIndex0].Codec.AVCodecContext.sample_aspect_ratio;

                int height1 = reader1[videoIndex1].Codec.AVCodecContext.height;
                int width1 = reader1[videoIndex1].Codec.AVCodecContext.width;
                int format1 = (int)reader1[videoIndex1].Codec.AVCodecContext.pix_fmt;
                AVRational time_base1 = reader1[videoIndex1].TimeBase;
                AVRational sample_aspect_ratio1 = reader1[videoIndex1].Codec.AVCodecContext.sample_aspect_ratio;

                MediaFilterGraph filterGraph = new MediaFilterGraph();
                var in0 = filterGraph.AddVideoSrcFilter(new MediaFilter(MediaFilter.VideoSources.Buffer), width0, height0, (AVPixelFormat)format0, time_base0, sample_aspect_ratio0);
                var in1 = filterGraph.AddVideoSrcFilter(new MediaFilter(MediaFilter.VideoSources.Buffer), width1, height1, (AVPixelFormat)format1, time_base1, sample_aspect_ratio1);
                var chromakey = filterGraph.AddFilter(new MediaFilter("chromakey"), chromakeyOptions); 
                var overlay = filterGraph.AddFilter(new MediaFilter("overlay"));
                var out0 = filterGraph.AddVideoSinkFilter(new MediaFilter(MediaFilter.VideoSinks.Buffersink));
                in0.LinkTo(0, chromakey, 0).LinkTo(0, overlay, 1).LinkTo(0, out0, 0);
                in1.LinkTo(0, overlay, 0);
                filterGraph.Initialize();

                // add stream by reader and init writer
                writer.AddStream(reader0[videoIndex0]);
                writer.Initialize();

                // init video frame format converter by dstcodec
                PixelConverter pixelConverter = new PixelConverter(writer[0].Codec);

                long pts = 0;
                // Get MediaFrame from the image file
                MediaFrame srcFrameImage = null;
                foreach (var srcPacket in reader0.ReadPacket())
                {
                    foreach (var srcFrame in reader0[videoIndex0].ReadFrame(srcPacket))
                    {
                        srcFrameImage = srcFrame.Copy();
                    }
                }

                long imagePts = 0;
                foreach (var srcPacket in reader1.ReadPacket())
                {
                    foreach (var srcFrame in reader1[videoIndex1].ReadFrame(srcPacket))
                    {
                        srcFrameImage.Pts = imagePts++; // NEED TO INCREASE VALUE
                        filterGraph.Inputs[0].WriteFrame(srcFrameImage);
                        filterGraph.Inputs[1].WriteFrame(srcFrame);

                        foreach (var filterFrame in filterGraph.Outputs.First().ReadFrame())
                        {
                            foreach (var dstFrame in pixelConverter.Convert(filterFrame))
                            {
                                dstFrame.Pts = pts++;
                                foreach (var dstPacket in writer[0].WriteFrame(dstFrame))
                                {
                                    writer.WritePacket(dstPacket);
                                }
                            }
                        }
                    }
                }

                // flush codec cache
                writer.FlushMuxer();
            }
        }

I'm wondering now how to deal with Pts if i want to overlay a video of X fps on another video of Y fps. Do you have any hint for this ? Thx

ChristopheI commented 2 years ago

I finally found a way to manage an "universal" method to have input1 over input2 in these situation:

input0 must have a lesser size than input1
input0 can be an image or a video file (whatever its fps)
input1 can be an image or a video file (whatever its fps)
input0 and input1 must not be both an image
output must be a video format

I add to detect if an input is an image or a video (I check duration for this. Do you know a better way ?). When an image is used I have to use a "loop" filter.

I also need to get fps of each input (I'm not sure if the way I'm doing this is the best way ...) If fps of input1 is lesser than fps of input0, I need to use a "fps" filter to increase it. It's not necessary to this if fps of input1 is greater than input0.

Here is the full code used. If you have any comment about it don't hesitate.

    public class VideoOverlay
    {
        /// <summary>
        /// To display input0 over input1:
        ///     - input0 must have a lesser size than inut1 
        ///     - input0 can an image or a video file (whatever its fps)
        ///     - input1 can an image or a video file (whatever its fps)
        ///     - input0 and input1 must not be both image
        ///     - output must be a video format
        /// 
        /// The final result has the size than input1 and fps equals to the max of input1 or input1
        /// </summary> 
        /// <param name="input0">foreground (this input must have a lesser size than input1)</param>
        /// <param name="input1">background</param>
        /// <param name="output">output</param>
        public VideoOverlay(string input0, string input1, string output)
        {
            using (MediaReader reader0 = new MediaReader(input0))
            using (MediaReader reader1 = new MediaReader(input1))
            using (MediaWriter writer = new MediaWriter(output))
            {
                var videoIndex0 = reader0.Where(_ => _.Codec.AVCodecContext.codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO).First().Index;
                var videoIndex1 = reader1.Where(_ => _.Codec.AVCodecContext.codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO).First().Index;

                var stream0 = reader0[videoIndex0];
                var stream1 = reader1[videoIndex1];

                var fps0 = GetFps(stream0);
                var fps1 = GetFps(stream1);

                int finalWidth = stream1.Codec.AVCodecContext.width;
                int finalHeight = stream1.Codec.AVCodecContext.height;

                // init complex filter graph
                MediaFilterGraph filterGraph = new MediaFilterGraph();
                var in0 = GetMediaFilterContext(filterGraph, stream0);
                var in1 = GetMediaFilterContext(filterGraph, stream1);

                var overlay = filterGraph.AddFilter(new MediaFilter("overlay"));
                var out0 = filterGraph.AddVideoSinkFilter(new MediaFilter(MediaFilter.VideoSinks.Buffersink));

                // If input0 is an image, we need to loop to have this image display in all the output
                if (IsImage(stream0))
                {
                    var loop0 = filterGraph.AddFilter(new MediaFilter("loop"), "loop=-1:size=1:start=0");
                    in0 = in0.LinkTo(0, loop0, 0);
                }

                // If input1 is an image, we need to loop to have this image display in all the output
                if (IsImage(stream1))
                {
                    var loop1 = filterGraph.AddFilter(new MediaFilter("loop"), "loop=-1:size=1:start=0");
                    in1 = in1.LinkTo(0, loop1, 0);
                }

                // We need to create a final ouput with the greatest fps.
                // So if fps of input1 is lesser than fps of input0 we need to increase it using a "fps" MediaFilter
                // Nothing to do if it's the opposite
                if (fps1 < fps0)
                {
                    var filterFps = filterGraph.AddFilter(new MediaFilter("fps"), fps0.ToString());
                    in1 = in1.LinkTo(0, filterFps, 0);
                }

                in0.LinkTo(0, overlay, 1).LinkTo(0, out0, 0);
                in1.LinkTo(0, overlay, 0);

                filterGraph.Initialize();

                // WE MUST HERE SPECIFY
                // => final Width, Height and the max fps wetween both input
                MediaEncoder mediaEncoder = MediaEncoder.CreateVideoEncode(writer.Format, finalWidth, finalHeight, Math.Max(fps0, fps1));
                writer.AddStream(mediaEncoder);
                writer.Initialize();

                // init video frame format converter by dstcodec
                PixelConverter pixelConverter = new PixelConverter(writer[0].Codec);

                long pts = 0;
                MediaReader[] readers = new MediaReader[] { reader0, reader1 };
                int[] index = new int[] { videoIndex0, videoIndex1 };
                for (int i = 0; i < readers.Length; i++)
                {
                    var reader = readers[i];
                    foreach (var srcPacket in reader.ReadPacket())
                    {
                        foreach (var srcFrame in reader[index[i]].ReadFrame(srcPacket))
                        {
                            filterGraph.Inputs[i].WriteFrame(srcFrame);
                            foreach (var filterFrame in filterGraph.Outputs.First().ReadFrame())
                            {
                                foreach (var dstFrame in pixelConverter.Convert(filterFrame))
                                {
                                    dstFrame.Pts = pts++;
                                    foreach (var dstPacket in writer[0].WriteFrame(dstFrame))
                                    {
                                        writer.WritePacket(dstPacket);
                                    }
                                }
                            }
                        }
                    }
                }

                // flush codec cache
                writer.FlushMuxer();
                writer.Dispose();

                mediaEncoder.Dispose();

                pixelConverter.Dispose();
                filterGraph.Dispose();

                reader0.Dispose();
                reader1.Dispose();
            }
        }

        private MediaFilterContext GetMediaFilterContext(MediaFilterGraph filterGraph, MediaStream stream0)
        {
            int height = stream0.Codec.AVCodecContext.height;
            int width = stream0.Codec.AVCodecContext.width;
            int format = (int)stream0.Codec.AVCodecContext.pix_fmt;
            AVRational time_base0 = stream0.TimeBase;
            AVRational sample_aspect_ratio0 = stream0.Codec.AVCodecContext.sample_aspect_ratio;
            return filterGraph.AddVideoSrcFilter(new MediaFilter(MediaFilter.VideoSources.Buffer), width, height, (AVPixelFormat)format, time_base0, sample_aspect_ratio0);
        }

        private Boolean IsImage(MediaStream mediaStream)
        {
            return mediaStream.Duration < 0;
        }

        private int GetFps(MediaStream mediaStream)
        {
            int originalFps = 0;
            AVRational avRational;
            avRational = mediaStream.Stream.avg_frame_rate.ToInvert();
            if (avRational.num != 0)
                originalFps = (int)Math.Round((double)avRational.den / avRational.num);

            if (originalFps != 0)
            {
                avRational = mediaStream.Stream.r_frame_rate.ToInvert();
                originalFps = (int)Math.Round((double)avRational.den / avRational.num);
            }
            return originalFps;
        }
    }

Now I will try to do the same but using video live stream.

IOL0ol1 / EmguFFmpeg

VideoChromekeyFilter with an image #17