intel / libvpl

Intel® Video Processing Library (Intel® VPL) API, dispatcher, and examples
https://intel.github.io/libvpl/
MIT License
262 stars 80 forks source link

LookAheadDepth a valid option for AV1? #79

Closed oviano closed 1 year ago

oviano commented 1 year ago

A cursory look at the code in the oneVPL-intel-gpu suggests this option should work for AV1. At least there are plenty of references to it in apparent AV1 code.

To test this, I modified sample_encoder.cpp to encode AV1, but as soon as I specify the LookAheadDepth option the sync operation gives error -16 (undefined behaviour).

Without this option, the encode works correctly.

Am I doing something wrong?

I also noticed that both FFmpeg and QSVEnc appear to hang when attempting to use the lookahead depth option.

I am using Windows 11 Pro, latest 3802 driver and latest oneVPL release (2022.2.5).

Here is my modified code, which sets up the extra parameters.

//==============================================================================
// Copyright Intel Corporation
//
// SPDX-License-Identifier: MIT
//==============================================================================

///
/// A minimal oneAPI Video Processing Library (oneVPL) encode application,
/// using 2.x API with internal memory management
///
/// @file

#include "util.h"

#define TARGETKBPS                 4000
#define FRAMERATE                  30
#define OUTPUT_FILE                "out.av1"
#define BITSTREAM_BUFFER_SIZE      2000000
#define MAJOR_API_VERSION_REQUIRED 2
#define MINOR_API_VERSION_REQUIRED 2

void Usage(void) {
    printf("\n");
    printf("   Usage  :  hello-encode\n");
    printf("     -hw        use hardware implementation\n");
    printf("     -sw        use software implementation\n");
    printf("     -i input file name (raw frames)\n");
    printf("     -w input width\n");
    printf("     -h input height\n\n");
    printf("   Example:  hello-encode -i in.i420 -w 320 -h 240\n");
    printf("   To view:  ffplay %s\n\n", OUTPUT_FILE);
    printf(" * Encode raw frames to AV1 elementary stream in %s\n\n", OUTPUT_FILE);
    printf("   CPU native color format is I420/yuv420p.  GPU native color format is "
           "NV12\n");
    return;
}

int main(int argc, char *argv[]) {
    // Variables used for legacy and 2.x
    bool isDraining                = false;
    bool isStillGoing              = true;
    bool isFailed                  = false;
    FILE *sink                     = NULL;
    FILE *source                   = NULL;
    mfxBitstream bitstream         = {};
    mfxFrameSurface1 *encSurfaceIn = NULL;
    mfxSession session             = NULL;
    mfxSyncPoint syncp             = {};
    mfxU32 framenum                = 0;
    mfxStatus sts                  = MFX_ERR_NONE;
    mfxStatus sts_r                = MFX_ERR_NONE;
    Params cliParams               = {};
    mfxVideoParam encodeParams     = {};
    mfxExtCodingOption extc        = {};
    mfxExtCodingOption2 extco2     = {};
    mfxExtCodingOption3 extco3     = {};

    // variables used only in 2.x version
    mfxConfig cfg[3];
    mfxVariant cfgVal[3];
    mfxLoader loader = NULL;

    // Parse command line args to cliParams
    if (ParseArgsAndValidate(argc, argv, &cliParams, PARAMS_ENCODE) == false) {
        Usage();
        return 1; // return 1 as error code
    }

    source = fopen(cliParams.infileName, "rb");
    VERIFY(source, "Could not open input file");

    sink = fopen(OUTPUT_FILE, "wb");
    VERIFY(sink, "Could not create output file");

    // Initialize VPL session
    loader = MFXLoad();
    VERIFY(NULL != loader, "MFXLoad failed -- is implementation in path?");

    // Implementation used must be the type requested from command line
    cfg[0] = MFXCreateConfig(loader);
    VERIFY(NULL != cfg[0], "MFXCreateConfig failed")

    sts =
        MFXSetConfigFilterProperty(cfg[0], (mfxU8 *)"mfxImplDescription.Impl", cliParams.implValue);
    VERIFY(MFX_ERR_NONE == sts, "MFXSetConfigFilterProperty failed for Impl");

    // Implementation must provide an AV1 encoder
    cfg[1] = MFXCreateConfig(loader);
    VERIFY(NULL != cfg[1], "MFXCreateConfig failed")
    cfgVal[1].Type     = MFX_VARIANT_TYPE_U32;
    cfgVal[1].Data.U32 = MFX_CODEC_AV1;
    sts                = MFXSetConfigFilterProperty(
        cfg[1],
        (mfxU8 *)"mfxImplDescription.mfxEncoderDescription.encoder.CodecID",
        cfgVal[1]);
    VERIFY(MFX_ERR_NONE == sts, "MFXSetConfigFilterProperty failed for encoder CodecID");

    // Implementation used must provide API version 2.2 or newer
    cfg[2] = MFXCreateConfig(loader);
    VERIFY(NULL != cfg[2], "MFXCreateConfig failed")
    cfgVal[2].Type     = MFX_VARIANT_TYPE_U32;
    cfgVal[2].Data.U32 = VPLVERSION(MAJOR_API_VERSION_REQUIRED, MINOR_API_VERSION_REQUIRED);
    sts                = MFXSetConfigFilterProperty(cfg[2],
                                     (mfxU8 *)"mfxImplDescription.ApiVersion.Version",
                                     cfgVal[2]);
    VERIFY(MFX_ERR_NONE == sts, "MFXSetConfigFilterProperty failed for API version");

    sts = MFXCreateSession(loader, 0, &session);
    VERIFY(MFX_ERR_NONE == sts,
           "Cannot create session -- no implementations meet selection criteria");

    // Print info about implementation loaded
    ShowImplementationInfo(loader, 0);

    // Initialize encode parameters
    encodeParams.mfx.CodecId                 = MFX_CODEC_AV1;
    encodeParams.mfx.TargetUsage             = MFX_TARGETUSAGE_BALANCED;
    encodeParams.mfx.TargetKbps              = TARGETKBPS;
    encodeParams.mfx.RateControlMethod       = MFX_RATECONTROL_VBR;
    encodeParams.mfx.FrameInfo.FrameRateExtN = FRAMERATE;
    encodeParams.mfx.FrameInfo.FrameRateExtD = 1;
    if (MFX_IMPL_SOFTWARE == cliParams.impl) {
        encodeParams.mfx.FrameInfo.FourCC = MFX_FOURCC_I420;
    }
    else {
        encodeParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12;
    }
    encodeParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420;
    encodeParams.mfx.FrameInfo.CropW        = cliParams.srcWidth;
    encodeParams.mfx.FrameInfo.CropH        = cliParams.srcHeight;
    encodeParams.mfx.FrameInfo.Width        = ALIGN16(cliParams.srcWidth);
    encodeParams.mfx.FrameInfo.Height       = ALIGN16(cliParams.srcHeight);

    encodeParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY;

    // Initialise look ahead
    mfxExtBuffer* extparam[3] = { nullptr };
    encodeParams.ExtParam = extparam;
    encodeParams.NumExtParam = 0;
    extco2.Header.BufferId = MFX_EXTBUFF_CODING_OPTION2;
    extco2.Header.BufferSz = sizeof(extco2);
    extco2.ExtBRC = MFX_CODINGOPTION_ON;
    extco2.LookAheadDepth = 10;
    encodeParams.ExtParam[encodeParams.NumExtParam++] = (mfxExtBuffer*)&extco2;

    // Initialize encoder
    sts = MFXVideoENCODE_Init(session, &encodeParams);
    VERIFY(MFX_ERR_NONE == sts, "Encode init failed");

    // Prepare output bitstream
    bitstream.MaxLength = BITSTREAM_BUFFER_SIZE;
    bitstream.Data      = (mfxU8 *)calloc(bitstream.MaxLength, sizeof(mfxU8));

    printf("Encoding %s -> %s\n", cliParams.infileName, OUTPUT_FILE);

    printf("Input colorspace: ");
    switch (encodeParams.mfx.FrameInfo.FourCC) {
        case MFX_FOURCC_I420: // CPU input
            printf("I420 (aka yuv420p)\n");
            break;
        case MFX_FOURCC_NV12: // GPU input
            printf("NV12\n");
            break;
        default:
            printf("Unsupported color format\n");
            isFailed = true;
            goto end;
            break;
    }

    mfxU64 next_pts = 0;

    while (isStillGoing == true) {
        // Load a new frame if not draining
        if (isDraining == false) {
            sts = MFXMemory_GetSurfaceForEncode(session, &encSurfaceIn);
            VERIFY(MFX_ERR_NONE == sts, "Could not get encode surface");

            sts = ReadRawFrame_InternalMem(encSurfaceIn, source);
            if (sts != MFX_ERR_NONE)
                isDraining = true;
        }

        encSurfaceIn->Data.TimeStamp = next_pts;
        next_pts += 90000 / FRAMERATE;

        sts = MFXVideoENCODE_EncodeFrameAsync(session,
                                              NULL,
                                              (isDraining == true) ? NULL : encSurfaceIn,
                                              &bitstream,
                                              &syncp);

        if (!isDraining) {
            sts_r = encSurfaceIn->FrameInterface->Release(encSurfaceIn);
            VERIFY(MFX_ERR_NONE == sts_r, "mfxFrameSurfaceInterface->Release failed");
        }
        switch (sts) {
            case MFX_ERR_NONE:
                // MFX_ERR_NONE and syncp indicate output is available
                if (syncp) {
                    // Encode output is not available on CPU until sync operation
                    // completes
                    sts = MFXVideoCORE_SyncOperation(session, syncp, WAIT_100_MILLISECONDS);
                    printf("MFXVideoCORE_SyncOperation returned %d\n", sts);
                    VERIFY(MFX_ERR_NONE == sts, "MFXVideoCORE_SyncOperation error");
                    WriteEncodedStream(bitstream, sink);
                    framenum++;
                }
                break;
            case MFX_ERR_NOT_ENOUGH_BUFFER:
                // This example deliberatly uses a large output buffer with immediate
                // write to disk for simplicity. Handle when frame size exceeds
                // available buffer here
                break;
            case MFX_ERR_MORE_DATA:
                // The function requires more data to generate any output
                if (isDraining == true)
                    isStillGoing = false;
                break;
            case MFX_ERR_DEVICE_LOST:
                // For non-CPU implementations,
                // Cleanup if device is lost
                break;
            case MFX_WRN_DEVICE_BUSY:
                // For non-CPU implementations,
                // Wait a few milliseconds then try again
                break;
            default:
                printf("unknown status %d\n", sts);
                isStillGoing = false;
                break;
        }
    }

end:
    printf("Encoded %d frames\n", framenum);

    // Clean up resources - It is recommended to close components first, before
    // releasing allocated surfaces, since some surfaces may still be locked by
    // internal resources.
    if (source)
        fclose(source);

    if (sink)
        fclose(sink);

    MFXVideoENCODE_Close(session);
    MFXClose(session);

    if (bitstream.Data)
        free(bitstream.Data);

    if (loader)
        MFXUnload(loader);

    if (isFailed) {
        return -1;
    }
    else {
        return 0;
    }
}
oviano commented 1 year ago

Further experimentation suggest that there is some relation to the 'scenario' option.

If I specify a scenario of MFX_SCENARIO_UNKNOWN it gives the error described above. If I choose MFX_SCENARIO_GAME_STREAMING then initialisation fails. If I choose MFX_SCENARIO_ARCHIVE it appears to work.

Is that all working correctly?

oviano commented 1 year ago

Just to add some further information. Whoever did the FFmpeg implementation of QSVenc AV1 clearly assumed that LookAheadDepth is a valid option for both the CBR and VBR rate control methods:

    switch (q->param.mfx.RateControlMethod) {
    case MFX_RATECONTROL_CBR:
    case MFX_RATECONTROL_VBR:
        if (q->extbrc == 1) {
            q->extco2.LookAheadDepth = q->look_ahead_depth;
        }

and

#define OFFSET(x) offsetof(QSVAV1EncContext, x)
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
static const AVOption options[] = {
    QSV_COMMON_OPTS
    QSV_OPTION_B_STRATEGY
    QSV_OPTION_ADAPTIVE_I
    QSV_OPTION_ADAPTIVE_B
    QSV_OPTION_EXTBRC
    QSV_OPTION_SCENARIO
    { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" },
        { "unknown" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN      }, INT_MIN, INT_MAX,     VE, "profile" },
        { "main"    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_AV1_MAIN     }, INT_MIN, INT_MAX,     VE, "profile" },
    { "tile_cols",  "Number of columns for tiled encoding",   OFFSET(qsv.tile_cols),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, UINT16_MAX, VE },
    { "tile_rows",  "Number of rows for tiled encoding",      OFFSET(qsv.tile_rows),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, UINT16_MAX, VE },
    { "look_ahead_depth", "Depth of look ahead in number frames, available when extbrc option is enabled", OFFSET(qsv.look_ahead_depth), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, VE },
    { NULL },
};

However, in my tests, regardless of whether I set look_ahead_depth to 10, 50, or 100 it produces an identical file in QSVEnc AV1. And if I turn it off completely, in my tests the file is fractionally larger and with a fractionally higher PSNR, SSIM, VMAF...which doesn't make sense as lookahead should be improving things not degrading them.

So some clarification as to whether this option is supposed to work in AV1 would be the first thing. Maybe the option is not supposed to be available in FFmpeg for AV1. Or maybe it is and it's broken.

dvrogozh commented 1 year ago

Look ahead is a valid option for AV1, but you need other option(s) to enable it. A key one would be -extbrc 1. Read here:

And for the actual command lines which would use Look Ahead with AV1 see below in the same document:

Caveat is that I am pointing you to the stuff which was prepared for Linux. I think it should be applicable for Windows as well though.

oviano commented 1 year ago

Ok that's great info, thank you very much.

I am using -extbrc 1 already, but I'll look into the other options listed, as from my tests so far the combination of -extbrc 1 -look_ahead_depth X isn't making any difference.

oviano commented 1 year ago

Is scenario a valid AV1 option?

dvrogozh commented 1 year ago

Is scenario a valid AV1 option?

That I don't know. I am aware of this option, but we did not use it on our side. I will need to ask around. However, I do not think that scenario option is supported for AV1 encoding on ffmpeg side. Initially it was added only for avc and hevc in this commit:

And latest ffmpeg master still does not have it for AV1:

This code in oneVPL runtime implies that it is supported, but it's just looking into the code, I did not try it out:

When you wrote above about "scenario" affecting AV1 look ahead, you did experiments with your own code or with ffmpeg? /I guess with your own code/

oviano commented 1 year ago

Yes with the modified sample_encode.

I then addded the scenario option to my local FFmpeg build so that I could set it to "archive" and get look_ahead_depth working without hanging.

oviano commented 1 year ago

So here are the results of some tests, following the guidance in the links above.

Source file:

https://ovcollyer-colebrooke.dyndns.org:4001/d/s/r5qm4zIURxrogQ9tAkee6Na6kMACl4NH/-YLQRSo2-6BuyfXDxkqoaMQj8nMh95jA-77vgIMDRBAo

This is a 10 minute 1080p50 capture of some live football, which was encoded losslessly using NVENC HEVC.

I am using a custom-built FFmpeg exe based on master/head but patched using the patch I wrote for this issue I logged regarding broken timestamps with the Windows driver:

https://trac.ffmpeg.org/ticket/10062

Here are the commands and the corresponding PSNR, SSIM and VMAF scores when the first 60s of the source are analysed.

I am using the tool FFmetrics:

https://github.com/fifonik/FFMetrics

ffmpeg -init_hw_device qsv=intel,child_device=1 -vcodec hevc_qsv -extra_hw_frames 40 -i football.mp4 -vcodec av1_qsv -profile:v main -level 51 -preset veryslow -scenario archive -vb 3000k -minrate 3000k -maxrate 3000k -bufsize 6000k -rc_init_occupancy 3000k -extbrc 1 -look_ahead_depth 40 -b_strategy 1 -bf 7 -g 256 -adaptive_i 1 -adaptive_b 1 -strict -1 output1.mp4 -y

PSNR 37.1029, SSIM 0.9529, VMAF 89.2757

ffmpeg -init_hw_device qsv=intel,child_device=1 -vcodec hevc_qsv -extra_hw_frames 40 -i football.mp4 -vcodec av1_qsv -profile:v main -level 51 -preset veryslow -scenario archive -vb 3000k -maxrate 6000k -bufsize 12000k -rc_init_occupancy 6000k -extbrc 1 -look_ahead_depth 40 -b_strategy 1 -bf 7 -g 256 -adaptive_i 1 -adaptive_b 1 -strict -1 output2.mp4 -y

PSNR 37.1101, SSIM 0.9529, VMAF 89.2871

ffmpeg -init_hw_device qsv=intel,child_device=1 -vcodec hevc_qsv -i football.mp4 -vcodec av1_qsv -profile:v main -level 51 -preset veryslow -scenario archive -vb 3000k -maxrate 3000k -g 256 output3.mp4 -y

PSNR 37.1428, SSIM 0.9532, VMAF 89.3651

ffmpeg -init_hw_device qsv=intel,child_device=1 -vcodec hevc_qsv -i football.mp4 -vcodec av1_qsv -profile:v main -level 51 -preset veryslow -scenario archive -vb 3000k -maxrate 6000k -g 256 output4.mp4 -y

PSNR 37.1428, SSIM 0.9532, VMAF 89.3651

ffmpeg -init_hw_device qsv=intel,child_device=1 -vcodec hevc_qsv -i football.mp4 -vcodec av1_qsv -profile:v main -level 51 -preset veryslow -vb 3000k -maxrate 3000k -g 256 output5.mp4 -y

PSNR 37.1428, SSIM 0.9532, VMAF 89.3651

ffmpeg -init_hw_device qsv=intel,child_device=1 -vcodec hevc_qsv -i football.mp4 -vcodec av1_qsv -profile:v main -level 51 -preset veryslow -vb 3000k -maxrate 6000k -g 256 output6.mp4 -y

PSNR 37.1428, SSIM 0.9532, VMAF 89.3651

Conclusions:

  1. I don't see any effect from explicitly setting the scenario to "archive" (unless the default is already archive?)
  2. I see a small negative effect in setting look_ahead_depth to 40 vs not specifying it all. Unless by not specifying it, the preset/scenario defaults already implement look ahead?
  3. Negligible difference between CBR and VBR. I would have expected a larger difference.

Would it be possible for someone to run these commands under Linux so we can at least establish that the Windows driver is comparable?

oviano commented 1 year ago

Adding a couple more tests, with look_ahead_depth explicitly set to zero.

ffmpeg -init_hw_device qsv=intel,child_device=1 -vcodec hevc_qsv -extra_hw_frames 40 -i football.mp4 -vcodec av1_qsv -profile:v main -level 51 -preset veryslow -scenario archive -vb 3000k -minrate 3000k -maxrate 3000k -bufsize 6000k -rc_init_occupancy 3000k -extbrc 1 -look_ahead_depth 0 -b_strategy 1 -bf 7 -g 256 -adaptive_i 1 -adaptive_b 1 -strict -1 output7.mp4 -y

PSNR 37.1428, SSIM 0.9532, VMAF 89.3651

ffmpeg -init_hw_device qsv=intel,child_device=1 -vcodec hevc_qsv -extra_hw_frames 40 -i football.mp4 -vcodec av1_qsv -profile:v main -level 51 -preset veryslow -scenario archive -vb 3000k -maxrate 6000k -bufsize 12000k -rc_init_occupancy 6000k -extbrc 1 -look_ahead_depth 0 -b_strategy 1 -bf 7 -g 256 -adaptive_i 1 -adaptive_b 1 -strict -1 output8.mp4 -y

PSNR 37.1428, SSIM 0.9532, VMAF 89.3651

So at least with this source, setting look_ahead_depth just makes things marginally worse.

oviano commented 1 year ago

Also, just to re-iterate, FFmpeg and QSVEnc hang, and samples_encode fails, under Windows driver 3802, when trying to use the LookAheadDepth option, unless a valid scenario is selected (which wasn't an option in FFmpeg until I added the option myself).

I assume this isn't the case with Linux.

It would be great to get some eyes on the Windows code. I'm ready to help/test.

dvrogozh commented 1 year ago

Negligible difference between CBR and VBR. I would have expected a larger difference.

That's expected for AV1 encoder. AV1 does not support HRD per specification. So, this feature is not implemented for AV1 (since it’s not in the spec). As such CBR and VBR modes for Intel AV1 encoder are basically the same. Now, maxrate currently is not taken into account (and this might change in the future). So, those command line which you specified for CBR/VBR produce almost identical results where differences most likely can be explained by differences in other parameters such as bufsize.

oviano commented 1 year ago

Ok, I see. Thanks.

Returning to LookAheadDepth; I will post full results later but I am finding that setting this to 40 with QSV HEVC results in a significant improvement in PSNR and VMAF vs setting it to zero, all else being equal, whereas the same test conducted with AV1, as shown above, results in a marginal decrease in the measurable quality. On Windows, anyway.

dvrogozh commented 1 year ago

I don't see any effect from explicitly setting the scenario to "archive" I see a small negative effect in setting look_ahead_depth to 40 vs not specifying it all.

Looks Ahead and other adaptive encoding tools described at https://github.com/intel/media-delivery/blob/master/doc/quality.rst#enctools-and-extbrc were designed and tested for Linux. We have verified yesterday - they do work for us on Linux (mind: we did not look into -scenario option yet, i.e. we tried ffmpeg cmdlines w/o specifying this option). For short, let's name these tools "enctools".

Whether enctools are working on Windows is a gray area considering your result showing that it might not work at least for AV1. We need some time to check it out on our side. Which we will try to do next week and give you an answer.

Let me try to give you some hints if you will wish to test LA in a meanwhile. Per internal design of VPL Runtime you might see different behavior depending on scenario being MFX_SCENARIO_GAME_STREAMING and any other scenarios. Reason is that this is a trigger for 2 different implementation paths (you can see that in open source code as well here):

  1. with MFX_SCENARIO_GAME_STREAMING you will get HW implementation of enctools
  2. with other scenarios (or no scenario specified at all) you will get SW implementation of enctools Mind that HW enctools don't fully match to SW enctools per features list. https://github.com/intel/media-delivery/blob/master/doc/quality.rst#enctools-and-extbrc was discussing SW enctools. We will need to improve documentation here to discuss HW/SW enctools differences.

For those who work on Linux, to try SW enctools you need to build https://github.com/oneapi-src/oneVPL-intel-gpu with -DMFX_ENABLE_AENC=ON which is disabled by default. W/o it you will miss advanced encoding tools, but still should be able to use SW BRC itself.

A scope of questions which arises is the following:

  1. For SW enctools (i.e. no scenario on cmdline): does enctools work across AVC/HEVC/AV1 encoders?
  2. For HW enctools (MFX_SCENARIO_GAME_STREAMING): does ffmpeg exposed options work correctly and allow to trigger adaptive tools in a same way as they do for SW enctools?
dvrogozh commented 1 year ago

I then addded the scenario option to my local FFmpeg build

-scenario is supported by ffmpeg for AVC/HEVC in ffmpeg-qsv, but not for AV1. Do you mean that you've added a patch to ffmpeg to expose this option for AV1 as well?

oviano commented 1 year ago

Yes, I patched my local build. It's a trivial three line change:

diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 514a1e8..01cf6ac 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -1090,6 +1090,8 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
                 q->extco3.MaxFrameSizeP = q->max_frame_size_p;

             q->extco3.ScenarioInfo = q->scenario;
+        } else if (avctx->codec_id == AV_CODEC_ID_AV1) {
+            q->extco3.ScenarioInfo = q->scenario;
         }

         if (avctx->codec_id == AV_CODEC_ID_HEVC) {
diff --git a/libavcodec/qsvenc_av1.c b/libavcodec/qsvenc_av1.c
index bb9ad16..6a2bcb0 100644
--- a/libavcodec/qsvenc_av1.c
+++ b/libavcodec/qsvenc_av1.c
@@ -110,6 +110,7 @@ static const AVOption options[] = {
     QSV_OPTION_ADAPTIVE_I
     QSV_OPTION_ADAPTIVE_B
     QSV_OPTION_EXTBRC
+    QSV_OPTION_SCENARIO
     { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" },
         { "unknown" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN      }, INT_MIN, INT_MAX,     VE, "profile" },
         { "main"    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_AV1_MAIN     }, INT_MIN, INT_MAX,     VE, "profile" },
oviano commented 1 year ago

So I've been comparing QSV AV1 with HEVC, comparing CBR vs VBR, LA 0 vs LA40. I've more-or-less used the settings in the link you posted earlier.

Windows 11 Pro, ARC 380, latest driver 3802.

The AV1 commands:

ffmpeg -init_hw_device qsv=intel,child_device=1                     -vcodec hevc_qsv -i football.mp4 -vcodec av1_qsv  -profile:v main -level 51            -preset veryslow -scenario archive -vb 3000k -minrate 3000k -maxrate 3000k -bufsize 6000k  -rc_init_occupancy 3000k -extbrc 1 -look_ahead_depth 0  -b_strategy 1 -bf 7         -g 256 -adaptive_i 1 -adaptive_b 1 -strict -1 c:\Users\Oliver\Desktop\av1_cbr_la_0.mp4   -y
ffmpeg -init_hw_device qsv=intel,child_device=1 -extra_hw_frames 40 -vcodec hevc_qsv -i football.mp4 -vcodec av1_qsv  -profile:v main -level 51            -preset veryslow -scenario archive -vb 3000k -minrate 3000k -maxrate 3000k -bufsize 6000k  -rc_init_occupancy 3000k -extbrc 1 -look_ahead_depth 40 -b_strategy 1 -bf 7         -g 256 -adaptive_i 1 -adaptive_b 1 -strict -1 c:\Users\Oliver\Desktop\av1_cbr_la_40.mp4  -y
ffmpeg -init_hw_device qsv=intel,child_device=1                     -vcodec hevc_qsv -i football.mp4 -vcodec av1_qsv  -profile:v main -level 51            -preset veryslow -scenario archive -vb 3000k                -maxrate 6000k -bufsize 12000k -rc_init_occupancy 6000k -extbrc 1 -look_ahead_depth 0  -b_strategy 1 -bf 7         -g 256 -adaptive_i 1 -adaptive_b 1 -strict -1 c:\Users\Oliver\Desktop\av1_vbr_la_0.mp4   -y
ffmpeg -init_hw_device qsv=intel,child_device=1 -extra_hw_frames 40 -vcodec hevc_qsv -i football.mp4 -vcodec av1_qsv  -profile:v main -level 51            -preset veryslow -scenario archive -vb 3000k                -maxrate 6000k -bufsize 12000k -rc_init_occupancy 6000k -extbrc 1 -look_ahead_depth 40 -b_strategy 1 -bf 7         -g 256 -adaptive_i 1 -adaptive_b 1 -strict -1 c:\Users\Oliver\Desktop\av1_vbr_la_40.mp4  -y

The HEVC commands:

ffmpeg -init_hw_device qsv=intel,child_device=1                     -vcodec hevc_qsv -i football.mp4 -vcodec hevc_qsv -profile:v main -level 51 -tier high -preset veryslow                   -vb 3000k -minrate 3000k -maxrate 3000k -bufsize 6000k  -rc_init_occupancy 3000k -extbrc 1 -look_ahead_depth 0  -b_strategy 1 -bf 7 -refs 4 -g 256                             -strict -1 c:\Users\Oliver\Desktop\hevc_cbr_la_0.mp4  -y
ffmpeg -init_hw_device qsv=intel,child_device=1 -extra_hw_frames 40 -vcodec hevc_qsv -i football.mp4 -vcodec hevc_qsv -profile:v main -level 51 -tier high -preset veryslow                   -vb 3000k -minrate 3000k -maxrate 3000k -bufsize 6000k  -rc_init_occupancy 3000k -extbrc 1 -look_ahead_depth 40 -b_strategy 1 -bf 7 -refs 4 -g 256                             -strict -1 c:\Users\Oliver\Desktop\hevc_cbr_la_40.mp4 -y
ffmpeg -init_hw_device qsv=intel,child_device=1                     -vcodec hevc_qsv -i football.mp4 -vcodec hevc_qsv -profile:v main -level 51 -tier high -preset veryslow                   -vb 3000k                -maxrate 6000k -bufsize 12000k -rc_init_occupancy 6000k -extbrc 1 -look_ahead_depth 0  -b_strategy 1 -bf 7 -refs 4 -g 256                             -strict -1 c:\Users\Oliver\Desktop\hevc_vbr_la_0.mp4  -y
ffmpeg -init_hw_device qsv=intel,child_device=1 -extra_hw_frames 40 -vcodec hevc_qsv -i football.mp4 -vcodec hevc_qsv -profile:v main -level 51 -tier high -preset veryslow                   -vb 3000k                -maxrate 6000k -bufsize 12000k -rc_init_occupancy 6000k -extbrc 1 -look_ahead_depth 40 -b_strategy 1 -bf 7 -refs 4 -g 256                             -strict -1 c:\Users\Oliver\Desktop\hevc_vbr_la_40.mp4 -y
QSV stats

Key points that stand out for me:

Next I will look at the impact of scenario on the AV1 LA results.

I guess the problem here is if the Windows driver is not getting updated with fixes/improvements in a timely fashion then I could be testing pretty old code....

PS It would be really useful if you were to run the commands above under Linux to get a baseline for my comparisons. The source file "football.mp4" is here:

https://ovcollyer-colebrooke.dyndns.org:4001/d/s/r5qm4zIURxrogQ9tAkee6Na6kMACl4NH/-YLQRSo2-6BuyfXDxkqoaMQj8nMh95jA-77vgIMDRBAo

dvrogozh commented 1 year ago

HEVC CBR and VBR results are identical.

I think the reason for that for HEVC is -strict -1 on cmdline. This option means to basically ignore HRD compliance which makes a difference between CBR and VBR. Try to remove this option.

HEVC LA has a positive effect, as expected AV1 LA has a negative effect, which seems wrong

This might be that AV1 LA has minor negative effect on the particular stream you've selected which might make it seem that AV1 LA is not working. Typically quality is evaluated in average for a selection of streams, then outliers are checked for having some issues. Let me ask around on which streams we have evaluated LA behavior and what would be a good stream candidate to verify what LA is working as expected.

oviano commented 1 year ago

Could be that, I suppose. Although it seems slightly suspicious that the HEVC benefits from LA and AV1 doesn't. If you have a file and some standard tests you are running on Linux I don't mind running my patched FFmpeg against them.

On that subject, are the encoders on the different platforms validated vs one another at all before releasing drivers? I assume if they are it's not being done using FFmpeg, since that has been broken for a long time from what I can tell (1. the timestamp issue making AV1 with non-zero -bf produce broken streams, and 2) look ahead depth relying on scenario, which isn't implemented in public FFmpeg).

I'll re-run without -strict -1 - (you should update the documents then, as this option is listed under the "CBR and VBR" section in the link further up.)

dvrogozh commented 1 year ago

you should update the documents then, as this option is listed under the "CBR and VBR" section in the link further up

Right. I'll be doing an update to the document next week clarifying this and few other things we've discussed above.

oviano commented 1 year ago

So, the HEVC encodes re-done without -strict -1. Also, I ran the analyse for the full 10 minutes of the clip, instead of the first 60s.

ffmpeg -init_hw_device qsv=intel,child_device=1                     -vcodec hevc_qsv -i football.mp4 -vcodec hevc_qsv -profile:v main -level 51 -tier high -preset veryslow                   -vb 3000k -minrate 3000k -maxrate 3000k -bufsize 6000k  -rc_init_occupancy 3000k -extbrc 1 -look_ahead_depth 0  -b_strategy 1 -bf 7 -refs 4 -g 256                             c:\Users\Oliver\Desktop\hevc_cbr_la_0_v2.mp4  -y
ffmpeg -init_hw_device qsv=intel,child_device=1 -extra_hw_frames 40 -vcodec hevc_qsv -i football.mp4 -vcodec hevc_qsv -profile:v main -level 51 -tier high -preset veryslow                   -vb 3000k -minrate 3000k -maxrate 3000k -bufsize 6000k  -rc_init_occupancy 3000k -extbrc 1 -look_ahead_depth 40 -b_strategy 1 -bf 7 -refs 4 -g 256                             c:\Users\Oliver\Desktop\hevc_cbr_la_40_v2.mp4 -y
ffmpeg -init_hw_device qsv=intel,child_device=1                     -vcodec hevc_qsv -i football.mp4 -vcodec hevc_qsv -profile:v main -level 51 -tier high -preset veryslow                   -vb 3000k                -maxrate 6000k -bufsize 12000k -rc_init_occupancy 6000k -extbrc 1 -look_ahead_depth 0  -b_strategy 1 -bf 7 -refs 4 -g 256                             c:\Users\Oliver\Desktop\hevc_vbr_la_0_v2.mp4  -y
ffmpeg -init_hw_device qsv=intel,child_device=1 -extra_hw_frames 40 -vcodec hevc_qsv -i football.mp4 -vcodec hevc_qsv -profile:v main -level 51 -tier high -preset veryslow                   -vb 3000k                -maxrate 6000k -bufsize 12000k -rc_init_occupancy 6000k -extbrc 1 -look_ahead_depth 40 -b_strategy 1 -bf 7 -refs 4 -g 256                             c:\Users\Oliver\Desktop\hevc_vbr_la_40_v2.mp4 -y
Screenshot 2022-12-03 at 08 59 54

Notes:

mikk9 commented 1 year ago

LA AV1 seems to work for me with your patched ffmpeg, VMAF score went 0.5 up. It makes no difference if I set it to 30, 50, or 100. It's like an on/off switch. No idea if it's working as intended.

ffmpeg

oviano commented 1 year ago

Ok good, can you share your FFmpeg command? Also, do you know where I would be able to find that clip?

mikk9 commented 1 year ago

ffmpeg -i Intel_Demo_Clip2.mp4 -vcodec av1_qsv -bf 7 -g: 120 -b:v 2600K -look_ahead_depth 100 -preset veryslow -c:a copy -f mp4 Arc_AV1_LA_2600.mp4

Sample is here: https://drive.google.com/file/d/1YX1V0SeSkYaq6Ui41vv1wcOatbLnuLSL/view?usp=share_link

It's a 8 bit source and ffmpeg uses 8 bit AV1 here, I can see it from the sky banding I don't have with QSVEnc 10 bit. They have to add a 10 bit option.

oviano commented 1 year ago

It looks like the FFmpeg code just needs to fill out

TargetBitDepthLuma TargetBitDepthChroma

So probably we just need to add a "highbitdepth" option as NVENC has, and use that to fill in the above. I'll see if I can make a patch.

oviano commented 1 year ago

ffmpeg -i Intel_Demo_Clip2.mp4 -vcodec av1_qsv -bf 7 -g: 120 -b:v 2600K -look_ahead_depth 100 -preset veryslow -c:a copy -f mp4 Arc_AV1_LA_2600.mp4

Sample is here: https://drive.google.com/file/d/1YX1V0SeSkYaq6Ui41vv1wcOatbLnuLSL/view?usp=share_link

It's a 8 bit source and ffmpeg uses 8 bit AV1 here, I can see it from the sky banding I don't have with QSVEnc 10 bit. They have to add a 10 bit option.

So what is your non-LA command? If I just remove the -look_ahead_depth 100 part, it does drop the measurements but not down to the level in your screenshot.

mikk9 commented 1 year ago

Exactly the same without -look_ahead_depth. You have to adjust the bitrate. With LA the bitrate goes down. On this sample 2600 with LA and 2500 without LA. If you don't adjust the bitrate the scores are more similar of course.

oviano commented 1 year ago

So I added a "highbitdepth" option to FFmpeg QSVENC but it doesn't work. I think it changes the bitstream, so maybe it has actually encoded it in 10-bits, but FFmpeg still treats it as NV12 instead of P010 when it writes the encoded bitstream to the mp4 container.

Maybe the same setting on NVENC has the same problem, or maybe I'm missing something in the NVENC implementation that takes care of this aspect. I can't test as I don't have access to a 4-series NVIDIA card right now.

QSVEnc doesn't appear to use the TargetBitdepthLuma/Chroma options, so maybe that application just does its own conversion of an 8-bit source to 10 bit too.

Anyway, it's trivial to just convert to P010 after decoding the source, as the command below shows, and it does make a positive difference.

Finally, I rolled back to driver 3490 but while it initially seemed more stable, it turned out it wasn't. I gave up last night as I couldn't complete a single encode with it crashing the PC. Also, the results are much worse with this driver as can be seen below.

Oddly, when I tried again today, with the machine having been switched off all night, it completed all encodes straightaway, so I'm wondering if there is a cooling issue with the card or something.

Screenshot 2022-12-04 at 09 32 28

The commands:

ffmpeg -init_hw_device qsv=intel,child_device=1 -i Intel_Demo_Clip2.mp4               -vcodec av1_qsv -bf 7 -g: 120 -b:v 2500K                       -preset veryslow -c:a copy -f mp4 Arc_AV1_2500.mp4 -y
ffmpeg -init_hw_device qsv=intel,child_device=1 -i Intel_Demo_Clip2.mp4               -vcodec av1_qsv -bf 7 -g: 120 -b:v 2600K                       -preset veryslow -c:a copy -f mp4 Arc_AV1_2600.mp4 -y
ffmpeg -init_hw_device qsv=intel,child_device=1 -i Intel_Demo_Clip2.mp4               -vcodec av1_qsv -bf 7 -g: 120 -b:v 2600K -look_ahead_depth 100 -preset veryslow -c:a copy -f mp4 Arc_AV1_LA_2600.mp4 -y
ffmpeg -init_hw_device qsv=intel,child_device=1 -i Intel_Demo_Clip2.mp4 -pix_fmt p010 -vcodec av1_qsv -bf 7 -g: 120 -b:v 2600K -look_ahead_depth 100 -preset veryslow -c:a copy -f mp4 Arc_AV1_LA_10bit_2600.mp4 -y
mikk9 commented 1 year ago

Wow what a difference it makes in the VMAF score. For some reason my VMAF score differs to yours though.

ffmpeg10bit

oviano commented 1 year ago

Your result looks more credible tbh. I will run mine again….

oviano commented 1 year ago

So I ran the 10-bit encode again. It produced a slightly different file and result, but almost the same scores.

I added "-async_depth 1" to the command this time, as I am experimenting to see if this improves the stability of my encodes. I don't know if this can affect the result, I would have assumed not, but who knows! I suppose if having an async_depth of greater than 1 causes some sort of memory corruption it could affect the result? Other than that, I've re-installed the 3802 driver since I ran the 10 bit encode the first time, maybe that has somehow affected things. Not very reassuring if the case!

Here is the result anyway. Maybe the large difference in VMAF scores between my results and yours is down to the version of the VMAF library used?

Mine is this one for reference:

https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip

Here are the results, anyway.

Screenshot 2022-12-04 at 16 13 12
oviano commented 1 year ago

And the command, for clarify.

ffmpeg -init_hw_device qsv=intel,child_device=1 -i Intel_Demo_Clip2.mp4 -pix_fmt p010 -vcodec av1_qsv -async_depth 1 -bf 7 -g: 120 -b:v 2600K -look_ahead_depth 100 -preset veryslow -c:a copy -f mp4 Arc_3802_AV1_LA_10bit_2600_2.mp4 -y
mikk9 commented 1 year ago

I get the same VMAF score after upgrading ffmpeg and FFMetrics, I guess FFMetrics changed something in the final 1.3.1.

oviano commented 1 year ago

Guess so. Did the FFmpeg 10 bit encode also remove the banding you mentioned earlier that wasn't present with QSVEnc?

By the way the slight discrepancy in my two 10 bit encodes was indeed caused by async_depth. Setting it to 1 marginally lowers the scores, leaving it at default (4 I think, in FFmpeg) raises it slightly.

mikk9 commented 1 year ago

Yes it helps decreasing the banding.

https://github.com/intel/media-delivery/commit/3e9825e8e917b5920655add98c0635e73955968a

Some of the recommended EncTools settings are not working for ffmpeg. Extbrc doesn't work when I use it together with look_ahead_depth.

oviano commented 1 year ago

Yes.

-extbrc 1 -lookahead N hangs both FFmpeg and QSVENC for me, unless scenario is specified and then I don’t really know whether scenario is doing it’s own collection of settings and overriding extbrc and that’s why it works as scenario isn’t mentioned much in the docs.

camthoms commented 1 year ago

Kind of a side note, but since this thread was very helpful and you all seem knowledgeable... Is anyone else not able to use the -crf flag? Without setting the bitrate manually my commands churn out very low bitrate videos

oviano commented 1 year ago

Kind of a side note, but since this thread was very helpful and you all seem knowledgeable... Is anyone else not able to use the -crf flag? Without setting the bitrate manually my commands churn out very low bitrate videos

CRF doesn't seem like an option for this encoder - try looking into using what is referred to in QSV as "intelligent constant quality" which seems to be triggered by the FFmpeg "global_quality" option. Looks like it ranges from 1 to 51.

Looking at FFmpeg source, CRF only seems to be used by aomenc, stvav1, vpxenc, x264, x265 and xavs.

mikk9 commented 1 year ago

The ICQ factor depends on the mode.

8 bit HEVC 1-51 10 bit HEVC 1-63 AV1 1-255

oviano commented 1 year ago

FFmpeg is always limiting it to 1-51, according to this code:

    case MFX_RATECONTROL_LA_ICQ:
        q->extco2.LookAheadDepth = q->look_ahead_depth;
    case MFX_RATECONTROL_ICQ:
        q->param.mfx.ICQQuality  = av_clip(avctx->global_quality, 1, 51);
        break;

There is no differentiation between the different codecs/bitdepths in this piece of code, at least.

mikk9 commented 1 year ago

I haven't tried but 10 bit needs +12, otherwise the bitrate is not comparable.

oviano commented 1 year ago

Right. That maybe a limitation in the oneVPL/QSV ecosystem then, because in the docs it states:

mfxU16 ICQQuality Used by the Intelligent Constant Quality (ICQ) bitrate control algorithm. Values are in the 1 to 51 range, where 1 corresponds the best quality.

Perhaps they deal with it internally or something...

mikk9 commented 1 year ago

HEVC Lookahead doesn't work for me, only for AV1. I haven't tried H264, I need to test. On my sample HEVC has a poor scene change because of the low bitrate. AV1 is much better there despite the close VMAF scores. AV1 extbrc seems to work with gamestreaming scenario and also I can adjust the LA depth to any value I want up to 100. Overall it's not really better though.

oviano commented 1 year ago

Have you tested your sample with NVENC AV1?

I have access to a 4090 for today only, let me know if you want me to run some commands.

camthoms commented 1 year ago

Oddly enough I'm just getting these errors after a little bit with the global quality flag:

Error allocating the output packet rate=xxxxkbits/s speed = x.xx Error submitting video frame to the encoder

oviano commented 1 year ago

Can you post your command line?

camthoms commented 1 year ago

ffmpeg -init_hw_device qsv=intel,child_device=2 -i "input.mkv" -vcodec av1_qsv -bf 7 -g: 120 -look_ahead_depth 100 -global_quality 12 -preset veryslow -acodec opus -strict -2 "output.mkv" -n

It works without the ICQ flag, and I get the same error with various values

oviano commented 1 year ago

Can you add -loglevel verbose to the start of your command, and then post the output?

camthoms commented 1 year ago

ffmpeg version 5.1.git Copyright (c) 2000-2022 the FFmpeg developers built with msvc configuration: --disable-ffplay --disable-ffprobe --disable-postproc --enable-libdav1d --enable-cuda-nvcc --enable-libvpl --enable-libsvtav1 --enable-libaom --disable-bzlib --disable-iconv --disable-zlib --disable-lzma --disable-sdl2 --prefix=..♀fmpeg-win32-server libavutil 57. 43.100 / 57. 43.100 libavcodec 59. 54.100 / 59. 54.100 libavformat 59. 34.102 / 59. 34.102 libavdevice 59. 8.101 / 59. 8.101 libavfilter 8. 51.100 / 8. 51.100 libswscale 6. 8.112 / 6. 8.112 libswresample 4. 9.100 / 4. 9.100 Routing option strict to both codec and muxer layer [AVHWDeviceContext @ 000001C7849B5EC0] Defaulting child_device_type to AV_HWDEVICE_TYPE_D3D11VA for oneVPL.Please explicitly set child device type via "-init_hw_device" option if needed. [AVHWDeviceContext @ 000001C7849B3780] Using device 8086:56a5 (Intel(R) Arc(TM) A380 Graphics). [AVHWDeviceContext @ 000001C7849B5EC0] Use Intel(R) oneVPL to create MFX session, API version is 2.7, the required implementation version is 1.3 [AVHWDeviceContext @ 000001C7849B5EC0] Initialize MFX session: implementation version is 2.7 [h264 @ 000001C794B6E600] Reinit context to 1920x1088, pix_fmt: yuv420p [h264 @ 000001C794B6E600] Increasing reorder buffer to 1 Input #0, matroska,webm, from 'Atlanta S01E05 Nobody Beats the Biebs 1080p WEB-DL DD5.1 H.264-Oosh.mkv': Metadata: encoder : libebml v1.3.4 + libmatroska v1.4.5 creation_time : 2016-09-28T09:27:31.000000Z Duration: 00:21:49.51, start: 0.032000, bitrate: 5583 kb/s Stream #0:0(eng): Subtitle: subrip Metadata: BPS : 122 BPS-eng : 122 DURATION : 00:21:33.891000000 DURATION-eng : 00:21:33.891000000 NUMBER_OF_FRAMES: 518 NUMBER_OF_FRAMES-eng: 518 NUMBER_OF_BYTES : 19828 NUMBER_OF_BYTES-eng: 19828 _STATISTICS_WRITING_APP: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_APP-eng: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_DATE_UTC: 2016-09-28 09:27:31 _STATISTICS_WRITING_DATE_UTC-eng: 2016-09-28 09:27:31 _STATISTICS_TAGS: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES _STATISTICS_TAGS-eng: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES Stream #0:1: Video: h264 (High), 1 reference frame, yuv420p(tv, bt709, progressive, topleft), 1920x1080 (1920x1088) [SAR 1:1 DAR 16:9], 23.98 fps, 23.98 tbr, 1k tbn (default) Metadata: BPS : 5198282 BPS-eng : 5198282 DURATION : 00:21:49.392000000 DURATION-eng : 00:21:49.392000000 NUMBER_OF_FRAMES: 31394 NUMBER_OF_FRAMES-eng: 31394 NUMBER_OF_BYTES : 850823717 NUMBER_OF_BYTES-eng: 850823717 _STATISTICS_WRITING_APP: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_APP-eng: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_DATE_UTC: 2016-09-28 09:27:31 _STATISTICS_WRITING_DATE_UTC-eng: 2016-09-28 09:27:31 _STATISTICS_TAGS: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES _STATISTICS_TAGS-eng: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES Stream #0:2(eng): Audio: ac3, 48000 Hz, 5.1(side), fltp, 384 kb/s (default) Metadata: BPS : 384000 BPS-eng : 384000 DURATION : 00:21:49.344000000 DURATION-eng : 00:21:49.344000000 NUMBER_OF_FRAMES: 40917 NUMBER_OF_FRAMES-eng: 40917 NUMBER_OF_BYTES : 62848512 NUMBER_OF_BYTES-eng: 62848512 _STATISTICS_WRITING_APP: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_APP-eng: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_DATE_UTC: 2016-09-28 09:27:31 _STATISTICS_WRITING_DATE_UTC-eng: 2016-09-28 09:27:31 _STATISTICS_TAGS: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES _STATISTICS_TAGS-eng: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES Stream mapping: Stream #0:1 -> #0:0 (h264 (native) -> av1 (av1_qsv)) Stream #0:2 -> #0:1 (ac3 (native) -> opus (native)) Stream #0:0 -> #0:2 (subrip (srt) -> ass (ssa)) Press [q] to stop, [?] for help [h264 @ 000001C794246FC0] Reinit context to 1920x1088, pix_fmt: yuv420p [graph_1_in_0_2 @ 000001C79444A140] tb:1/48000 samplefmt:fltp samplerate:48000 chlayout:5.1(side) [format_out_0_1 @ 000001C79444D240] auto-inserting filter 'auto_aresample_0' between the filter 'Parsed_anull_0' and the filter 'format_out_0_1' [auto_aresample_0 @ 000001C79444D340] ch:6 chl:5.1(side) fmt:fltp r:48000Hz -> ch:2 chl:stereo fmt:fltp r:48000Hz [graph 0 input from stream 0:1 @ 000001C79444CB40] w:1920 h:1080 pixfmt:yuv420p tb:1/1000 fr:2997/125 sar:1/1 [auto_scale_0 @ 000001C79444C040] w:iw h:ih flags:'' interl:0 [format @ 000001C79444AA40] auto-inserting filter 'auto_scale_0' between the filter 'Parsed_null_0' and the filter 'format' [auto_scale_0 @ 000001C79444C040] w:1920 h:1080 fmt:yuv420p sar:1/1 -> w:1920 h:1080 fmt:nv12 sar:1/1 flags:0x0 Last message repeated 3 times [av1_qsv @ 000001C7948E3CC0] Using device intel (type qsv) with av1_qsv encoder. [av1_qsv @ 000001C7948E3CC0] Encoder: input is system memory surface [av1_qsv @ 000001C7948E3CC0] Use Intel(R) oneVPL to create MFX session with the specified MFX loader [av1_qsv @ 000001C7948E3CC0] Using the intelligent constant quality (ICQ) ratecontrol method [av1_qsv @ 000001C7948E3CC0] profile: av1 main; level: 40 [av1_qsv @ 000001C7948E3CC0] GopPicSize: 120; GopRefDist: 8; GopOptFlag: strict; IdrInterval: 0 [av1_qsv @ 000001C7948E3CC0] TargetUsage: 1; RateControlMethod: ICQ [av1_qsv @ 000001C7948E3CC0] ICQQuality: 12 [av1_qsv @ 000001C7948E3CC0] NumRefFrame: 4 [av1_qsv @ 000001C7948E3CC0] IntRefType: 0; IntRefCycleSize: 0; IntRefQPDelta: 0; IntRefCycleDist: 0 [av1_qsv @ 000001C7948E3CC0] MaxFrameSize: 0; [av1_qsv @ 000001C7948E3CC0] BitrateLimit: unknown; MBBRC: OFF; ExtBRC: unknown [av1_qsv @ 000001C7948E3CC0] VDENC: ON [av1_qsv @ 000001C7948E3CC0] BRefType: pyramid [av1_qsv @ 000001C7948E3CC0] PRefType: default [av1_qsv @ 000001C7948E3CC0] MinQPI: 0; MaxQPI: 0; MinQPP: 0; MaxQPP: 0; MinQPB: 0; MaxQPB: 0 [av1_qsv @ 000001C7948E3CC0] FrameRateExtD: 125; FrameRateExtN: 2997 [av1_qsv @ 000001C7948E3CC0] NumTileRows: 1; NumTileColumns: 1; NumTileGroups: 1 [av1_qsv @ 000001C7948E3CC0] WriteIVFHeaders: OFF Output #0, matroska, to 'AtlantaFFmpegTest2.mkv': Metadata: encoder : Lavf59.34.102 Stream #0:0: Video: av1, 1 reference frame (AV01 / 0x31305641), nv12(tv, bt709, progressive, topleft), 1920x1080 (0x0) [SAR 1:1 DAR 16:9], q=2-31, 1000 kb/s, 23.98 fps, 1k tbn (default) Metadata: BPS : 5198282 BPS-eng : 5198282 DURATION : 00:21:49.392000000 DURATION-eng : 00:21:49.392000000 NUMBER_OF_FRAMES: 31394 NUMBER_OF_FRAMES-eng: 31394 NUMBER_OF_BYTES : 850823717 NUMBER_OF_BYTES-eng: 850823717 _STATISTICS_WRITING_APP: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_APP-eng: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_DATE_UTC: 2016-09-28 09:27:31 _STATISTICS_WRITING_DATE_UTC-eng: 2016-09-28 09:27:31 _STATISTICS_TAGS: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES _STATISTICS_TAGS-eng: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES encoder : Lavc59.54.100 av1_qsv Stream #0:1(eng): Audio: opus ([255][255][255][255] / 0xFFFFFFFF), 48000 Hz, stereo, fltp, delay 120, 96 kb/s (default) Metadata: BPS : 384000 BPS-eng : 384000 DURATION : 00:21:49.344000000 DURATION-eng : 00:21:49.344000000 NUMBER_OF_FRAMES: 40917 NUMBER_OF_FRAMES-eng: 40917 NUMBER_OF_BYTES : 62848512 NUMBER_OF_BYTES-eng: 62848512 _STATISTICS_WRITING_APP: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_APP-eng: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_DATE_UTC: 2016-09-28 09:27:31 _STATISTICS_WRITING_DATE_UTC-eng: 2016-09-28 09:27:31 _STATISTICS_TAGS: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES _STATISTICS_TAGS-eng: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES encoder : Lavc59.54.100 opus Stream #0:2(eng): Subtitle: ass Metadata: BPS : 122 BPS-eng : 122 DURATION : 00:21:33.891000000 DURATION-eng : 00:21:33.891000000 NUMBER_OF_FRAMES: 518 NUMBER_OF_FRAMES-eng: 518 NUMBER_OF_BYTES : 19828 NUMBER_OF_BYTES-eng: 19828 _STATISTICS_WRITING_APP: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_APP-eng: mkvmerge v9.4.2 ('So High') 64bit _STATISTICS_WRITING_DATE_UTC: 2016-09-28 09:27:31 _STATISTICS_WRITING_DATE_UTC-eng: 2016-09-28 09:27:31 _STATISTICS_TAGS: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES _STATISTICS_TAGS-eng: BPS DURATION NUMBER_OF_FRAMES NUMBER_OF_BYTES encoder : Lavc59.54.100 ssa [av1_qsv @ 000001C7948E3CC0] Error allocating the output packetrate=4185.4kbits/s speed=1.95x Error submitting video frame to the encoder All streams finished for output file #0 Terminating muxer thread 0 [opus @ 000001C794276880] 141 frames left in the queue on closing [opus @ 000001C794276880] Average Intensity Stereo band: 16.0 [opus @ 000001C794276880] Dual Stereo used: 0.01% [AVIOContext @ 000001C784A51700] Statistics: 282802994 bytes written, 0 seeks, 1079 writeouts Terminating demuxer thread 0 [AVIOContext @ 000001C784A52280] Statistics: 382263472 bytes read, 2 seeks Conversion failed!

oviano commented 1 year ago

It looks like ICQ for AV1 hasn't been tested/isn't supported for FFmpeg then, because that error occurs here:

    ret = av_new_packet(&pkt.pkt, q->packet_size);
    if (ret < 0) {
        av_log(avctx, AV_LOG_ERROR, "Error allocating the output packet\n");
        return ret;
    }

I'm guessing because packet_size is zero. It's calculated like this:

    q->packet_size = q->param.mfx.BufferSizeInKB * q->param.mfx.BRCParamMultiplier * 1000;

This is inside qsv_retrieve_enc_av1_params.

However,it looks like neither param.mfx.BufferSizeInKB or param.mfx.BRCParamMultiplier get set when the codec is AV1, and are thus still zero:

    switch (q->param.mfx.RateControlMethod) {
    case MFX_RATECONTROL_CBR:
    case MFX_RATECONTROL_VBR:
        if (q->extbrc) {
            q->extco2.LookAheadDepth = q->look_ahead_depth;
        }
#if QSV_HAVE_VCM
    case MFX_RATECONTROL_VCM:
#endif
    case MFX_RATECONTROL_QVBR:
        q->param.mfx.BufferSizeInKB   = buffer_size_in_kilobytes / brc_param_multiplier;
        q->param.mfx.InitialDelayInKB = initial_delay_in_kilobytes / brc_param_multiplier;
        q->param.mfx.TargetKbps       = target_bitrate_kbps / brc_param_multiplier;
        q->param.mfx.MaxKbps          = max_bitrate_kbps / brc_param_multiplier;
        q->param.mfx.BRCParamMultiplier = brc_param_multiplier;
        if (q->param.mfx.RateControlMethod == MFX_RATECONTROL_QVBR)
            q->extco3.QVBRQuality = av_clip(avctx->global_quality, 0, 51);
        break;
    case MFX_RATECONTROL_CQP:
        quant = avctx->global_quality / FF_QP2LAMBDA;
        if (avctx->codec_id == AV_CODEC_ID_AV1) {
            q->param.mfx.QPI = av_clip_uintp2(quant * fabs(avctx->i_quant_factor) + avctx->i_quant_offset, 8);
            q->param.mfx.QPP = av_clip_uintp2(quant, 8);
            q->param.mfx.QPB = av_clip_uintp2(quant * fabs(avctx->b_quant_factor) + avctx->b_quant_offset, 8);
        } else {
            q->param.mfx.QPI = av_clip(quant * fabs(avctx->i_quant_factor) + avctx->i_quant_offset, 0, 51);
            q->param.mfx.QPP = av_clip(quant, 0, 51);
            q->param.mfx.QPB = av_clip(quant * fabs(avctx->b_quant_factor) + avctx->b_quant_offset, 0, 51);
        }
        q->old_global_quality = avctx->global_quality;
        q->old_i_quant_factor = avctx->i_quant_factor;
        q->old_i_quant_offset = avctx->i_quant_offset;
        q->old_b_quant_factor = avctx->b_quant_factor;
        q->old_b_quant_offset = avctx->b_quant_offset;

        break;
#if QSV_HAVE_AVBR
    case MFX_RATECONTROL_AVBR:
        q->param.mfx.TargetKbps  = target_bitrate_kbps / brc_param_multiplier;
        q->param.mfx.Convergence = q->avbr_convergence;
        q->param.mfx.Accuracy    = q->avbr_accuracy;
        q->param.mfx.BRCParamMultiplier = brc_param_multiplier;
        break;
#endif
    case MFX_RATECONTROL_LA:
        q->param.mfx.TargetKbps  = target_bitrate_kbps / brc_param_multiplier;
        q->extco2.LookAheadDepth = q->look_ahead_depth;
        q->param.mfx.BRCParamMultiplier = brc_param_multiplier;
        break;
    case MFX_RATECONTROL_LA_ICQ:
        q->extco2.LookAheadDepth = q->look_ahead_depth;
    case MFX_RATECONTROL_ICQ:
        q->param.mfx.ICQQuality  = av_clip(avctx->global_quality, 1, 51);
        break;
    }

My conclusion: either it's not supported/meant to work, or it hasn't been tested yet. It might work if those fields get set, but that's really a Q for @dvrogozh or somebody else from Intel....

I would suggest opening a separate issue for this, and including a link to this issue for reference.

camthoms commented 1 year ago

Will do, thanks!