intel / libyami

Yet Another Media Infrastructure. it is core part of media codec with hardware acceleration, it is yummy to your video experience on Linux like platform.
Apache License 2.0
146 stars 106 forks source link

Encoder: memory always growing #751

Closed jsorg71 closed 6 years ago

jsorg71 commented 7 years ago

I have a test program here that causes memory to continuously grow. Maybe one of you can take a look to see if I'm doing something wrong.


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <dlfcn.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <va/va.h>
#include <va/va_drm.h>

#include <YamiVersion.h>
#include <VideoEncoderCapi.h>

static int g_va_fd = -1;
static VADisplay g_va_display = 0;
static int g_va_major_version = 0;
static int g_va_minor_version = 0;
static VASurfaceID g_va_surface = 0;
static VAImage g_va_image;
static EncodeHandler g_handle = 0;

int main(int argc, char** argv)
{
    VAImageFormat va_image_format;
    NativeDisplay nd;
    VideoParamsCommon encVideoParams;
    VideoConfigAVCStreamFormat streamFormat;
    int index;
    int count;
    void* buf_ptr;
    VideoFrame yami_vf;
    VideoEncOutputBuffer outb;
    char *cdata;
    int cdata_max_bytes;
    int noencode;

    noencode = 0;
    count = 100;
    for (index = 0; index < argc; index++)
    {
        if (strcmp(argv[index], "--loop") == 0)
        {
            count = atoi(argv[index + 1]);
            index++;
        }
        if (strcmp(argv[index], "--noencode") == 0)
        {
            noencode = 1;
        }
    }

    cdata_max_bytes = 1024 * 1024;
    cdata = (char *) malloc(cdata_max_bytes);

    g_va_fd = open("/dev/dri/renderD128", O_RDWR);
    printf("g_va_fd %d\n", g_va_fd);
    g_va_display = vaGetDisplayDRM(g_va_fd);
    printf("g_va_display %p\n", g_va_display);
    vaInitialize(g_va_display, &g_va_major_version, &g_va_minor_version);

    for (index = 0; index < count; index++)
    {

        vaCreateSurfaces(g_va_display, VA_RT_FORMAT_YUV420, 1024, 768,
                         &g_va_surface, 1, 0, 0);
        memset(&va_image_format, 0, sizeof(va_image_format));
        va_image_format.fourcc = VA_FOURCC_NV12;
        vaCreateImage(g_va_display, &va_image_format, 1024, 768, &g_va_image);

        g_handle = createEncoder(YAMI_MIME_H264);
        memset(&nd, 0, sizeof(nd));
        nd.handle = (intptr_t) (g_va_display);
        nd.type = NATIVE_DISPLAY_VA;
        encodeSetNativeDisplay(g_handle, &nd);

        memset(&encVideoParams, 0, sizeof(encVideoParams));
        encVideoParams.size = sizeof(VideoParamsCommon);
        encodeGetParameters(g_handle, VideoParamsTypeCommon, &encVideoParams);
        encVideoParams.resolution.width = 1024;
        encVideoParams.resolution.height = 768;
        encVideoParams.rcMode = RATE_CONTROL_CQP;
        encVideoParams.rcParams.initQP = 28;
        encVideoParams.intraPeriod = 16;
        encodeSetParameters(g_handle, VideoParamsTypeCommon, &encVideoParams);
        memset(&streamFormat, 0, sizeof(streamFormat));
        streamFormat.size = sizeof(VideoConfigAVCStreamFormat);
        encodeGetParameters(g_handle, VideoConfigTypeAVCStreamFormat, &streamFormat);
        streamFormat.streamFormat = AVC_STREAM_FORMAT_ANNEXB;
        encodeSetParameters(g_handle, VideoConfigTypeAVCStreamFormat, &streamFormat);
        encodeStart(g_handle);

        vaMapBuffer(g_va_display, g_va_image.buf, &buf_ptr);
        memset(buf_ptr, 0, ((1024 * 768) * 3) / 2);
        vaUnmapBuffer(g_va_display, g_va_image.buf);
        vaPutImage(g_va_display, g_va_surface, g_va_image.image_id,
                   0, 0, 1024, 768, 0, 0, 1024, 768);
        vaSyncSurface(g_va_display, g_va_surface);

        if (noencode == 0)
        {
            memset(&yami_vf, 0, sizeof(yami_vf));
            yami_vf.surface = g_va_surface;
            encodeEncode(g_handle, &yami_vf);

            memset(&outb, 0, sizeof(outb));
            outb.data = (unsigned char *) (cdata);
            outb.bufferSize = cdata_max_bytes;
            outb.format = OUTPUT_EVERYTHING;
            encodeGetOutput(g_handle, &outb, 1);
            printf("compressed to %d\n", outb.dataSize);
        }

        encodeStop(g_handle);
        releaseEncoder(g_handle);

        vaDestroyImage(g_va_display, g_va_image.image_id);
        vaDestroySurfaces(g_va_display, &g_va_surface, 1);

        usleep(10000);
        printf("loop %d\n", index);

    }

    vaTerminate(g_va_display);
    close(g_va_fd);
    free(cdata);
    return 0;
}
xuguangxin commented 7 years ago

@Zhziyao, please uses valgrind to check this issue. thanks

jsorg71 commented 7 years ago

I ran valgrind already. I think this is a va surface leak or something because vaTerminate cleans it up when the app exits. I can see the leaks when I comment out vaTerminate but I can't figure out what's wrong.

xuguangxin commented 7 years ago

hi @jsorg71 , how to see the leak. I comment out the vaTerminiate in your code, run it with valgrind --leak-check=full ./encode --loop 50 Seems it does not leak anything. It always 6,370 bytes for any loop number.

thanks

==5828==
==5828== HEAP SUMMARY:
==5828==     in use at exit: 6,370 bytes in 18 blocks
==5828==   total heap usage: 10,246 allocs, 10,228 frees, 837,807,281 bytes allocated
==5828==
==5828== LEAK SUMMARY:
==5828==    definitely lost: 0 bytes in 0 blocks
==5828==    indirectly lost: 0 bytes in 0 blocks
==5828==      possibly lost: 0 bytes in 0 blocks
==5828==    still reachable: 6,370 bytes in 18 blocks
==5828==         suppressed: 0 bytes in 0 blocks
==5828== Reachable blocks (those to which a pointer was found) are not shown.
==5828== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==5828==
==5828== For counts of detected and suppressed errors, rerun with: -v
jsorg71 commented 7 years ago

Can you run with --loop 10000 and then run top and watch VIRT and RES? What do they grow to? Then compare when adding --noencode.

jsorg71 commented 7 years ago

It looks like Intel changed VAAPI https://github.com/01org/libva/commit/3eb038aa13bdd785808286c0a4995bd7a1ef07e9 Can this change affect yami?

xuguangxin commented 7 years ago

@jsorg71 , It does not impact our case. It's just comment. In practice, for intel driver, we never delete the buffer in vaRenderPicture.

xuguangxin commented 6 years ago

Hi @jsorg71 , I can reproduce your result seems only virtual memory grow, the physical memory is kind of stable. This is why we can't find issue using valgrind.

Hi @xhaihao , Seems we leak tons of virtual memory, any idea?

xhaihao commented 6 years ago

@xuguangxin I found 15 VA surfaces are created however only 14 VA surfaces are destroyed in a single loop.

xhaihao commented 6 years ago

@xuguangxin @jsorg71 could you have a try with https://github.com/xhaihao/intel-vaapi-driver/commit/a6f5a1c0a0aaf4e3dae9b308d86487b18e43d92c ?

jsorg71 commented 6 years ago

It looks like that was it! Thanks @xhaihao @xuguangxin

jsorg71 commented 6 years ago

I may have spoke too soon. I had a work around in place so it was not growing. I'll do some more testing and reply.

jsorg71 commented 6 years ago

I'm convinced it's fixed. I changed my builder to take intel-vaapi-driver-1.8.2 and patch. I look forward to the next release.

xuguangxin commented 6 years ago

Hi @jsorg71 This issue only happen when you do things like this pattern: "create an encoder ->encode a frame->free the encode->create an encoder->encode a frame....". It's not so efficient to always create encoder. If you just want every frame to be an I frame, you can set https://github.com/01org/libyami/blob/apache/interface/VideoEncoderDefs.h#L216 to 1.

jsorg71 commented 6 years ago

@xuguangxin It happens as well when you "create an encoder -> encode 2 frames -> free encoder" or "create an encoder -> encode 100 frames -> free encoder" or "create an encoder -> encode 1000000 frames -> free encoder" It's the act of free encoder that leaks. I created the test program to exaggerate the scenario. It's not a real world thing you should do and I know there is better ways to get an I frame. A real world app can do something like create an encoder of size 640x640, encode 4000 frames, free encoder. Then later create an encoder or size 1280x720, encode 200 frame, free encoder. and so on. The real world app leaks much slower but after a while it can add up. BTW, this same leak happened when you resize the encoder via encodeStop, encodeGetParameters, encodeSetParameters, encodeStart, but good news, that is fixes now too.

xuguangxin commented 6 years ago

@jsorg71 , You are right, thanks for the clarification.

xuguangxin commented 6 years ago

close this since it fixed, thanks @jsorg71 for report this.