Try if this changes make 12tap version competitive with the mix version

igv commented 1 year ago

//!HOOK NATIVE
//!BIND NATIVE
//!BIND CHROMA
//!WHEN CHROMA.w LUMA.w <
//!OFFSET ALIGN
//!DESC Chroma From Luma Prediction

vec4 hook() {
    vec2 pp = CHROMA_pos * CHROMA_size - vec2(0.5);
    vec2 fp = floor(pp);

    vec2 chroma_pixels[13];
    chroma_pixels[0] = CHROMA_tex(vec2((fp + vec2(0.5, -0.5)) * CHROMA_pt)).xy;
    chroma_pixels[1] = CHROMA_tex(vec2((fp + vec2(1.5, -0.5)) * CHROMA_pt)).xy;
    chroma_pixels[2] = CHROMA_tex(vec2((fp + vec2(-0.5, 0.5)) * CHROMA_pt)).xy;
    chroma_pixels[3] = CHROMA_tex(vec2((fp + vec2( 0.5, 0.5)) * CHROMA_pt)).xy;
    chroma_pixels[4] = CHROMA_tex(vec2((fp + vec2( 1.5, 0.5)) * CHROMA_pt)).xy;
    chroma_pixels[5] = CHROMA_tex(vec2((fp + vec2( 2.5, 0.5)) * CHROMA_pt)).xy;
    chroma_pixels[6] = CHROMA_tex(vec2((fp + vec2(-0.5, 1.5)) * CHROMA_pt)).xy;
    chroma_pixels[7] = CHROMA_tex(vec2((fp + vec2( 0.5, 1.5)) * CHROMA_pt)).xy;
    chroma_pixels[8] = CHROMA_tex(vec2((fp + vec2( 1.5, 1.5)) * CHROMA_pt)).xy;
    chroma_pixels[9] = CHROMA_tex(vec2((fp + vec2( -1.5, 0.5)) * CHROMA_pt)).xy;
    chroma_pixels[10] = CHROMA_tex(vec2((fp + vec2(0.5, 2.5) ) * CHROMA_pt)).xy;
    chroma_pixels[11] = CHROMA_tex(vec2((fp + vec2(0.5, -1.5) ) * CHROMA_pt)).xy;
    chroma_pixels[12] = CHROMA_tex(vec2((fp + vec2(-0.5, -0.5) ) * CHROMA_pt)).xy;

    float luma_pixels[13];
    luma_pixels[0] = NATIVE_tex(vec2((fp + vec2(0.5, -0.5)) * CHROMA_pt)).x;
    luma_pixels[1] = NATIVE_tex(vec2((fp + vec2(1.5, -0.5)) * CHROMA_pt)).x;
    luma_pixels[2] = NATIVE_tex(vec2((fp + vec2(-0.5, 0.5)) * CHROMA_pt)).x;
    luma_pixels[3] = NATIVE_tex(vec2((fp + vec2( 0.5, 0.5)) * CHROMA_pt)).x;
    luma_pixels[4] = NATIVE_tex(vec2((fp + vec2( 1.5, 0.5)) * CHROMA_pt)).x;
    luma_pixels[5] = NATIVE_tex(vec2((fp + vec2( 2.5, 0.5)) * CHROMA_pt)).x;
    luma_pixels[6] = NATIVE_tex(vec2((fp + vec2(-0.5, 1.5)) * CHROMA_pt)).x;
    luma_pixels[7] = NATIVE_tex(vec2((fp + vec2( 0.5, 1.5)) * CHROMA_pt)).x;
    luma_pixels[8]  = NATIVE_tex(vec2((fp + vec2( 1.5, 1.5)) * CHROMA_pt)).x;
    luma_pixels[9]  = NATIVE_tex(vec2((fp + vec2( -1.5, 0.5)) * CHROMA_pt)).x;
    luma_pixels[10] = NATIVE_tex(vec2((fp + vec2(0.5, 2.5) ) * CHROMA_pt)).x;
    luma_pixels[11] = NATIVE_tex(vec2((fp + vec2(0.5, -1.5) ) * CHROMA_pt)).x;
    luma_pixels[12] = NATIVE_tex(vec2((fp + vec2(-0.5, -0.5) ) * CHROMA_pt)).x;

    float luma_avg = 0.0;
    for(int i = 0; i < 13; i++) {
        luma_avg += luma_pixels[i];
    }
    luma_avg /= 13.0;

    float luma_var = 0.0;
    for(int i = 0; i < 13; i++) {
        luma_var += pow(luma_pixels[i] - luma_avg, 2.0);
    }

    vec2 chroma_avg = vec2(0.0);
    for(int i = 0; i < 13; i++) {
        chroma_avg += chroma_pixels[i];
    }
    chroma_avg /= 13.0;

    vec2 chroma_var = vec2(0.0);
    for(int i = 0; i < 13; i++) {
        chroma_var += pow(chroma_pixels[i] - chroma_avg, vec2(2.0));
    }

    vec2 luma_chroma_cov = vec2(0.0);
    for(int i = 0; i < 13; i++) {
        luma_chroma_cov += (luma_pixels[i] - luma_avg) * (chroma_pixels[i] - chroma_avg);
    }

    vec2 corr = abs(luma_chroma_cov / max(sqrt(luma_var * chroma_var), 1e-6));
    corr = clamp(corr, 0.0, 1.0);

    vec2 alpha = luma_chroma_cov / max(luma_var, 1e-6);
    vec2 beta = chroma_avg - alpha * luma_avg;

    float luma_native = NATIVE_texOff(0.0).x;
    vec2 chroma_native = NATIVE_texOff(0.0).yz;

    vec2 chroma_pred = alpha * luma_native + beta;
    chroma_pred = clamp(chroma_pred, 0.0, 1.0);

    vec4 output_pix = vec4(luma_native, 0.0, 0.0, 1.0);
    output_pix.yz = mix(chroma_native, chroma_pred, corr / 2.0);
    // output_pix.yz = clamp(output_pix.yz, 0.0, 1.0);
    return  output_pix;
}

Artoriuz commented 1 year ago

Thanks!

I'm travelling right now and I don't have access to my computer, but I'll check this out as soon as I can.

As you've noticed the problem with the shader is that the math is very unstable and values tend to explode, and I didn't really want to pursue the idea further because it was looking like too much of a hack.

Artoriuz commented 1 year ago

Ok, I could look at this briefly.

From my admittedly very limited tests, it looks like limiting the denominators essentially solves the "values tend to explode" problem, which in turn makes the AR-style clamping unnecessary. Numbers seem a bit better doing this but I can't say the visual difference is meaningful.

The only thing I don't understand is why you'd include an extra top-left pixel (fp + vec2(-0.5, -0.5)).

Artoriuz commented 1 year ago

The same solution doesn't work nearly as well in the 4-tap variant, values can still easily go to the wrong direction due to the steepness of the slope (red to green transitions for example).

I think these shaders can still be improved in other ways too. I could try adding a luma downscaling step, or figure out a better way to handle the correlation-based mixing (right now it's just corr / 2.0, which probably isn't ideal). This will probably have to wait until I'm back home in ~3 weeks though.

Artoriuz commented 1 year ago

Updated the shaders with some of the suggestions. Thanks again =)

Artoriuz / glsl-chroma-from-luma-prediction

Try if this changes make 12tap version competitive with the mix version #1