Kosmonaut3d / ColorGradingFilter-Sample

A color grading / correction filter for Monogame and XNA.
60 stars 11 forks source link

OpenGL request #1

Open Apostolique opened 7 years ago

Apostolique commented 7 years ago

It would be nice to have an OpenGL version also (Something that works with a Monogame DesktopGL build.). Just changing the shader model version doesn't seem to be enough. (The load() function on Texture2D isn't available.)

Apostolique commented 7 years ago

Actually, here is my solution for OpenGL. Not sure if there's any performance loss, but this seems to work.

//Color Grading Shader with LUTs, TheKosmonaut 2017 ( kosmonaut3d@googlemail.com )


float Size = 16;
float SizeRoot = 4;

Texture2D InputTexture;
Texture2D LUT;

SamplerState Sampler;

SamplerState InputSampler
    Texture = ( InputTexture );
    MagFilter = POINT;
    MinFilter = POINT;
    Mipfilter = POINT;
    AddressU = CLAMP;
    AddressV = CLAMP;
SamplerState LutSampler
    Texture = ( LUT );
    MagFilter = POINT;
    MinFilter = POINT;
    Mipfilter = POINT;
    AddressU = CLAMP;
    AddressV = CLAMP;


struct VertexShaderFSQInput
    float2 Position : POSITION0;

struct VertexShaderFSQOutput
    float4 Position : SV_POSITION;
    float2 TexCoord : TEXCOORD0;



//Only needs float2 position as input.
VertexShaderFSQOutput VertexShaderFSQFunction(VertexShaderFSQInput input)
    VertexShaderFSQOutput output;

    output.Position = float4(input.Position.xy, 1, 1);
    output.TexCoord = input.Position.xy * 0.5f + 0.5f;
    output.TexCoord.y = 1 - output.TexCoord.y;

    return output;


//Creating a LookUpTable texture with default values
float4 PixelShaderCreateLUT(VertexShaderFSQOutput input) : COLOR0
    //Size can be 16 or 32 for example
    //16 will yield in 4x4 fields
    //32 in 8x8 etc

    //Note: The sample Location is 0.5, 0.5!
    //So even though a pixel is stored at [0,0] its texCoord is [0.5, 0.5]
    float2 pixel = input.Position.xy; //float2(trunc(input.Position.x), trunc(input.Position.y));

    float red = (pixel.x % Size) / Size;
    float green = (pixel.y % Size) / Size;

    float col = trunc(pixel.x / Size);
    float row = trunc(pixel.y / Size);

    float blue = (row * SizeRoot + col) / Size;

    return float4(red, green, blue, 1);

float4 PixelShaderApplyLUT(VertexShaderFSQOutput input) : COLOR0

    //Our input
    //float4 baseTexture =  InputTexture.Sample(Sampler, input.TexCoord.xy);
    float4 baseTexture =  tex2D(InputSampler, input.TexCoord.xy);

    int width = SizeRoot * Size;
    int height = (Size / SizeRoot) * Size;
    float2 wh = float2(width, height);

    //Manual trilinear interpolation

    //We need to clamp since our values go, for example, from 0 to 15. But with a red value of 1.0 we would get 16, which is on the next table already.

    //OBSOLETE: We also need to shift half a pixel to the left, since our sampling locations do not match the storage location (see CreateLUT)
    //float halfOffset = 0.5f;

    float red = baseTexture.r * (Size - 1);

    float redinterpol = frac(red);

    float green = baseTexture.g * (Size - 1);
    float greeninterpol = frac(green);

    float blue = baseTexture.b * (Size - 1);
    float blueinterpol = frac(blue);

    //Blue base value

    float row = trunc(blue / SizeRoot);
    float col = trunc(blue % SizeRoot);

    float2 blueBaseTable = float2(trunc(col * Size), trunc(row * Size));

    float4 b0r1g0;
    float4 b0r0g1;
    float4 b0r1g1;
    float4 b1r0g0;
    float4 b1r1g0;
    float4 b1r0g1;
    float4 b1r1g1;

    //We need to read 8 values (like in a 3d LUT) and interpolate between them.
    //This cannot be done with default hardware filtering so I am doing it manually.
    //Note that we must not interpolate when on the borders of tables!

    //Red 0 and 1, Green 0

    float4 b0r0g0 = tex2D(LutSampler, float2(blueBaseTable.x + red, blueBaseTable.y + green) / wh);

    if (red < Size - 1)
        b0r1g0 = tex2D(LutSampler, float2(blueBaseTable.x + red + 1, blueBaseTable.y + green) / wh);
        b0r1g0 = b0r0g0;

    // Green 1

    if (green < Size - 1)
        //Red 0 and 1

        b0r0g1 = tex2D(LutSampler, float2(blueBaseTable.x + red, blueBaseTable.y + green + 1) / wh);

        if (red < Size - 1)
            b0r1g1 = tex2D(LutSampler, float2(blueBaseTable.x + red + 1, blueBaseTable.y + green + 1) / wh);
            b0r1g1 = b0r0g1;
        b0r0g1 = b0r0g0;
        b0r1g1 = b0r0g1;

    if (blue < Size - 1)
        blue += 1;
        row = trunc(blue / SizeRoot);
        col = trunc(blue % SizeRoot);

        blueBaseTable = float2(trunc(col * Size), trunc(row * Size));

        b1r0g0 = tex2D(LutSampler, float2(blueBaseTable.x + red, blueBaseTable.y + green) / wh);

        if (red < Size - 1)
            b1r1g0 = tex2D(LutSampler, float2(blueBaseTable.x + red + 1, blueBaseTable.y + green) / wh);
            b1r1g0 = b0r0g0;

        // Green 1

        if (green < Size - 1)
            //Red 0 and 1

            b1r0g1 = tex2D(LutSampler, float2(blueBaseTable.x + red, blueBaseTable.y + green + 1) / wh);

            if (red < Size - 1)
                b1r1g1 = tex2D(LutSampler, float2(blueBaseTable.x + red + 1, blueBaseTable.y + green + 1) / wh);
                b1r1g1 = b0r0g1;
            b1r0g1 = b0r0g0;
            b1r1g1 = b0r0g1;
        b1r0g0 = b0r0g0;
        b1r1g0 = b0r1g0;
        b1r0g1 = b0r0g0;
        b1r1g1 = b0r1g1;

    float4 result = lerp(lerp(b0r0g0, b0r1g0, redinterpol), lerp(b0r0g1, b0r1g1, redinterpol), greeninterpol);
    float4 result2 = lerp(lerp(b1r0g0, b1r1g0, redinterpol), lerp(b1r0g1, b1r1g1, redinterpol), greeninterpol);

    result = lerp(result, result2, blueinterpol);

    return result;


technique ApplyLUT
    pass Pass1
        VertexShader = compile vs_3_0 VertexShaderFSQFunction();
        PixelShader = compile ps_3_0 PixelShaderApplyLUT();

technique CreateLUT
    pass Pass1
        VertexShader = compile vs_3_0 VertexShaderFSQFunction();
        PixelShader = compile ps_3_0 PixelShaderCreateLUT();
Kosmonaut3d commented 7 years ago


Apostolique commented 6 years ago

In OpenGL, there's still the half pixel offset problem. I see you have some notes about that in the comments but I'm not 100% sure how to add it back in.

Edit: nvm, that wasn't the problem. Turns out I needed to use POINT for the Mag, Min, Mip filters. I edited the post above.

Apostolique commented 4 years ago

I guess I revisited this shader at some point, this is the version I have now:

//Color Grading Shader with LUTs, TheKosmonaut 2017 ( kosmonaut3d@googlemail.com )


float Size = 16;
float SizeRoot = 4;

sampler InputSampler : register(s0);
Texture2D LUT;

SamplerState LutSampler
    Texture = ( LUT );
    MagFilter = POINT;
    MinFilter = POINT;
    Mipfilter = POINT;
    AddressU = CLAMP;
    AddressV = CLAMP;


struct VertexToPixel {
    float4 Position : SV_Position0;
    float4 Color : COLOR0;
    float4 TexCoord : TEXCOORD0;



//Creating a LookUpTable texture with default values
float4 PixelShaderCreateLUT(VertexToPixel input) : COLOR0
    //Size can be 16 or 32 for example
    //16 will yield in 4x4 fields
    //32 in 8x8 etc

    //Note: The sample Location is 0.5, 0.5!
    //So even though a pixel is stored at [0,0] its texCoord is [0.5, 0.5]
    float2 pixel = input.Position.xy; //float2(trunc(input.Position.x), trunc(input.Position.y));

    float red = (pixel.x % Size) / Size;
    float green = (pixel.y % Size) / Size;

    float col = trunc(pixel.x / Size);
    float row = trunc(pixel.y / Size);

    float blue = (row * SizeRoot + col) / Size;

    return float4(red, green, blue, 1);

float4 PixelShaderApplyLUT(VertexToPixel input) : COLOR0

    //Our input
    float4 baseTexture =  tex2D(InputSampler, input.TexCoord.xy);

    int width = SizeRoot * Size;
    int height = (Size / SizeRoot) * Size;
    float2 wh = float2(width, height);

    //Manual trilinear interpolation

    //We need to clamp since our values go, for example, from 0 to 15. But with a red value of 1.0 we would get 16, which is on the next table already.

    float red = baseTexture.r * (Size - 1);

    float redinterpol = frac(red);

    float green = baseTexture.g * (Size - 1);
    float greeninterpol = frac(green);

    float blue = baseTexture.b * (Size - 1);
    float blueinterpol = frac(blue);

    //Blue base value

    float row = trunc(blue / SizeRoot);
    float col = trunc(blue % SizeRoot);

    float2 blueBaseTable = float2(trunc(col * Size), trunc(row * Size));

    float4 b0r0g0;
    float4 b0r1g0;
    float4 b0r0g1;
    float4 b0r1g1;
    float4 b1r0g0;
    float4 b1r1g0;
    float4 b1r0g1;
    float4 b1r1g1;

    //We need to read 8 values (like in a 3d LUT) and interpolate between them.
    //This cannot be done with default hardware filtering so I am doing it manually.
    //Note that we must not interpolate when on the borders of tables!

    //Red 0 and 1, Green 0
    float redI = min(red + 1, Size - 1);
    float greenI = min(green + 1, Size - 1);
    float blueI = min(blue + 1, Size - 1);

    b0r0g0 = tex2D(LutSampler, float2(blueBaseTable.x + red, blueBaseTable.y + green) / wh);
    b0r1g0 = tex2D(LutSampler, float2(blueBaseTable.x + redI, blueBaseTable.y + green) / wh);
    b0r0g1 = tex2D(LutSampler, float2(blueBaseTable.x + red, blueBaseTable.y + greenI) / wh);
    b0r1g1 = tex2D(LutSampler, float2(blueBaseTable.x + redI, blueBaseTable.y + greenI) / wh);

    row = trunc(blueI / SizeRoot);
    col = trunc(blueI % SizeRoot);

    blueBaseTable = float2(trunc(col * Size), trunc(row * Size));

    b1r0g0 = tex2D(LutSampler, float2(blueBaseTable.x + red, blueBaseTable.y + green) / wh);
    b1r1g0 = tex2D(LutSampler, float2(blueBaseTable.x + redI, blueBaseTable.y + green) / wh);
    b1r0g1 = tex2D(LutSampler, float2(blueBaseTable.x + red, blueBaseTable.y + greenI) / wh);
    b1r1g1 = tex2D(LutSampler, float2(blueBaseTable.x + redI, blueBaseTable.y + greenI) / wh);

    float4 result = lerp(lerp(b0r0g0, b0r1g0, redinterpol), lerp(b0r0g1, b0r1g1, redinterpol), greeninterpol);
    float4 result2 = lerp(lerp(b1r0g0, b1r1g0, redinterpol), lerp(b1r0g1, b1r1g1, redinterpol), greeninterpol);

    result = lerp(result, result2, blueinterpol);

    return result;


technique ApplyLUT
    pass Pass1
        PixelShader = compile ps_3_0 PixelShaderApplyLUT();

technique CreateLUT
    pass Pass1
        PixelShader = compile ps_3_0 PixelShaderCreateLUT();