yasukichi / testcode

okawa's personal testcode
0 stars 0 forks source link

UE5 Lumenまとめ #23

Open yasukichi opened 1 year ago

yasukichi commented 1 year ago

UE5 Lumenに関するまとめ Lumenの技術的詳細

Ray tracingはRenderDocと互換がないため、Ray tracingを有効化するためには、RendeDoc pluginを無効化する

yasukichi commented 1 year ago
yasukichi commented 1 year ago

Profile GPU image

Lumen pass structure

yasukichi commented 1 year ago

LumenSceneLighting

image

Lumen pass structure

yasukichi commented 1 year ago

BuildCardUpdateContext

image image

Lumen pass structure

back

yasukichi commented 1 year ago

ClearCardUpdateContext

}

- シェーダー(LumenSceneLighting.usf)
```C++
/**
 * Batch clear all resources required for the subsequent card context update pass
 */
[numthreads(THREADGROUP_SIZE, 1, 1)]
void ClearCardUpdateContextCS(
    uint3 DispatchThreadId : SV_DispatchThreadID)
{
    uint ElementIndex = DispatchThreadId.x;

    if (ElementIndex < 1)
    {
        RWDirectLightingCardPageIndexAllocator[ElementIndex] = 0;
        RWIndirectLightingCardPageIndexAllocator[ElementIndex] = 0;
    }

    if (ElementIndex < CARD_UPDATE_CONTEXT_MAX * MAX_UPDATE_BUCKET_STRIDE)
    {
        RWMaxUpdateBucket[ElementIndex] = 0;
    }

    if (ElementIndex < CARD_UPDATE_CONTEXT_MAX * CARD_PAGE_TILE_ALLOCATOR_STRIDE)
    {
        RWCardPageTileAllocator[ElementIndex] = 0;
    }

    if (ElementIndex < CARD_UPDATE_CONTEXT_MAX * PRIORITY_HISTOGRAM_SIZE)
    {
        RWPriorityHistogram[ElementIndex] = 0;
    }
}
yasukichi commented 1 year ago

BuildPageUpdatePriorityHistogram

}

- シェーダー(LumenSceneLighting.usf)
```C++
/**
 * Iterate over all pages and build a histogram of card update priorities 
 */
[numthreads(THREADGROUP_SIZE, 1, 1)]
void BuildPageUpdatePriorityHistogramCS(
    uint3 DispatchThreadId : SV_DispatchThreadID)
{
    uint IndexInIndexBuffer = DispatchThreadId.x;

    if (IndexInIndexBuffer < CardPageNum)
    {
        uint CardPageIndex = IndexInIndexBuffer;
        FLumenCardPageData CardPage = GetLumenCardPageData(CardPageIndex);
        FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
        const uint NumCardPageTiles = GetNumCardPageTiles(CardPage);

        if (NumCardPageTiles > 0)
        {
            BuildUpdatePriorityHistogram(Card, CardPage, CardPageIndex, NumCardPageTiles, CARD_UPDATE_CONTEXT_DIRECT_LIGHTING); // ダイレクトLightingのヒストグラム構築
            BuildUpdatePriorityHistogram(Card, CardPage, CardPageIndex, NumCardPageTiles, CARD_UPDATE_CONTEXT_INDIRECT_LIGHTING); // 間接Lightingのヒストグラム構築
        }
    }
}

void BuildUpdatePriorityHistogram(FLumenCardData Card, FLumenCardPageData CardPage, uint CardPageIndex, uint NumCardPageTiles, uint CardUpdateContext)
{
    uint PriorityBucketIndex = GetPriorityBucketIndex(Card, CardPage, CardPageIndex, CardUpdateContext);
    InterlockedAdd(RWPriorityHistogram[CardUpdateContext * PRIORITY_HISTOGRAM_SIZE + PriorityBucketIndex], NumCardPageTiles);
}

// ダイレクトLightingの場合(CardUpdateContext == CARD_UPDATE_CONTEXT_DIRECT_LIGTING)に限ったコード
uint GetPriorityBucketIndex(FLumenCardData Card, FLumenCardPageData CardPage, uint CardPageIndex, uint CardUpdateContext)
{
    uint LastLightingUpdateFrameIndex = CardPage.LastDirectLightingUpdateFrameIndex;
    const float UpdateFactor = DirectLighitngUpdateFactor;

    // [1;N]
    uint FramesSinceLastUpdated = SurfaceCacheUpdateFrameIndex - LastLightingUpdateFrameIndex;

    // [0;MAX_UPDATE_FREQUENCY]
    float Frequency = 0.0f;
    {
        float DistanceFromViewer = 100000000.0f;

        for (uint ViewIndex = 0; ViewIndex < NumCameraOrigins; ViewIndex++)
        {
            float3 CardSpaceViewPosition = mul(WorldCameraOrigins[ViewIndex].xyz - Card.Origin, Card.WorldToLocalRotation);
            float3 CardPageLocalCenter;
            float3 CardPageLocalExtent;
            //GetCardPageLocalBBox(CardPage, Card, CardPageLocalCenter, CardPageLocalExtent);
            {
                float2 CardUVMin = CardPage.CardUVRect.xw;
                float2 CardUVMax = CardPage.CardUVRect.zy;
                //float3 CardPageLocalBoxMin = GetCardLocalPosition(Card.LocalExtent, CardUVMin, 1.0f);
                { // UV → [-Card.LocalExtent;+Card.LocalExtent] のレンジ変換
                    CardPageLocalBoxMin.xy = Card.LocalExtent.xy * (1.0f - 2.0f * float2(1.0f - CardUVMin.x, CardUVMin.y));
                    CardPageLocalBoxMin.z = -(2.0f * 1.0f/*=Depth*/ - 1.0f) * Card.LocalExtent.z;
                }
                float3 CardPageLocalBoxMax = GetCardLocalPosition(Card.LocalExtent, CardUVMax, 0.0f);

                CardPageLocalCenter = 0.5f * (CardPageLocalBoxMax + CardPageLocalBoxMin);
                CardPageLocalExtent = 0.5f * (CardPageLocalBoxMax - CardPageLocalBoxMin);
            }
            DistanceFromViewer = min(DistanceFromViewer, sqrt(ComputeSquaredDistanceFromBoxToPoint(CardPageLocalCenter, CardPageLocalExtent, CardSpaceViewPosition)));
        }
        Frequency = MAX_UPDATE_FREQUENCY - clamp(DistanceFromViewer * FirstClipmapWorldExtentRcp, 0.0f, MAX_UPDATE_FREQUENCY);
    }

    // Drive frequency based on the feedback
    #if SURFACE_CACHE_FEEDBACK
    {
        const uint LastUsedFrameIndex = CardPageLastUsedBuffer[CardPageIndex];
        const uint LastUsedHighResFrameIndex = CardPageHighResLastUsedBuffer[CardPageIndex];

        FramesSinceLastUpdated = max(max(LastUsedFrameIndex, LastUsedHighResFrameIndex), LastLightingUpdateFrameIndex) - LastLightingUpdateFrameIndex + 1;

        Frequency *= 0.5f;
        if (SurfaceCacheUpdateFrameIndex >= LastUsedHighResFrameIndex + 1)
        {
            Frequency += 0.5f * MAX_UPDATE_FREQUENCY * saturate((SurfaceCacheUpdateFrameIndex - (LastUsedHighResFrameIndex + 1)) / 2.0f);
        }
    }
    #endif

    uint BucketIndex = 0;
    if (LastLightingUpdateFrameIndex == 0)
    {
        // Special case where page wasn't ever updated, just place into first 8 most important buckets based on the frequency
        BucketIndex = clamp(MAX_UPDATE_FREQUENCY - Frequency, 0.0f, MAX_UPDATE_FREQUENCY);
    }
    else
    {
        // [0;N]
        float UpdateImportance = FramesSinceLastUpdated * (Frequency + 1.0f);

        // Normalize histogram
        UpdateImportance = (PRIORITY_HISTOGRAM_SIZE * UpdateImportance) / (UpdateFactor * (MAX_UPDATE_FREQUENCY + 1.0f));

        // Offset from [1;N] and invert in order to place most important pages in bucket 0
        BucketIndex = PRIORITY_HISTOGRAM_SIZE - 1 - clamp(UpdateImportance - 1, 0, PRIORITY_HISTOGRAM_SIZE - 1);
    }

    return BucketIndex;
}

struct FLumenCardPageData
{
    uint CardIndex;
    bool bMapped;

    uint ResLevelPageTableOffset;
    uint2 ResLevelSizeInTiles;

    float2 SizeInTexels;
    float2 PhysicalAtlasCoord;

    float4 CardUVRect;
    float4 PhysicalAtlasUVRect;
    float2 CardUVTexelScale;
    float2 PhysicalAtlasUVTexelScale;

    uint LastDirectLightingUpdateFrameIndex;
    uint LastIndirectLightingUpdateFrameIndex;

    // Increments each time the page has Radiosity updated, needs to be consecutive for the sample pattern
    uint IndirectLightingTemporalIndex;
};

RWStructuredBuffer<float4> RWLumenCardPageDataBuffer;
#if USE_RW_LUMEN_CARD_PAGE_DATA_BUFFER
    #define LumenCardPageDataBuffer RWLumenCardPageDataBuffer
#else
    #define LumenCardPageDataBuffer LumenCardScene.CardPageData
#endif

// Note: layout must match FLumenCardPageData in C++
FLumenCardPageData GetLumenCardPageData(uint CardPageId)
{
    FLumenCardPageData CardPageData = (FLumenCardPageData) 0;

    uint BaseOffset = CardPageId * LUMEN_CARD_PAGE_DATA_STRIDE;
    float4 Vector0 = LumenCardPageDataBuffer[BaseOffset + 0];
    float4 Vector1 = LumenCardPageDataBuffer[BaseOffset + 1];
    float4 Vector2 = LumenCardPageDataBuffer[BaseOffset + 2];
    float4 Vector3 = LumenCardPageDataBuffer[BaseOffset + 3];
    float4 Vector4 = LumenCardPageDataBuffer[BaseOffset + 4];

    CardPageData.CardIndex = asuint(Vector0.x);
    CardPageData.ResLevelPageTableOffset = asuint(Vector0.y);
    CardPageData.SizeInTexels = Vector0.zw;
    CardPageData.CardUVRect = Vector1;
    CardPageData.PhysicalAtlasUVRect = Vector2;
    CardPageData.CardUVTexelScale = Vector3.xy;
    CardPageData.ResLevelSizeInTiles = asuint(Vector3.zw);

    CardPageData.LastDirectLightingUpdateFrameIndex = asuint(Vector4.x);
    CardPageData.LastIndirectLightingUpdateFrameIndex = asuint(Vector4.y);
    CardPageData.IndirectLightingTemporalIndex = asuint(Vector4.z);

    // Derived properties
    CardPageData.bMapped = CardPageData.SizeInTexels.x > 0;
    CardPageData.PhysicalAtlasCoord = CardPageData.PhysicalAtlasUVRect.xy * LumenCardScene.PhysicalAtlasSize;
    CardPageData.PhysicalAtlasUVTexelScale = LumenCardScene.InvPhysicalAtlasSize;

    return CardPageData;
}

struct FLumenCardData
{
    // OBB in MeshCards space
    float3x3 MeshCardsToLocalRotation;
    float3 MeshCardsOrigin;
    float3 MeshCardsExtent;

    // OBB in world space
    float3x3 WorldToLocalRotation;
    float3 Origin;
    float3 LocalExtent;

    uint2 SizeInPages;
    uint PageTableOffset;

    uint2 HiResSizeInPages;
    uint HiResPageTableOffset;

    // Convert Card's uint ResLevel (card's resolution) to uint2 ResLevelXY (each side's resolution)
    uint2 ResLevelToResLevelXYBias;

    bool bVisible;
    bool bHeightfield;
    uint AxisAlignedDirection;
    uint LightingChannelMask;

    // Average world space texel size of always resident pages
    float TexelSize;
};

#if USE_LUMEN_CARD_DATA_BUFFER
    StructuredBuffer<float4> LumenCardDataBuffer;   
#else
    #define LumenCardDataBuffer LumenCardScene.CardData
#endif

// Stride of a single cards's data in float4's, must match C++
#define LUMEN_CARD_DATA_STRIDE 9
#define LUMEN_CARD_PAGE_DATA_STRIDE 5

// Heightfields are a special case and they always have only one card
#define LUMEN_HEIGHTFIELD_LOCAL_CARD_INDEX 0

// Fetch from scene card buffer
// Note: layout must match FLumenCardData in C++
FLumenCardData GetLumenCardData(uint CardId)
{
    FLumenCardData CardData = (FLumenCardData)0;

    uint BaseOffset = CardId * LUMEN_CARD_DATA_STRIDE;
    float4 Vector0 = LumenCardDataBuffer[BaseOffset + 0];
    float4 Vector1 = LumenCardDataBuffer[BaseOffset + 1];
    float4 Vector2 = LumenCardDataBuffer[BaseOffset + 2];
    float4 Vector3 = LumenCardDataBuffer[BaseOffset + 3];
    float4 Vector4 = LumenCardDataBuffer[BaseOffset + 4];
    float4 Vector5 = LumenCardDataBuffer[BaseOffset + 5];
    float4 Vector6 = LumenCardDataBuffer[BaseOffset + 6];
    float4 Vector7 = LumenCardDataBuffer[BaseOffset + 7];
    float4 Vector8 = LumenCardDataBuffer[BaseOffset + 8];

    CardData.WorldToLocalRotation[0] = Vector0.xyz;
    CardData.WorldToLocalRotation[1] = Vector1.xyz;
    CardData.WorldToLocalRotation[2] = Vector2.xyz;
    CardData.Origin = float3(Vector0.w, Vector1.w, Vector2.w);

    CardData.LocalExtent = abs(Vector3.xyz);

    uint Packed3W = asuint(Vector3.w);
    CardData.ResLevelToResLevelXYBias.x = (Packed3W >> 0) & 0xFF;
    CardData.ResLevelToResLevelXYBias.y = (Packed3W >> 8) & 0xFF;
    CardData.AxisAlignedDirection = (Packed3W >> 16) & 0xF;
    CardData.LightingChannelMask = (Packed3W >> 20) & 0xF;
    CardData.bVisible = (Packed3W >> 24) & 1;
    CardData.bHeightfield = (Packed3W >> 25) & 1;

    CardData.SizeInPages.x = (asuint(Vector4.x) >> 0) & 0xFFFF;
    CardData.SizeInPages.y = (asuint(Vector4.x) >> 16) & 0xFFFF;
    CardData.PageTableOffset = asuint(Vector4.y);
    CardData.HiResSizeInPages.x = (asuint(Vector4.z) >> 0) & 0xFFFF;
    CardData.HiResSizeInPages.y = (asuint(Vector4.z) >> 16) & 0xFFFF;
    CardData.HiResPageTableOffset = asuint(Vector4.w);

    CardData.MeshCardsToLocalRotation[0] = Vector5.xyz;
    CardData.MeshCardsToLocalRotation[1] = Vector6.xyz;
    CardData.MeshCardsToLocalRotation[2] = Vector7.xyz;
    CardData.MeshCardsOrigin = float3(Vector5.w, Vector6.w, Vector7.w);
    CardData.MeshCardsExtent = Vector8.xyz;
    CardData.TexelSize = Vector8.w;

    return CardData;
}

uint GetNumCardPageTiles(FLumenCardPageData CardPage)
{
    return (CardPage.SizeInTexels.x * CardPage.SizeInTexels.y) / (CARD_TILE_SIZE * CARD_TILE_SIZE);
}
yasukichi commented 1 year ago

Select max update bucket

}

- シェーダー(LumenSceneLighting.usf)
```C++
/** 
 * Compute max bucket histogram to update and how many tiles should be updated in that last bucket 
 */
[numthreads(THREADGROUP_SIZE, 1, 1)]
void SelectMaxUpdateBucketCS(
    uint3 GroupId : SV_GroupID,
    uint3 GroupThreadId : SV_GroupThreadID,
    uint3 DispatchThreadId : SV_DispatchThreadID)
{
    if (GroupId.x == 0 && GroupThreadId.x == 0)
    {
        SelectMaxUpdateBucket(CARD_UPDATE_CONTEXT_DIRECT_LIGHTING);
    }
    else if (GroupId.x == 1 && GroupThreadId.x == 0)
    {
        SelectMaxUpdateBucket(CARD_UPDATE_CONTEXT_INDIRECT_LIGHTING);
    }
}

void SelectMaxUpdateBucket(uint CardUpdateContext)
{
    const uint MaxTilesToUpdate = MaxDirectLightingTilesToUpdate/* ダイレクトLightingの場合*/;

    uint UpdateTileSum = 0;
    uint PriorityBucketIndex = 0;
    uint PriorityBucketMaxTiles = MaxTilesToUpdate;

    for (; PriorityBucketIndex < PRIORITY_HISTOGRAM_SIZE; ++PriorityBucketIndex)
    {
        uint TilesPerBucket = PriorityHistogram[CardUpdateContext * PRIORITY_HISTOGRAM_SIZE + PriorityBucketIndex];

        if (UpdateTileSum + TilesPerBucket >= MaxTilesToUpdate)
        {
            PriorityBucketMaxTiles = MaxTilesToUpdate - UpdateTileSum;
            break;
        }

        UpdateTileSum += TilesPerBucket;
    }

    RWMaxUpdateBucket[MAX_UPDATE_BUCKET_STRIDE * CardUpdateContext + 0] = PriorityBucketIndex;
    RWMaxUpdateBucket[MAX_UPDATE_BUCKET_STRIDE * CardUpdateContext + 1] = PriorityBucketMaxTiles;
}
yasukichi commented 1 year ago

Build cards update list

}

- シェーダー(LumenSceneLighting.usf)
```C++
/**
 * Iterate over all cards and pick first N for update based on the histogram max update bucket
 */
[numthreads(THREADGROUP_SIZE, 1, 1)]
void BuildCardsUpdateListCS(
    uint3 DispatchThreadId : SV_DispatchThreadID)
{
    uint IndexInIndexBuffer = DispatchThreadId.x;

    if (IndexInIndexBuffer < CardPageNum)
    {
        const uint CardPageIndex = IndexInIndexBuffer;
        FLumenCardPageData CardPage = GetLumenCardPageData(CardPageIndex);
        FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
        const uint NumCardPageTiles = GetNumCardPageTiles(CardPage);

        if (NumCardPageTiles > 0)
        {
            bool bUpdatedCardPage = false;

            if (BuildCardsUpdateList(
                Card,
                CardPage,
                CardPageIndex,
                NumCardPageTiles,
                CARD_UPDATE_CONTEXT_DIRECT_LIGHTING,
                RWDirectLightingCardPageIndexAllocator,
                RWDirectLightingCardPageIndexData))
            {
                CardPage.LastDirectLightingUpdateFrameIndex = SurfaceCacheUpdateFrameIndex;
                bUpdatedCardPage = true;
            }

            if (BuildCardsUpdateList(
                Card,
                CardPage,
                CardPageIndex,
                NumCardPageTiles,
                CARD_UPDATE_CONTEXT_INDIRECT_LIGHTING,
                RWIndirectLightingCardPageIndexAllocator,
                RWIndirectLightingCardPageIndexData))
            {
                CardPage.LastIndirectLightingUpdateFrameIndex = SurfaceCacheUpdateFrameIndex;
                CardPage.IndirectLightingTemporalIndex = CardPage.IndirectLightingTemporalIndex + 1;
                bUpdatedCardPage = true;
            }

            if (bUpdatedCardPage && FreezeUpdateFrame == 0)
            {
                SetCardPageUpdateData(CardPageIndex, CardPage);
            }
        }
    }
}

bool BuildCardsUpdateList(
    FLumenCardData Card,
    FLumenCardPageData CardPage,
    uint CardPageIndex,
    uint NumCardPageTiles,
    uint CardUpdateContext,
    RWStructuredBuffer<uint> RWCardPageIndexAllocator,
    RWStructuredBuffer<uint> RWCardPageIndexData)
{
    const uint MaxTilesToUpdate = MaxDirectLightingTilesToUpdate/*ダイレクトLightingの場合*/;
    const uint MaxUpdateBucketIndex = MaxUpdateBucket[MAX_UPDATE_BUCKET_STRIDE * CardUpdateContext + 0];
    const uint MaxUpdateBucketMaxTiles = MaxUpdateBucket[MAX_UPDATE_BUCKET_STRIDE * CardUpdateContext + 1];

    // Update everything up to the max selected priority bucket
    uint PriorityBucketIndex = GetPriorityBucketIndex(Card, CardPage, CardPageIndex, CardUpdateContext);
    bool bUpdateThisPage = PriorityBucketIndex <= MaxUpdateBucketIndex;

    if (bUpdateThisPage && PriorityBucketIndex == MaxUpdateBucketIndex)
    {
        // Can't update more than MaxUpdateBucketMaxTiles in the max bucket to preserve the general order
        uint NumAllocatedTilesInMaxUpdateBucket = 0;
        InterlockedAdd(RWCardPageTileAllocator[CARD_PAGE_TILE_ALLOCATOR_STRIDE * CardUpdateContext + 1], NumCardPageTiles, NumAllocatedTilesInMaxUpdateBucket);

        if (!(NumAllocatedTilesInMaxUpdateBucket + NumCardPageTiles <= MaxUpdateBucketMaxTiles))
        {
            bUpdateThisPage = false;
        }
    }

    if (bUpdateThisPage)
    {
        bUpdateThisPage = false;
        uint NumAllocatedTiles = 0;
        InterlockedAdd(RWCardPageTileAllocator[CARD_PAGE_TILE_ALLOCATOR_STRIDE * CardUpdateContext + 0], NumCardPageTiles, NumAllocatedTiles);

        if (NumAllocatedTiles + NumCardPageTiles <= MaxTilesToUpdate)
        {
            uint NextIndex = 0;
            InterlockedAdd(RWCardPageIndexAllocator[0], 1, NextIndex);

            if (NextIndex < CardPageNum)
            {
                RWCardPageIndexData[NextIndex] = CardPageIndex;
                bUpdateThisPage = true;
            }
        }
    }

    return bUpdateThisPage;
}
yasukichi commented 1 year ago

SetCardPageIndexIndirectArgs

}

- シェーダー(LumenSceneLighting.cpp)
```C++
RWBuffer<uint> RWDirectLightingDrawCardPageIndicesIndirectArgs;
RWBuffer<uint> RWDirectLightingDispatchCardPageIndicesIndirectArgs;
RWBuffer<uint> RWIndirectLightingDrawCardPageIndicesIndirectArgs;
RWBuffer<uint> RWIndirectLightingDispatchCardPageIndicesIndirectArgs;

StructuredBuffer<uint> DirectLightingCardPageIndexAllocator;
StructuredBuffer<uint> IndirectLightingCardPageIndexAllocator;
uint VertexCountPerInstanceIndirect;

[numthreads(THREADGROUP_SIZE, 1, 1)]
void SetCardPageIndexIndirectArgsCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
    if (DispatchThreadId.x == 0)
    {
        {
            uint NumPageIndices = DirectLightingCardPageIndexAllocator[0];

            // FRHIDrawIndirectParameters
            RWDirectLightingDrawCardPageIndicesIndirectArgs[0] = VertexCountPerInstanceIndirect;
            RWDirectLightingDrawCardPageIndicesIndirectArgs[1] = NumPageIndices;
            RWDirectLightingDrawCardPageIndicesIndirectArgs[2] = 0;
            RWDirectLightingDrawCardPageIndicesIndirectArgs[3] = 0;

            // Thread per page
            RWDirectLightingDispatchCardPageIndicesIndirectArgs[0] = (NumPageIndices + 63) / 64;
            RWDirectLightingDispatchCardPageIndicesIndirectArgs[1] = 1;
            RWDirectLightingDispatchCardPageIndicesIndirectArgs[2] = 1;

            // Thread per tile
            RWDirectLightingDispatchCardPageIndicesIndirectArgs[3 + 0] = 4 * NumPageIndices;
            RWDirectLightingDispatchCardPageIndicesIndirectArgs[3 + 1] = 1;
            RWDirectLightingDispatchCardPageIndicesIndirectArgs[3 + 2] = 1;
        }

        {
            uint NumPageIndices = IndirectLightingCardPageIndexAllocator[0];

            // FRHIDrawIndirectParameters
            RWIndirectLightingDrawCardPageIndicesIndirectArgs[0] = VertexCountPerInstanceIndirect;
            RWIndirectLightingDrawCardPageIndicesIndirectArgs[1] = NumPageIndices;
            RWIndirectLightingDrawCardPageIndicesIndirectArgs[2] = 0;
            RWIndirectLightingDrawCardPageIndicesIndirectArgs[3] = 0;

            // Thread per page
            RWIndirectLightingDispatchCardPageIndicesIndirectArgs[0] = (NumPageIndices + 63) / 64;
            RWIndirectLightingDispatchCardPageIndicesIndirectArgs[1] = 1;
            RWIndirectLightingDispatchCardPageIndicesIndirectArgs[2] = 1;

            // Thread per tile
            RWIndirectLightingDispatchCardPageIndicesIndirectArgs[3 + 0] = 4 * NumPageIndices;
            RWIndirectLightingDispatchCardPageIndicesIndirectArgs[3 + 1] = 1;
            RWIndirectLightingDispatchCardPageIndicesIndirectArgs[3 + 2] = 1;
        }
    }
}
yasukichi commented 1 year ago

DirectLighting

image

Lumen pass structure

back

image ライトをGatheredLightsに集める

TArray<FLumenGatheredLight, TInlineAllocator<64>> GatheredLights;
bool bHasRectLights = false;

for (auto LightIt = Scene->Lights.CreateConstIterator(); LightIt; ++LightIt)
{
    const FLightSceneInfoCompact& LightSceneInfoCompact = *LightIt;
    const FLightSceneInfo* LightSceneInfo = LightSceneInfoCompact.LightSceneInfo;

    if (LightSceneInfo->ShouldRenderLightViewIndependent()
        && LightSceneInfo->Proxy->GetIndirectLightingScale() > 0.0f)
    {
        for (const FViewInfo& View : Views)
        {
            if (LightSceneInfo->ShouldRenderLight(View, true))
            {
                const FLumenGatheredLight GatheredLight(Scene, View, LightSceneInfo, /*LightIndex*/ GatheredLights.Num());
                bHasRectLights = bHasRectLights || GatheredLight.Type == ELumenLightType::Rect;
                GatheredLights.Add(GatheredLight);
                break;
            }
        }
    }
}
yasukichi commented 1 year ago

CullTiles 17 lights

void FDeferredShadingSceneRenderer::RenderDirectLightingForLumenScene(...)
{
    :

    FRDGBufferRef LumenPackedLights = CreateLumenLightDataBuffer(GraphBuilder, GatheredLights, MainView.GetLastEyeAdaptationExposure()); // TODO View

    FLightTileCullContext CullContext;
    FLumenCardTileUpdateContext CardTileUpdateContext;
    CullDirectLightingTiles(Views, GraphBuilder, CardUpdateContext, LumenCardSceneUniformBuffer, GatheredLights,    LumenPackedLights, CullContext, CardTileUpdateContext, ComputePassFlags);
    :

}

}

void Lumen::SpliceCardPagesIntoTiles( FRDGBuilder& GraphBuilder, const FGlobalShaderMap GlobalShaderMap, const FLumenCardUpdateContext& CardUpdateContext, const TRDGUniformBufferRef& LumenCardSceneUniformBuffer, FLumenCardTileUpdateContext& OutCardTileUpdateContext, ERDGPassFlags ComputePassFlags) { const uint32 MaxLightTilesTilesX = FMath::DivideAndRoundUp(CardUpdateContext.UpdateAtlasSize.X, Lumen::CardTileSize); const uint32 MaxLightTilesTilesY = FMath::DivideAndRoundUp(CardUpdateContext.UpdateAtlasSize.Y, Lumen::CardTileSize); const uint32 MaxLightTiles = MaxLightTilesTilesX MaxLightTilesTilesY; :

yasukichi commented 1 year ago

SpliceCardPagesIntoTiles

groupshared uint SharedTileAllocator; groupshared uint SharedTiles[THREADGROUP_SIZE * THREADGROUP_SIZE]; groupshared uint SharedGlobalTileOffset;

/**

yasukichi commented 1 year ago

InitializeCardTileIndirectArgs

[numthreads(THREADGROUP_SIZE, 1, 1)] void InitializeCardTileIndirectArgsCS(uint3 DispatchThreadId : SV_DispatchThreadID) { if (DispatchThreadId.x == 0) { uint NumCardTiles = CardTileAllocator[0];

    // One thread per card tile
    RWDispatchCardTilesIndirectArgs[0] = (NumCardTiles + 63) / 64;
    RWDispatchCardTilesIndirectArgs[1] = 1;
    RWDispatchCardTilesIndirectArgs[2] = 1;

    // One thread group per card tile
    RWDispatchCardTilesIndirectArgs[3] = NumCardTiles;
    RWDispatchCardTilesIndirectArgs[4] = 1;
    RWDispatchCardTilesIndirectArgs[5] = 1;
}

}

yasukichi commented 1 year ago

BuildLightTiles

}

- シェーダー(LumenSceneDirectLightingCulling.usf)
![image](https://user-images.githubusercontent.com/14350715/218243071-91eb9df2-e542-4080-8999-4028ae507848.png)
```C++
StructuredBuffer<uint> CardTileAllocator;
StructuredBuffer<uint> CardTiles;

struct FLightSampleAccumulator
{
    uint PackedSamples[MAX_LIGHT_SAMPLES];
};

struct FCardTileData
{
    uint CardPageIndex;
    uint2 TileCoord;
};

FCardTileData UnpackCardTileData(uint PackedTile)
{
    FCardTileData TileData;
    TileData.CardPageIndex = PackedTile & 0xFFFFFF;
    TileData.TileCoord.x = (PackedTile >> 24) & 0xF;
    TileData.TileCoord.y = (PackedTile >> 28) & 0xF;
    return TileData;
}

/**
 * Pick N most important lights per tile in page selected to update to update this frame, and output a list of light tiles
 */
[numthreads(THREADGROUP_SIZE, 1, 1)]
void BuildLightTilesCS(
    uint3 GroupId : SV_GroupID,
    uint3 DispatchThreadId : SV_DispatchThreadID,
    uint3 GroupThreadId : SV_GroupThreadID)
{
    // One thread per tile
    uint CardTileIndex = DispatchThreadId.x;

    FLightSampleAccumulator LightSampleAccumulator = InitLightSampleAccumulator();

    if (CardTileIndex < CardTileAllocator[0])
    {
        FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);
        FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
        uint PackedOffsetNum = 0;

        if (CardPage.CardIndex >= 0)
        {
            FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);

            const uint2 SizeInTiles = CardPage.SizeInTexels / CARD_TILE_SIZE;
            float2 UVMin = float2(CardTile.TileCoord) / SizeInTiles;
            float2 UVMax = float2(CardTile.TileCoord + 1) / SizeInTiles;

            // UVMin.y/Max.yのY軸を反転
            float SwapY = UVMin.y;
            UVMin.y = 1.0f - UVMax.y;
            UVMax.y = 1.0f - SwapY;

            uint ViewIndex = GetCardViewIndex(CardPage, Card, UVMin, UVMax, NumViews, true);

            // Loop over lights to select N most important lights
            for (uint LightIndex = 0; LightIndex < NumLights; ++LightIndex)
            {
                FLumenLight LumenLight = LoadLumenLight(LightIndex, PreViewTranslation[ViewIndex].xyz);

                float3 CardPageWorldCenter = 0.0f; // LWC_TODO:
                bool bLightAffectsCard = DoesLightAffectCardPageUVRange(LumenLight, CardPage, Card, UVMin, UVMax, CardPageWorldCenter);
                if (bLightAffectsCard)
                {
                    // Center of a tile for estimating attenuation
                    float3 TranslatedWorldPosition = CardPageWorldCenter + LWCHackToFloat(PrimaryView.PreViewTranslation);

                    FLightSample LightSample;
                    LightSample.Weight = GetLightWeight(LumenLight, TranslatedWorldPosition);
                    LightSample.LightIndex = LightIndex;
                    LightSample.bHasShadowMask = LumenLight.bHasShadowMask;
                    AddLightSample(LightSampleAccumulator, LightSample);
                }
            }

            uint NumPackedLightSamples = 0;
            for (uint PackedSampleIndex = 0; PackedSampleIndex < MAX_LIGHT_SAMPLES; ++PackedSampleIndex)
            {
                if (LightSampleAccumulator.PackedSamples[PackedSampleIndex] > 0)
                {
                    ++NumPackedLightSamples;
                }
            }

            uint LightTileOffset = 0;
            if (NumPackedLightSamples > 0)
            {
                InterlockedAdd(RWLightTileAllocator[0], NumPackedLightSamples, LightTileOffset);
            }

            for (uint LightSampleIndex = 0; LightSampleIndex < NumPackedLightSamples; ++LightSampleIndex)
            {
                FLightSample LightSample = UnpackLightSample(LightSampleAccumulator.PackedSamples[LightSampleIndex]);

                // Write light tile to global light tile array
                FLightTileForCompactionPass LightTile;
                LightTile.LightIndex = LightSample.LightIndex;
                LightTile.ViewIndex = ViewIndex;
                LightTile.bHasShadowMask = LightSample.bHasShadowMask;
                LightTile.CardTileIndex = CardTileIndex;
                LightTile.CulledLightIndex = LightSampleIndex;
                RWLightTiles[LightTileOffset + LightSampleIndex] = PackLightTileForCompactionPass(LightTile);

                InterlockedAdd(RWLightTileAllocatorPerLight[LightSample.LightIndex * NumViews + ViewIndex], 1);
            }

            if (NumPackedLightSamples > 0)
            {
                uint CardLightTilesOffset;
                InterlockedAdd(RWLightTileAllocatorForPerCardTileDispatch[0], NumPackedLightSamples, CardLightTilesOffset);
                PackedOffsetNum = (NumPackedLightSamples << 24) | CardLightTilesOffset;
            }
        }

        RWLightTileOffsetNumPerCardTile[CardTileIndex] = PackedOffsetNum;
    }
}

float4x4 WorldToClip[2];
float4 PreViewTranslation[2];

uint GetCardViewIndex(FLumenCardPageData CardPage, FLumenCardData Card, float2 UVMin, float2 UVMax, uint NumViews, bool bPrioritizeWhenInFrustum)
{
    float3 CardPageLocalCenter;
    float3 CardPageLocalExtent;
    GetCardLocalBBox(CardPage, Card, UVMin, UVMax, CardPageLocalCenter, CardPageLocalExtent);

    float3 CardPageWorldCenter = mul(Card.WorldToLocalRotation, CardPageLocalCenter) + Card.Origin;
    float3 CardPageWorldExtent = mul(abs(Card.WorldToLocalRotation), CardPageLocalExtent);

    uint ViewIndex = 0;

    if (NumViews > 1)
    {
        float View0Distance = length(CardPageWorldCenter - -PreViewTranslation[0].xyz);
        float View1Distance = length(CardPageWorldCenter - -PreViewTranslation[1].xyz);

#define IN_FRUSTUM_DISTANCE 1
#if IN_FRUSTUM_DISTANCE
        if (bPrioritizeWhenInFrustum)
        {
            float4 CardOriginClipSpace0 = mul(float4(CardPageWorldCenter, 1.0f), WorldToClip[0]);

            if (all(CardOriginClipSpace0.xy >= CardOriginClipSpace0.w) && all(CardOriginClipSpace0.xy <= CardOriginClipSpace0.w) && CardOriginClipSpace0.z < 1.0f) // Card中心がフラスタム内?
            {
                View0Distance = .5f * CardOriginClipSpace0.w;
            }

            float4 CardOriginClipSpace1 = mul(float4(CardPageWorldCenter, 1.0f), WorldToClip[1]);

            if (all(CardOriginClipSpace1.xy >= CardOriginClipSpace1.w) && all(CardOriginClipSpace1.xy <= CardOriginClipSpace1.w) && CardOriginClipSpace1.z < 1.0f)
            {
                View1Distance = .5f * CardOriginClipSpace1.w;
            }
        }
#endif
        ViewIndex = View0Distance < View1Distance ? 0 : 1; // 0 or 1, よりCard中心からの距離が小さいViewを選択
    }

    return ViewIndex;
}

image

yasukichi commented 1 year ago

ComputeLightTileOffsetsPerLight

}

- シェーダー(LumenSceneDirectLightingCulling.usf)
```C++
StructuredBuffer<uint> LightTileAllocatorPerLight;
RWStructuredBuffer<uint> RWLightTileOffsetsPerLight;

/**
 * Prefix sum for card tile array compaction
 */
[numthreads(THREADGROUP_SIZE, 1, 1)]
void ComputeLightTileOffsetsPerLightCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
    if (DispatchThreadId.x == 0)
    {
        uint TileOffset = 0;

        for (uint ViewIndex = 0; ViewIndex < NumViews; ViewIndex++)
        {           
            for (uint LightIndex = 0; LightIndex < NumLights; ++LightIndex)
            {
                RWLightTileOffsetsPerLight[LightIndex * NumViews + ViewIndex] = TileOffset;
                TileOffset += LightTileAllocatorPerLight[LightIndex * NumViews + ViewIndex];
            }
        }
    }
}

RWLightTileOffsetsPerLight = prefixsum(LightTileAllocatorPerLight)

yasukichi commented 1 year ago

InitializeLightTileIndirectArgs

}

- シェーダー(LumenSceneDirectLightingCulling.usf)
```C++
RWBuffer<uint> RWDispatchLightTilesIndirectArgs; // 0: NumTilesDiv1, 1: NumTilesDiv64
RWBuffer<uint> RWDrawTilesPerLightIndirectArgs;
RWBuffer<uint> RWDispatchTilesPerLightIndirectArgs;

uint VertexCountPerInstanceIndirect;
uint PerLightDispatchFactor;

[numthreads(THREADGROUP_SIZE, 1, 1)]
void InitializeLightTileIndirectArgsCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
    uint PerViewLightIndex = DispatchThreadId.x;

    // Global card tile array
    if (PerViewLightIndex == 0)
    {
        uint NumLightTiles = LightTileAllocator[0];

        // NumTilesDiv1
        RWDispatchLightTilesIndirectArgs[0] = NumLightTiles;
        RWDispatchLightTilesIndirectArgs[1] = 1;
        RWDispatchLightTilesIndirectArgs[2] = 1;

        // NumTilesDiv64
        RWDispatchLightTilesIndirectArgs[3 + 0] = (NumLightTiles + 63) / 64;
        RWDispatchLightTilesIndirectArgs[3 + 1] = 1;
        RWDispatchLightTilesIndirectArgs[3 + 2] = 1;
    }

    // Per light card tile array
    if (PerViewLightIndex < NumLights * NumViews)
    {
        uint NumLightTilesPerLight = LightTileAllocatorPerLight[PerViewLightIndex]; // view/lightごとのTIle数

        // FRHIDispatchIndirectParameters
        RWDispatchTilesPerLightIndirectArgs[3 * PerViewLightIndex + 0] = PerLightDispatchFactor * NumLightTilesPerLight;
        RWDispatchTilesPerLightIndirectArgs[3 * PerViewLightIndex + 1] = 1;
        RWDispatchTilesPerLightIndirectArgs[3 * PerViewLightIndex + 2] = 1;

        // FRHIDrawIndirectParameters
        RWDrawTilesPerLightIndirectArgs[4 * PerViewLightIndex + 0] = VertexCountPerInstanceIndirect;
        RWDrawTilesPerLightIndirectArgs[4 * PerViewLightIndex + 1] = NumLightTilesPerLight;
        RWDrawTilesPerLightIndirectArgs[4 * PerViewLightIndex + 2] = 0;
        RWDrawTilesPerLightIndirectArgs[4 * PerViewLightIndex + 3] = 0;
    }
}
  1. NumTilesDiv1 : NumLightTiles個のthread group(64,1,1)を投入するindrect args
  2. NumTilesDiv64 : NumLightTiles個のthreadを投入するindirect args
yasukichi commented 1 year ago

CompactLightTiles

}

- シェーダー(LumenSceneDirectLightingCulling.usf)
```C++
RWStructuredBuffer<uint2> RWCompactedLightTiles;
RWStructuredBuffer<uint2> RWLightTilesPerCardTile;
RWStructuredBuffer<uint> RWCompactedLightTileAllocatorPerLight;
StructuredBuffer<uint> LightTileAllocator;
StructuredBuffer<uint2> LightTiles;
StructuredBuffer<uint> LightTileOffsetsPerLight;
StructuredBuffer<uint> LightTileOffsetNumPerCardTile;

/**
 * Compact card tile array
 */
[numthreads(THREADGROUP_SIZE, 1, 1)]
void CompactLightTilesCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
    const uint LightTileIndex = DispatchThreadId.x;

    if (LightTileIndex < LightTileAllocator[0])
    {
        FLightTileForCompactionPass LightTile = UnpackLightTileForCompactionPass(LightTiles[LightTileIndex]);

        uint CompactedLightTileIndex = 0;
        InterlockedAdd(RWCompactedLightTileAllocatorPerLight[LightTile.LightIndex * NumViews + LightTile.ViewIndex], 1, CompactedLightTileIndex);
        CompactedLightTileIndex += LightTileOffsetsPerLight[LightTile.LightIndex * NumViews + LightTile.ViewIndex];

        uint CardTileIndex = LightTile.CardTileIndex;
        FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);

        FLightTileForShadowMaskPass TileForLight;
        TileForLight.LightIndex = LightTile.LightIndex;
        TileForLight.ViewIndex = LightTile.ViewIndex;
        TileForLight.CardPageIndex = CardTile.CardPageIndex;
        TileForLight.TileCoord = CardTile.TileCoord;
        uint2 PackedLightTile = PackLightTileForShadowMaskPass(TileForLight);
        RWCompactedLightTiles[CompactedLightTileIndex] = PackedLightTile;

        uint PackedOffsetNum = LightTileOffsetNumPerCardTile[CardTileIndex];
        uint LightTileOffset = (PackedOffsetNum & 0x00ffffff) + LightTile.CulledLightIndex/* lightSampleIndex: tile単位で集められたlight中のindex*/;
        FLightTileForLightPass TileForCardTile;
        TileForCardTile.LightIndex = LightTile.LightIndex;
        TileForCardTile.ViewIndex = LightTile.ViewIndex;
        TileForCardTile.ShadowMaskIndex = LightTile.bHasShadowMask ? CompactedLightTileIndex : 0xffffffff;
        PackedLightTile = PackLightTileForLightPass(TileForCardTile);
        RWLightTilesPerCardTile[LightTileOffset] = PackedLightTile;
    }
}
yasukichi commented 1 year ago

non raytraced shadows

NonRayTracedShadowPass L_Expanse.xxx

}

void ComputeNonRayTracedShadows( FRDGBuilder& GraphBuilder, const FScene Scene, const FViewInfo& View, TRDGUniformBufferRef LumenCardSceneUniformBuffer, TArray<FVisibleLightInfo, SceneRenderingAllocator>& VisibleLightInfos, const FVirtualShadowMapArray& VirtualShadowMapArray, const FLumenGatheredLight& Light, const FLumenLightTileScatterParameters& LightTileScatterParameters, int32 ViewIndex, int32 NumViews, FRDGBufferUAVRef ShadowMaskTilesUAV, FRDGBufferUAVRef ShadowTraceAllocatorUAV, FRDGBufferUAVRef ShadowTracesUAV, ERDGPassFlags ComputePassFlags) { FLumenSceneData& LumenSceneData = Scene->GetLumenSceneData(View); check(Light.NeedsShadowMask());

    :

const FMaterialRenderProxy* LightFunctionMaterialProxy = Light.LightSceneInfo->Proxy->GetLightFunctionMaterial();
bool bUseLightFunction = true;

if (!LightFunctionMaterialProxy
    || !LightFunctionMaterialProxy->GetIncompleteMaterialWithFallback(Scene->GetFeatureLevel()).IsLightFunction()
    || !View.Family->EngineShowFlags.LightFunctions)
{
    bUseLightFunction = false;
    LightFunctionMaterialProxy = UMaterial::GetDefaultMaterial(MD_LightFunction)->GetRenderProxy();
}

const uint32 DispatchIndirectArgOffset = (Light.LightIndex * NumViews + ViewIndex) * sizeof(FRHIDispatchIndirectParameters);

if (bUseLightFunction)
{
    :
}
else
{
    FLumenDirectLightingNonRayTracedShadowsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenDirectLightingNonRayTracedShadowsCS::FParameters>();
    SetCommonParameters(PassParameters->Common);

    FLumenDirectLightingNonRayTracedShadowsCS::FPermutationDomain PermutationVector;
    PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FThreadGroupSize32>(Lumen::UseThreadGroupSize32());
    PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FCompactShadowTraces>(ShadowTraceAllocatorUAV != nullptr);
    PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FLightType>(Light.Type);
    PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FVirtualShadowMap>(bUseVirtualShadowMap);
    PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FDynamicallyShadowed>(bUseDenseShadowMap);
    PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FDenseShadowMap>(bUseDenseShadowMap);
    PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FCloudTransmittance>(bUseCloudTransmittance);
    TShaderRef<FLumenDirectLightingNonRayTracedShadowsCS> ComputeShader = View.ShaderMap->GetShader<FLumenDirectLightingNonRayTracedShadowsCS>(PermutationVector);

    FComputeShaderUtils::AddPass(
        GraphBuilder,
        RDG_EVENT_NAME("NonRayTracedShadowPass %s", *Light.Name),
        ComputePassFlags,
        ComputeShader,
        PassParameters,
        LightTileScatterParameters.DispatchIndirectArgs,
        DispatchIndirectArgOffset);
}
- シェーダー(LumenCardCommon.h)
```C++
float3 GetCardLocalPosition(float3 CardLocalExtent, float2 CardUV, float Depth)
{
    CardUV.x = 1.0f - CardUV.x;

    float3 LocalPosition;
    LocalPosition.xy = CardLocalExtent.xy * (1.0f - 2.0f * CardUV);
    LocalPosition.z = -(2.0f * Depth - 1.0f) * CardLocalExtent.z;

    return LocalPosition;
}

float3 GetCardWorldPosition(FLumenCardData Card, float2 CardUV, float Depth)
{
    float3 LocalPosition = GetCardLocalPosition(Card.LocalExtent, CardUV, Depth);
    float3 WorldPosition = mul(Card.WorldToLocalRotation, LocalPosition) + Card.Origin;
    return WorldPosition;
}

void WriteSharedShadowMaskRay(FShadowMaskRay Ray, uint2 CoordInCardTile, const bool bClearExistingMask) { uint Mask = uint(Ray.ShadowFactor * SHADOW_FACTOR_BITS_MASK);

if (Ray.bShadowFactorComplete)
{
    Mask |= SHADOW_FACTOR_COMPLETE_BITS_MASK;
}

uint BitOffset = SHADOW_MASK_RAY_BITS * (CoordInCardTile.x + CoordInCardTile.y * CARD_TILE_SIZE);

if (bClearExistingMask)
{
    InterlockedAnd(SharedShadowMask[BitOffset / 32], ~(SHADOW_MASK_RAY_BITS_MASK << (BitOffset % 32)));
}

if (Mask != 0)
{
    InterlockedOr(SharedShadowMask[BitOffset / 32], Mask << (BitOffset % 32));
}

}

- シェーダー(LumenSurfaceCache.ush)
```C++
struct FLumenSurfaceCacheData
{
    bool bValid;

    float Depth;
    float3 Albedo;
    float3 Emissive;

    // Derived
    float3 WorldPosition;
    float3 WorldNormal;
};

FLumenSurfaceCacheData GetSurfaceCacheData(FLumenCardData Card, float2 CardUV, float2 AtlasUV)
{
    float Depth = Texture2DSampleLevel(LumenCardScene.DepthAtlas, GlobalPointClampedSampler, AtlasUV, 0).x;

    FLumenSurfaceCacheData SurfaceCacheData;
    SurfaceCacheData.Depth = Depth;
    SurfaceCacheData.bValid = IsSurfaceCacheDepthValid(Depth); // Depth < 1.0f
    SurfaceCacheData.Albedo = float3(0.0f, 0.0f, 0.0f);
    SurfaceCacheData.Emissive = float3(0.0f, 0.0f, 0.0f);

    float2 NormalXY = float2(0.5f, 0.5f);

    if (SurfaceCacheData.bValid)
    {
        SurfaceCacheData.Albedo = DecodeSurfaceCacheAlbedo(Texture2DSampleLevel(LumenCardScene.AlbedoAtlas, GlobalPointClampedSampler, AtlasUV, 0).xyz);
        SurfaceCacheData.Emissive = Texture2DSampleLevel(LumenCardScene.EmissiveAtlas, GlobalPointClampedSampler, AtlasUV, 0).x;
        NormalXY = Texture2DSampleLevel(LumenCardScene.NormalAtlas, GlobalPointClampedSampler, AtlasUV, 0).xy;
    }

    SurfaceCacheData.WorldNormal = DecodeSurfaceCacheNormal(Card, NormalXY); // xy(0-1) -> {xy(-1-+1), sqrt(1-xy^2)}
    SurfaceCacheData.WorldPosition = GetCardWorldPosition(Card, CardUV, SurfaceCacheData.Depth);

    return SurfaceCacheData;
}

[numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)] void LumenSceneDirectLightingNonRayTracedShadowsCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint LinearGroupThreadId = GroupThreadId.x + GroupThreadId.y * THREADGROUP_SIZE_X;

if THREADGROUP_SIZE_32

uint LocalLightTileIndex = GroupId.x / 2;
uint2 CoordInCardTile = GroupThreadId.xy + uint2(0, GroupId.x % 2 ? 0 : 4);
uint ShadowMaskSize = SHADOW_MASK_CARD_TILE_DWORDS / 2;
uint ShadowMaskOffset = GroupId.x % 2 ? 0 : 1;

else

uint LocalLightTileIndex = GroupId.x;
uint2 CoordInCardTile = GroupThreadId.xy;
uint ShadowMaskSize = SHADOW_MASK_CARD_TILE_DWORDS;
uint ShadowMaskOffset = 0;

endif

if (LinearGroupThreadId < ShadowMaskSize)
{
    SharedShadowMask[LinearGroupThreadId] = 0;
}

if (all(GroupThreadId.xy == 0))
{
    SharedNumShadowTraces = 0;
    SharedGlobalShadowTraceOffset = 0;
    SharedShadowTraces[0] = 0;
}

GroupMemoryBarrierWithGroupSync();

uint LightTileIndex = LocalLightTileIndex + LightTileOffsetsPerLight[LightIndex * NumViews + ViewIndex];
FLightTileForShadowMaskPass LightTile = UnpackLightTileForShadowMaskPass(LightTiles[LightTileIndex]);
uint2 TexelInCardPageCoord = LightTile.TileCoord * CARD_TILE_SIZE + CoordInCardTile;

FLumenCardPageData CardPage = GetLumenCardPageData(LightTile.CardPageIndex + DummyZeroForFixingShaderCompilerBug);
float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (TexelInCardPageCoord + 0.5f);
float2 CardUV = CardPage.CardUVRect.xy + CardPage.CardUVTexelScale * (TexelInCardPageCoord + 0.5f);

FShadowMaskRay ShadowMaskRay;
ShadowMaskRay.ShadowFactor = 0.0f;
ShadowMaskRay.bShadowFactorComplete = true;

FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
FLumenSurfaceCacheData SurfaceCacheData = GetSurfaceCacheData(Card, CardUV, AtlasUV);

if (SurfaceCacheData.bValid)
{
    FDeferredLightData LightData = LoadLightData(LIGHT_TYPE);
    float3 WorldNormal = SurfaceCacheData.WorldNormal;
    float3 WorldPosition = SurfaceCacheData.WorldPosition;
    float3 TranslatedWorldPosition = WorldPosition + LWCHackToFloat(PrimaryView.PreViewTranslation);

    float3 L = LightData.Direction;
    float3 ToLight = L;
    float CombinedAttenuation = 1.0f;
    float Attenuation = 1.0f;
    float LightMask = 1.0f;

if LIGHT_TYPE != LIGHT_TYPE_DIRECTIONAL

    if (LightData.bRadialLight)
    {
        LightMask = GetLocalLightAttenuation(TranslatedWorldPosition, LightData, ToLight, L);
    }

    if (LightData.bRectLight)
    {
        FRect Rect = GetRect(ToLight, LightData);
        Attenuation = IntegrateLight(Rect);
    }
    else
    {
        FCapsuleLight Capsule = GetCapsule(ToLight, LightData);
        Capsule.DistBiasSqr = 0;
        Attenuation = IntegrateLight(Capsule, LightData.bInverseSquared);
    }

    CombinedAttenuation *= Attenuation * LightMask;

endif

    CombinedAttenuation *= saturate(dot(WorldNormal, L));

    if (CombinedAttenuation > 0)
    {
        ShadowMaskRay.ShadowFactor = 1.0f;
        ShadowMaskRay.bShadowFactorComplete = false;
        bool bVSMValid = false;

        const float4 PostProjectionPosition = mul(float4(WorldPosition, 1.0), LWCHackToFloat(PrimaryView.WorldToClip));
        float ReceiverBias = 0.0f;
        if (Card.bHeightfield)
        {
            float3 WorldCameraOrigin = LWCHackToFloat(PrimaryView.WorldCameraOrigin);
            ReceiverBias = CalculateDistanceBasedHeightfieldBias(HeightfieldShadowReceiverBias, WorldPosition, WorldCameraOrigin);
        }

        // Shadow maps are culled so only query points inside the view frustum are valid
        if (all(and(PostProjectionPosition.xy < PostProjectionPosition.w, PostProjectionPosition.xy > -PostProjectionPosition.w)))
        {
            #if VIRTUAL_SHADOW_MAP
            {
                // Bias only ray start to maximize chances of hitting an allocated page
                FVirtualShadowMapSampleResult VirtualShadowMapSample = SampleVirtualShadowMapLWCHack(VirtualShadowMapId, WorldPosition, VirtualShadowMapSamplingBias + ReceiverBias, WorldNormal);

                bVSMValid = VirtualShadowMapSample.bValid;
                ShadowMaskRay.ShadowFactor *= VirtualShadowMapSample.ShadowFactor;

                // If there's also a dense shadow map present we need to sample both (unless the VSM determines we are fully shadowed anyways)
                ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.ShadowFactor < 0.01f;
                #if !DENSE_SHADOW_MAP
                ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.bShadowFactorComplete || VirtualShadowMapSample.bValid;
                #endif
            }
            #endif

            #if DENSE_SHADOW_MAP
            if (!ShadowMaskRay.bShadowFactorComplete)
            {
                float3 TranslatedWorldPositionForShadowing = GetCardWorldPositionForShadowing(TranslatedWorldPosition, L, WorldNormal, ShadowMapSamplingBias + ReceiverBias);

                #if LIGHT_TYPE == LIGHT_TYPE_DIRECTIONAL
                {
                    #if DYNAMICALLY_SHADOWED
                    {
                        float SceneDepth = dot(TranslatedWorldPositionForShadowing - PrimaryView.TranslatedWorldCameraOrigin, View.ViewForward);

                        bool bShadowingFromValidUVArea = false;
                        float NewShadowFactor = ComputeDirectionalLightDynamicShadowing(TranslatedWorldPositionForShadowing, SceneDepth, bShadowingFromValidUVArea);

                        if (bShadowingFromValidUVArea)
                        {
                            ShadowMaskRay.ShadowFactor *= NewShadowFactor;
                            ShadowMaskRay.bShadowFactorComplete = VIRTUAL_SHADOW_MAP ? bVSMValid : true;
                        }
                    }
                    #endif
                }
                #else
                {
                    bool bShadowingFromValidUVArea = false;
                    float NewShadowFactor = ComputeVolumeShadowing(TranslatedWorldPositionForShadowing, LightData.bRadialLight && !LightData.bSpotLight, LightData.bSpotLight, bShadowingFromValidUVArea);

                    if (bShadowingFromValidUVArea) 
                    {
                        ShadowMaskRay.ShadowFactor *= NewShadowFactor;
                        ShadowMaskRay.bShadowFactorComplete = VIRTUAL_SHADOW_MAP ? bVSMValid : true;
                    }
                }
                #endif
            }
            #endif
        }

        #if LIGHT_FUNCTION
        if (ShadowMaskRay.ShadowFactor > 0.01f)
        {
            ShadowMaskRay.ShadowFactor *= GetLightFunction(TranslatedWorldPosition);
            ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.bShadowFactorComplete || ShadowMaskRay.ShadowFactor < 0.01f;
        }
        #endif

        #if USE_CLOUD_TRANSMITTANCE
        if (ShadowMaskRay.ShadowFactor > 0.01f)
        {
            float OutOpticalDepth = 0.0f;
            ShadowMaskRay.ShadowFactor *= lerp(1.0f, GetCloudVolumetricShadow(TranslatedWorldPosition, CloudShadowmapTranslatedWorldToLightClipMatrix, CloudShadowmapFarDepthKm, CloudShadowmapTexture, CloudShadowmapSampler, OutOpticalDepth), CloudShadowmapStrength);
            ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.bShadowFactorComplete || ShadowMaskRay.ShadowFactor < 0.01f;
        }
        #endif

        if (ShadowMaskRay.ShadowFactor > 0.01f && UseIESProfile > 0)
        {
            ShadowMaskRay.ShadowFactor *= ComputeLightProfileMultiplier(TranslatedWorldPosition, DeferredLightUniforms.TranslatedWorldPosition, -DeferredLightUniforms.Direction, DeferredLightUniforms.Tangent);
            ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.bShadowFactorComplete || ShadowMaskRay.ShadowFactor < 0.01f;
        }
    }
    else
    {
        ShadowMaskRay.ShadowFactor = 0.0f;
        ShadowMaskRay.bShadowFactorComplete = true;
    }
}

if (ForceShadowMaps != 0)
{
    ShadowMaskRay.bShadowFactorComplete = true;
}

WriteSharedShadowMaskRay(ShadowMaskRay, GroupThreadId.xy, false);

#if COMPACT_SHADOW_TRACES
{
    if (!ShadowMaskRay.bShadowFactorComplete)
    {
        // シャドウマップがなければ(Virtual Shadow Mapも含む)、レイトレーシングへ
        uint ShadowTraceIndex = 0;
        InterlockedAdd(SharedNumShadowTraces, 1, ShadowTraceIndex);

        FShadowTrace ShadowTrace;
        ShadowTrace.LightTileIndex = LightTileIndex;
        ShadowTrace.LightTileCoord = CoordInCardTile;

        SharedShadowTraces[ShadowTraceIndex] = PackShadowTrace(ShadowTrace);
    }
}
#endif

GroupMemoryBarrierWithGroupSync();

#if COMPACT_SHADOW_TRACES
{
    if (all(GroupThreadId == 0))
    {
        InterlockedAdd(RWShadowTraceAllocator[0], SharedNumShadowTraces, SharedGlobalShadowTraceOffset);
    }
}
#endif

GroupMemoryBarrierWithGroupSync();

#if COMPACT_SHADOW_TRACES
{
    if (LinearGroupThreadId < SharedNumShadowTraces)
    {
        RWShadowTraces[SharedGlobalShadowTraceOffset + LinearGroupThreadId] = SharedShadowTraces[LinearGroupThreadId];
    }
}
#endif

if (LinearGroupThreadId < ShadowMaskSize)
{
    RWShadowMaskTiles[SHADOW_MASK_CARD_TILE_DWORDS * LightTileIndex + ShadowMaskSize * ShadowMaskOffset + LinearGroupThreadId] = SharedShadowMask[LinearGroupThreadId];
}

}


![image](https://user-images.githubusercontent.com/14350715/219937697-85292354-6766-4fd9-af1f-f0fc41967232.png)
yasukichi commented 1 year ago

Offscreen shadows

}

- C++(LumenSceneDirectLightingHardwareRayTracing.cpp)
```C++
void TraceLumenHardwareRayTracedDirectLightingShadows(
    FRDGBuilder& GraphBuilder,
    const FScene* Scene,
    const FViewInfo& View,
    int32 ViewIndex,
    const FLumenCardTracingInputs& TracingInputs,
    FRDGBufferRef ShadowTraceIndirectArgs,
    FRDGBufferRef ShadowTraceAllocator,
    FRDGBufferRef ShadowTraces,
    FRDGBufferRef LightTileAllocator,
    FRDGBufferRef LightTiles,
    FRDGBufferRef LumenPackedLights,
    FRDGBufferUAVRef ShadowMaskTilesUAV)
{
    const bool bInlineRayTracing = Lumen::UseHardwareInlineRayTracing(*View.Family);
    const bool bUseMinimalPayload = true;

    FRDGBufferRef HardwareRayTracingIndirectArgsBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(1), TEXT("Lumen.Reflection.CompactTracingIndirectArgs"));
yasukichi commented 1 year ago

FLumenDiretLightingHardwareRayTracingIndirectArgsCS

}

- シェーダー(LumenSceneDirectLightingHardwareRayTracing.usf)
```C++
Buffer<uint> DispatchLightTilesIndirectArgs;
RWBuffer<uint> RWHardwareRayTracingIndirectArgs;
uint2 OutputThreadGroupSize;

[numthreads(1, 1, 1)]
void LumenDirectLightingHardwareRayTracingIndirectArgsCS()
{
    RWHardwareRayTracingIndirectArgs[0] = (CARD_TILE_SIZE * CARD_TILE_SIZE + OutputThreadGroupSize.x - 1) / OutputThreadGroupSize.x;
    RWHardwareRayTracingIndirectArgs[1] = (DispatchLightTilesIndirectArgs[0] + OutputThreadGroupSize.y - 1) / OutputThreadGroupSize.y;
    RWHardwareRayTracingIndirectArgs[2] = 1;
}
yasukichi commented 1 year ago

LumenDirectLightingHardwareRayTracingCS

}

- シェーダー(LumenSceneDirectLightingHardwareRayTracing.usf)
```C++
RaytracingAccelerationStructure TLAS;

StructuredBuffer<FHitGroupRootConstants> HitGroupData;
StructuredBuffer<FRayTracingSceneMetadataRecord> RayTracingSceneMetadata;

Buffer<uint2> ShadowTraceTileData;
Buffer<int> VirtualShadowMapIds;

uint MaxTranslucentSkipCount;
uint MaxTraversalIterations;
uint GroupCount;
uint ViewIndex;

uint LumenLightType;
float MaxTraceDistance;
float FarFieldMaxTraceDistance;
float3 FarFieldReferencePos;

float HardwareRayTracingShadowRayBias;
float HeightfieldShadowReceiverBias;
float HeightfieldProjectionBiasSearchRadius;

StructuredBuffer<uint> LightTileAllocator;
StructuredBuffer<uint2> LightTiles;

bool IsRayOccluded(FLumenMinimalRayResult RayResult)
{
    return RayResult.bHit || !RayResult.bCompleted;
}

bool IsRayOccluded(FRayTracedLightingResult RayResult)
{
    return RayResult.bIsHit || !RayResult.bIsCompleted;
}

StructuredBuffer<uint> ShadowTraceAllocator;
StructuredBuffer<uint> ShadowTraces;

void LumenSceneDirectLightingHardwareRayTracing_INTERNAL(
                uint3 DispatchThreadIndex,
                uint3 DispatchGroupId,
                uint DispatchGroupIndex);

[numthreads(INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X(=8), INLINE_RAY_TRACING_THREAD_GROUP_SIZE_Y(=4), 1)]
void LumenSceneDirectLightingHardwareRayTracingCS(
                uint3 DispatchThreadIndex : SV_DispatchThreadID,
                uint3 DispatchGroupId : SV_GroupID,
                uint DispatchGroupIndex : SV_GroupIndex)
{
    LumenSceneDirectLightingHardwareRayTracing_INTERNAL(DispatchThreadIndex, DispatchGroupId, DispatchGroupIndex);
}

void LumenSceneDirectLightingHardwareRayTracing_INTERNAL(
                uint3 DispatchThreadIndex,
                uint3 DispatchGroupId,
                uint DispatchGroupIndex)
{
    uint ShadowTraceIndexOffset = DispatchThreadIndex.x % 64;

#if DIM_INDIRECT_DISPATCH
    uint ShadowTraceGroup = DispatchThreadIndex.y;
#else
    for (uint ShadowTraceGroup = DispatchThreadIndex.y; ShadowTraceGroup * 64 < ShadowTraceAllocator[0]; ShadowTraceGroup += GroupCount)
#endif
    {
        uint ShadowTraceIndex = ShadowTraceGroup * 64 + ShadowTraceIndexOffset;
        FShadowTrace ShadowTrace = UnpackShadowTrace(ShadowTraces[ShadowTraceIndex]);
        uint2 CoordInCardTile = ShadowTrace.LightTileCoord; 
        const FLightTileForShadowMaskPass LightTile = UnpackLightTileForShadowMaskPass(LightTiles[ShadowTrace.LightTileIndex]);

        if (LightTile.ViewIndex != ViewIndex)
        {
            return;
        }

        uint2 TexelInCardPageCoord = LightTile.TileCoord * CARD_TILE_SIZE + CoordInCardTile;

        const FLumenLight LumenLight = LoadLumenLight(LightTile.LightIndex, LWCHackToFloat(PrimaryView.PreViewTranslation));

        if (all(CoordInCardTile < CARD_TILE_SIZE))
        {
            FShadowMaskRay ShadowMaskRay;
            ReadShadowMaskRayRW(ShadowTrace.LightTileIndex, CoordInCardTile, ShadowMaskRay);

            // Trace visibility ray
            if (!ShadowMaskRay.bShadowFactorComplete)
            {
                FLumenCardPageData CardPage = GetLumenCardPageData(LightTile.CardPageIndex);
                FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
                float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (TexelInCardPageCoord + 0.5f);
                float2 CardUV = CardPage.CardUVRect.xy + CardPage.CardUVTexelScale * (TexelInCardPageCoord + 0.5f);

                FLumenSurfaceCacheData SurfaceCacheData = GetSurfaceCacheData(Card, CardUV, AtlasUV);

                float3 WorldPosition = SurfaceCacheData.WorldPosition;
                float3 WorldNormal = SurfaceCacheData.WorldNormal;
                float3 TranslatedWorldPosition = WorldPosition + LWCHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO

                float3 L = LumenLight.DeferredLightData.Direction;
                float3 ToLight = L;
                float NearFieldTMax = MaxTraceDistance;
                float FarFieldTMax = FarFieldMaxTraceDistance;

                if (LumenLight.Type != LIGHT_TYPE_DIRECTIONAL)
                {
                    ToLight = LumenLight.DeferredLightData.TranslatedWorldPosition - TranslatedWorldPosition;
                    float LengthToLight = length(ToLight);
                    NearFieldTMax = min(NearFieldTMax, LengthToLight);
                    FarFieldTMax = min(FarFieldTMax, LengthToLight);
                    L = normalize(ToLight);
                }

                FRayDesc Ray;
                const float2 RandSample = 0.5;

#if 0
                bool bIsValid = GenerateOcclusionRay(LumenLight.Type, LightParameters, WorldPosition, WorldNormal, RandSample,
                    Ray.Origin, Ray.Direction, Ray.TMin, Ray.TMax);
#else

                float ReceiverBias = 0.0f;
                #if !ENABLE_HEIGHTFIELD_PROJECTION_BIAS
                if (Card.bHeightfield)
                {
                    float3 WorldCameraOrigin = LWCHackToFloat(PrimaryView.WorldCameraOrigin);
                    ReceiverBias = CalculateDistanceBasedHeightfieldBias(HeightfieldShadowReceiverBias, WorldPosition, WorldCameraOrigin);
                }
                #endif

                Ray.Origin = GetCardWorldPositionForShadowing(WorldPosition, L, WorldNormal, HardwareRayTracingShadowRayBias + ReceiverBias) + LWCHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO
                Ray.Direction = L;
#endif

                {
                    FRayCone RayCone = (FRayCone)0;
                    uint CullingMode = RAY_FLAG_CULL_FRONT_FACING_TRIANGLES;
                    FRayTracedLightingContext Context = CreateRayTracedLightingContext(
                        TLAS,
                        RayCone,
                        CoordInCardTile,
                        CoordInCardTile.x, // dummy coordinate
                        CullingMode,
                        MaxTranslucentSkipCount,
                        MaxTraversalIterations);

                    // Shadows don't need closest hit distance
                    Context.bAcceptFirstHitAndEndSearch = true;

                    Context.HitGroupData = HitGroupData;
                    Context.RayTracingSceneMetadata = RayTracingSceneMetadata;

                    bool bRayOccluded = false;

#if ENABLE_FAR_FIELD_TRACING
                    {
                        // Rebase origin and point-source direction for far-field rays
                        FRayDesc FarFieldRay;
                        FarFieldRay.Origin = TranslatedWorldPosition;
                        FarFieldRay.Direction = Ray.Direction;
                        FarFieldRay.TMin = NearFieldTMax;
                        FarFieldRay.TMax = FarFieldTMax;

                        Context.InstanceMask = RAY_TRACING_MASK_FAR_FIELD;
                        Context.bSkipClosestHitShader = true;
                        Context.TranslucentSkipCountMax = 0;
                        Context.FarFieldReferencePos = FarFieldReferencePos;
                        Context.FarFieldMaxTraceDistance = FarFieldTMax;

                        FRayTracedLightingResult RayResult = TraceSurfaceCacheFarFieldRay(FarFieldRay, Context);
                        bRayOccluded = IsRayOccluded(RayResult);

                        Context.bSkipClosestHitShader = false;
                    }
#endif // ENABLE_FAR_FIELD_TRACING

#if ENABLE_HEIGHTFIELD_PROJECTION_BIAS
                    // Find the heightfield intersection that corresponds to the given card position.
                    if (Card.bHeightfield && !bRayOccluded)
                    {
                        float SearchRadius = HeightfieldProjectionBiasSearchRadius;
                        float3 SearchDirection = float3(0.0, 0.0, 1.0);

                        FRayDesc ProjectedRay;
                        ProjectedRay.Origin = Ray.Origin - SearchDirection * SearchRadius;
                        ProjectedRay.Direction = SearchDirection;
                        ProjectedRay.TMin = 0.0f;
                        ProjectedRay.TMax = 2.0f * SearchRadius;
                        Context.CullingMode = RAY_FLAG_CULL_FRONT_FACING_TRIANGLES;

                        FLumenMinimalRayResult SearchResult = TraceLumenMinimalRay(ProjectedRay, Context);

                        if (IsRayOccluded(SearchResult))
                        {
                            float Epsilon = 0.01;
                            Ray.Origin = ProjectedRay.Origin + ProjectedRay.Direction * SearchResult.HitT + SearchResult.HitNormal * Epsilon;
                        }
                    }
#endif // ENABLE_HEIGHTFIELD_PROJECTION_BIAS

                    if (!bRayOccluded)
                    {
                        Ray.TMin = 0;
                        Ray.TMax = NearFieldTMax;
                        Context.InstanceMask = RAY_TRACING_MASK_OPAQUE;
                        Context.TranslucentSkipCountMax = MaxTranslucentSkipCount;

                        FRayTracedLightingResult RayResult = TraceSurfaceCacheRay(Ray, Context);
                        bRayOccluded = IsRayOccluded(RayResult);
                    }

                    ShadowMaskRay.ShadowFactor *= bRayOccluded ? 0.0f : 1.0f;
                }

                ShadowMaskRay.bShadowFactorComplete = true;

                WriteShadowMaskRay(ShadowMaskRay, ShadowTrace.LightTileIndex, CoordInCardTile, true);
            }
        }
    }
}
yasukichi commented 1 year ago

Lights

}

yasukichi commented 1 year ago

Batched lights

[numthreads(CARD_TILE_SIZE, CARD_TILE_SIZE, 1)] void LumenCardBatchDirectLightingCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID) { uint CardTileIndex = GroupId.x; uint2 TexelCoordInTile = GroupThreadId.xy;

uint PackedOffsetNum = LightTileOffsetNumPerCardTile[CardTileIndex];
uint LightTilesOffset = BitFieldExtractU32(PackedOffsetNum, 24, 0);
uint NumLightTiles = BitFieldExtractU32(PackedOffsetNum, 8, 24);

FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);

uint2 CoordInCardPage = CARD_TILE_SIZE * CardTile.TileCoord + TexelCoordInTile;
uint2 AtlasCoord = CardPage.PhysicalAtlasCoord + CoordInCardPage;

float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (CoordInCardPage + 0.5);
float2 CardUV = CardPage.CardUVRect.xy + CardPage.CardUVTexelScale * (CoordInCardPage + 0.5);

FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
FLumenSurfaceCacheData SurfaceCacheData = GetSurfaceCacheData(Card, CardUV, AtlasUV);

if (NumLightTiles == 0 || !SurfaceCacheData.bValid)
{
    RWDirectLightingAtlas[AtlasCoord] = 0;
    return;
}

float3 Irradiance = 0.0f;

for (uint CulledLightIndex = 0; CulledLightIndex < NumLightTiles; ++CulledLightIndex)
{
    FLightTileForLightPass LightTile = UnpackLightTileForLightPass(LightTilesPerCardTile[LightTilesOffset + CulledLightIndex]);

    FShadowMaskRay ShadowMaskRay;
    ShadowMaskRay.bShadowFactorComplete = true;
    ShadowMaskRay.ShadowFactor = 1.0f;

    if (LightTile.ShadowMaskIndex != 0xffffffff)
    {
        ReadShadowMaskRay(LightTile.ShadowMaskIndex, TexelCoordInTile, ShadowMaskRay);
    }

    if (ShadowMaskRay.ShadowFactor > 0.0f)
    {
        uint ViewIndex = HAS_MULTIPLE_VIEWS ? LightTile.ViewIndex : 0;
        Irradiance += GetIrradianceForLight(LightTile.LightIndex, SurfaceCacheData, LWCHackToFloat(PreViewTranslation[ViewIndex]).xyz, ShadowMaskRay.ShadowFactor);

        //Irradiance += bShadowFactorValid ? float3(0, 1, 0) : float3(0.2f, 0.0f, 0.0f);
    }
}

RWDirectLightingAtlas[AtlasCoord] = Irradiance;

}

- シェーダー(LumenSceneDirectLighting.ush)
```C++
StructuredBuffer<uint> ShadowMaskTiles;

void ReadShadowMaskRay(uint CardTileIndex, uint2 CoordInCardTile, inout FShadowMaskRay ShadowMaskRay)
{
    uint BitOffset = SHADOW_MASK_RAY_BITS * (CoordInCardTile.x + CoordInCardTile.y * CARD_TILE_SIZE);

    uint ShadowMask = ShadowMaskTiles[SHADOW_MASK_CARD_TILE_DWORDS * CardTileIndex + BitOffset / 32];
    ShadowMask = ShadowMask >> (BitOffset % 32);

    ShadowMaskRay.ShadowFactor = float(ShadowMask & SHADOW_FACTOR_BITS_MASK) / SHADOW_FACTOR_BITS_MASK;
    ShadowMaskRay.bShadowFactorComplete = (ShadowMask & SHADOW_FACTOR_COMPLETE_BITS_MASK) != 0;
}

if USE_LIGHT_UNIFORM_BUFFER

Out = LoadLightData(LIGHT_TYPE);

else

FLumenLight LumenLight = LoadLumenLight(LightIndex, PreViewTranslation);
Out = LumenLight.DeferredLightData;
Out.bRectLight = HAS_RECT_LIGHTS && Out.bRectLight;

endif

return Out;

}

float3 GetIrradianceForLight( uint LightIndex, FLumenSurfaceCacheData SurfaceCacheData, float3 PreViewTranslation, float ShadowFactor) { FDeferredLightData LightData = GetLumenDirectLightingLightData(LightIndex, PreViewTranslation);

float3 WorldNormal = SurfaceCacheData.WorldNormal;
float3 WorldPosition = SurfaceCacheData.WorldPosition;
float3 TranslatedWorldPosition = WorldPosition + PreViewTranslation;

float3 LightColor = LightData.Color;
float3 L = LightData.Direction;
float3 ToLight = L;
float3 AreaLightFalloffColor = 1;
float CombinedAttenuation = 1;
float NoL = saturate(dot(WorldNormal, L));

if (LightData.bRadialLight)
{
    FAreaLightIntegrateContext Context = (FAreaLightIntegrateContext) 0;
    float LightMask = GetLocalLightAttenuation(TranslatedWorldPosition, LightData, ToLight, L);

    float Attenuation = 0.0f;
    float Roughness = 1;
    float3 V = float3(1, 0, 0);

    if (LightData.bRectLight)
    {
        FRect Rect = GetRect(ToLight, LightData);
        Attenuation = IntegrateLight(Rect);

        if (IsRectVisible(Rect))
        {
            const FRectTexture SourceTexture = InitRectTexture(LightData);
            Context = CreateRectIntegrateContext(Roughness, WorldNormal, V, Rect, SourceTexture);
        }
    }
    else
    {
        FCapsuleLight Capsule = GetCapsule(ToLight, LightData);
        Capsule.DistBiasSqr = 0;
        Context = CreateCapsuleIntegrateContext(Roughness, WorldNormal, V, Capsule, LightData.bInverseSquared);
        Attenuation = IntegrateLight(Capsule, LightData.bInverseSquared);
    }

    CombinedAttenuation = Attenuation * LightMask;
    AreaLightFalloffColor = Context.AreaLight.FalloffColor;
    NoL = Context.NoL;
}

float3 Irradiance = LightColor * AreaLightFalloffColor * (CombinedAttenuation * NoL * ShadowFactor);

return Irradiance;

}

yasukichi commented 1 year ago

CombineLighting CS

[numthreads(CARD_TILE_SIZE, CARD_TILE_SIZE, 1)] void CombineLumenSceneLightingCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID) { uint CardTileIndex = GroupId.x; uint2 TexelCoordInTile = GroupThreadId.xy;

FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);

FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);

uint2 CoordInCardPage = CARD_TILE_SIZE * CardTile.TileCoord + TexelCoordInTile;
uint2 AtlasCoord = CardPage.PhysicalAtlasCoord + CoordInCardPage;
float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (CoordInCardPage + 0.5);

if RADIOSITY_ATLAS_DOWNSAMPLE_FACTOR == 1

float2 IndirectLightingAtlasUV = AtlasUV;

else

// When sampling from a downsampled Indirect Lighting atlas we need to appropriately clamp input UVs to prevent bilinear reading outside of the valid area
float2 IndirectLightingAtlasUV = clamp(AtlasUV, CardPage.PhysicalAtlasUVRect.xy + IndirectLightingAtlasHalfTexelSize, CardPage.PhysicalAtlasUVRect.zw - IndirectLightingAtlasHalfTexelSize);

endif

float3 Albedo = Texture2DSampleLevel(AlbedoAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 Emissive = Texture2DSampleLevel(EmissiveAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 DirectLighting = Texture2DSampleLevel(DirectLightingAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 IndirectLighting = Texture2DSampleLevel(IndirectLightingAtlas, BilinearClampedSampler, IndirectLightingAtlasUV, 0).xyz;

float3 FinalLighting = CombineFinalLighting(Albedo, Emissive, DirectLighting, IndirectLighting);
RWFinalLightingAtlas[AtlasCoord] = FinalLighting;

}

float3 CombineFinalLighting(float3 Albedo, float3 Emissive, float3 DirectLighting, float3 IndirectLighting) { Albedo = DecodeSurfaceCacheAlbedo(Albedo);

float3 FinalLighting = (DirectLighting + IndirectLighting) * Diffuse_Lambert(Albedo) + Emissive;

// Secure against strange values, as we are writing it to a persistent atlas with a feedback loop
FinalLighting = max(MakeFinite(FinalLighting), float3(0.0f, 0.0f, 0.0f));

return FinalLighting;

}

yasukichi commented 1 year ago

Radiosity

image

Lumen pass structure

back

yasukichi commented 1 year ago

BuildRadiosityTiles

}

- シェーダー(LumenRadiosityCulling.usf)
```C++
StructuredBuffer<uint> CardPageIndexAllocator;
StructuredBuffer<uint> CardPageIndexData;

RWStructuredBuffer<uint> RWCardTileAllocator;
RWStructuredBuffer<uint> RWCardTileData;

uint NumViews;
uint MaxCardTiles;

/**
 * Build a list of radiosity tiles
 */
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void BuildRadiosityTilesCS(
    uint3 GroupId : SV_GroupID,
    uint3 DispatchThreadId : SV_DispatchThreadID,
    uint3 GroupThreadId : SV_GroupThreadID)
{
    // One thread per tile
    uint LinearLightTileOffset = (GroupId.x % 4);
    uint IndexInIndexBuffer = GroupId.x / 4;

    uint2 TileCoord;
    TileCoord.x = (LinearLightTileOffset % 2) * 8 + GroupThreadId.x;
    TileCoord.y = (LinearLightTileOffset / 2) * 8 + GroupThreadId.y;

    if (IndexInIndexBuffer < CardPageIndexAllocator[0])
    {
        uint CardPageIndex = CardPageIndexData[IndexInIndexBuffer];
        FLumenCardPageData CardPage = GetLumenCardPageData(CardPageIndex);
        if (CardPage.CardIndex >= 0)
        {
            FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);

            const uint2 SizeInTiles = CardPage.SizeInTexels / CARD_TILE_SIZE;

            if (all(TileCoord < SizeInTiles))
            {
                float2 UVMin = float2(TileCoord) / SizeInTiles;
                float2 UVMax = float2(TileCoord + 1) / SizeInTiles;

                float SwapY = UVMin.y;
                UVMin.y = 1.0f - UVMax.y;
                UVMax.y = 1.0f - SwapY;

                uint ViewIndex = GetCardViewIndex(CardPage, Card, UVMin, UVMax, NumViews, false);

                FCardTileData CardTile;
                CardTile.CardPageIndex = CardPageIndex;
                CardTile.TileCoord = TileCoord;

                uint NextTileIndex = 0;
                InterlockedAdd(RWCardTileAllocator[ViewIndex], 1, NextTileIndex);
                RWCardTileData[ViewIndex * MaxCardTiles + NextTileIndex] = PackCardTileData(CardTile);
            }
        }
    }
}

image

yasukichi commented 1 year ago

IndirectArgs

}

- C++(LumenRadiodity.cpp)
```C++
uint32 GetRadiosityProbeSpacing(const FViewInfo& View)
{
    int32 RadiosityProbeSpacing = GLumenRadiosityProbeSpacing/*=4*/;

    if (View.FinalPostProcessSettings.LumenSceneLightingQuality >= 6)
    {
        RadiosityProbeSpacing /= 2;
    }

    return FMath::RoundUpToPowerOfTwo(FMath::Clamp<uint32>(RadiosityProbeSpacing, 1, Lumen::CardTileSize(=8)));
}
const int32 ProbeSpacing = LumenRadiosity::GetRadiosityProbeSpacing(FirstView);

int32 GetHemisphereProbeResolution(const FViewInfo& View)
{
    const float LumenSceneLightingQuality = FMath::Clamp<float>(View.FinalPostProcessSettings.LumenSceneLightingQuality, .5f, 4.0f);
    return FMath::Clamp<int32>(GLumenRadiosityHemisphereProbeResolution/*=4*/ * FMath::Sqrt(LumenSceneLightingQuality), 1, 16);
}
const int32 HemisphereProbeResolution = LumenRadiosity::GetHemisphereProbeResolution(FirstView);

void SetIndirectArgs(uint ArgIndex, uint NumThreads, uint ThreadGroupSize) { uint BaseOffset = ArgIndex * 3; RWIndirectArgs[BaseOffset + 0] = (NumThreads + ThreadGroupSize - 1) / ThreadGroupSize; RWIndirectArgs[BaseOffset + 1] = 1; RWIndirectArgs[BaseOffset + 2] = 1; }

void SetHardwareRayTracingIndirectArgs(uint ArgIndex, uint NumThreads, uint ThreadGroupSize) { uint NumGroups = (NumThreads + HardwareRayTracingThreadGroupSize - 1) / HardwareRayTracingThreadGroupSize;

uint BaseOffset = ArgIndex * 3;
int3 DispatchDimension = GetRayTracingThreadCountWrapped(NumGroups, ThreadGroupSize);
RWIndirectArgs[BaseOffset + 0] = DispatchDimension.x;
RWIndirectArgs[BaseOffset + 1] = DispatchDimension.y;
RWIndirectArgs[BaseOffset + 2] = DispatchDimension.z;

}

[numthreads(THREADGROUP_SIZE(=1), 1, 1)] void LumenRadiosityIndirectArgsCS(uint3 DispatchThreadId : SV_DispatchThreadID) { uint ViewIndex = DispatchThreadId.x;

if (ViewIndex < NumViews)
{
    // View offset
    uint BaseOffset = ViewIndex * 5;

/ const uint32 RadiosityTileSize = Lumen::CardTileSize(=8) / ProbeSpacing; / uint NumTraces = CardTileAllocator[ViewIndex] RadiosityTileSize RadiosityTileSize NumTracesPerProbe(=HemisphereProbeResolution HemisphereProbeResolution);

    // ERadiosityIndirectArgs::NumTracesDiv64
    SetIndirectArgs(BaseOffset + 0, NumTraces, /*ThreadGroupSize*/ 64);

    // ERadiosityIndirectArgs::NumTracesDiv32
    SetIndirectArgs(BaseOffset + 1, NumTraces, /*ThreadGroupSize*/ 32);

    // ERadiosityIndirectArgs::ThreadPerProbe
    SetIndirectArgs(BaseOffset + 2, CardTileAllocator[ViewIndex] * RadiosityTileSize * RadiosityTileSize, /*ThreadGroupSize*/ 32);

    // ERadiosityIndirectArgs::ThreadPerRadiosityTexel
    SetIndirectArgs(BaseOffset + 3, CardTileAllocator[ViewIndex] * CARD_TILE_SIZE(=8) * CARD_TILE_SIZE, /*ThreadGroupSize*/ 32);

    // ERadiosityIndirectArgs::HardwareRayTracingThreadPerTrace
    SetHardwareRayTracingIndirectArgs(BaseOffset + 4, NumTraces, /*ThreadGroupSize*/ 64);
}

}

yasukichi commented 1 year ago

HardwareRayTracing (inline)

FCardTileData GetCardTile(uint CardTileIndex) { return UnpackCardTileData(CardTileData[ViewIndex * MaxCardTiles + CardTileIndex]); }

void UnswizzleCardTileIndex( uint RadiosityProbeIndex, inout uint CardTileIndex, inout uint2 CoordInCardTile) { uint NumProbesPerTile = RadiosityTileSize RadiosityTileSize; CardTileIndex = RadiosityProbeIndex / NumProbesPerTile; uint LinearIndexInCardTile = RadiosityProbeIndex - CardTileIndex NumProbesPerTile; uint2 ProbeCoord = uint2(LinearIndexInCardTile % RadiosityTileSize, LinearIndexInCardTile / RadiosityTileSize);

FCardTileData CardTile = GetCardTile(CardTileIndex);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
CoordInCardTile = ProbeCoord * ProbeSpacingInRadiosityTexels/*=ProbeSpacing*/ + GetProbeJitter(CardPage.IndirectLightingTemporalIndex);

}

void UnswizzleTexelTraceCoords( uint DispatchThreadId, inout uint CardTileIndex, inout uint2 CoordInCardTile, inout uint2 TraceTexelCoord) { uint RadiosityProbeIndex = DispatchThreadId / NumTracesPerProbe;

UnswizzleCardTileIndex(RadiosityProbeIndex, CardTileIndex, CoordInCardTile);

uint LinearTexelIndex = DispatchThreadId - RadiosityProbeIndex * NumTracesPerProbe;
TraceTexelCoord = uint2(LinearTexelIndex % HemisphereProbeResolution, LinearTexelIndex / HemisphereProbeResolution);

}

![image](https://user-images.githubusercontent.com/14350715/224533719-e0b6bcfd-38fa-46a9-b1c3-71854095d054.png)
- シェーダー(LumenRadiosity.ush)
```C++
#define PROBE_HEMISPHERE_HEMI_OCTAHEDRON    0
#define PROBE_HEMISPHERE_UNIFORM            1
#define PROBE_HEMISPHERE_COSINE             2

#define RADIOSITY_PROBE_MAPPING             PROBE_HEMISPHERE_UNIFORM

void GetRadiosityRay(FRadiosityTexel RadiosityTexel, uint2 ProbeCoord, uint2 TracingTexelCoord, out float3 WorldRayDirection, out float ConeHalfAngle, out float PDF)
{
    float2 ProbeTexelCenter = GetProbeTexelCenter(RadiosityTexel.IndirectLightingTemporalIndex, ProbeCoord);
/*
float2 GetProbeTexelCenter(uint IndirectLightingTemporalIndex, uint2 ProbeTileCoord)
{
    uint2 RandomSeed = Rand3DPCG16(int3(ProbeTileCoord, 0)).xy;
    uint TemporalIndex = (FixedJitterIndex < 0 ? IndirectLightingTemporalIndex : FixedJitterIndex);
    return Hammersley16(TemporalIndex % MaxFramesAccumulated, MaxFramesAccumulated, RandomSeed);
}

float2 Hammersley16( uint Index, uint NumSamples, uint2 Random )
{
    float E1 = frac( (float)Index / NumSamples + float( Random.x ) * (1.0 / 65536.0) );
    float E2 = float( ( reversebits(Index) >> 16 ) ^ Random.y ) * (1.0 / 65536.0);
    return float2( E1, E2 );
}
*/
    float2 ProbeUV = (TracingTexelCoord + ProbeTexelCenter) / float(HemisphereProbeResolution);

    float3 LocalRayDirection;

    uint RadiosityProbeHemisphereMapping = RADIOSITY_PROBE_MAPPING;
    // Sample generation must match probe occlusion
    if (RadiosityProbeHemisphereMapping == PROBE_HEMISPHERE_HEMI_OCTAHEDRON)
    {
        LocalRayDirection = HemiOctahedronToUnitVector(ProbeUV * 2 - 1);
/*
float3 HemiOctahedronToUnitVector( float2 Oct )
{
    Oct = float2( Oct.x + Oct.y, Oct.x - Oct.y );
    float3 N = float3( Oct, 2.0 - dot( 1, abs(Oct) ) );
    return normalize(N);
}
*/
        //@todo - hemi octahedron solid angle
        PDF = 1.0 / (2 * PI);
    }
    else if (RadiosityProbeHemisphereMapping == PROBE_HEMISPHERE_UNIFORM)
    {
        float4 Sample = UniformSampleHemisphere(ProbeUV);
/*
// PDF = 1 / (2 * PI)
float4 UniformSampleHemisphere( float2 E )
{
    float Phi = 2 * PI * E.x;
    float CosTheta = E.y;
    float SinTheta = sqrt( 1 - CosTheta * CosTheta );

    float3 H;
    H.x = SinTheta * cos( Phi );
    H.y = SinTheta * sin( Phi );
    H.z = CosTheta;

    float PDF = 1.0 / (2 * PI);

    return float4( H, PDF );
}
*/
        LocalRayDirection = Sample.xyz;
        PDF = Sample.w;
    }
    else
    {
        float4 Sample = CosineSampleHemisphere(ProbeUV);
/*
// PDF = NoL / PI
float4 CosineSampleHemisphere( float2 E )
{
    float Phi = 2 * PI * E.x;
    float CosTheta = sqrt(E.y);
    float SinTheta = sqrt(1 - CosTheta * CosTheta);

    float3 H;
    H.x = SinTheta * cos(Phi);
    H.y = SinTheta * sin(Phi);
    H.z = CosTheta;

    float PDF = CosTheta * (1.0 / PI);

    return float4(H, PDF);
}
*/
        LocalRayDirection = Sample.xyz;
        PDF = Sample.w;
    }

    float3x3 TangentBasis = GetTangentBasisFrisvad(RadiosityTexel.WorldNormal);
/*
// [Frisvad 2012, "Building an Orthonormal Basis from a 3D Unit Vector Without Normalization"]
// Discontinuity at TangentZ.z < -0.9999999f
float3x3 GetTangentBasisFrisvad(float3 TangentZ)
{
    float3 TangentX;
    float3 TangentY;

    if (TangentZ.z < -0.9999999f)
    {
        TangentX = float3(0, -1, 0);
        TangentY = float3(-1, 0, 0);
    }
    else
    {
        float A = 1.0f / (1.0f + TangentZ.z);
        float B = -TangentZ.x * TangentZ.y * A;
        TangentX = float3(1.0f - TangentZ.x * TangentZ.x * A, B, -TangentZ.x);
        TangentY = float3(B, 1.0f - TangentZ.y * TangentZ.y * A, -TangentZ.y);
    }

    return float3x3( TangentX, TangentY, TangentZ );
}
*/
    WorldRayDirection = mul(LocalRayDirection, TangentBasis);

    ConeHalfAngle = acosFast(1.0f - 1.0f / (float)(NumTracesPerProbe));
}

RaytracingAccelerationStructure TLAS;

if LUMEN_HARDWARE_INLINE_RAYTRACING

StructuredBuffer HitGroupData; StructuredBuffer RayTracingSceneMetadata;

endif // LUMEN_HARDWARE_INLINE_RAYTRACING

float MinTraceDistance; float MaxTraceDistance; float MinTraceDistanceToSampleSurface; float SurfaceBias; float HeightfieldSurfaceBias; float AvoidSelfIntersectionTraceDistance; float MaxRayIntensity; uint NumThreadsToDispatch; float TanRadiosityRayConeHalfAngle; uint MaxTranslucentSkipCount; uint MaxTraversalIterations;

[numthreads(INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X(=32), INLINE_RAY_TRACING_THREAD_GROUP_SIZE_Y(=1), 1)] void LumenRadiosityHardwareRayTracingCS( uint3 DispatchThreadIndex : SV_DispatchThreadID, uint3 DispatchGroupId : SV_GroupID, uint DispatchGroupIndex : SV_GroupIndex) { LumenRadiosityHardwareRayTracing_INTERNAL(DispatchThreadIndex, DispatchGroupId, DispatchGroupIndex); }

void LumenRadiosityHardwareRayTracing_INTERNAL( uint3 DispatchThreadIndex, uint3 DispatchGroupId, uint DispatchGroupIndex) {

if DIM_INDIRECT_DISPATCH

uint GlobalThreadIndex = GetUnWrappedRayTracingDispatchThreadId(DispatchGroupId, THREADGROUP_SIZE(=64));

/ uint GetUnWrappedRayTracingDispatchThreadId(int3 DispatchThreadId, uint ThreadGroupSize) { return DispatchThreadId.x + (DispatchThreadId.z ThreadGroupSize + DispatchThreadId.y) ThreadGroupSize; } /

// When running with indirect inline RT, ThreadIndex is actually GroupIndex, so we need to account for that.

if LUMEN_HARDWARE_INLINE_RAYTRACING

GlobalThreadIndex = GlobalThreadIndex * INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X + DispatchGroupIndex;

endif

if (GlobalThreadIndex >= CardTileAllocator[ViewIndex] * NumTracesPerProbe * RadiosityTileSize * RadiosityTileSize)
{
    return;
}

else

for (uint GlobalThreadIndex = DispatchThreadIndex.x;
    GlobalThreadIndex < CardTileAllocator[ViewIndex] * NumTracesPerProbe * RadiosityTileSize * RadiosityTileSize;
    GlobalThreadIndex += NumThreadsToDispatch)

endif

{
    uint CardTileIndex;
    uint2 CoordInCardTile;
    uint2 TraceTexelCoord;
    UnswizzleTexelTraceCoords(GlobalThreadIndex, CardTileIndex, CoordInCardTile, TraceTexelCoord);

    FRadiosityTexel RadiosityTexel = GetRadiosityTexelFromCardTile(CardTileIndex, CoordInCardTile);

    if (RadiosityTexel.bInsideAtlas)
    {
        float3 Radiance = 0.0f;
        float TraceHitDistance = MaxTraceDistance;

        if (RadiosityTexel.bValid)
        {
            float3 TranslatedWorldPosition = RadiosityTexel.WorldPosition + LWCHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO
            float3 WorldNormal = RadiosityTexel.WorldNormal;

            float3 WorldRayDirection;
            float ConeHalfAngle;
            float PDF;
            GetRadiosityRay(RadiosityTexel, RadiosityTexel.CardCoord / ProbeSpacingInRadiosityTexels, TraceTexelCoord, WorldRayDirection, ConeHalfAngle, PDF);

            float ReceiverBias = 0.0f;
            if (RadiosityTexel.bHeightfield)
            {
                float3 TranslatedWorldCameraOrigin = PrimaryView.TranslatedWorldCameraOrigin;
                ReceiverBias = CalculateDistanceBasedHeightfieldBias(HeightfieldSurfaceBias, TranslatedWorldPosition, TranslatedWorldCameraOrigin);
            }

            FRayDesc Ray;
            Ray.Origin = TranslatedWorldPosition + WorldNormal * (SurfaceBias + ReceiverBias);
            Ray.Direction = WorldRayDirection;
            Ray.TMin = MinTraceDistance;

            FRayCone RayCone = (FRayCone)0;
            RayCone = PropagateRayCone(RayCone, ConeHalfAngle, 0.0);

/ FRayCone PropagateRayCone(in FRayCone Cone, in float SurfaceSpreadAngle, in float HitT) { FRayCone NewCone; NewCone.Width = Cone.SpreadAngle HitT + Cone.Width; NewCone.SpreadAngle = Cone.SpreadAngle + SurfaceSpreadAngle; return NewCone; } */

            const uint LinearCoord = CoordInCardTile.y * CARD_TILE_SIZE + CoordInCardTile.x;
            const uint OriginalCullingMode = 0;
            FRayTracedLightingContext Context = CreateRayTracedLightingContext(TLAS, RayCone, CoordInCardTile, LinearCoord, OriginalCullingMode, MaxTranslucentSkipCount, MaxTraversalIterations);

if LUMEN_HARDWARE_INLINE_RAYTRACING

            Context.HitGroupData = HitGroupData;
            Context.RayTracingSceneMetadata = RayTracingSceneMetadata;

endif

if DIM_AVOID_SELF_INTERSECTION_TRACE

            // First trace - short distance with backface culling enabled to avoid self intersecting in cases where the traced geometry doesn't match what's in the GBuffer (Nanite, Ray Tracing LODs, etc)
            Ray.TMax = AvoidSelfIntersectionTraceDistance;
            FRayTracedLightingResult RayResult = CreateRayTracedLightingResult();

            if (Ray.TMax > Ray.TMin)
            {
                Context.CullingMode = RAY_FLAG_CULL_BACK_FACING_TRIANGLES;
                RayResult = TraceAndCalculateRayTracedLightingFromSurfaceCache(Ray, Context);
            }

            // Second trace - remaining ray with backface culling enabled to reduce leaking when starting from a point inside the mesh
            if (!RayResult.bIsHit)
            {
                Ray.TMin = max(Ray.TMin, AvoidSelfIntersectionTraceDistance);
                Ray.TMax = MaxTraceDistance;

                Context.CullingMode = OriginalCullingMode;
                RayResult = TraceAndCalculateRayTracedLightingFromSurfaceCache(Ray, Context);
            }

else

            Ray.TMax = MaxTraceDistance;
            FRayTracedLightingResult RayResult = TraceAndCalculateRayTracedLightingFromSurfaceCache(Ray, Context);

endif

            if (RayResult.TraceHitDistance < MinTraceDistanceToSampleSurface)
            {
                RayResult.Radiance = 0.0f;
            }

            if (RayResult.bIsHit)
            {
                Radiance = RayResult.Radiance;

                // Recalculate TraceHitDistance to incorporate biases
                float3 HitPosition = Ray.Origin + Ray.Direction * RayResult.TraceHitDistance;
                TraceHitDistance = length(TranslatedWorldPosition - HitPosition);
            }
            else
            {
                Radiance = EvaluateSkyRadiance(WorldRayDirection);
            }

            float MaxLighting = max3(Radiance.x, Radiance.y, Radiance.z);

            if (MaxLighting > MaxRayIntensity * View.OneOverPreExposure)
            {
                Radiance *= MaxRayIntensity * View.OneOverPreExposure / MaxLighting;
            }
        }

        FCardTileData CardTile = GetCardTile(CardTileIndex);
        FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
        uint2 RadiosityProbeTracingAtlasCoord = GetRadiosityProbeAtlasCoord(CardPage, CardTile, CoordInCardTile) * HemisphereProbeResolution + TraceTexelCoord;
        RWTraceRadianceAtlas[RadiosityProbeTracingAtlasCoord] = Radiance;

        if (UseProbeOcclusion > 0)
        {
            RWTraceHitDistanceAtlas[RadiosityProbeTracingAtlasCoord] = TraceHitDistance;
        }
    }
}

}



- [TraceAndCalculateRayTracedLightingFromSurfaceCache()](https://github.com/yasukichi/testcode/issues/23#issuecomment-1665455910)
yasukichi commented 1 year ago

SpatialFilterProbes

}

- シェーダー(LumenRadiosity.ush)
```C++
// Coord in persistent radiosity probe atlas
uint2 GetRadiosityProbeAtlasCoord(FLumenCardPageData CardPage, FCardTileData CardTile, uint2 CoordInCardTile)
{
    uint2 AtlasCoord = CardPage.PhysicalAtlasCoord + CardTile.TileCoord * uint2(CARD_TILE_SIZE, CARD_TILE_SIZE) + CoordInCardTile;
    return AtlasCoord >> ProbeSpacingInRadiosityTexelsDivideShift;
}

if FILTERING_KERNEL_SIZE == 1

        const uint NumSamples = 4;
        int2 NeighborOffsets[NumSamples];
        NeighborOffsets[0] = int2(0, 1);
        NeighborOffsets[1] = int2(1, 0);
        NeighborOffsets[2] = int2(0, -1);
        NeighborOffsets[3] = int2(-1, 0);

else

        const uint NumSamples = 13;
        int2 NeighborOffsets[NumSamples];
        NeighborOffsets[0] = int2(0, 2);
        NeighborOffsets[1] = int2(-1, 1);
        NeighborOffsets[2] = int2(0, 1);
        NeighborOffsets[3] = int2(1, 1);
        NeighborOffsets[4] = int2(-2, 0);
        NeighborOffsets[5] = int2(-1, 0);
        NeighborOffsets[6] = int2(0, 0);
        NeighborOffsets[7] = int2(1, 0);
        NeighborOffsets[8] = int2(2, 0);
        NeighborOffsets[9] = int2(-1, -1);
        NeighborOffsets[10] = int2(0, -1);
        NeighborOffsets[11] = int2(1, -1);
        NeighborOffsets[12] = int2(0, -2);

endif

        UNROLL
        for (uint i = 0; i < NumSamples; i++)
        {
            SampleTraceRadianceAtlas(
                RadiosityTexel,
                ProbeAtlasCoord,
                CardPage.ResLevelPageTableOffset, 
                ResLevelSizeInProbes, 
                ProbeCoordInCard + NeighborOffsets[i], 
                TraceTexelCoord,
                1.0f,
                Radiance,
                TotalWeight);
        }
    }

    RWFilteredTraceRadianceAtlas[RadiosityProbeTracingAtlasCoord] = Radiance / TotalWeight;
}

}

void SampleTraceRadianceAtlas( FRadiosityTexel GatherProbeTexel, uint2 GatherProbeAtlasCoord, uint ResLevelPageTableOffset, uint2 ResLevelSizeInProbes, int2 ProbeCoordInCard, uint2 TraceTexelCoord, float InterpolationWeight, inout float3 Radiance, inout float WeightSum) { if (all(ProbeCoordInCard >= 0)) { uint2 ResLevelSizeInPages = (ResLevelSizeInProbes ProbeSpacingInRadiosityTexels) / PHYSICAL_PAGE_SIZE; uint2 CoordInCard = ProbeCoordInCard ProbeSpacingInRadiosityTexels; // First find page to sample from uint2 PageCoordInCard = CoordInCard / PHYSICAL_PAGE_SIZE;

    //@todo - breaks SW tracing
    //if (all(PageCoordInCard < ResLevelSizeInPages))
    {
        uint LinearCardPageIndex = PageCoordInCard.x + PageCoordInCard.y * ResLevelSizeInPages.x;
        FLumenCardPageData CardPage = GetLumenCardPageData(ResLevelPageTableOffset + LinearCardPageIndex);

        // Don't sample if page doesn't have a valid probe
        if (CardPage.bMapped && CardPage.LastIndirectLightingUpdateFrameIndex != 0)
        {
            // Then tile and probe coordinates
            uint2 CoordInCardPage = CoordInCard - (PageCoordInCard * PHYSICAL_PAGE_SIZE);
            uint2 ProbeAtlasCoord = ((uint2)CardPage.PhysicalAtlasCoord + CoordInCardPage) >> ProbeSpacingInRadiosityTexelsDivideShift;

            FRadiosityTexel ProbeTexel = GetRadiosityTexel(CardPage, CoordInCardPage);

            if (ProbeTexel.bInsideAtlas)
            {
                float Weight = ProbeTexel.bValid ? InterpolationWeight : 0.0f;

                #if FILTERING_PLANE_WEIGHTING
                {
                    float PlaneWeight = CalculatePlaneWeight(GatherProbeTexel.WorldPosition, GatherProbeTexel.WorldNormal, ProbeTexel.WorldPosition);
                    Weight = min(Weight, PlaneWeight);
                }
                #endif

                #if FILTERING_PROBE_OCCLUSION
                {
                    float VisibilityWeight = CalculateProbeVisibility(GatherProbeTexel.WorldPosition, ProbeTexel, ProbeAtlasCoord);
                    Weight = min(Weight, VisibilityWeight);

                    float VisibilityWeight2 = CalculateProbeVisibility(ProbeTexel.WorldPosition, GatherProbeTexel, GatherProbeAtlasCoord);
                    Weight = min(Weight, VisibilityWeight2);
                }
                #endif

                if (Weight > 0)
                {
                    Radiance += TraceRadianceAtlas[ProbeAtlasCoord * HemisphereProbeResolution + TraceTexelCoord] * Weight;
                    WeightSum += Weight;
                }
            }
        }
    }
}

}


![image](https://user-images.githubusercontent.com/14350715/224553603-92d56f82-a97c-4e3c-bf46-91380d93596f.png)
yasukichi commented 1 year ago

ConvertToSH

}

- シェーダー(LumenRadiosity.usf)
```C++
RWTexture2D<float4> RWRadiosityProbeSHRedAtlas;
RWTexture2D<float4> RWRadiosityProbeSHGreenAtlas;
RWTexture2D<float4> RWRadiosityProbeSHBlueAtlas;

[numthreads(THREADGROUP_SIZE, 1, 1)]
void LumenRadiosityConvertToSH(
    uint DispatchThreadId : SV_DispatchThreadID)
{
    uint CardTileIndex;
    uint2 CoordInCardTile;
    UnswizzleCardTileIndex(DispatchThreadId, CardTileIndex, CoordInCardTile);

    if (CardTileIndex < CardTileAllocator[ViewIndex])
    {
        FRadiosityTexel RadiosityTexel = GetRadiosityTexelFromCardTile(CardTileIndex, CoordInCardTile);

        FCardTileData CardTile = GetCardTile(CardTileIndex);
        FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);

        uint2 RadiosityProbeAtlasCoord = GetRadiosityProbeAtlasCoord(CardPage, CardTile, CoordInCardTile);

        FTwoBandSHVectorRGB IrradianceSH = (FTwoBandSHVectorRGB)0;
        float NumValidSamples = 0.0f;

        if (RadiosityTexel.bInsideAtlas && RadiosityTexel.bValid)
        {
            for (uint TraceY = 0; TraceY < HemisphereProbeResolution; ++TraceY)
            {
                for (uint TraceX = 0; TraceX < HemisphereProbeResolution; ++TraceX)
                {
                    uint2 TraceTexelCoord = uint2(TraceX, TraceY);
                    float3 TraceRadiance = TraceRadianceAtlas[RadiosityProbeAtlasCoord * HemisphereProbeResolution + TraceTexelCoord];

                    float3 WorldRayDirection;
                    float ConeHalfAngle;
                    float PDF;
                    GetRadiosityRay(RadiosityTexel, RadiosityTexel.CardCoord >> ProbeSpacingInRadiosityTexelsDivideShift, TraceTexelCoord, WorldRayDirection, ConeHalfAngle, PDF);

                    IrradianceSH = AddSH(IrradianceSH, MulSH(SHBasisFunction(WorldRayDirection), TraceRadiance / PDF));
                    NumValidSamples += 1.0f;
                }
            }
        }

        if (NumValidSamples > 0.0f)
        {
            IrradianceSH = MulSH(IrradianceSH, 1.0f / NumValidSamples);
        }

        RWRadiosityProbeSHRedAtlas[RadiosityProbeAtlasCoord] = IrradianceSH.R.V;
        RWRadiosityProbeSHGreenAtlas[RadiosityProbeAtlasCoord] = IrradianceSH.G.V;
        RWRadiosityProbeSHBlueAtlas[RadiosityProbeAtlasCoord] = IrradianceSH.B.V;
    }
}

image

yasukichi commented 1 year ago

Integrate

}

- シェーダー(LumenRadiosity.usf)
```C++
RWTexture2D<float3> RWRadiosityAtlas;
RWTexture2D<UNORM float> RWRadiosityNumFramesAccumulatedAtlas;

[numthreads(THREADGROUP_SIZE, 1, 1)]
void LumenRadiosityIntegrateCS(
    uint3 DispatchThreadId : SV_DispatchThreadID)
{
    uint LinearTexelIndex = DispatchThreadId.x;

    uint NumTexelsPerTile = CARD_TILE_SIZE/*=8*/ * CARD_TILE_SIZE;
    uint CardTileIndex = LinearTexelIndex / NumTexelsPerTile;
    const uint LinearIndexInCardTile = LinearTexelIndex - CardTileIndex * NumTexelsPerTile;
    uint2 CoordInCardTile = uint2(LinearIndexInCardTile % CARD_TILE_SIZE, LinearIndexInCardTile / CARD_TILE_SIZE);

    float3 TexelRadiance = float3(0.0f, 0.0f, 0.0f);
    float3 Debug = 0;
    float WeightSum = 0.0f;
    float NewNumFramesAccumulated = 0;

    FRadiosityTexel RadiosityTexel = GetRadiosityTexelFromCardTile(CardTileIndex, CoordInCardTile);

    if (RadiosityTexel.bInsideAtlas && RadiosityTexel.bValid)
    {
        FCardTileData CardTile = GetCardTile(CardTileIndex);
        FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
        FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);

        //@todo - seam if CardPage.IndirectLightingTemporalIndex different
        uint2 CoordInCard = (CardPage.CardUVRect.xy * CardPage.ResLevelSizeInTiles + CardTile.TileCoord) * CARD_TILE_SIZE + CoordInCardTile;

        uint2 ProbeFullResCoord = max((float2)CoordInCard - GetProbeJitter(CardPage.IndirectLightingTemporalIndex), 0.0f);
        uint2 ProbeCoord00 = ProbeFullResCoord >> ProbeSpacingInRadiosityTexelsDivideShift;
        uint2 ProbeCoord10 = ProbeCoord00 + uint2(1, 0);
        uint2 ProbeCoord01 = ProbeCoord00 + uint2(0, 1);
        uint2 ProbeCoord11 = ProbeCoord00 + uint2(1, 1);

        // Guarantee that no probe will have a bilinear weight of zero, when other probes might be discarded due to occlusion
        float BilinearExpand = 1;
        float2 BilinearWeights = ((float2)ProbeFullResCoord - ProbeCoord00 * ProbeSpacingInRadiosityTexels + BilinearExpand) / (float)(ProbeSpacingInRadiosityTexels + 2 * BilinearExpand);

        float4 Weights = float4(
            (1.0f - BilinearWeights.x) * (1.0f - BilinearWeights.y),
            BilinearWeights.x * (1.0f - BilinearWeights.y),
            (1.0f - BilinearWeights.x) * BilinearWeights.y,
            BilinearWeights.x * BilinearWeights.y);

        FTwoBandSHVectorRGB IrradianceSHWeighted = (FTwoBandSHVectorRGB)0;

        uint2 ResLevelSizeInProbes = CardPage.ResLevelSizeInTiles * RadiosityTileSize;
        SampleRadiositySH(CoordInCard, RadiosityTexel.WorldPosition, RadiosityTexel.WorldNormal, CardPage.ResLevelPageTableOffset, ResLevelSizeInProbes, ProbeCoord00, Weights.x, IrradianceSHWeighted, WeightSum, Debug);
        SampleRadiositySH(CoordInCard, RadiosityTexel.WorldPosition, RadiosityTexel.WorldNormal, CardPage.ResLevelPageTableOffset, ResLevelSizeInProbes, ProbeCoord10, Weights.y, IrradianceSHWeighted, WeightSum, Debug);
        SampleRadiositySH(CoordInCard, RadiosityTexel.WorldPosition, RadiosityTexel.WorldNormal, CardPage.ResLevelPageTableOffset, ResLevelSizeInProbes, ProbeCoord01, Weights.z, IrradianceSHWeighted, WeightSum, Debug);
        SampleRadiositySH(CoordInCard, RadiosityTexel.WorldPosition, RadiosityTexel.WorldNormal, CardPage.ResLevelPageTableOffset, ResLevelSizeInProbes, ProbeCoord11, Weights.w, IrradianceSHWeighted, WeightSum, Debug);

        FTwoBandSHVector DiffuseTransferSH = CalcDiffuseTransferSH(RadiosityTexel.WorldNormal, 1.0f);
        TexelRadiance = max(float3(0.0f, 0.0f, 0.0f), DotSH(IrradianceSHWeighted, DiffuseTransferSH));

        if (WeightSum > 0.0f)
        {
            TexelRadiance /= WeightSum;
        }

#if TEMPORAL_ACCUMULATION
        float NumFramesAccumulated = RWRadiosityNumFramesAccumulatedAtlas[RadiosityTexel.AtlasCoord] * 255.0f;
        NewNumFramesAccumulated = min(NumFramesAccumulated + 1, (float)MaxFramesAccumulated);

        float Alpha = 1.0f / (1.0f + NumFramesAccumulated);

        float3 HistoryRadiosity = RWRadiosityAtlas[RadiosityTexel.AtlasCoord];
        TexelRadiance = lerp(HistoryRadiosity, TexelRadiance, Alpha);
#endif

        ////////////////////////
        //TexelRadiance = Debug;
        //TexelRadiance = WeightSum > 0 ? 1 : 0;

#define DEBUG_VISUALIZE_PROBE_PLACEMENT 0
#if DEBUG_VISUALIZE_PROBE_PLACEMENT

        if (all(CoordInCard == ProbeCoord00 * ProbeSpacingInRadiosityTexels + GetProbeJitter(CardPage.IndirectLightingTemporalIndex)))
        {
            TexelRadiance = float3(10, 0, 10);
        }
#endif

    }

    RWRadiosityNumFramesAccumulatedAtlas[RadiosityTexel.AtlasCoord] = NewNumFramesAccumulated / 255.0f;
    RWRadiosityAtlas[RadiosityTexel.AtlasCoord] = TexelRadiance;
}

void SampleRadiositySH(
    uint2 TexelCoordInCard,
    float3 TexelWorldPosition,
    float3 TexelNormal,
    uint ResLevelPageTableOffset, 
    uint2 ResLevelSizeInProbes, 
    uint2 ProbeCoordInCard, 
    float InterpolationWeight,
    inout FTwoBandSHVectorRGB IrradianceSH,
    inout float WeightSum,
    inout float3 Debug)
{
    if (all(ProbeCoordInCard < ResLevelSizeInProbes))
    {
        uint2 ResLevelSizeInPages = (ResLevelSizeInProbes * ProbeSpacingInRadiosityTexels) / PHYSICAL_PAGE_SIZE;

        // First find page to sample from
        uint2 PageCoordInCard = (ProbeCoordInCard * ProbeSpacingInRadiosityTexels) / PHYSICAL_PAGE_SIZE;
        uint LinearCardPageIndex = PageCoordInCard.x + PageCoordInCard.y * ResLevelSizeInPages.x;
        FLumenCardPageData CardPage = GetLumenCardPageData(ResLevelPageTableOffset + LinearCardPageIndex);

        // Don't sample if page doesn't have a valid probe
        if (CardPage.bMapped && CardPage.LastIndirectLightingUpdateFrameIndex != 0)
        {
            // Then tile and probe coordinates
            uint2 CoordInCardPage = ProbeCoordInCard * ProbeSpacingInRadiosityTexels - (PageCoordInCard * PHYSICAL_PAGE_SIZE);
            uint2 ProbeAtlasCoord = ((uint2)CardPage.PhysicalAtlasCoord + CoordInCardPage) >> ProbeSpacingInRadiosityTexelsDivideShift;

            if (all(CoordInCardPage < CardPage.SizeInTexels))
            {
                FRadiosityTexel NeighborProbeTexel = GetRadiosityTexel(CardPage, CoordInCardPage);

                float Weight = NeighborProbeTexel.bValid ? InterpolationWeight : 0.0f;

                #if INTERPOLATION_PLANE_WEIGHTING
                {
                    float PlaneWeight = CalculatePlaneWeight(TexelWorldPosition, TexelNormal, NeighborProbeTexel.WorldPosition);
                    Weight = min(Weight, PlaneWeight);
                }
                #endif

                #if INTERPOLATION_PROBE_OCCLUSION
                {
                    float VisibilityWeight = CalculateProbeVisibility(TexelWorldPosition, NeighborProbeTexel, ProbeAtlasCoord);
                    Weight = min(Weight, VisibilityWeight);
                }
                #endif

                if (Weight > 0.0f)
                {
                    FTwoBandSHVectorRGB ProbeSH = GetRadiosityProbeSH(ProbeAtlasCoord);
                    IrradianceSH = AddSH(IrradianceSH, MulSH(ProbeSH, Weight));
                    WeightSum += Weight;
                }
            }
        }
    }
}
yasukichi commented 1 year ago

CombiningLighting CS

SamplerState BilinearClampedSampler; StructuredBuffer CardTiles; RWTexture2D RWFinalLightingAtlas; float2 IndirectLightingAtlasHalfTexelSize;

[numthreads(CARD_TILE_SIZE, CARD_TILE_SIZE, 1)] void CombineLumenSceneLightingCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID) { uint CardTileIndex = GroupId.x; uint2 TexelCoordInTile = GroupThreadId.xy;

FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);

FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);

uint2 CoordInCardPage = CARD_TILE_SIZE * CardTile.TileCoord + TexelCoordInTile;
uint2 AtlasCoord = CardPage.PhysicalAtlasCoord + CoordInCardPage;
float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (CoordInCardPage + 0.5);

if RADIOSITY_ATLAS_DOWNSAMPLE_FACTOR == 1

float2 IndirectLightingAtlasUV = AtlasUV;

else

// When sampling from a downsampled Indirect Lighting atlas we need to appropriately clamp input UVs to prevent bilinear reading outside of the valid area
float2 IndirectLightingAtlasUV = clamp(AtlasUV, CardPage.PhysicalAtlasUVRect.xy + IndirectLightingAtlasHalfTexelSize, CardPage.PhysicalAtlasUVRect.zw - IndirectLightingAtlasHalfTexelSize);

endif

float3 Albedo = Texture2DSampleLevel(AlbedoAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 Emissive = Texture2DSampleLevel(EmissiveAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 DirectLighting = Texture2DSampleLevel(DirectLightingAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 IndirectLighting = Texture2DSampleLevel(IndirectLightingAtlas, BilinearClampedSampler, IndirectLightingAtlasUV, 0).xyz;

float3 FinalLighting = CombineFinalLighting(Albedo, Emissive, DirectLighting, IndirectLighting);
RWFinalLightingAtlas[AtlasCoord] = FinalLighting;

}

yasukichi commented 1 year ago

DiffuseIndirectAndAO

image

Lumen pass structure

back

yasukichi commented 1 year ago

LumenScreenProbeGather

image

Lumen pass structure

back

yasukichi commented 1 year ago

UniformPlacement DownsampleFactor=16

}

- シェーダー(LumenScreenProbeGather.usf)
```C++
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void ScreenProbeDownsampleDepthUniformCS(
    uint3 GroupId : SV_GroupID,
    uint3 DispatchThreadId : SV_DispatchThreadID,
    uint3 GroupThreadId : SV_GroupThreadID)
{
    uint2 ScreenProbeAtlasCoord = DispatchThreadId.xy;

    if (all(ScreenProbeAtlasCoord < ScreenProbeAtlasViewSize))
    {
        uint2 ScreenProbeScreenPosition = GetUniformScreenProbeScreenPosition(ScreenProbeAtlasCoord);
        float2 ScreenUV = (ScreenProbeScreenPosition + .5f) * View.BufferSizeAndInvSize.zw;

        WriteDownsampledProbeMaterial(ScreenUV, ScreenProbeAtlasCoord, GetScreenProbeMaterial(ScreenProbeScreenPosition));
    }
}

uint2 GetUniformScreenProbeScreenPosition(uint2 ScreenTileCoord)
{
    uint2 ScreenJitter = GetScreenTileJitter(SCREEN_TEMPORAL_INDEX);
    uint2 ScreenProbeScreenPosition = min((uint2)(View.ViewRectMinAndSize.xy + ScreenTileCoord * ScreenProbeDownsampleFactor + ScreenJitter), (uint2)(View.ViewRectMinAndSize.xy + View.ViewRectMinAndSize.zw) - 1);
    return ScreenProbeScreenPosition;
}

FScreenProbeMaterial GetScreenProbeMaterial(uint2 PixelPos)
{
    const FLumenMaterialData Material = ReadMaterialData(PixelPos);
    FScreenProbeMaterial Out;
    Out.WorldNormal = Material.WorldNormal;
    Out.SceneDepth = Material.SceneDepth;
    Out.bIsValid = IsValid(Material);
    Out.bTwoSidedFoliage = IsFoliage(Material);
    return Out;
}

FLumenMaterialData ReadMaterialData(uint2 InPixelPos)
{
#if FRONT_LAYER_TRANSLUCENCY
    return InternalReadMaterialData_FrontLayerTranslucency(InPixelPos);
#elif STRATA_ENABLED
    return InternalReadMaterialData_Strata(InPixelPos);
#else
    return InternalReadMaterialData_GBuffer(InPixelPos);
#endif
}

void WriteDownsampledProbeMaterial(float2 ScreenUV, uint2 ScreenProbeAtlasCoord, FScreenProbeMaterial ProbeMaterial)
{
    float EncodedDepth = ProbeMaterial.SceneDepth;

    if (!ProbeMaterial.bIsValid)
    {
        // Store unlit in sign bit
        EncodedDepth *= -1.0f;
    }

    RWScreenProbeSceneDepth[ScreenProbeAtlasCoord] = asuint(EncodedDepth);

    RWScreenProbeWorldNormal[ScreenProbeAtlasCoord] = UnitVectorToOctahedron(ProbeMaterial.WorldNormal) * 0.5 + 0.5;

    float3 ProbeWorldVelocity;
    float3 ProbeTranslatedWorldPosition;
    {
        float2 ProbeScreenPosition = (ScreenUV - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;

        float ProbeDeviceZ = ConvertToDeviceZ(ProbeMaterial.SceneDepth);
        float3 ProbeHistoryScreenPosition = GetHistoryScreenPositionIncludingTAAJitter(ProbeScreenPosition, ScreenUV, ProbeDeviceZ);

        ProbeTranslatedWorldPosition = mul(float4(ProbeScreenPosition * ProbeMaterial.SceneDepth, ProbeMaterial.SceneDepth, 1), View.ScreenToTranslatedWorld).xyz;
        ProbeWorldVelocity = ProbeTranslatedWorldPosition - GetPrevTranslatedWorldPosition(ProbeHistoryScreenPosition);
    }

    RWScreenProbeWorldSpeed[ScreenProbeAtlasCoord] = EncodeScreenProbeSpeed(length(ProbeWorldVelocity), ProbeMaterial.bTwoSidedFoliage);

    RWScreenProbeTranslatedWorldPosition[ScreenProbeAtlasCoord] = ProbeTranslatedWorldPosition;
}

image

image

yasukichi commented 1 year ago

AdaptivePlacement DownsampleFactor=8,4

}

- シェーダー(LumenScreenProbeGather.usf)
```C++
float GetScreenProbeDepthFromUAV(uint2 ScreenProbeAtlasCoord)
{
    return asfloat(RWScreenProbeSceneDepth[ScreenProbeAtlasCoord]);
}

void CalculateUniformUpsampleInterpolationWeights(
    float2 ScreenCoord, 
    float2 NoiseOffset, 
    float3 WorldPosition, 
    float SceneDepth, 
    float3 WorldNormal, 
    uniform bool bIsUpsamplePass/*=false*/,
    out uint2 ScreenTileCoord00, 
    out float4 InterpolationWeights)
{
    uint2 ScreenProbeFullResScreenCoord = clamp(ScreenCoord.xy - View.ViewRectMin.xy - GetScreenTileJitter(SCREEN_TEMPORAL_INDEX) + NoiseOffset, 0.0f, View.ViewSizeAndInvSize.xy - 1.0f);
    ScreenTileCoord00 = min(ScreenProbeFullResScreenCoord / ScreenProbeDownsampleFactor, (uint2)ScreenProbeViewSize - 2);

    uint BilinearExpand = 1;
    float2 BilinearWeights = (ScreenProbeFullResScreenCoord - ScreenTileCoord00 * ScreenProbeDownsampleFactor + BilinearExpand) / (float)(ScreenProbeDownsampleFactor + 2 * BilinearExpand);

    float4 CornerDepths;
    CornerDepths.x = bIsUpsamplePass ? GetScreenProbeDepth(ScreenTileCoord00) : GetScreenProbeDepthFromUAV(ScreenTileCoord00);
    CornerDepths.y = bIsUpsamplePass ? GetScreenProbeDepth(ScreenTileCoord00 + int2(1, 0)) : GetScreenProbeDepthFromUAV(ScreenTileCoord00 + int2(1, 0));
    CornerDepths.z = bIsUpsamplePass ? GetScreenProbeDepth(ScreenTileCoord00 + int2(0, 1)) : GetScreenProbeDepthFromUAV(ScreenTileCoord00 + int2(0, 1));
    CornerDepths.w = bIsUpsamplePass ? GetScreenProbeDepth(ScreenTileCoord00 + int2(1, 1)) : GetScreenProbeDepthFromUAV(ScreenTileCoord00 + int2(1, 1));

    InterpolationWeights = float4(
        (1 - BilinearWeights.y) * (1 - BilinearWeights.x),
        (1 - BilinearWeights.y) * BilinearWeights.x,
        BilinearWeights.y * (1 - BilinearWeights.x),
        BilinearWeights.y * BilinearWeights.x);

    float4 DepthWeights;

#define PLANE_WEIGHTING 1
#if PLANE_WEIGHTING
    {
        float4 ScenePlane = float4(WorldNormal, dot(WorldPosition, WorldNormal));

        float3 Position00 = GetWorldPositionFromScreenUV(GetScreenUVFromScreenTileCoord(ScreenTileCoord00), CornerDepths.x);
        float3 Position10 = GetWorldPositionFromScreenUV(GetScreenUVFromScreenTileCoord(ScreenTileCoord00 + uint2(1, 0)), CornerDepths.y);
        float3 Position01 = GetWorldPositionFromScreenUV(GetScreenUVFromScreenTileCoord(ScreenTileCoord00 + uint2(0, 1)), CornerDepths.z);
        float3 Position11 = GetWorldPositionFromScreenUV(GetScreenUVFromScreenTileCoord(ScreenTileCoord00 + uint2(1, 1)), CornerDepths.w);

        float4 PlaneDistances;
        PlaneDistances.x = abs(dot(float4(Position00, -1), ScenePlane));
        PlaneDistances.y = abs(dot(float4(Position10, -1), ScenePlane));
        PlaneDistances.z = abs(dot(float4(Position01, -1), ScenePlane));
        PlaneDistances.w = abs(dot(float4(Position11, -1), ScenePlane));

        float4 RelativeDepthDifference = PlaneDistances / SceneDepth;

        DepthWeights = select(CornerDepths > 0, exp2(-10000.0f * (RelativeDepthDifference * RelativeDepthDifference)), 0.0);
    }
#else
    {
        float4 DepthDifference = abs(CornerDepths - SceneDepth.xxxx);
        float4 RelativeDepthDifference = DepthDifference / SceneDepth;
        DepthWeights = CornerDepths > 0 ? exp2(-100.0f * (RelativeDepthDifference * RelativeDepthDifference)) : 0;
    }
#endif

    InterpolationWeights *= DepthWeights;
}

RWTexture2D<uint> RWScreenTileAdaptiveProbeHeader;
RWTexture2D<uint> RWScreenTileAdaptiveProbeIndices;
RWStructuredBuffer<uint> RWAdaptiveScreenProbeData;

struct FScreenProbeSample
{
    uint2 AtlasCoord[4];
    float4 Weights;
};

void CalculateUpsampleInterpolationWeights(
    float2 ScreenCoord,
    float2 NoiseOffset,
    float3 WorldPosition,
    float SceneDepth,
    float3 WorldNormal,
    uniform bool bIsUpsamplePass,
    out FScreenProbeSample ScreenProbeSample)
{
    uint2 ScreenTileCoord00;
    CalculateUniformUpsampleInterpolationWeights(ScreenCoord, NoiseOffset, WorldPosition, SceneDepth, WorldNormal, bIsUpsamplePass, ScreenTileCoord00, ScreenProbeSample.Weights);

    ScreenProbeSample.AtlasCoord[0] = ScreenTileCoord00;
    ScreenProbeSample.AtlasCoord[1] = ScreenTileCoord00 + uint2(1, 0);
    ScreenProbeSample.AtlasCoord[2] = ScreenTileCoord00 + uint2(0, 1);
    ScreenProbeSample.AtlasCoord[3] = ScreenTileCoord00 + uint2(1, 1);

    bool bUseAdaptiveProbesForUpsample = true;

    if (bUseAdaptiveProbesForUpsample || !bIsUpsamplePass)
    {       
        float Epsilon = .01f;
        float4 ScenePlane = float4(WorldNormal, dot(WorldPosition, WorldNormal));

        UNROLL
        for (uint CornerIndex = 0; CornerIndex < 4; CornerIndex++)
        {
            if (ScreenProbeSample.Weights[CornerIndex] <= Epsilon)
            {
                uint2 ScreenTileCoord = ScreenTileCoord00 + uint2(CornerIndex % 2, CornerIndex / 2);
                uint NumAdaptiveProbes = bIsUpsamplePass ? ScreenTileAdaptiveProbeHeader[ScreenTileCoord] : RWScreenTileAdaptiveProbeHeader[ScreenTileCoord];

                for (uint AdaptiveProbeListIndex = 0; AdaptiveProbeListIndex < NumAdaptiveProbes; AdaptiveProbeListIndex++)
                {
                    uint2 AdaptiveProbeCoord = GetAdaptiveProbeCoord(ScreenTileCoord, AdaptiveProbeListIndex);
                    uint AdaptiveProbeIndex = bIsUpsamplePass ? ScreenTileAdaptiveProbeIndices[AdaptiveProbeCoord] : RWScreenTileAdaptiveProbeIndices[AdaptiveProbeCoord];
                    uint ScreenProbeIndex = AdaptiveProbeIndex + NumUniformScreenProbes;

                    uint2 ScreenProbeScreenPosition = bIsUpsamplePass ? GetScreenProbeScreenPosition(ScreenProbeIndex) : DecodeScreenProbeData(RWAdaptiveScreenProbeData[AdaptiveProbeIndex]);
                    uint2 ScreenProbeAtlasCoord = uint2(ScreenProbeIndex % ScreenProbeAtlasViewSize.x, ScreenProbeIndex / ScreenProbeAtlasViewSize.x);
                    float ProbeDepth = bIsUpsamplePass ? GetScreenProbeDepth(ScreenProbeAtlasCoord) : GetScreenProbeDepthFromUAV(ScreenProbeAtlasCoord);

                    float NewDepthWeight = 0;
                    bool bPlaneWeighting = true;
                    if (bPlaneWeighting)
                    {
                        float3 ProbePosition = GetWorldPositionFromScreenUV(GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition), ProbeDepth);
                        float PlaneDistance = abs(dot(float4(ProbePosition, -1), ScenePlane));
                        float RelativeDepthDifference = PlaneDistance / SceneDepth;
                        NewDepthWeight = exp2(-10000.0f * (RelativeDepthDifference * RelativeDepthDifference));
                    }
                    else
                    {
                        float DepthDifference = abs(ProbeDepth - SceneDepth);
                        float RelativeDepthDifference = DepthDifference / SceneDepth;
                        NewDepthWeight = ProbeDepth > 0 ? exp2(-100.0f * (RelativeDepthDifference * RelativeDepthDifference)) : 0;
                    }

                    float2 DistanceToScreenProbe = abs(ScreenProbeScreenPosition - ScreenCoord);
                    float NewCornerWeight = 1.0f - saturate(min(DistanceToScreenProbe.x, DistanceToScreenProbe.y) / (float)ScreenProbeDownsampleFactor);
                    float NewInterpolationWeight = NewDepthWeight * NewCornerWeight;

                    if (NewInterpolationWeight > ScreenProbeSample.Weights[CornerIndex])
                    {
                        ScreenProbeSample.Weights[CornerIndex] = NewInterpolationWeight;
                        ScreenProbeSample.AtlasCoord[CornerIndex] = ScreenProbeAtlasCoord;
                    }
                }
            }
        }
    }
}

image image image

groupshared uint SharedNumProbesToAllocate; groupshared uint SharedAdaptiveProbeBaseIndex; groupshared uint2 SharedProbeScreenPositionsToAllocate[THREADGROUP_SIZE THREADGROUP_SIZE]; groupshared FScreenProbeMaterial SharedScreenProbeMaterial[THREADGROUP_SIZE THREADGROUP_SIZE];

uint PlacementDownsampleFactor;

[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)] void ScreenProbeAdaptivePlacementCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x;

if (ThreadIndex == 0)
{
    SharedNumProbesToAllocate = 0;
}

GroupMemoryBarrierWithGroupSync();

{
    uint2 ScreenProbeScreenPosition = DispatchThreadId.xy * PlacementDownsampleFactor + GetScreenTileJitter(SCREEN_TEMPORAL_INDEX) + View.ViewRectMinAndSize.xy;

    if (all(ScreenProbeScreenPosition < View.ViewRectMinAndSize.xy + View.ViewRectMinAndSize.zw) && any((DispatchThreadId.xy & 0x1) != 0))
    {
        const FScreenProbeMaterial ScreenProbeMaterial = GetScreenProbeMaterial(ScreenProbeScreenPosition);
        if (ScreenProbeMaterial.bIsValid)
        {
            float2 ScreenUV = (ScreenProbeScreenPosition + .5f) * View.BufferSizeAndInvSize.zw;
            float3 WorldPosition = GetWorldPositionFromScreenUV(ScreenUV, ScreenProbeMaterial.SceneDepth);
            float2 NoiseOffset = 0.0f;

            FScreenProbeSample ScreenProbeSample = (FScreenProbeSample)0;

            CalculateUpsampleInterpolationWeights(
                ScreenProbeScreenPosition,
                NoiseOffset,
                WorldPosition,
                ScreenProbeMaterial.SceneDepth,
                ScreenProbeMaterial.WorldNormal,
                false,
                ScreenProbeSample);

            float Epsilon = .01f;
            ScreenProbeSample.Weights /= max(dot(ScreenProbeSample.Weights, 1), Epsilon);

            float LightingIsValid = (dot(ScreenProbeSample.Weights, 1) < 1.0f - Epsilon) ? 0.0f : 1.0f;

            if (!LightingIsValid)
            {
                // probeが見つからなかった場合、LDS(SharedProbeScreenPositionsToAllocate,SharedScreenProbeMaterial)に座標とマテリアルを格納
                uint SharedListIndex;
                InterlockedAdd(SharedNumProbesToAllocate, 1, SharedListIndex);
                SharedProbeScreenPositionsToAllocate[SharedListIndex] = ScreenProbeScreenPosition;
                SharedScreenProbeMaterial[SharedListIndex] = ScreenProbeMaterial;
            }
        }
    }
}

GroupMemoryBarrierWithGroupSync();

if (ThreadIndex == 0)
{
    // RWStructuredBuffer<uint>に追加するprobe数を蓄積
    InterlockedAdd(RWNumAdaptiveScreenProbes[0], SharedNumProbesToAllocate, SharedAdaptiveProbeBaseIndex);
}

GroupMemoryBarrierWithGroupSync();

uint AdaptiveProbeIndex = ThreadIndex + SharedAdaptiveProbeBaseIndex;

if (ThreadIndex < SharedNumProbesToAllocate && AdaptiveProbeIndex < MaxNumAdaptiveProbes)
{
    uint2 ScreenProbeScreenPosition = SharedProbeScreenPositionsToAllocate[ThreadIndex];
    // RWRegularBuffer<uint>にprobeを追加するscreen座標を追加
    RWAdaptiveScreenProbeData[AdaptiveProbeIndex] = EncodeScreenProbeData(ScreenProbeScreenPosition);
    uint2 ScreenTileCoord = GetScreenTileCoord(ScreenProbeScreenPosition);

    uint TileProbeIndex;
    // RWTexture2D<uint> RWScreenTileAdaptiveProbeHeaderで指定したScreen座標のprobe数をインクリメント
    InterlockedAdd(RWScreenTileAdaptiveProbeHeader[ScreenTileCoord], 1, TileProbeIndex);
    uint2 AdaptiveProbeCoord = GetAdaptiveProbeCoord(ScreenTileCoord, TileProbeIndex);
    // RWTexture2D<uint> RWScreenTileAdaptiveProbeIndicesに追加するprobeのadaptive probe indexを格納
    RWScreenTileAdaptiveProbeIndices[AdaptiveProbeCoord] = AdaptiveProbeIndex;

    float2 ScreenUV = (ScreenProbeScreenPosition + .5f) * View.BufferSizeAndInvSize.zw;
    uint ScreenProbeIndex = NumUniformScreenProbes + AdaptiveProbeIndex;
    uint2 ScreenProbeAtlasCoord = uint2(ScreenProbeIndex % ScreenProbeAtlasViewSize.x, ScreenProbeIndex / ScreenProbeAtlasViewSize.x);
    WriteDownsampledProbeMaterial(ScreenUV, ScreenProbeAtlasCoord, SharedScreenProbeMaterial[ThreadIndex]);
}

}

void WriteDownsampledProbeMaterial(float2 ScreenUV, uint2 ScreenProbeAtlasCoord, FScreenProbeMaterial ProbeMaterial) { float EncodedDepth = ProbeMaterial.SceneDepth;

if (!ProbeMaterial.bIsValid)
{
    // Store unlit in sign bit
    EncodedDepth *= -1.0f;
}

RWScreenProbeSceneDepth[ScreenProbeAtlasCoord] = asuint(EncodedDepth);

RWScreenProbeWorldNormal[ScreenProbeAtlasCoord] = UnitVectorToOctahedron(ProbeMaterial.WorldNormal) * 0.5 + 0.5;

float3 ProbeWorldVelocity;
float3 ProbeTranslatedWorldPosition;
{
    float2 ProbeScreenPosition = (ScreenUV - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;

    float ProbeDeviceZ = ConvertToDeviceZ(ProbeMaterial.SceneDepth);
    float3 ProbeHistoryScreenPosition = GetHistoryScreenPositionIncludingTAAJitter(ProbeScreenPosition, ScreenUV, ProbeDeviceZ);

    ProbeTranslatedWorldPosition = mul(float4(ProbeScreenPosition * ProbeMaterial.SceneDepth, ProbeMaterial.SceneDepth, 1), View.ScreenToTranslatedWorld).xyz;
    ProbeWorldVelocity = ProbeTranslatedWorldPosition - GetPrevTranslatedWorldPosition(ProbeHistoryScreenPosition);
}

RWScreenProbeWorldSpeed[ScreenProbeAtlasCoord] = EncodeScreenProbeSpeed(length(ProbeWorldVelocity), ProbeMaterial.bTwoSidedFoliage);

RWScreenProbeTranslatedWorldPosition[ScreenProbeAtlasCoord] = ProbeTranslatedWorldPosition;

}

yasukichi commented 1 year ago

SetupAdaptiveProbeIndirectArgs

}

- シェーダー(LumenScreenProbeGather.usf)
```C++
RWBuffer<uint> RWScreenProbeIndirectArgs;

void WriteArgs2D(uint Index, uint2 ThreadCount)
{
    RWScreenProbeIndirectArgs[Index * 3 + 0] = (ThreadCount.x + PROBE_THREADGROUP_SIZE_2D - 1) / PROBE_THREADGROUP_SIZE_2D;
    RWScreenProbeIndirectArgs[Index * 3 + 1] = (ThreadCount.y + PROBE_THREADGROUP_SIZE_2D - 1) / PROBE_THREADGROUP_SIZE_2D;
    RWScreenProbeIndirectArgs[Index * 3 + 2] = 1;
}

[numthreads(1, 1, 1)]
void SetupAdaptiveProbeIndirectArgsCS(
    uint3 GroupId : SV_GroupID,
    uint3 DispatchThreadId : SV_DispatchThreadID,
    uint3 GroupThreadId : SV_GroupThreadID)
{
    uint2 AtlasSizeInProbes = uint2(ScreenProbeAtlasViewSize.x, (GetNumScreenProbes() + ScreenProbeAtlasViewSize.x - 1) / ScreenProbeAtlasViewSize.x);

    // Must match EScreenProbeIndirectArgs in C++
    WriteArgs2D(0, AtlasSizeInProbes * PROBE_THREADGROUP_SIZE_2D);
    WriteArgs2D(1, AtlasSizeInProbes);
    WriteArgs2D(2, AtlasSizeInProbes * ScreenProbeTracingOctahedronResolution);
    WriteArgs2D(3, AtlasSizeInProbes * ScreenProbeGatherOctahedronResolution);
    WriteArgs2D(4, AtlasSizeInProbes * ScreenProbeGatherOctahedronResolutionWithBorder);
    WriteArgs2D(5, AtlasSizeInProbes * ScreenProbeLightSampleResolutionXY);
}
yasukichi commented 1 year ago

ComputeBRDF_PDF

FThreeBandSHVector GetGroupSharedSH(uint ThreadIndex) { FThreeBandSHVector BRDF; BRDF.V0.x = PDF_SphericalHarmonic[ThreadIndex][0]; BRDF.V0.y = PDF_SphericalHarmonic[ThreadIndex][1]; BRDF.V0.z = PDF_SphericalHarmonic[ThreadIndex][2]; BRDF.V0.w = PDF_SphericalHarmonic[ThreadIndex][3]; BRDF.V1.x = PDF_SphericalHarmonic[ThreadIndex][4]; BRDF.V1.y = PDF_SphericalHarmonic[ThreadIndex][5]; BRDF.V1.z = PDF_SphericalHarmonic[ThreadIndex][6]; BRDF.V1.w = PDF_SphericalHarmonic[ThreadIndex][7]; BRDF.V2.x = PDF_SphericalHarmonic[ThreadIndex][8]; return BRDF; }

void WriteGroupSharedSH(FThreeBandSHVector SH, uint ThreadIndex) { PDF_SphericalHarmonic[ThreadIndex][0] = SH.V0.x; PDF_SphericalHarmonic[ThreadIndex][1] = SH.V0.y; PDF_SphericalHarmonic[ThreadIndex][2] = SH.V0.z; PDF_SphericalHarmonic[ThreadIndex][3] = SH.V0.w; PDF_SphericalHarmonic[ThreadIndex][4] = SH.V1.x; PDF_SphericalHarmonic[ThreadIndex][5] = SH.V1.y; PDF_SphericalHarmonic[ThreadIndex][6] = SH.V1.z; PDF_SphericalHarmonic[ThreadIndex][7] = SH.V1.w; PDF_SphericalHarmonic[ThreadIndex][8] = SH.V2.x; }

[numthreads(PROBE_THREADGROUP_SIZE_2D(=8), PROBE_THREADGROUP_SIZE_2D, 1)] void ScreenProbeComputeBRDFProbabilityDensityFunctionCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint2 ScreenProbeAtlasCoord = GroupId.xy; uint ScreenProbeIndex = ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x;

uint2 ScreenProbeScreenPosition = GetScreenProbeScreenPosition(ScreenProbeIndex);

if (ScreenProbeIndex < GetNumScreenProbes() && ScreenProbeAtlasCoord.x < ScreenProbeAtlasViewSize.x)
{
    float ProbeSceneDepth = GetScreenProbeDepth(ScreenProbeAtlasCoord);

    if (ProbeSceneDepth > 0)
    {
        uint ThreadIndex = GroupThreadId.y * PROBE_THREADGROUP_SIZE_2D + GroupThreadId.x;

        if (ThreadIndex == 0)
        {
            NumSphericalHarmonics = 0;
        }

        GroupMemoryBarrierWithGroupSync();

        float2 ScreenUV = GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition);

        {
            float2 ThreadOffset = GroupThreadId.xy / (float)PROBE_THREADGROUP_SIZE_2D * ScreenProbeDownsampleFactor * 2.0f - 1.0f;

            bool bCenterSample = all(GroupThreadId.xy == PROBE_THREADGROUP_SIZE_2D / 2);
            if (bCenterSample)
            {
                // Make sure we have at least one pixel that won't be rejected by the depth weight
                ThreadOffset = 0;
            }

            float2 PixelScreenUV = ScreenUV + ThreadOffset * View.BufferSizeAndInvSize.zw;
            PixelScreenUV = clamp(PixelScreenUV, View.ViewRectMin.xy * View.BufferSizeAndInvSize.zw, (View.ViewRectMin.xy + View.ViewSizeAndInvSize.xy - 1) * View.BufferSizeAndInvSize.zw);
            const uint2 PixelPos = PixelScreenUV * View.BufferSizeAndInvSize.xy;

            const FLumenMaterialData Material = ReadMaterialData(PixelPos, PixelScreenUV);

            float3 PixelPosition = GetWorldPositionFromScreenUV(PixelScreenUV, Material.SceneDepth);
            float4 PixelPlane = float4(Material.WorldNormal, dot(Material.WorldNormal, PixelPosition));
            float3 ProbeWorldPosition = GetWorldPositionFromScreenUV(ScreenUV, ProbeSceneDepth);

            float PlaneDistance = abs(dot(float4(ProbeWorldPosition, -1), PixelPlane));
            float RelativeDepthDifference = PlaneDistance / ProbeSceneDepth;
            float DepthWeight = exp2(-10000.0f * (RelativeDepthDifference * RelativeDepthDifference));

            if (DepthWeight > .1f || bCenterSample)
            {
                uint Index;
                InterlockedAdd(NumSphericalHarmonics, 1, Index);

                FThreeBandSHVector BRDF;

                if (HasSphericalVisibility(Material)/* IsFoliage(Material) || IsHair(Material) */)
                {
                    // Avoid culling directions that the shading models will sample
                    BRDF = (FThreeBandSHVector)0;
                    BRDF.V0.x = 1.0f;
                }
                else
                {
                    BRDF = CalcDiffuseTransferSH3(Material.WorldNormal, 1.0f);
                }

                WriteGroupSharedSH(BRDF, Index);
            }
        }

        GroupMemoryBarrierWithGroupSync();

        uint NumSHToAccumulate = NumSphericalHarmonics;
        uint Offset = 0;

        while (NumSHToAccumulate > 1)
        {
            uint ThreadBaseIndex = ThreadIndex * 4;

            if (ThreadBaseIndex < NumSHToAccumulate)
            {
                FThreeBandSHVector PDF = GetGroupSharedSH(ThreadBaseIndex + Offset);

                if (ThreadBaseIndex + 1 < NumSHToAccumulate)
                {
                    PDF = AddSH(PDF, GetGroupSharedSH(ThreadBaseIndex + 1 + Offset));
                }

                if (ThreadBaseIndex + 2 < NumSHToAccumulate)
                {
                    PDF = AddSH(PDF, GetGroupSharedSH(ThreadBaseIndex + 2 + Offset));
                }

                if (ThreadBaseIndex + 3 < NumSHToAccumulate)
                {
                    PDF = AddSH(PDF, GetGroupSharedSH(ThreadBaseIndex + 3 + Offset));
                }

                WriteGroupSharedSH(PDF, ThreadIndex + Offset + NumSHToAccumulate);
            }

            Offset += NumSHToAccumulate;
            NumSHToAccumulate = (NumSHToAccumulate + 3) / 4;

            GroupMemoryBarrierWithGroupSync();
        }

        if (ThreadIndex < NUM_PDF_SH_COEFFICIENTS)
        {
            uint WriteIndex = (ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x) * NUM_PDF_SH_COEFFICIENTS + ThreadIndex;
            float NormalizeWeight = 1.0f / (float)(NumSphericalHarmonics);
            RWBRDFProbabilityDensityFunctionSH[WriteIndex] = PDF_SphericalHarmonic[Offset][ThreadIndex] * NormalizeWeight;
        }
    }
}

}


<img width="555" alt="image" src="https://github.com/yasukichi/testcode/assets/14350715/2595174a-ac24-44b6-b097-355f79b808d0">
yasukichi commented 1 year ago

UpdateRadianceCaches

image

Lumen pass structure

back

yasukichi commented 1 year ago

UpdateRadianceCacheState

yasukichi commented 1 year ago

ClearProbeIndirectionCS

[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void ClearProbeIndirectionCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { RWRadianceProbeIndirectionTexture[DispatchThreadId] = INVALID_PROBE_INDEX; }

yasukichi commented 1 year ago
TranslucentSurfacesMarkPass

}


- C++(LumenTranslucentRadianceCache.cpp)
```C++
void LumenTranslucencyReflectionsMarkUsedProbes(
    FRDGBuilder& GraphBuilder,
    const FSceneRenderer& SceneRenderer,
    FViewInfo& View,
    const FSceneTextures& SceneTextures,
    const LumenRadianceCache::FRadianceCacheMarkParameters& RadianceCacheMarkParameters)
{
    check(GLumenTranslucencyRadianceCacheReflections != 0);

    const EMeshPass::Type MeshPass = EMeshPass::LumenTranslucencyRadianceCacheMark;
    const float ViewportScale = 1.0f / GLumenTranslucencyReflectionsMarkDownsampleFactor;
    FIntRect DownsampledViewRect = GetScaledRect(View.ViewRect, ViewportScale);

    View.BeginRenderView();

    FLumenTranslucencyRadianceCacheMarkParameters* PassParameters = GraphBuilder.AllocParameters<FLumenTranslucencyRadianceCacheMarkParameters>();

    {
        FViewUniformShaderParameters DownsampledTranslucencyViewParameters = *View.CachedViewUniformShaderParameters;

        FViewMatrices ViewMatrices = View.ViewMatrices;
        FViewMatrices PrevViewMatrices = View.PrevViewInfo.ViewMatrices;

        // Update the parts of DownsampledTranslucencyParameters which are dependent on the buffer size and view rect
        View.SetupViewRectUniformBufferParameters(
            DownsampledTranslucencyViewParameters,
            SceneTextures.Config.Extent,
            DownsampledViewRect,
            ViewMatrices,
            PrevViewMatrices);

        PassParameters->View.View = TUniformBufferRef<FViewUniformShaderParameters>::CreateUniformBufferImmediate(DownsampledTranslucencyViewParameters, UniformBuffer_SingleFrame);

        if (const FViewInfo* InstancedView = View.GetInstancedView())
        {
            InstancedView->SetupViewRectUniformBufferParameters(
                DownsampledTranslucencyViewParameters,
                SceneTextures.Config.Extent,
                GetScaledRect(InstancedView->ViewRect, ViewportScale),
                ViewMatrices,
                PrevViewMatrices);

            PassParameters->View.InstancedView = TUniformBufferRef<FInstancedViewUniformShaderParameters>::CreateUniformBufferImmediate(
                reinterpret_cast<const FInstancedViewUniformShaderParameters&>(DownsampledTranslucencyViewParameters),
                UniformBuffer_SingleFrame);
        }
    }

    {
        FLumenTranslucencyRadianceCacheMarkPassUniformParameters& MarkPassParameters = *GraphBuilder.AllocParameters<FLumenTranslucencyRadianceCacheMarkPassUniformParameters>();
        SetupSceneTextureUniformParameters(GraphBuilder, &SceneTextures, View.FeatureLevel, ESceneTextureSetupMode::All, MarkPassParameters.SceneTextures);
        MarkPassParameters.RadianceCacheMarkParameters = RadianceCacheMarkParameters;
        MarkPassParameters.RadianceCacheMarkParameters.InvClipmapFadeSizeForMark = 1.0f / FMath::Clamp(GLumenTranslucencyVolumeRadianceCacheClipmapFadeSize, .001f, 16.0f);

        MarkPassParameters.FurthestHZBTexture = View.HZB;
        MarkPassParameters.ViewportUVToHZBBufferUV = FVector2f(
                float(View.ViewRect.Width()) / float(2 * View.HZBMipmap0Size.X),
                float(View.ViewRect.Height()) / float(2 * View.HZBMipmap0Size.Y));
        MarkPassParameters.HZBMipLevel = FMath::Max<float>((int32)FMath::FloorLog2((float)GLumenTranslucencyReflectionsMarkDownsampleFactor) - 1, 0.0f);

        PassParameters->MarkPass = GraphBuilder.CreateUniformBuffer(&MarkPassParameters);
    }

    View.ParallelMeshDrawCommandPasses[MeshPass].BuildRenderingCommands(GraphBuilder, SceneRenderer.Scene->GPUScene, PassParameters->InstanceCullingDrawParams);

    GraphBuilder.AddPass(
        RDG_EVENT_NAME("TranslucentSurfacesMarkPass"),
        PassParameters,
        ERDGPassFlags::Raster | ERDGPassFlags::SkipRenderPass,
        [&View, &SceneRenderer, MeshPass, PassParameters, ViewportScale, DownsampledViewRect](FRHICommandList& RHICmdList)
    {
        FRHIRenderPassInfo RPInfo;
        RPInfo.ResolveRect = FResolveRect(DownsampledViewRect);
        RHICmdList.BeginRenderPass(RPInfo, TEXT("LumenTranslucencyRadianceCacheMark"));

        SceneRenderer.SetStereoViewport(RHICmdList, View, ViewportScale);
        View.ParallelMeshDrawCommandPasses[MeshPass].DispatchDraw(nullptr, RHICmdList, &PassParameters->InstanceCullingDrawParams);

        RHICmdList.EndRenderPass();
    });
}

EARLYDEPTHSTENCIL void MainPS( FVertexFactoryInterpolantsVSToPS Interpolants, FLumenTranslucencyRadianceCacheMarkInterpolantsVSToPS PassInterpolants, in INPUT_POSITION_QUALIFIERS float4 SvPosition : SV_Position
OPTIONAL_IsFrontFace) { ResolvedView = ResolveView(); FMaterialPixelParameters MaterialParameters = GetMaterialPixelParameters(Interpolants, SvPosition);

FPixelMaterialInputs PixelMaterialInputs;

{
    float4 ScreenPosition = SvPositionToResolvedScreenPosition(SvPosition);
    float3 TranslatedWorldPosition = SvPositionToResolvedTranslatedWorld(SvPosition);
    CalcMaterialParametersEx(MaterialParameters, PixelMaterialInputs, SvPosition, ScreenPosition, bIsFrontFace, TranslatedWorldPosition, TranslatedWorldPosition);
}

GetMaterialCoverageAndClipping(MaterialParameters, PixelMaterialInputs);

//@todo - conservative coverage
uint ClipmapIndex = GetRadianceProbeClipmapForMark(LWCHackToFloat(MaterialParameters.AbsoluteWorldPosition), InterleavedGradientNoise(MaterialParameters.SvPosition.xy, View.StateFrameIndexMod8));

if (IsValidRadianceCacheClipmapForMark(ClipmapIndex))
{
    float MaxDepth = 1000000.0f;

define CULL_TO_HZB 1

if CULL_TO_HZB

    float2 HZBScreenUV = (SvPosition.xy - View.ViewRectMin.xy) * View.ViewSizeAndInvSize.zw * LumenTranslucencyRadianceCacheMarkPass.ViewportUVToHZBBufferUV;

    MaxDepth = ConvertFromDeviceZ(LumenTranslucencyRadianceCacheMarkPass.FurthestHZBTexture.SampleLevel(GlobalPointClampedSampler, HZBScreenUV, LumenTranslucencyRadianceCacheMarkPass.HZBMipLevel).x);

endif

    if (MaterialParameters.ScreenPosition.w < MaxDepth)
    {
        MarkPositionUsedInIndirectionTexture(LWCHackToFloat(MaterialParameters.AbsoluteWorldPosition), ClipmapIndex);
    }
}

}

#### MarkRadianceProbes(ScreenProbes) 87x81
- C++(LumenScreenProbeGather.cpp)
```C++
static void ScreenGatherMarkUsedProbes(
    FRDGBuilder& GraphBuilder,
    const FViewInfo& View,
    const FSceneTextures& SceneTextures,
    FScreenProbeParameters& ScreenProbeParameters,
    const LumenRadianceCache::FRadianceCacheMarkParameters& RadianceCacheMarkParameters,
    ERDGPassFlags ComputePassFlags)
{
    FMarkRadianceProbesUsedByScreenProbesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FMarkRadianceProbesUsedByScreenProbesCS::FParameters>();
    PassParameters->View = View.ViewUniformBuffer;
    PassParameters->SceneTexturesStruct = SceneTextures.UniformBuffer;
    PassParameters->ScreenProbeParameters = ScreenProbeParameters;
    PassParameters->RadianceCacheMarkParameters = RadianceCacheMarkParameters;

    auto ComputeShader = View.ShaderMap->GetShader<FMarkRadianceProbesUsedByScreenProbesCS>(0);

    FComputeShaderUtils::AddPass(
        GraphBuilder,
        RDG_EVENT_NAME("MarkRadianceProbes(ScreenProbes) %ux%u", PassParameters->ScreenProbeParameters.ScreenProbeAtlasViewSize.X, PassParameters->ScreenProbeParameters.ScreenProbeAtlasViewSize.Y),
        ComputePassFlags,
        ComputeShader,
        PassParameters,
        PassParameters->ScreenProbeParameters.ProbeIndirectArgs,
        (uint32)EScreenProbeIndirectArgs::ThreadPerProbe * sizeof(FRHIDispatchIndirectParameters));
}

void MarkProbeIndirectionTextureCoord(int3 ProbeCoord, uint ClipmapIndex) { if (all(ProbeCoord >= 0) && all(ProbeCoord < (int3)RadianceProbeClipmapResolutionForMark) && ClipmapIndex < NumRadianceProbeClipmapsForMark) { int3 IndirectionTextureCoord = ProbeCoord + int3(ClipmapIndex * RadianceProbeClipmapResolutionForMark, 0, 0); RWRadianceProbeIndirectionTexture[IndirectionTextureCoord] = USED_PROBE_INDEX; } }

void MarkPositionUsedInIndirectionTexture(float3 WorldPosition, uint ClipmapIndex) { float3 ProbeCoordFloat = WorldPosition * WorldPositionToRadianceProbeCoordScaleForMark(ClipmapIndex) + WorldPositionToRadianceProbeCoordBiasForMark(ClipmapIndex); int3 BottomCornerProbeCoord = floor(ProbeCoordFloat - 0.5f);

MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(0, 0, 0), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(0, 0, 1), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(0, 1, 0), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(0, 1, 1), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(1, 0, 0), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(1, 0, 1), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(1, 1, 0), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(1, 1, 1), ClipmapIndex);

}

#### MarkRadianceProbesUsedByTranslucencyVolume
- C++(LumenTranslucencyVolumeLighting.cpp)
```C++
static void MarkRadianceProbesUsedByTranslucencyVolume(
    FRDGBuilder& GraphBuilder,
    const FViewInfo& View,
    FLumenTranslucencyLightingVolumeParameters VolumeParameters,
    const LumenRadianceCache::FRadianceCacheMarkParameters& RadianceCacheMarkParameters,
    ERDGPassFlags ComputePassFlags)
{
    FMarkRadianceProbesUsedByTranslucencyVolumeCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FMarkRadianceProbesUsedByTranslucencyVolumeCS::FParameters>();
    PassParameters->View = View.ViewUniformBuffer;
    PassParameters->RadianceCacheMarkParameters = RadianceCacheMarkParameters;

    PassParameters->VolumeParameters = VolumeParameters;

    FMarkRadianceProbesUsedByTranslucencyVolumeCS::FPermutationDomain PermutationVector;
    auto ComputeShader = View.ShaderMap->GetShader<FMarkRadianceProbesUsedByTranslucencyVolumeCS>();

    const FIntVector GroupSize = FComputeShaderUtils::GetGroupCount(VolumeParameters.TranslucencyGIGridSize, FMarkRadianceProbesUsedByTranslucencyVolumeCS::GetGroupSize());

    FComputeShaderUtils::AddPass(
        GraphBuilder,
        RDG_EVENT_NAME("MarkRadianceProbesUsedByTranslucencyVolume"),
        ComputePassFlags,
        ComputeShader,
        PassParameters,
        GroupSize);
}

float3 ComputeCellTranslatedWorldPosition(uint3 GridCoordinate, float3 CellOffset) { float Unused; return ComputeCellTranslatedWorldPosition(GridCoordinate, CellOffset, Unused); }

float3 ComputeCellWorldPosition(uint3 GridCoordinate, float3 CellOffset) { return ComputeCellTranslatedWorldPosition(GridCoordinate, CellOffset) - LWCHackToFloat(PrimaryView.PreViewTranslation); }

Texture2D FurthestHZBTexture; float HZBMipLevel; float2 ViewportUVToHZBBufferUV;

float GetMaxVisibleDepth(uint2 GridCoordinate) { float2 HZBScreenUV = (GridCoordinate.xy + .5f) (1U << TranslucencyGIGridPixelSizeShift) View.ViewSizeAndInvSize.zw * ViewportUVToHZBBufferUV; float TrilinearFootprintMipBias = 1.0f; return ConvertFromDeviceZ(FurthestHZBTexture.SampleLevel(GlobalPointClampedSampler, HZBScreenUV, HZBMipLevel + TrilinearFootprintMipBias).x); }

bool IsFroxelVisible(uint3 GridCoordinate) { float MaxVisibleDepth = GetMaxVisibleDepth(GridCoordinate.xy); float TrilinearFootprintBias = -1.0f; float FroxelMinSceneDepth = ComputeDepthFromZSlice(max((float)GridCoordinate.z + TrilinearFootprintBias, 0.0f)); return FroxelMinSceneDepth < MaxVisibleDepth; }

- シェーダー(LumenRadianceCacheMarkCommon.ush)
```C++
FRadianceProbeCoord GetRadianceProbeCoord(float3 WorldSpacePosition, float ClipmapDitherRandom)
{
    FRadianceProbeCoord Out = (FRadianceProbeCoord)0;
    Out.ClipmapIndex = NumRadianceProbeClipmapsForMark;

    uint ClipmapIndex = 0;
    for (; ClipmapIndex < NumRadianceProbeClipmapsForMark; ++ClipmapIndex)
    {
        float3 ProbeCoordFloat = WorldSpacePosition * WorldPositionToRadianceProbeCoordScaleForMark(ClipmapIndex) + WorldPositionToRadianceProbeCoordBiasForMark(ClipmapIndex);
        float3 BottomEdgeFades = saturate((ProbeCoordFloat - .5f) * InvClipmapFadeSizeForMark);
        float3 TopEdgeFades = saturate(((float3)RadianceProbeClipmapResolutionForMark - .5f - ProbeCoordFloat) * InvClipmapFadeSizeForMark);
        float EdgeFade = min(min3(BottomEdgeFades.x, BottomEdgeFades.y, BottomEdgeFades.z), min3(TopEdgeFades.x, TopEdgeFades.y, TopEdgeFades.z));

        int3 ProbeMinCoord;
        int3 ProbeMaxCoord;

        ProbeMinCoord = floor(ProbeCoordFloat - 0.5f);
        ProbeMaxCoord = ProbeMinCoord + 1;

        if (EdgeFade > ClipmapDitherRandom)
        {
            Out.ProbeMinCoord = ProbeMinCoord;
            Out.ProbeMaxCoord = ProbeMaxCoord;
            Out.ClipmapIndex  = ClipmapIndex;
            return Out;
        }
    }

    return Out;
}

uint GetRadianceProbeClipmapForMark(float3 WorldSpacePosition, float ClipmapDitherRandom)
{
    FRadianceProbeCoord Cell = GetRadianceProbeCoord(WorldSpacePosition, ClipmapDitherRandom);
    return Cell.ClipmapIndex;
}

uint GetRadianceProbeClipmapForMark(float3 WorldSpacePosition)
{
    return GetRadianceProbeClipmapForMark(WorldSpacePosition, .01f);
}
yasukichi commented 1 year ago

UpdateCacheForUsedProbes

[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void UpdateCacheForUsedProbesCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution; uint3 LastFrameProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.yz);

if (ClipmapIndex < NumRadianceProbeClipmaps && all(LastFrameProbeCoord < RadianceProbeClipmapResolution))
{
    uint3 LastFrameProbeIndirectionTextureCoord = uint3(LastFrameProbeCoord.x + ClipmapIndex * RadianceProbeClipmapResolution, LastFrameProbeCoord.yz);
    uint LastFrameProbeIndex = LastFrameRadianceProbeIndirectionTexture.Load(uint4(LastFrameProbeIndirectionTextureCoord, 0));

    if (LastFrameProbeIndex != INVALID_PROBE_INDEX)
    {
        float3 ProbeWorldPosition = LastFrameProbeCoord * PackedLastFrameRadianceProbeCoordToWorldPosition[ClipmapIndex].w + PackedLastFrameRadianceProbeCoordToWorldPosition[ClipmapIndex].xyz;
        int3 ProbeCoord = GetRadianceProbeCoord(ProbeWorldPosition, ClipmapIndex)/*=floor(ProbeWorldPosition * GetWorldPositionToRadianceProbeCoordScale(ClipmapIndex) + GetWorldPositionToRadianceProbeCoordBias(ClipmapIndex))*/;

        bool bReused = false;

        if (all(ProbeCoord >= 0) && all(ProbeCoord < (int3)RadianceProbeClipmapResolution))
        {
            uint3 ProbeIndirectionTextureCoord = uint3(ProbeCoord.x + ClipmapIndex * RadianceProbeClipmapResolution, ProbeCoord.yz);
            uint ProbeUsedMarker = RWRadianceProbeIndirectionTexture[ProbeIndirectionTextureCoord];
            uint LastUsedFrameNumber = RWProbeLastUsedFrame[LastFrameProbeIndex];

            if (ProbeUsedMarker == USED_PROBE_INDEX
                || FrameNumber - LastUsedFrameNumber < NumFramesToKeepCachedProbes)
            {
                bReused = true;

                if (ProbeUsedMarker == USED_PROBE_INDEX)
                {
                    RWProbeLastUsedFrame[LastFrameProbeIndex] = FrameNumber;
                }
                RWRadianceProbeIndirectionTexture[ProbeIndirectionTextureCoord] = LastFrameProbeIndex;
            }
        }

        if (!bReused)
        {
            int FreeIndex;
            InterlockedAdd(RWProbeFreeListAllocator[0], 1, FreeIndex);
            RWProbeFreeList[FreeIndex] = LastFrameProbeIndex;
        }
    }
}

}


<img width="783" alt="image" src="https://github.com/yasukichi/testcode/assets/14350715/c0c89321-0897-47bf-82ab-47e2b4a81fbc">
yasukichi commented 1 year ago

AllocateUsedProbes

}

- シェーダー(LumenRadianceCacheMarkCommon.ush)
```C++
    RWTexture3D<uint> RWRadianceProbeIndirectionTexture;

RWBuffer RWProbeAllocator;

[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void AllocateUsedProbesCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution; uint3 ProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.y, DispatchThreadId.z);

if (ClipmapIndex < NumRadianceProbeClipmaps && all(ProbeCoord < RadianceProbeClipmapResolution))
{
    uint ProbeUsedMarker = RWRadianceProbeIndirectionTexture[DispatchThreadId];
    float3 ProbeWorldPosition = ProbeCoord * GetRadianceProbeCoordToWorldPositionScale(ClipmapIndex) + GetRadianceProbeCoordToWorldPositionBias(ClipmapIndex);

    if (ProbeUsedMarker != INVALID_PROBE_INDEX)
    {
        uint ProbeIndex = INVALID_PROBE_INDEX;
        uint LastTracedFrameIndex = 0;
        uint LastUsedFrameIndex = 0;

        if (ProbeUsedMarker == USED_PROBE_INDEX)
        {
            // Allocate new probe
            #if PERSISTENT_CACHE
                int NumFree;
                InterlockedAdd(RWProbeFreeListAllocator[0], -1, NumFree);

                if (NumFree > 0)
                {
                    ProbeIndex = ProbeFreeList[NumFree - 1];
                }
                else
                {
                    InterlockedAdd(RWProbeAllocator[0], 1, ProbeIndex);
                }
            #else
                InterlockedAdd(RWProbeAllocator[0], 1, ProbeIndex);
            #endif
            RWProbeLastTracedFrame[ProbeIndex] = PROBE_FRAME_INDEX_NEVER_TRACED;
            RWProbeLastUsedFrame[ProbeIndex] = FrameNumber;

            LastTracedFrameIndex = PROBE_FRAME_INDEX_NEVER_TRACED;
            LastUsedFrameIndex = FrameNumber;
        }
        else
        {
            // Re-trace existing probe
            ProbeIndex = ProbeUsedMarker;
            LastTracedFrameIndex = RWProbeLastTracedFrame[ProbeIndex];
            LastUsedFrameIndex = RWProbeLastUsedFrame[ProbeIndex];
        }

        if (ProbeIndex < MaxNumProbes)
        {
            // Update histogram
            const uint PriorityBucketIndex = GetPriorityBucketIndex(LastTracedFrameIndex, LastUsedFrameIndex, ProbeWorldPosition);
            uint ProbeTraceCost = GetProbeTraceCost(ProbeWorldPosition);
            InterlockedAdd(RWPriorityHistogram[PriorityBucketIndex], ProbeTraceCost);

            RWRadianceProbeIndirectionTexture[DispatchThreadId] = ProbeIndex;
        }
        else
        {
            RWRadianceProbeIndirectionTexture[DispatchThreadId] = INVALID_PROBE_INDEX;
        }
    }
}

}

yasukichi commented 1 year ago

AllocateProbeTraces

}

- シェーダー(LumenRadianceCacheUpdate,usf)
```C++
/**
 * Iterate again over all probes and update them based on the histogram priority max update bucket
 */
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
void AllocateProbeTracesCS(
    uint3 GroupId : SV_GroupID,
    uint3 DispatchThreadId : SV_DispatchThreadID,
    uint3 GroupThreadId : SV_GroupThreadID)
{
    uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution;
    uint3 ProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.y, DispatchThreadId.z);

    if (ClipmapIndex < NumRadianceProbeClipmaps && all(ProbeCoord < RadianceProbeClipmapResolution))
    {
        const uint ProbeIndex = RadianceProbeIndirectionTexture[DispatchThreadId];
        float3 ProbeWorldPosition = ProbeCoord * GetRadianceProbeCoordToWorldPositionScale(ClipmapIndex) + GetRadianceProbeCoordToWorldPositionBias(ClipmapIndex);

        if (ProbeIndex < MaxNumProbes)
        {
            const uint MaxUpdateBucketIndex = MaxUpdateBucket[0];
            const uint LocalMaxTracesFromMaxUpdateBucket = MaxTracesFromMaxUpdateBucket[0];

            const uint ProbeTraceCost = GetProbeTraceCost(ProbeWorldPosition);
/*
uint GetProbeTraceCost(float3 ProbeWorldPosition)
{
    float3 CameraVector = LWCHackToFloat(PrimaryView.WorldCameraOrigin) - ProbeWorldPosition;
    float DistanceFromCameraSq = dot(CameraVector, CameraVector);

    if (DistanceFromCameraSq < SupersampleDistanceFromCameraSq)
    {
        return 16;
    }

    if (DistanceFromCameraSq < DownsampleDistanceFromCameraSq)
    {
        return 4;
    }

    return 1;
}
*/
            const uint LastTracedFrameIndex = RWProbeLastTracedFrame[ProbeIndex];
            const uint LastUsedFrameIndex = ProbeLastUsedFrame[ProbeIndex];

            // Update everything up to the max selected priority bucket
            const uint PriorityBucketIndex = GetPriorityBucketIndex(LastTracedFrameIndex, LastUsedFrameIndex, ProbeWorldPosition);
            bool bTraceProbe = PriorityBucketIndex <= MaxUpdateBucketIndex;

            // Can't trace more than MaxTracesFromMaxUpdateBucket from the last bucket
            // Make an exception for the first bucket, as it contains probes without valid data
            if (bTraceProbe && PriorityBucketIndex == MaxUpdateBucketIndex && MaxUpdateBucketIndex > 0)
            {
                // Accumulate allocated probe trace cost for the last bucket
                uint ProbeTraceAllocatedFromMaxUpdateBucket = 0;
                InterlockedAdd(RWProbesToUpdateTraceCost[1], ProbeTraceCost, ProbeTraceAllocatedFromMaxUpdateBucket);

                if (ProbeTraceAllocatedFromMaxUpdateBucket + ProbeTraceCost > LocalMaxTracesFromMaxUpdateBucket)
                {
                    bTraceProbe = false;
                }
            }

            if (bTraceProbe)
            {
                InterlockedAdd(RWProbesToUpdateTraceCost[0], ProbeTraceCost);

                uint TraceIndex;
                InterlockedAdd(RWProbeTraceAllocator[0], 1, TraceIndex);

                RWProbeTraceData[TraceIndex] = float4(ProbeWorldPosition, asfloat((ClipmapIndex << 24) | ProbeIndex));
                RWProbeLastTracedFrame[ProbeIndex] = FrameNumber;
            }
        }
    }
}

uint GetPriorityBucketIndex(uint LastTracedFrameIndex, uint LastUsedFrameIndex, float3 ProbeWorldPosition)
{
    // [1;N]
    uint FramesBetweenTracedAndUsed = LastUsedFrameIndex - LastTracedFrameIndex;

    uint BucketIndex = 0;

    if (LastTracedFrameIndex == PROBE_FRAME_INDEX_NEVER_TRACED)
    {
        // Special case for probes which were created this frame. Places those in the most important bucket 0.
        BucketIndex = 0;
    }
    else
    {
        // [0;MAX_UPDATE_FREQUENCY]
        float Frequency = 1.0f;
        {
            float3 CameraVector = LWCHackToFloat(PrimaryView.WorldCameraOrigin) - ProbeWorldPosition;
            const float DistanceFromViewer = sqrt(dot(CameraVector, CameraVector));
            Frequency = MAX_UPDATE_FREQUENCY - clamp(DistanceFromViewer * FirstClipmapWorldExtentRcp, 0.0f, MAX_UPDATE_FREQUENCY);
        }

        // [1;N]
        float UpdateImportance = FramesBetweenTracedAndUsed * (Frequency + 1.0f);

        // Normalize histogram based on the expected range
        float HistogramScale = (PRIORITY_HISTOGRAM_SIZE * NumProbesToTraceBudget) / (MaxNumProbes * (MAX_UPDATE_FREQUENCY + 1.0f));

        // Remap from [1;N] to log2([N;1])
        BucketIndex = PRIORITY_HISTOGRAM_SIZE - 1 - clamp((UpdateImportance - 1.0f) * HistogramScale, 0, PRIORITY_HISTOGRAM_SIZE - 2);
    }

    return BucketIndex;
}
yasukichi commented 1 year ago

ComputeProbeWorldOffsets

}

- シェーダー(LumenRadianceCache.usf)
```C++
groupshared float4 SharedBestOffset[THREADGROUP_SIZE];
groupshared float4 SharedBestOffset2[THREADGROUP_SIZE];

[numthreads(THREADGROUP_SIZE, 1, 1)]
void ComputeProbeWorldOffsetsCS(
    uint3 GroupId : SV_GroupID,
    uint GroupThreadId : SV_GroupThreadID)
{
    uint ProbeTraceIndex = GroupId.z;
    float3 ProbeWorldCenter;
    uint ClipmapIndex;
    uint ProbeIndex;
    GetProbeTraceDataNoOffset(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);
/*
void GetProbeTraceDataNoOffset(uint ProbeTraceIndex, out float3 ProbeWorldCenter, out uint ClipmapIndex, out uint ProbeIndex)
{
    ProbeWorldCenter = ProbeTraceData[ProbeTraceIndex].xyz;
    uint PackedW = asuint(ProbeTraceData[ProbeTraceIndex].w);
    ClipmapIndex = PackedW >> 24;
    ProbeIndex = PackedW & 0xFFFFFF;
}
*/

    float DistanceToSurface = GetDistanceToNearestSurfaceGlobal(ProbeWorldCenter);
    float TooCloseThreshold = .05f * GetRadianceProbeCoordToWorldPositionScale(ClipmapIndex);

    if (DistanceToSurface < TooCloseThreshold)
    {
        float MaxVoxelOffset = .25f;

        uint X = GroupThreadId % 4;
        uint Y = (GroupThreadId % 16) / 4;
        uint Z = GroupThreadId / 16;
        {
            float3 Offset = (float3(X, Y, Z) * 2.0f / 3.0f - 1.0f) * MaxVoxelOffset * GetRadianceProbeCoordToWorldPositionScale(ClipmapIndex);
            float SampleDistanceToSurface = GetDistanceToNearestSurfaceGlobal(ProbeWorldCenter + Offset);
            SharedBestOffset[GroupThreadId] = float4(Offset, SampleDistanceToSurface);
        }

        GroupMemoryBarrierWithGroupSync();

        if (GroupThreadId < 8)
        {
            float4 BestOffset = SharedBestOffset[GroupThreadId * 8];

            for (uint i = 1; i < 8; i++)
            {
                if (SharedBestOffset[GroupThreadId * 8 + i].w > BestOffset.w)
                {
                    BestOffset = SharedBestOffset[GroupThreadId * 8 + i];
                }
            }

            SharedBestOffset2[GroupThreadId] = BestOffset;
        }

        GroupMemoryBarrierWithGroupSync();

        if (GroupThreadId == 0)
        {
            float4 BestOffset = SharedBestOffset2[0];

            for (uint i = 1; i < 8; i++)
            {
                if (SharedBestOffset2[i].w > BestOffset.w)
                {
                    BestOffset = SharedBestOffset2[i];
                }
            }

            if (BestOffset.w >= TooCloseThreshold)
            {
                RWProbeWorldOffset[ProbeIndex] = float4(BestOffset.xyz, 1);
            }
            else
            {
                RWProbeWorldOffset[ProbeIndex] = 0;
            }
        }
    }
    else
    {
        RWProbeWorldOffset[ProbeIndex] = 0;
    }
}
image

ScatterScreenProbeBRDFToRadianceProbes

}

- シェーダー(LumenRadianceCahce.usf)
```C++
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void ScatterScreenProbeBRDFToRadianceProbesCS(
    uint3 GroupId : SV_GroupID,
    uint3 GroupThreadId : SV_GroupThreadID)
{
    // PDF pass: Accumulate BRDF SH from screen probes
    //  Scatter: Scatter BRDF SH into 8 radiance probes using atomics to buffer
    //      Threadgroup per probe

    uint2 ScreenProbeAtlasCoord = GroupId.xy;

    uint ScreenProbeIndex = ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x;
    uint2 ScreenProbeScreenPosition = GetScreenProbeScreenPosition(ScreenProbeIndex);

    if (ScreenProbeIndex < GetNumScreenProbes() && ScreenProbeAtlasCoord.x < ScreenProbeAtlasViewSize.x)
    {
        float2 ScreenUV = GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition);
        float SceneDepth = GetScreenProbeDepth(ScreenProbeAtlasCoord);

        if (SceneDepth > 0)
        {
            float3 WorldPosition = GetWorldPositionFromScreenUV(ScreenUV, SceneDepth);
            uint2 ScreenTileCoord = GetScreenTileCoord(ScreenProbeScreenPosition);
            uint ClipmapIndex = GetRadianceProbeClipmap(WorldPosition, InterleavedGradientNoise(ScreenTileCoord, View.StateFrameIndexMod8));
/*
float InterleavedGradientNoise( float2 uv, float FrameId )
{
    // magic values are found by experimentation
    uv += FrameId * (float2(47, 17) * 0.695f);

    const float3 magic = float3( 0.06711056f, 0.00583715f, 52.9829189f );
    return frac(magic.z * frac(dot(uv, magic.xy)));
}
*/

            if (ClipmapIndex < NumRadianceProbeClipmaps)
            {
                int3 BottomCornerProbeCoord = GetRadianceProbeBottomCornerCoord(WorldPosition, ClipmapIndex);
                int3 IndirectionTextureCoord = BottomCornerProbeCoord + int3(ClipmapIndex * RadianceProbeClipmapResolution, 0, 0);
                uint ProbeIndex = RadianceProbeIndirectionTexture[IndirectionTextureCoord + int3(GroupThreadId.x & 0x1, (GroupThreadId.x & 0x2) >> 1, (GroupThreadId.x & 0x4) >> 2)];
                bool bTwoSidedFoliage = GetScreenProbeIsTwoSidedFoliage(ScreenProbeAtlasCoord);
/*
bool GetScreenProbeIsTwoSidedFoliage(uint2 ScreenProbeAtlasCoord)
{
    uint Encoded = ScreenProbeWorldSpeed.Load(int3(ScreenProbeAtlasCoord, 0));
    return (Encoded & 0x8000) != 0;
}
*/

                //@todo - skip probes which are cached this frame
                if (ProbeIndex != INVALID_PROBE_INDEX)
                {
                    uint SHBaseIndex = (ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x) * NUM_PDF_SH_COEFFICIENTS/*=9*/;
                    uint ProbeSHBaseCoord = ProbeIndex * NUM_RADIANCE_PROBE_PDF_COEFFICIENTS/*NUM_PDF_SH_COEFFICIENTS+1*/;

                    for (uint CoefficientIndex = GroupThreadId.y; CoefficientIndex < NUM_RADIANCE_PROBE_PDF_COEFFICIENTS; CoefficientIndex += THREADGROUP_SIZE)
                    {
                        float Coefficient = 1.0f;
                        float MaxValuePerThread = 1.0f;

                        if (CoefficientIndex < NUM_PDF_SH_COEFFICIENTS)
                        {
                            // The Radiance Cache over-samples with high depth complexity caused by foliage, attempt to offset that by keeping at the lowest trace resolution
                            Coefficient = bTwoSidedFoliage ? 0.0f : BRDFProbabilityDensityFunctionSH[SHBaseIndex + CoefficientIndex];
                            MaxValuePerThread = (float)0xFFFFFFFF / 100000.0f;
                        }

                        int QuantizedCoefficient = Coefficient * MaxValuePerThread;
                        InterlockedAdd(RWRadianceProbeSH_PDF[ProbeSHBaseCoord + CoefficientIndex], QuantizedCoefficient);
                    }
                }
            }
        }
    }
}
image

GenerateProbeTraceTiles

}

- シェーダー(LumenRadianceCache.usf)
```C++
groupshared uint SharedNumPendingTraceTiles;
groupshared uint2 PendingTraceTileList[THREADGROUP_SIZE * THREADGROUP_SIZE * 4];

groupshared uint SharedNumCompletedTraceTiles;
groupshared uint2 CompletedTraceTileList[THREADGROUP_SIZE * THREADGROUP_SIZE * 4];

StructuredBuffer<uint> ProbesToUpdateTraceCost;

[numthreads(THREADGROUP_SIZE/*=8*/, THREADGROUP_SIZE, 1)]
void GenerateProbeTraceTilesCS(
    uint3 GroupId : SV_GroupID,
    uint2 GroupThreadId : SV_GroupThreadID)
{
    uint ProbeTraceIndex = GroupId.z;
    float3 ProbeWorldCenter;
    uint Unused;
    uint ProbeIndex;
    GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, Unused, ProbeIndex);

    float DistanceFromCameraSq = dot(LWCHackToFloat(PrimaryView.WorldCameraOrigin) - ProbeWorldCenter, LWCHackToFloat(PrimaryView.WorldCameraOrigin) - ProbeWorldCenter);

    // Ray gen pass:
    //  Clear trace tile list
    //  For each level [0 - 2] test PDF at tile center, issue trace tile if below threshold, otherwise subdivide and queue for next level
    //  Write out all trace tiles for indirect dispatch

    if (all(GroupThreadId.xy == 0))
    {
        SharedNumCompletedTraceTiles = 0;
        SharedNumPendingTraceTiles = 0;
    }

    GroupMemoryBarrierWithGroupSync();

    uint NumLevels = 1;

    // Calculate subdivision level for the probe
    // Level 0 is half of RadianceProbeResolution
    if (DistanceFromCameraSq < DownsampleDistanceFromCameraSq)
    {
        NumLevels = DistanceFromCameraSq < SupersampleDistanceFromCameraSq ? 3 : 2;
    }

    // Force downsampling if we have too many probes to update
    if (ProbesToUpdateTraceCost[0] > 2 * GetProbeTraceCostBudget()/*=NumProbesToTraceBudget * 4*/)
    {
        NumLevels = 1;
    }

    FThreeBandSHVector BRDF = GetBRDF_PDF(ProbeIndex);

    uint BaseTraceTileResolution = RadianceProbeResolution / THREADGROUP_SIZE / 2;

    // Queue trace tiles for level 0
    if (all(GroupThreadId.xy < BaseTraceTileResolution))
    {
        uint2 TraceTileCoord = GroupThreadId.xy;
        uint Level = 0;

        if (Level < (NumLevels - 1) && ShouldRefineTraceTile(TraceTileCoord, BaseTraceTileResolution, /*pdf thresold=*/0.0f, BRDF))
/*
bool ShouldRefineTraceTile(uint2 TraceTileCoord, uint TraceTileResolution, float LevelPDFThreshold, FThreeBandSHVector BRDF)
{
    float2 ProbeUV = (TraceTileCoord + float2(.5f, .5f)) / float(TraceTileResolution);
    float3 WorldConeDirection = OctahedronToUnitVector(ProbeUV * 2.0 - 1.0);
    FThreeBandSHVector DirectionSH = SHBasisFunction3(WorldConeDirection);
    float PDF = max(DotSH3(BRDF, DirectionSH), 0);

    bool bRefineTraceTile = PDF > LevelPDFThreshold;
    return bRefineTraceTile;
}
*/
        {
            uint TileBaseIndex;
            InterlockedAdd(SharedNumPendingTraceTiles, 4, TileBaseIndex);

            PendingTraceTileList[TileBaseIndex + 0] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 0), /*Level=*/1, ProbeTraceIndex);
            PendingTraceTileList[TileBaseIndex + 1] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 0), /*Level=*/1, ProbeTraceIndex);
            PendingTraceTileList[TileBaseIndex + 2] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 1), /*Level=*/1, ProbeTraceIndex);
            PendingTraceTileList[TileBaseIndex + 3] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 1), /*Level=*/1, ProbeTraceIndex);
/*
uint2 PackTraceTileInfo(uint2 TraceTileCoord, uint Level, uint ProbeTraceIndex)
{
    return uint2((TraceTileCoord.x & 0xFF) | ((TraceTileCoord.y & 0xFF) << 8) | ((Level & 0xFF) << 16), ProbeTraceIndex);
}
*/
        }
        else
        {
            uint TileIndex;
            InterlockedAdd(SharedNumCompletedTraceTiles, 1, TileIndex);
            CompletedTraceTileList[TileIndex] = PackTraceTileInfo(TraceTileCoord, 0, ProbeTraceIndex);
        }
    }

    GroupMemoryBarrierWithGroupSync();

    uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x;

    // Queue trace tiles for remaining levels
    if (NumLevels == 3)
    {
        SubdivideTraceTileTree(ThreadIndex, BaseTraceTileResolution, 3, ProbeTraceIndex, BRDF);
    }
    else if (NumLevels == 2)
    {
        SubdivideTraceTileTree(ThreadIndex, BaseTraceTileResolution, 2, ProbeTraceIndex, BRDF);
    }

    if (ThreadIndex == 0)
    {
        InterlockedAdd(RWProbeTraceTileAllocator[0], SharedNumCompletedTraceTiles, GlobalTraceOffset);
    }

    GroupMemoryBarrierWithGroupSync();

    for (uint TraceTileIndex = ThreadIndex; TraceTileIndex < SharedNumCompletedTraceTiles; TraceTileIndex += THREADGROUP_SIZE * THREADGROUP_SIZE)
    {
        RWProbeTraceTileData[GlobalTraceOffset + TraceTileIndex] = CompletedTraceTileList[TraceTileIndex];
    }
}

void SubdivideTraceTileTree(
    uint ThreadIndex,
    uint BaseTraceTileResolution,
    uint NumLevels,
    uint ProbeTraceIndex,
    FThreeBandSHVector BRDF)
{
    uint PendingTraceListStartIndex = 0;

    // NumLevels must be a literal to allow the loop to unroll, otherwise we get this incorrect compile error from the DXC compiler:
    // error X3663: thread sync operation found in varying flow control, consider reformulating your algorithm so all threads will hit the sync simultaneously

    UNROLL
    for (uint Level = 1; Level < NumLevels; Level++)
    {
        uint TraceTileResolution = BaseTraceTileResolution << Level;
        uint NumPendingTraceTiles = SharedNumPendingTraceTiles;

        for (uint PendingTraceTileIndex = PendingTraceListStartIndex + ThreadIndex; PendingTraceTileIndex < NumPendingTraceTiles; PendingTraceTileIndex += THREADGROUP_SIZE * THREADGROUP_SIZE)
        {
            uint2 TraceTileCoord = UnpackTraceTileInfo(PendingTraceTileList[PendingTraceTileIndex]);

            if (Level < (NumLevels - 1) && ShouldRefineTraceTile(TraceTileCoord, TraceTileResolution, SupersampleTileBRDFThreshold, BRDF))
            {
                uint TileBaseIndex;
                InterlockedAdd(SharedNumPendingTraceTiles, 4, TileBaseIndex);
                PendingTraceTileList[TileBaseIndex + 0] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 0), Level + 1, ProbeTraceIndex);
                PendingTraceTileList[TileBaseIndex + 1] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 0), Level + 1, ProbeTraceIndex);
                PendingTraceTileList[TileBaseIndex + 2] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 1), Level + 1, ProbeTraceIndex);
                PendingTraceTileList[TileBaseIndex + 3] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 1), Level + 1, ProbeTraceIndex);
            }
            else
            {
                uint TileIndex;
                InterlockedAdd(SharedNumCompletedTraceTiles, 1, TileIndex);
                CompletedTraceTileList[TileIndex] = PackTraceTileInfo(TraceTileCoord, Level, ProbeTraceIndex);
            }
        }

        GroupMemoryBarrierWithGroupSync();

        PendingTraceListStartIndex = NumPendingTraceTiles;
    }
}
image

SetupTraceFromProbesCS

}

- シェーダー(LumenRadianceCache.usf)
```C++
RWBuffer<uint> RWTraceProbesIndirectArgs;
RWBuffer<uint> RWSortProbeTraceTilesIndirectArgs;
RWBuffer<uint> RWRadianceCacheHardwareRayTracingIndirectArgs;
RWBuffer<uint> RWHardwareRayTracingRayAllocatorBuffer;
uint SortTraceTilesGroupSize;

[numthreads(1, 1, 1)]
void SetupTraceFromProbesCS()
{
    uint NumProbeTraceTiles = ProbeTraceTileAllocator[0];

    // Decompose the dispatch group layout into 2d to work around hitting D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION (65k) with a 1d layout,
    // which manifests as flickering during Force Full Update

    RWTraceProbesIndirectArgs[0] = TRACE_TILE_GROUP_STRIDE/*=128*/;
    RWTraceProbesIndirectArgs[1] = (NumProbeTraceTiles + TRACE_TILE_GROUP_STRIDE - 1) / TRACE_TILE_GROUP_STRIDE;
    RWTraceProbesIndirectArgs[2] = 1;

    RWSortProbeTraceTilesIndirectArgs[0] = (NumProbeTraceTiles + SortTraceTilesGroupSize - 1) / SortTraceTilesGroupSize;
    RWSortProbeTraceTilesIndirectArgs[1] = 1;
    RWSortProbeTraceTilesIndirectArgs[2] = 1;

    RWRadianceCacheHardwareRayTracingIndirectArgs[0] = RADIANCE_CACHE_TRACE_TILE_SIZE_1D/*=(RADIANCE_CACHE_TRACE_TILE_SIZE_2D(=8) * RADIANCE_CACHE_TRACE_TILE_SIZE_2D)*/;
    RWRadianceCacheHardwareRayTracingIndirectArgs[1] = NumProbeTraceTiles;
    RWRadianceCacheHardwareRayTracingIndirectArgs[2] = 1;

    RWHardwareRayTracingRayAllocatorBuffer[0] = NumProbeTraceTiles * RADIANCE_CACHE_TRACE_TILE_SIZE_1D;
}

SortTraceTiles

}

- シェーダー(LumenRadianceCache.usf)
```C++
#ifndef SORT_TILES_THREADGROUP_SIZE
#define SORT_TILES_THREADGROUP_SIZE 1
#endif

#define NUM_DIRECTION_BINS_2D 8
#define NUM_DIRECTION_BINS_1D (NUM_DIRECTION_BINS_2D * NUM_DIRECTION_BINS_2D)
groupshared uint SharedNumTraceTileBins[NUM_DIRECTION_BINS_1D];
groupshared uint SharedTraceTileBinOffset[NUM_DIRECTION_BINS_1D];

[numthreads(SORT_TILES_THREADGROUP_SIZE, 1, 1)]
void SortProbeTraceTilesCS(
    uint GroupId : SV_GroupID,
    uint GroupThreadId : SV_GroupThreadID)
{
    // Clear bins to 0
    for (uint BinIndex = GroupThreadId; BinIndex < NUM_DIRECTION_BINS_1D; BinIndex += SORT_TILES_THREADGROUP_SIZE)
    {
        SharedNumTraceTileBins[BinIndex] = 0; 
        SharedTraceTileBinOffset[BinIndex] = 0;
    }

    GroupMemoryBarrierWithGroupSync();

    uint TraceTileIndex = GroupId * SORT_TILES_THREADGROUP_SIZE + GroupThreadId;

    // Count how many trace tiles in each direction bin
    if (TraceTileIndex < ProbeTraceTileAllocator[0])
    {
        uint2 TraceTileCoord;
        uint TraceTileLevel;
        uint ProbeTraceIndex;
        UnpackTraceTileInfo(ProbeTraceTileData[TraceTileIndex], TraceTileCoord, TraceTileLevel, ProbeTraceIndex);

        uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
        uint2 ProbeTexelCoord = TraceTileCoord * RADIANCE_CACHE_TRACE_TILE_SIZE_2D;
        uint2 DirectionalBin = ProbeTexelCoord * NUM_DIRECTION_BINS_2D / TraceResolution;

        //@todo - also bin by Morton encoded position
        uint FinalBinIndex = DirectionalBin.y * NUM_DIRECTION_BINS_2D + DirectionalBin.x;

        InterlockedAdd(SharedNumTraceTileBins[FinalBinIndex], 1);
    }

    GroupMemoryBarrierWithGroupSync();

    if (TraceTileIndex < ProbeTraceTileAllocator[0])
    {
        uint2 TraceTileData = ProbeTraceTileData[TraceTileIndex];

        uint2 TraceTileCoord;
        uint TraceTileLevel;
        uint ProbeTraceIndex;
        UnpackTraceTileInfo(TraceTileData, TraceTileCoord, TraceTileLevel, ProbeTraceIndex);

        uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
        uint2 ProbeTexelCoord = TraceTileCoord * RADIANCE_CACHE_TRACE_TILE_SIZE_2D;
        uint2 DirectionalBin = ProbeTexelCoord * NUM_DIRECTION_BINS_2D / TraceResolution;

        uint FinalBinIndex = DirectionalBin.y * NUM_DIRECTION_BINS_2D + DirectionalBin.x;

        uint SortedTraceTileOffset;

        // Calculate our sorted offset by adding up all the bins before us
        {
            InterlockedAdd(SharedTraceTileBinOffset[FinalBinIndex], 1, SortedTraceTileOffset);

            for (uint BinIndex = 0; BinIndex < FinalBinIndex; BinIndex++)
            {
                SortedTraceTileOffset += SharedNumTraceTileBins[BinIndex];
            }
        }

        // Write out to the sorted position
        RWProbeTraceTileData[GroupId * SORT_TILES_THREADGROUP_SIZE + SortedTraceTileOffset] = TraceTileData;
    }
}
yasukichi commented 1 year ago

HardwareRayTracing (inline)

}

- C++(LumenRadianceCacheHardwareRayTracing.cpp)
```C++
void RenderLumenHardwareRayTracingRadianceCacheTwoPass(
    FRDGBuilder& GraphBuilder,
    const FScene* Scene,
    const FSceneTextureParameters& SceneTextures,
    const FViewInfo& View,
    const FLumenCardTracingInputs& TracingInputs,
    const LumenRadianceCache::FRadianceCacheInterpolationParameters& RadianceCacheParameters,
    FRadianceCacheConfiguration Configuration,
    float DiffuseConeHalfAngle,
    int32 MaxNumProbes,
    int32 MaxProbeTraceTileResolution,
    FRDGBufferRef ProbeTraceData,
    FRDGBufferRef ProbeTraceTileData,
    FRDGBufferRef ProbeTraceTileAllocator,
    FRDGBufferRef TraceProbesIndirectArgs,
    FRDGBufferRef HardwareRayTracingRayAllocatorBuffer,
    FRDGBufferRef RadianceCacheHardwareRayTracingIndirectArgs,
    FRDGTextureUAVRef RadianceProbeAtlasTextureUAV,
    FRDGTextureUAVRef DepthProbeTextureUAV
)
{
#if RHI_RAYTRACING
    // Must match usf
    const int32 TempAtlasTraceTileStride = 1024;
    extern int32 GRadianceCacheForceFullUpdate;
    // Overflow is possible however unlikely - only nearby probes trace at max resolution
    const int32 TemporaryBufferAllocationDownsampleFactor = GRadianceCacheForceFullUpdate ? 4 : CVarLumenRadianceCacheTemporaryBufferAllocationDownsampleFactor.GetValueOnRenderThread();
    const int32 TempAtlasNumTraceTiles = FMath::DivideAndRoundUp(MaxProbeTraceTileResolution * MaxProbeTraceTileResolution, TemporaryBufferAllocationDownsampleFactor);
    const FIntPoint WrappedTraceTileLayout(
        TempAtlasTraceTileStride,
        FMath::DivideAndRoundUp(MaxNumProbes * TempAtlasNumTraceTiles, TempAtlasTraceTileStride));

    uint32 TraceTileResultPackedBufferElementCount = MaxNumProbes * TempAtlasNumTraceTiles * FLumenRadianceCacheHardwareRayTracingRGS::GetGroupSize() * FLumenRadianceCacheHardwareRayTracingRGS::GetGroupSize();
    FRDGBufferRef TraceTileResultPackedBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(FTraceTileResultPacked), TraceTileResultPackedBufferElementCount), TEXT("Lumen.RadianceCache.HardwareRayTracing.TraceTileResultPackedBuffer"));
    FRDGBufferRef RetraceDataPackedBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(LumenHWRTPipeline::FTraceDataPacked), TraceTileResultPackedBufferElementCount), TEXT("Lumen.RadianceCache.HardwareRayTracing.RetraceTilePackedBuffer"));
    uint32 MaxRayCount = TraceTileResultPackedBufferElementCount;

    const bool bInlineRayTracing = Lumen::UseHardwareInlineRayTracing(*View.Family);
    const bool bUseFarField = UseFarFieldForRadianceCache(*View.Family) && Configuration.bFarField;

    // Default tracing of near-field, extract surface cache and material-id
    {
        bool bApplySkyLight = !bUseFarField;

        FLumenRadianceCacheHardwareRayTracing::FPermutationDomain PermutationVector;
        PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FLightingModeDim>(LumenHWRTPipeline::ELightingMode::SurfaceCache);
        PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FEnableNearFieldTracing>(true);
        PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FEnableFarFieldTracing>(false);
        PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FIndirectDispatchDim>(IsHardwareRayTracingRadianceCacheIndirectDispatch());
        PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FSpecularOcclusionDim>(false);
        PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FPackTraceDataDim>(bUseFarField);
        PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FClipRayDim>(GetRayTracingCulling() != 0);

        DispatchRayGenOrComputeShader(GraphBuilder, Scene, View, SceneTextures, TracingInputs, RadianceCacheParameters, PermutationVector,
            DiffuseConeHalfAngle, MaxNumProbes, MaxProbeTraceTileResolution, bApplySkyLight, bInlineRayTracing,
            ProbeTraceTileAllocator, ProbeTraceTileData, ProbeTraceData,
            HardwareRayTracingRayAllocatorBuffer, RetraceDataPackedBuffer, TraceTileResultPackedBuffer);
    }
        :

}

void DispatchRayGenOrComputeShader(
    FRDGBuilder& GraphBuilder,
    const FScene* Scene,
    const FViewInfo& View,
    const FSceneTextureParameters& SceneTextures,
    const FLumenCardTracingInputs& TracingInputs,
    const LumenRadianceCache::FRadianceCacheInterpolationParameters& RadianceCacheParameters,
    const FLumenRadianceCacheHardwareRayTracing::FPermutationDomain& PermutationVector,
    float DiffuseConeHalfAngle,
    int32 MaxNumProbes,
    int32 MaxProbeTraceTileResolution,
    bool bApplySkyLight,
    bool bInlineRayTracing,
    FRDGBufferRef ProbeTraceTileAllocator,
    FRDGBufferRef ProbeTraceTileData,
    FRDGBufferRef ProbeTraceData,
    FRDGBufferRef RayAllocatorBuffer,
    FRDGBufferRef RetraceDataPackedBuffer,
    FRDGBufferRef TraceTileResultPackedBuffer
)
{
    FRDGBufferRef HardwareRayTracingIndirectArgsBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(1), TEXT("Lumen.RadianceCache.HardwareRayTracing.IndirectArgsBuffer"));
    if (IsHardwareRayTracingRadianceCacheIndirectDispatch())
    {
        FLumenRadianceCacheHardwareRayTracingIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenRadianceCacheHardwareRayTracingIndirectArgsCS::FParameters>();
        {
            PassParameters->RayAllocatorBuffer = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(RayAllocatorBuffer, PF_R32_UINT));
            PassParameters->RWHardwareRayTracingIndirectArgs = GraphBuilder.CreateUAV(HardwareRayTracingIndirectArgsBuffer, PF_R32_UINT);
            PassParameters->OutputThreadGroupSize = bInlineRayTracing ? FLumenRadianceCacheHardwareRayTracingCS::GetThreadGroupSize() : FLumenRadianceCacheHardwareRayTracingRGS::GetThreadGroupSize();
        }

        TShaderRef<FLumenRadianceCacheHardwareRayTracingIndirectArgsCS> ComputeShader = View.ShaderMap->GetShader<FLumenRadianceCacheHardwareRayTracingIndirectArgsCS>();
        FComputeShaderUtils::AddPass(
            GraphBuilder,
            RDG_EVENT_NAME("HardwareRayTracingIndirectArgsCS"),
            ComputeShader,
            PassParameters,
            FIntVector(1, 1, 1));
    }

    bool bEnableHitLighting = PermutationVector.Get<FLumenRadianceCacheHardwareRayTracingRGS::FLightingModeDim>() == LumenHWRTPipeline::ELightingMode::HitLighting;
    bool bEnableFarFieldTracing = PermutationVector.Get<FLumenRadianceCacheHardwareRayTracingRGS::FEnableFarFieldTracing>();

    FLumenRadianceCacheHardwareRayTracing::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenRadianceCacheHardwareRayTracing::FParameters>();
    SetLumenHardwareRayTracingRadianceCacheParameters(
        GraphBuilder,
        View,
        SceneTextures,
        TracingInputs,
        RadianceCacheParameters,
        DiffuseConeHalfAngle,
        bApplySkyLight,
        bEnableHitLighting,
        bEnableFarFieldTracing,
        ProbeTraceTileAllocator,
        ProbeTraceTileData,
        ProbeTraceData,
        RayAllocatorBuffer,
        RetraceDataPackedBuffer,
        TraceTileResultPackedBuffer,
        HardwareRayTracingIndirectArgsBuffer,
        PassParameters
    );

    uint32 PersistentTracingGroupCount = CVarLumenRadianceCacheHardwareRayTracingPersistentTracingGroupCount.GetValueOnRenderThread();
    FIntPoint DispatchResolution(FLumenRadianceCacheHardwareRayTracingRGS::GetGroupSize() * FLumenRadianceCacheHardwareRayTracingRGS::GetGroupSize(), PersistentTracingGroupCount);

    if (bInlineRayTracing)
    {
        TShaderRef<FLumenRadianceCacheHardwareRayTracingCS> ComputeShader = View.ShaderMap->GetShader<FLumenRadianceCacheHardwareRayTracingCS>(PermutationVector);
        if (IsHardwareRayTracingRadianceCacheIndirectDispatch())
        {           
            FComputeShaderUtils::AddPass(
                GraphBuilder,
                RDG_EVENT_NAME("HardwareRayTracing (inline) %s %s", *LumenRadianceCache::GenerateModeString(bEnableHitLighting, bEnableFarFieldTracing), *LumenRadianceCache::GenerateResolutionString(DispatchResolution)),
                ComputeShader,
                PassParameters,
                PassParameters->HardwareRayTracingIndirectArgs,
                0);
        }
        else
        {
                      :
        }
    }
    else
    {
                 :
    }   
}

int3 GetRayTracingThreadCountWrapped(uint TargetThreadCount, uint ThreadGroupSize) { int3 ThreadCount = int3(TargetThreadCount, 1, 1); if (ThreadCount.x > MAX_DISPATCH_THREAD_PER_DIMENSION) { ThreadCount.y = (ThreadCount.x + ThreadGroupSize - 1) / ThreadGroupSize; ThreadCount.x = ThreadGroupSize; }

if (ThreadCount.y > MAX_DISPATCH_THREAD_PER_DIMENSION)
{
    ThreadCount.z = (ThreadCount.y + ThreadGroupSize - 1) / ThreadGroupSize;
    ThreadCount.y = ThreadGroupSize;
}

return ThreadCount;

}

- シェーダー(LumenRadianceCacheHardwareRayTracing.usf)
```C++
Buffer<uint> RayAllocatorBuffer;
RWBuffer<uint> RWHardwareRayTracingIndirectArgs;
uint2 OutputThreadGroupSize;

[numthreads(1, 1, 1)]
void LumenRadianceCacheHardwareRayTracingIndirectArgsCS()
{
    uint NumRays = RayAllocatorBuffer[0];
    uint NumGroups = (NumRays + OutputThreadGroupSize.x - 1) / OutputThreadGroupSize.x;

    int3 IndirectArgs = GetRayTracingThreadCountWrapped(NumGroups, RADIANCE_CACHE_TRACE_TILE_SIZE_1D/*=RADIANCE_CACHE_TRACE_TILE_SIZE_2D(=8) * RADIANCE_CACHE_TRACE_TILE_SIZE_2D*/);
    RWHardwareRayTracingIndirectArgs[0] = IndirectArgs.x;
    RWHardwareRayTracingIndirectArgs[1] = IndirectArgs.y;
    RWHardwareRayTracingIndirectArgs[2] = IndirectArgs.z;
}

[numthreads(INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X, INLINE_RAY_TRACING_THREAD_GROUP_SIZE_Y, 1)]
void LumenRadianceCacheHardwareRayTracingCS(
    uint3 GroupId : SV_GroupID,
    uint GroupIndex : SV_GroupIndex
)
{
    uint GroupLinearIndex = GetUnWrappedRayTracingDispatchThreadId(GroupId, RADIANCE_CACHE_TRACE_TILE_SIZE_1D);
    LumenRadianceCacheHardwareRayTracingCommon(GroupLinearIndex * INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X + GroupIndex);
}

uint PersistentTracingGroupCount;

void LumenRadianceCacheHardwareRayTracingCommon(uint ThreadIndex)
{
    uint TexelLinearCoord = ThreadIndex % RADIANCE_CACHE_TRACE_TILE_SIZE_1D/* = (RADIANCE_CACHE_TRACE_TILE_SIZE_2D * RADIANCE_CACHE_TRACE_TILE_SIZE_2D)=(8*8)*/;
    uint GlobalGroupIndex = ThreadIndex / RADIANCE_CACHE_TRACE_TILE_SIZE_1D;

    {
        uint RayIndex = ThreadIndex;

#if ENABLE_FAR_FIELD_TRACING
        FTraceData TraceData = UnpackTraceData(RWRetraceDataPackedBuffer[RayIndex]);
        FRayIdPacked RayIdPacked;
        RayIdPacked.PackedData = TraceData.RayId;

        FRayId RayId = UnpackRayId(RayIdPacked);
        uint TraceTileIndex = RayId.TraceTileIndex;
        uint2 TexelCoord = RayId.TexelCoord;

#else
        uint TraceTileIndex = GlobalGroupIndex;
        uint2 TexelCoord = uint2(TexelLinearCoord % RADIANCE_CACHE_TRACE_TILE_SIZE_2D, TexelLinearCoord / RADIANCE_CACHE_TRACE_TILE_SIZE_2D);
#endif
        if (RayIndex >= RayAllocatorBuffer[0])
        {
            return;
        }

        uint2 TraceTileCoord;
        uint TraceTileLevel;
        uint ProbeTraceIndex;
        UnpackTraceTileInfo(ProbeTraceTileData[TraceTileIndex], TraceTileCoord, TraceTileLevel, ProbeTraceIndex);
/*
void UnpackTraceTileInfo(uint2 TraceTileInfo, out uint2 TraceTileCoord, out uint Level, out uint ProbeTraceIndex)
{
    TraceTileCoord = UnpackTraceTileInfo(TraceTileInfo);
    Level = (TraceTileInfo.x >> 16) & 0xFF;
    ProbeTraceIndex = TraceTileInfo.y;
}
*/

        float3 ProbeWorldCenter;
        uint ClipmapIndex;
        uint ProbeIndex;
        GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);
/*
void GetProbeTraceData(uint ProbeTraceIndex, out float3 ProbeWorldCenter, out uint ClipmapIndex, out uint ProbeIndex)
{
    float3 ProbeWorldCenterNoOffset;
    GetProbeTraceDataNoOffset(ProbeTraceIndex, ProbeWorldCenterNoOffset, ClipmapIndex, ProbeIndex);
    ProbeWorldCenter = ProbeWorldCenterNoOffset + ProbeWorldOffset[ProbeIndex].xyz;
}
void GetProbeTraceDataNoOffset(uint ProbeTraceIndex, out float3 ProbeWorldCenter, out uint ClipmapIndex, out uint ProbeIndex)
{
    ProbeWorldCenter = ProbeTraceData[ProbeTraceIndex].xyz;
    uint PackedW = asuint(ProbeTraceData[ProbeTraceIndex].w);
    ClipmapIndex = PackedW >> 24;
    ProbeIndex = PackedW & 0xFFFFFF;
}
*/

        uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
        uint2 ProbeTexelCoord = TraceTileCoord * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + TexelCoord;

        if (all(ProbeTexelCoord < TraceResolution))
        {
            float2 ProbeTexelCenter = float2(0.5, 0.5);
            float2 ProbeUV = (ProbeTexelCoord + ProbeTexelCenter) / float(TraceResolution);
            float3 WorldConeDirection = OctahedronToUnitVector(ProbeUV * 2.0 - 1.0);
/*
float3 OctahedronToUnitVector( float2 Oct )
{
    float3 N = float3( Oct, 1 - dot( 1, abs(Oct) ) );
    float t = max( -N.z, 0 );
    N.xy += select(N.xy >= 0, float2(-t, -t), float2(t, t));
    return normalize(N);
}
*/
            float FinalMinTraceDistance = max(MinTraceDistance, GetRadianceProbeTMin(ClipmapIndex));
            float FinalMaxTraceDistance = MaxTraceDistance;

            // Evenly distributing the sphere solid angle among all cones instead of based on Octahedron distortion
            float ConeHalfAngle = acosFast(1.0f - 1.0f / (float)(TraceResolution * TraceResolution));

            FRayDesc Ray;
            Ray.Origin = ProbeWorldCenter + LWCHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO
            Ray.Direction = WorldConeDirection;
            Ray.TMin = FinalMinTraceDistance;
            Ray.TMax = FinalMaxTraceDistance;
#if ENABLE_FAR_FIELD_TRACING
            Ray.TMin = max(Ray.TMin, FarFieldBias);
#endif

#if DIM_CLIP_RAY
            // Cull TMax by the bounding sphere of the near-field
            float2 RaySphereHit = RayIntersectSphere(Ray.Origin, Ray.Direction, float4(PrimaryView.TranslatedWorldCameraOrigin, RayTracingCullingRadius));
            bool bInsideCullingSphere = RaySphereHit.x < 0 && RaySphereHit.y > 0;
            Ray.TMax = bInsideCullingSphere ? RaySphereHit.y : Ray.TMin;
#endif // DIM_CULL_RAY

            FRayCone RayCone = (FRayCone)0;
            RayCone = PropagateRayCone(RayCone, ConeHalfAngle, 0.0);
/*
struct FRayCone
{
    float Width;
    float SpreadAngle;
};
FRayCone PropagateRayCone(in FRayCone Cone, in float SurfaceSpreadAngle, in float  HitT)
{
    FRayCone NewCone;
    NewCone.Width = Cone.SpreadAngle * HitT + Cone.Width;
    NewCone.SpreadAngle = Cone.SpreadAngle + SurfaceSpreadAngle;
    return NewCone;
}
*/
            const uint LinearCoord = ProbeTexelCoord.y * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + ProbeTexelCoord.x;
            const uint CullingMode = 0;
            FRayTracedLightingContext Context = CreateRayTracedLightingContext(TLAS, RayCone, ProbeTexelCoord, LinearCoord, CullingMode, MaxTranslucentSkipCount, MaxTraversalIterations);

            // Set far-field context specialization
            Context.FarFieldMaxTraceDistance = FarFieldMaxTraceDistance;
            Context.FarFieldReferencePos = FarFieldReferencePos;
#if DIM_SPECULAR_OCCLUSION
            Context.bAcceptFirstHitAndEndSearch = true;
            Context.bSkipClosestHitShader = true;
#endif // DIM_SPECULAR_OCCLUSION

            Context.HitGroupData = HitGroupData;
            Context.RayTracingSceneMetadata = RayTracingSceneMetadata;
            FRayTracedLightingResult Result = TraceAndCalculateRayTracedLightingFromSurfaceCache(Ray, Context);

            if ((ApplySkyLight != 0) && !Result.bIsHit)
            {
                FConeTraceResult TraceResult = (FConeTraceResult)0;
                TraceResult.Transparency = 1;
                ApplySkylightToTraceResult(Ray.Direction, TraceResult);
/*
void ApplySkylightToTraceResult(float3 ConeDirection, inout FConeTraceResult TraceResult)
{ 
    if (ReflectionStruct.SkyLightParameters.y > 0)
    {
        float SkyAverageBrightness = 1.0f;
        float TanConeAngle = 0.0f;
        float Roughness = TanConeAngleToRoughness(TanConeAngle);

        TraceResult.Lighting += GetSkyLightReflection(ConeDirection, Roughness, SkyAverageBrightness) * TraceResult.Transparency;
        TraceResult.Transparency = 0;
    }
}

ReflectionStruct.SkyLightParameters: X = max mip, Y = 1 if sky light should be rendered, 0 otherwise, Z = 1 if sky light is dynamic, 0 otherwise, W = blend fraction. 

float3 GetSkyLightReflection(float3 ReflectionVector, float Roughness, out float OutSkyAverageBrightness)
{
    float AbsoluteSpecularMip = ComputeReflectionCaptureMipFromRoughness(Roughness, ReflectionStruct.SkyLightParameters.x);
    float3 Reflection = TextureCubeSampleLevel(ReflectionStruct.SkyLightCubemap, ReflectionStruct.SkyLightCubemapSampler, ReflectionVector, AbsoluteSpecularMip).rgb;

    OutSkyAverageBrightness = GetSkyLightCubemapBrightness()(=SkyIrradianceEnvironmentMap[7].x Refer to FSceneRenderer::UpdateSkyIrradianceGpuBuffer for more details.) * Luminance(View.SkyLightColor.rgb);
    return Reflection * View.SkyLightColor.rgb;
}
*/
                Result.Radiance = TraceResult.Lighting;
                Result.TraceHitDistance = MaxTraceDistance;
            }

            float SampleHitDistance = Result.TraceHitDistance;
            float3 SampleRadiance = Result.Radiance;

            // Write continuation data
#if DIM_PACK_TRACE_DATA
            FRayIdPacked RayIdPacked = PackRayId(CreateRayId(TraceTileIndex, TexelCoord));
            RWRetraceDataPackedBuffer[RayIndex] = PackTraceData(CreateTraceData(
                RayIdPacked.PackedData,
                Result.MaterialShaderIndex,
                Result.Bookmark,
                Result.TraceHitDistance,
                Result.bIsHit,
                Result.bIsRadianceCompleted,
                Result.bIsFarField));
#endif // DIM_PACK_TRACE_DATA

            uint OutputIndex = TraceTileIndex * RADIANCE_CACHE_TRACE_TILE_SIZE_1D + TexelCoord.y * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + TexelCoord.x;
            RWTraceTileResultPackedBuffer[OutputIndex] = PackTraceTileResult(CreateTraceTileResult(SampleRadiance, min(SampleHitDistance, MaxHalfFloat)));
/*
FTraceTileResult CreateTraceTileResult(float3 Radiance, float HitDistance)
{
    FTraceTileResult TraceTileResult;
    TraceTileResult.Radiance = Radiance;
    TraceTileResult.HitDistance = HitDistance;

    return TraceTileResult;
}
*/
        }
    }
}
image
yasukichi commented 1 year ago

Far field rays(CompactRays + dispatch rayGen again)

}

- C++(LumenHardwareRayTracingCommon.cpp)
```C++
void LumenHWRTCompactRays(
    FRDGBuilder& GraphBuilder,
    const FScene* Scene,
    const FViewInfo& View,
    int32 RayCount,
    LumenHWRTPipeline::ECompactMode CompactMode,
    const FRDGBufferRef& RayAllocatorBuffer,
    const FRDGBufferRef& TraceDataPackedBuffer,
    FRDGBufferRef& OutputRayAllocatorBuffer,
    FRDGBufferRef& OutputTraceDataPackedBuffer
)
{
    FRDGBufferRef CompactRaysIndirectArgsBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(1), TEXT("Lumen.HWRT.CompactTracingIndirectArgs"));
    {
        FLumenHWRTCompactRaysIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenHWRTCompactRaysIndirectArgsCS::FParameters>();
        {
            PassParameters->RayAllocator = GraphBuilder.CreateSRV(RayAllocatorBuffer, PF_R32_UINT);
            PassParameters->RWCompactRaysIndirectArgs = GraphBuilder.CreateUAV(CompactRaysIndirectArgsBuffer, PF_R32_UINT);
        }

        TShaderRef<FLumenHWRTCompactRaysIndirectArgsCS> ComputeShader = View.ShaderMap->GetShader<FLumenHWRTCompactRaysIndirectArgsCS>();
        FComputeShaderUtils::AddPass(
            GraphBuilder,
            RDG_EVENT_NAME("CompactRaysIndirectArgs"),
            ComputeShader,
            PassParameters,
            FIntVector(1, 1, 1));
    }

    {
        FLumenHWRTCompactRaysCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenHWRTCompactRaysCS::FParameters>();
        {
            // Input
            PassParameters->RayAllocator = GraphBuilder.CreateSRV(RayAllocatorBuffer, PF_R32_UINT);
            PassParameters->TraceDataPacked = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(TraceDataPackedBuffer));

            // Output
            PassParameters->RWRayAllocator = GraphBuilder.CreateUAV(OutputRayAllocatorBuffer, PF_R32_UINT);
            PassParameters->RWTraceDataPacked = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(OutputTraceDataPackedBuffer));

            // Indirect args
            PassParameters->CompactRaysIndirectArgs = CompactRaysIndirectArgsBuffer;
        }

        FLumenHWRTCompactRaysCS::FPermutationDomain PermutationVector;
        PermutationVector.Set<FLumenHWRTCompactRaysCS::FCompactModeDim>(CompactMode);
        TShaderRef<FLumenHWRTCompactRaysCS> ComputeShader = View.ShaderMap->GetShader<FLumenHWRTCompactRaysCS>(PermutationVector);
        FComputeShaderUtils::AddPass(
            GraphBuilder,
            RDG_EVENT_NAME("CompactRays"),
            ComputeShader,
            PassParameters,
            PassParameters->CompactRaysIndirectArgs,
            0);
    }
}

[numthreads(1, 1, 1)] void FLumenHWRTCompactRaysIndirectArgsCS() { RWCompactRaysIndirectArgs[0] = (RayAllocator[0] + THREADGROUP_SIZE_1D - 1) / THREADGROUP_SIZE_1D; RWCompactRaysIndirectArgs[1] = 1; RWCompactRaysIndirectArgs[2] = 1; }

RWBuffer RWRayAllocator;

Buffer TraceTexelDataPacked; RWBuffer RWTraceTexelDataPacked;

StructuredBuffer TraceDataPacked; RWStructuredBuffer RWTraceDataPacked;

groupshared uint SharedRayAllocator; groupshared uint SharedGroupOffset; groupshared uint2 SharedTraceTexelDataPacked[THREADGROUP_SIZE_1D]; groupshared FTraceDataPacked SharedTraceDataPacked[THREADGROUP_SIZE_1D];

define COMPACT_MODE_HIT_LIGHTING_RETRACE 0

define COMPACT_MODE_FAR_FIELD_RETRACE 1

define COMPACT_MODE_FORCE_HIT_LIGHTING 2

define COMPACT_MODE_APPEND_RAYS 3

[numthreads(THREADGROUP_SIZE_1D, 1, 1)] void FLumenHWRTCompactRaysCS( uint GroupThreadId : SV_GroupThreadID, uint DispatchThreadId : SV_DispatchThreadID) { SharedRayAllocator = 0; GroupMemoryBarrierWithGroupSync();

uint RayIndex = DispatchThreadId;
FTraceData TraceData = UnpackTraceData(TraceDataPacked[RayIndex]);
bool bIsRayValid = !TraceData.bIsHit;
if (bIsRayValid && (RayIndex < RayAllocator[0]))
{
    // Allocate rays to re-trace with hit lighting
    uint ThreadOffset;
    InterlockedAdd(SharedRayAllocator, 1, ThreadOffset);

    SharedTraceDataPacked[ThreadOffset] = TraceDataPacked[RayIndex];
}
GroupMemoryBarrierWithGroupSync();

if (GroupThreadId == 0)
{
    InterlockedAdd(RWRayAllocator[0], SharedRayAllocator, SharedGroupOffset);
}
GroupMemoryBarrierWithGroupSync();

if (GroupThreadId < SharedRayAllocator)
{
    RWTraceDataPacked[SharedGroupOffset + GroupThreadId] = SharedTraceDataPacked[GroupThreadId];
}

}


<img width="627" alt="image" src="https://github.com/yasukichi/testcode/assets/14350715/9085751e-a348-43ce-9a26-e0fa3d456721">
yasukichi commented 1 year ago

CompositeTracesIntoAtlas

[numthreads(RADIANCE_CACHE_TRACE_TILE_SIZE_2D, RADIANCE_CACHE_TRACE_TILE_SIZE_2D, 1)] void SplatRadianceCacheIntoAtlasCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID ) { uint TraceTileIndex = GroupId.y TRACE_TILE_GROUP_STRIDE/=128*/ + GroupId.x; if (TraceTileIndex >= ProbeTraceTileAllocator[0]) { return; }

uint2 TraceTileCoord;
uint TraceTileLevel;
uint ProbeTraceIndex;
UnpackTraceTileInfo(ProbeTraceTileData[TraceTileIndex], TraceTileCoord, TraceTileLevel, ProbeTraceIndex);

uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
uint2 WrappedTileCoord = uint2(TraceTileIndex % TEMP_ATLAS_TRACE_TILE_STRIDE/*=1024*/, TraceTileIndex / TEMP_ATLAS_TRACE_TILE_STRIDE/*=1024*/);

float3 ProbeWorldCenter;
uint ClipmapIndex;
uint ProbeIndex;
GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);

uint2 ProbeAtlasBaseCoord = RadianceProbeResolution * uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);

if (TraceResolution < RadianceProbeResolution)
{
    uint UpsampleFactor = RadianceProbeResolution / TraceResolution;
    ProbeAtlasBaseCoord += (RADIANCE_CACHE_TRACE_TILE_SIZE_2D * TraceTileCoord + GroupThreadId.xy) * UpsampleFactor;

    uint TraceTileResultIndex = TraceTileIndex * RADIANCE_CACHE_TRACE_TILE_SIZE_1D + GroupThreadId.y * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + GroupThreadId.x;
    FTraceTileResult TraceTileResult = UnpackTraceTileResult(TraceTileResultPackedBuffer[TraceTileResultIndex]);
    float3 Lighting = TraceTileResult.Radiance;

    if (TraceTileResultIndex >= TraceTileResultPackedBufferElementCount)
    {
        // Visual assert on overflow due to r.Lumen.RadianceCache.HardwareRayTracing.TemporaryBufferAllocationDownsampleFactor
        Lighting = float3(0, 10000, 0) * View.PreExposure;
    }

    float HitDistance = TraceTileResult.HitDistance;

    for (uint Y = 0; Y < UpsampleFactor; Y++)
    {
        for (uint X = 0; X < UpsampleFactor; X++)
        {
            RWRadianceProbeAtlasTexture[ProbeAtlasBaseCoord + uint2(X, Y)] = Lighting;
            RWDepthProbeAtlasTexture[ProbeAtlasBaseCoord + uint2(X, Y)] = HitDistance;
        }
    }
}
else
{
    uint DownsampleFactor = TraceResolution / RadianceProbeResolution;
    uint WriteTileSize = RADIANCE_CACHE_TRACE_TILE_SIZE_2D / DownsampleFactor;

    if (all(GroupThreadId.xy < WriteTileSize))
    {
        float3 Lighting = 0;
        float HitDistance = MaxHalfFloat;

        for (uint Y = 0; Y < DownsampleFactor; Y++)
        {
            for (uint X = 0; X < DownsampleFactor; X++)
            {
                uint TraceTileResultIndex = TraceTileIndex * RADIANCE_CACHE_TRACE_TILE_SIZE_1D + (GroupThreadId.y * DownsampleFactor + Y) * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + (GroupThreadId.x * DownsampleFactor + X);
                FTraceTileResult TraceTileResult = UnpackTraceTileResult(TraceTileResultPackedBuffer[TraceTileResultIndex]);
                Lighting += TraceTileResult.Radiance;
                HitDistance = min(HitDistance, TraceTileResult.HitDistance);

                if (TraceTileResultIndex >= TraceTileResultPackedBufferElementCount)
                {
                    // Visual assert on overflow due to r.Lumen.RadianceCache.HardwareRayTracing.TemporaryBufferAllocationDownsampleFactor
                    Lighting += float3(0, 10000, 0) * View.PreExposure;
                }
            }
        }

        uint2 ProbeAtlasCoord = ProbeAtlasBaseCoord + WriteTileSize * TraceTileCoord + GroupThreadId.xy;
        RWRadianceProbeAtlasTexture[ProbeAtlasCoord] = Lighting / (float)(DownsampleFactor * DownsampleFactor);
        RWDepthProbeAtlasTexture[ProbeAtlasCoord] = HitDistance;
    }
}

}


<img width="853" alt="image" src="https://github.com/yasukichi/testcode/assets/14350715/7aa3b63a-1a7a-463a-ada6-dd5a67a87b1c">
yasukichi commented 1 year ago

FilterProbeRadiance

- シェーダー(LumenRadianceCache,usf)
```C++
Texture2D<float3> RadianceProbeAtlasTexture;
Texture2D<float> DepthProbeAtlasTexture;
float SpatialFilterMaxRadianceHitAngle;

groupshared uint SharedRadiance[4][THREADGROUP_SIZE][THREADGROUP_SIZE];

[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void FilterProbeRadianceWithGatherCS(
    uint3 GroupId : SV_GroupID,
    uint3 GroupThreadId : SV_GroupThreadID,
    uint3 DispatchThreadId : SV_DispatchThreadID)
{
    uint ProbeTraceIndex = GroupId.z;
    float3 ProbeWorldCenter;
    uint ClipmapIndex;
    uint ProbeIndex;
    GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);

    uint2 ProbeAtlasBaseCoord = RadianceProbeResolution * uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);

    uint2 ProbeTexelCoord = DispatchThreadId.xy;

    if (all(ProbeTexelCoord < RadianceProbeResolution))
    {
        float3 ProbeWorldCenter;
        uint ClipmapIndex;
        uint ProbeIndex;
        GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);

        uint2 ProbeAtlasBaseCoord = RadianceProbeResolution * uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);
        float3 Lighting = RadianceProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord].xyz;
        float HitDistance = DepthProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord];
        float TotalWeight = 1.0f;

        float2 ProbeTexelCenter = float2(0.5, 0.5);
        float2 ProbeUV = (ProbeTexelCoord + ProbeTexelCenter) / (float)RadianceProbeResolution;
        float3 WorldConeDirection = OctahedronToUnitVector(ProbeUV * 2.0 - 1.0);

        int3 ProbeCoord = GetRadianceProbeCoord(ProbeWorldCenter, ClipmapIndex);

        int3 Offsets[6];
        Offsets[0] = int3(-1, 0, 0);
        Offsets[1] = int3(1, 0, 0);
        Offsets[2] = int3(0, -1, 0);
        Offsets[3] = int3(0, 1, 0);
        Offsets[4] = int3(0, 0, -1);
        Offsets[5] = int3(0, 0, 1);

        for (uint OffsetIndex = 0; OffsetIndex < 6; OffsetIndex++)
        {
            int3 NeighborCoord = ProbeCoord + Offsets[OffsetIndex];

            if (all(NeighborCoord >= 0) && all(NeighborCoord < (int3)RadianceProbeClipmapResolution))
            {
                uint NeighborProbeIndex = GetProbeIndexFromIndirectionTexture(NeighborCoord, ClipmapIndex);

                if (NeighborProbeIndex != INVALID_PROBE_INDEX)
                {
                    uint2 NeighborProbeAtlasBaseCoord = RadianceProbeResolution * uint2(NeighborProbeIndex & ProbeAtlasResolutionModuloMask, NeighborProbeIndex >> ProbeAtlasResolutionDivideShift);
                    float NeighborRadianceDepth = DepthProbeAtlasTexture[ProbeTexelCoord + NeighborProbeAtlasBaseCoord];
                    float3 NeighborWorldPosition = GetProbeWorldPosition(NeighborCoord, ClipmapIndex, NeighborProbeIndex);

                    float OcclusionWeight = 1.0f;

                    {
                        // Test a nearby position along the neighbor ray's path, if occluded then discard the neighbor radiance to reduce leaking
                        // We can't test occlusion of the neighbor probe, because it is closer than GetRadianceProbeTMin
                        float VoxelRadius = sqrt(3.0f) / GetWorldPositionToRadianceProbeCoordScale(ClipmapIndex);
                        float OcclusionDistance = max(GetRadianceProbeTMin(ClipmapIndex), VoxelRadius);
                        float3 NeighborOcclusionTestPosition = NeighborWorldPosition + 2 * OcclusionDistance * WorldConeDirection;
                        float3 ToNeighborOcclusionPosition = NeighborOcclusionTestPosition - ProbeWorldCenter;
                        uint2 ProbeTexelCoordForNeighborOcclusionPosition = ( UnitVectorToOctahedron(ToNeighborOcclusionPosition) * 0.5 + 0.5 ) * RadianceProbeResolution;
                        float ProbeDepthForNeighborOcclusionPosition = DepthProbeAtlasTexture[ProbeTexelCoordForNeighborOcclusionPosition + ProbeAtlasBaseCoord];

                        if (ProbeDepthForNeighborOcclusionPosition * ProbeDepthForNeighborOcclusionPosition < dot(ToNeighborOcclusionPosition, ToNeighborOcclusionPosition))
                        {
                            OcclusionWeight = 0.0f;
                        }
                    }

                    // Clamp neighbor's hit distance to our own.  This helps preserve contact shadows, as a long neighbor hit distance will cause a small NeighborAngle and bias toward distant lighting.
                    if (HitDistance >= 0)
                    {
                        NeighborRadianceDepth = min(NeighborRadianceDepth, HitDistance);
                    }

                    float3 NeighborHitPosition = NeighborWorldPosition + WorldConeDirection * NeighborRadianceDepth;
                    float3 ToNeighborHit = NeighborHitPosition - ProbeWorldCenter;
                    float NeighborAngle = acosFast(dot(ToNeighborHit, WorldConeDirection) / length(ToNeighborHit));
                    float AngleWeight = 1.0f - saturate(NeighborAngle / SpatialFilterMaxRadianceHitAngle);

                    float Weight = AngleWeight * OcclusionWeight;
                    Lighting += RadianceProbeAtlasTexture[ProbeTexelCoord + NeighborProbeAtlasBaseCoord].xyz * Weight;
                    TotalWeight += Weight;
                }
            }
        }

        RWRadianceProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord] = Lighting / TotalWeight;
    }
}