Open yasukichi opened 1 year ago
FDeferredShadingSceneRenderer::RenderLumenSceneLighting()(LumenSceneLighting.cpp)
C++(LumenSceneLighting.cpp)
void Lumen::BuildCardUpdateContext(...)
{
:
// Batch clear all resources required for the subsequent card context update pass
{
FClearCardUpdateContextCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FClearCardUpdateContextCS::FParameters>();
PassParameters->RWDirectLightingCardPageIndexAllocator = DirectCardPageIndexAllocatorUAV;
PassParameters->RWIndirectLightingCardPageIndexAllocator = IndirectCardPageIndexAllocatorUAV;
PassParameters->RWMaxUpdateBucket = MaxUpdateBucketUAV;
PassParameters->RWCardPageTileAllocator = CardPageTileAllocatorUAV;
PassParameters->RWPriorityHistogram = PriorityHistogramUAV;
auto ComputeShader = Views[0].ShaderMap->GetShader<FClearCardUpdateContextCS>();
const FIntVector GroupSize(FMath::DivideAndRoundUp<int32>(LumenCardUpdateContext::CARD_UPDATE_CONTEXT_MAX * LumenCardUpdateContext::PRIORITY_HISTOGRAM_SIZE, FClearCardUpdateContextCS::GetGroupSize()), 1, 1);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("ClearCardUpdateContext"),
ComputePassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
:
}
- シェーダー(LumenSceneLighting.usf)
```C++
/**
* Batch clear all resources required for the subsequent card context update pass
*/
[numthreads(THREADGROUP_SIZE, 1, 1)]
void ClearCardUpdateContextCS(
uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint ElementIndex = DispatchThreadId.x;
if (ElementIndex < 1)
{
RWDirectLightingCardPageIndexAllocator[ElementIndex] = 0;
RWIndirectLightingCardPageIndexAllocator[ElementIndex] = 0;
}
if (ElementIndex < CARD_UPDATE_CONTEXT_MAX * MAX_UPDATE_BUCKET_STRIDE)
{
RWMaxUpdateBucket[ElementIndex] = 0;
}
if (ElementIndex < CARD_UPDATE_CONTEXT_MAX * CARD_PAGE_TILE_ALLOCATOR_STRIDE)
{
RWCardPageTileAllocator[ElementIndex] = 0;
}
if (ElementIndex < CARD_UPDATE_CONTEXT_MAX * PRIORITY_HISTOGRAM_SIZE)
{
RWPriorityHistogram[ElementIndex] = 0;
}
}
C++(LumenSceneLighting.cpp)
void Lumen::BuildCardUpdateContext(...)
{
:
// Prepare update priority histogram
{
FBuildPageUpdatePriorityHistogramCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FBuildPageUpdatePriorityHistogramCS::FParameters>();
PassParameters->RWPriorityHistogram = PriorityHistogramUAV;
PassParameters->LumenCardScene = FrameTemporaries.LumenCardSceneUniformBuffer;
PassParameters->CardPageLastUsedBuffer = CardPageLastUsedBufferSRV;
PassParameters->CardPageHighResLastUsedBuffer = CardPageHighResLastUsedBufferSRV;
PassParameters->CardPageNum = NumCardPages;
PassParameters->SurfaceCacheUpdateFrameIndex = UpdateFrameIndex;
PassParameters->FreezeUpdateFrame = FreezeUpdateFrame;
PassParameters->FirstClipmapWorldExtentRcp = FirstClipmapWorldExtentRcp;
PassParameters->NumCameraOrigins = Views.Num();
check(Views.Num() <= PassParameters->WorldCameraOrigins.Num());
for (int32 i = 0; i < Views.Num(); i++)
{
PassParameters->WorldCameraOrigins[i] = FVector4f((FVector3f)Views[i].ViewMatrices.GetViewOrigin(), 0.0f);
}
PassParameters->DirectLightingUpdateFactor = DirectLightingCardUpdateContext.UpdateFactor;
PassParameters->IndirectLightingUpdateFactor = IndirectLightingCardUpdateContext.UpdateFactor;
FBuildPageUpdatePriorityHistogramCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FBuildPageUpdatePriorityHistogramCS::FSurfaceCacheFeedback>(bUseFeedback);
auto ComputeShader = Views[0].ShaderMap->GetShader<FBuildPageUpdatePriorityHistogramCS>(PermutationVector);
const FIntVector GroupSize(FMath::DivideAndRoundUp<int32>(LumenSceneData.GetNumCardPages(), FBuildPageUpdatePriorityHistogramCS::GetGroupSize()), 1, 1);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("BuildPageUpdatePriorityHistogram"),
ComputePassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
:
}
- シェーダー(LumenSceneLighting.usf)
```C++
/**
* Iterate over all pages and build a histogram of card update priorities
*/
[numthreads(THREADGROUP_SIZE, 1, 1)]
void BuildPageUpdatePriorityHistogramCS(
uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint IndexInIndexBuffer = DispatchThreadId.x;
if (IndexInIndexBuffer < CardPageNum)
{
uint CardPageIndex = IndexInIndexBuffer;
FLumenCardPageData CardPage = GetLumenCardPageData(CardPageIndex);
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
const uint NumCardPageTiles = GetNumCardPageTiles(CardPage);
if (NumCardPageTiles > 0)
{
BuildUpdatePriorityHistogram(Card, CardPage, CardPageIndex, NumCardPageTiles, CARD_UPDATE_CONTEXT_DIRECT_LIGHTING); // ダイレクトLightingのヒストグラム構築
BuildUpdatePriorityHistogram(Card, CardPage, CardPageIndex, NumCardPageTiles, CARD_UPDATE_CONTEXT_INDIRECT_LIGHTING); // 間接Lightingのヒストグラム構築
}
}
}
void BuildUpdatePriorityHistogram(FLumenCardData Card, FLumenCardPageData CardPage, uint CardPageIndex, uint NumCardPageTiles, uint CardUpdateContext)
{
uint PriorityBucketIndex = GetPriorityBucketIndex(Card, CardPage, CardPageIndex, CardUpdateContext);
InterlockedAdd(RWPriorityHistogram[CardUpdateContext * PRIORITY_HISTOGRAM_SIZE + PriorityBucketIndex], NumCardPageTiles);
}
// ダイレクトLightingの場合(CardUpdateContext == CARD_UPDATE_CONTEXT_DIRECT_LIGTING)に限ったコード
uint GetPriorityBucketIndex(FLumenCardData Card, FLumenCardPageData CardPage, uint CardPageIndex, uint CardUpdateContext)
{
uint LastLightingUpdateFrameIndex = CardPage.LastDirectLightingUpdateFrameIndex;
const float UpdateFactor = DirectLighitngUpdateFactor;
// [1;N]
uint FramesSinceLastUpdated = SurfaceCacheUpdateFrameIndex - LastLightingUpdateFrameIndex;
// [0;MAX_UPDATE_FREQUENCY]
float Frequency = 0.0f;
{
float DistanceFromViewer = 100000000.0f;
for (uint ViewIndex = 0; ViewIndex < NumCameraOrigins; ViewIndex++)
{
float3 CardSpaceViewPosition = mul(WorldCameraOrigins[ViewIndex].xyz - Card.Origin, Card.WorldToLocalRotation);
float3 CardPageLocalCenter;
float3 CardPageLocalExtent;
//GetCardPageLocalBBox(CardPage, Card, CardPageLocalCenter, CardPageLocalExtent);
{
float2 CardUVMin = CardPage.CardUVRect.xw;
float2 CardUVMax = CardPage.CardUVRect.zy;
//float3 CardPageLocalBoxMin = GetCardLocalPosition(Card.LocalExtent, CardUVMin, 1.0f);
{ // UV → [-Card.LocalExtent;+Card.LocalExtent] のレンジ変換
CardPageLocalBoxMin.xy = Card.LocalExtent.xy * (1.0f - 2.0f * float2(1.0f - CardUVMin.x, CardUVMin.y));
CardPageLocalBoxMin.z = -(2.0f * 1.0f/*=Depth*/ - 1.0f) * Card.LocalExtent.z;
}
float3 CardPageLocalBoxMax = GetCardLocalPosition(Card.LocalExtent, CardUVMax, 0.0f);
CardPageLocalCenter = 0.5f * (CardPageLocalBoxMax + CardPageLocalBoxMin);
CardPageLocalExtent = 0.5f * (CardPageLocalBoxMax - CardPageLocalBoxMin);
}
DistanceFromViewer = min(DistanceFromViewer, sqrt(ComputeSquaredDistanceFromBoxToPoint(CardPageLocalCenter, CardPageLocalExtent, CardSpaceViewPosition)));
}
Frequency = MAX_UPDATE_FREQUENCY - clamp(DistanceFromViewer * FirstClipmapWorldExtentRcp, 0.0f, MAX_UPDATE_FREQUENCY);
}
// Drive frequency based on the feedback
#if SURFACE_CACHE_FEEDBACK
{
const uint LastUsedFrameIndex = CardPageLastUsedBuffer[CardPageIndex];
const uint LastUsedHighResFrameIndex = CardPageHighResLastUsedBuffer[CardPageIndex];
FramesSinceLastUpdated = max(max(LastUsedFrameIndex, LastUsedHighResFrameIndex), LastLightingUpdateFrameIndex) - LastLightingUpdateFrameIndex + 1;
Frequency *= 0.5f;
if (SurfaceCacheUpdateFrameIndex >= LastUsedHighResFrameIndex + 1)
{
Frequency += 0.5f * MAX_UPDATE_FREQUENCY * saturate((SurfaceCacheUpdateFrameIndex - (LastUsedHighResFrameIndex + 1)) / 2.0f);
}
}
#endif
uint BucketIndex = 0;
if (LastLightingUpdateFrameIndex == 0)
{
// Special case where page wasn't ever updated, just place into first 8 most important buckets based on the frequency
BucketIndex = clamp(MAX_UPDATE_FREQUENCY - Frequency, 0.0f, MAX_UPDATE_FREQUENCY);
}
else
{
// [0;N]
float UpdateImportance = FramesSinceLastUpdated * (Frequency + 1.0f);
// Normalize histogram
UpdateImportance = (PRIORITY_HISTOGRAM_SIZE * UpdateImportance) / (UpdateFactor * (MAX_UPDATE_FREQUENCY + 1.0f));
// Offset from [1;N] and invert in order to place most important pages in bucket 0
BucketIndex = PRIORITY_HISTOGRAM_SIZE - 1 - clamp(UpdateImportance - 1, 0, PRIORITY_HISTOGRAM_SIZE - 1);
}
return BucketIndex;
}
struct FLumenCardPageData
{
uint CardIndex;
bool bMapped;
uint ResLevelPageTableOffset;
uint2 ResLevelSizeInTiles;
float2 SizeInTexels;
float2 PhysicalAtlasCoord;
float4 CardUVRect;
float4 PhysicalAtlasUVRect;
float2 CardUVTexelScale;
float2 PhysicalAtlasUVTexelScale;
uint LastDirectLightingUpdateFrameIndex;
uint LastIndirectLightingUpdateFrameIndex;
// Increments each time the page has Radiosity updated, needs to be consecutive for the sample pattern
uint IndirectLightingTemporalIndex;
};
RWStructuredBuffer<float4> RWLumenCardPageDataBuffer;
#if USE_RW_LUMEN_CARD_PAGE_DATA_BUFFER
#define LumenCardPageDataBuffer RWLumenCardPageDataBuffer
#else
#define LumenCardPageDataBuffer LumenCardScene.CardPageData
#endif
// Note: layout must match FLumenCardPageData in C++
FLumenCardPageData GetLumenCardPageData(uint CardPageId)
{
FLumenCardPageData CardPageData = (FLumenCardPageData) 0;
uint BaseOffset = CardPageId * LUMEN_CARD_PAGE_DATA_STRIDE;
float4 Vector0 = LumenCardPageDataBuffer[BaseOffset + 0];
float4 Vector1 = LumenCardPageDataBuffer[BaseOffset + 1];
float4 Vector2 = LumenCardPageDataBuffer[BaseOffset + 2];
float4 Vector3 = LumenCardPageDataBuffer[BaseOffset + 3];
float4 Vector4 = LumenCardPageDataBuffer[BaseOffset + 4];
CardPageData.CardIndex = asuint(Vector0.x);
CardPageData.ResLevelPageTableOffset = asuint(Vector0.y);
CardPageData.SizeInTexels = Vector0.zw;
CardPageData.CardUVRect = Vector1;
CardPageData.PhysicalAtlasUVRect = Vector2;
CardPageData.CardUVTexelScale = Vector3.xy;
CardPageData.ResLevelSizeInTiles = asuint(Vector3.zw);
CardPageData.LastDirectLightingUpdateFrameIndex = asuint(Vector4.x);
CardPageData.LastIndirectLightingUpdateFrameIndex = asuint(Vector4.y);
CardPageData.IndirectLightingTemporalIndex = asuint(Vector4.z);
// Derived properties
CardPageData.bMapped = CardPageData.SizeInTexels.x > 0;
CardPageData.PhysicalAtlasCoord = CardPageData.PhysicalAtlasUVRect.xy * LumenCardScene.PhysicalAtlasSize;
CardPageData.PhysicalAtlasUVTexelScale = LumenCardScene.InvPhysicalAtlasSize;
return CardPageData;
}
struct FLumenCardData
{
// OBB in MeshCards space
float3x3 MeshCardsToLocalRotation;
float3 MeshCardsOrigin;
float3 MeshCardsExtent;
// OBB in world space
float3x3 WorldToLocalRotation;
float3 Origin;
float3 LocalExtent;
uint2 SizeInPages;
uint PageTableOffset;
uint2 HiResSizeInPages;
uint HiResPageTableOffset;
// Convert Card's uint ResLevel (card's resolution) to uint2 ResLevelXY (each side's resolution)
uint2 ResLevelToResLevelXYBias;
bool bVisible;
bool bHeightfield;
uint AxisAlignedDirection;
uint LightingChannelMask;
// Average world space texel size of always resident pages
float TexelSize;
};
#if USE_LUMEN_CARD_DATA_BUFFER
StructuredBuffer<float4> LumenCardDataBuffer;
#else
#define LumenCardDataBuffer LumenCardScene.CardData
#endif
// Stride of a single cards's data in float4's, must match C++
#define LUMEN_CARD_DATA_STRIDE 9
#define LUMEN_CARD_PAGE_DATA_STRIDE 5
// Heightfields are a special case and they always have only one card
#define LUMEN_HEIGHTFIELD_LOCAL_CARD_INDEX 0
// Fetch from scene card buffer
// Note: layout must match FLumenCardData in C++
FLumenCardData GetLumenCardData(uint CardId)
{
FLumenCardData CardData = (FLumenCardData)0;
uint BaseOffset = CardId * LUMEN_CARD_DATA_STRIDE;
float4 Vector0 = LumenCardDataBuffer[BaseOffset + 0];
float4 Vector1 = LumenCardDataBuffer[BaseOffset + 1];
float4 Vector2 = LumenCardDataBuffer[BaseOffset + 2];
float4 Vector3 = LumenCardDataBuffer[BaseOffset + 3];
float4 Vector4 = LumenCardDataBuffer[BaseOffset + 4];
float4 Vector5 = LumenCardDataBuffer[BaseOffset + 5];
float4 Vector6 = LumenCardDataBuffer[BaseOffset + 6];
float4 Vector7 = LumenCardDataBuffer[BaseOffset + 7];
float4 Vector8 = LumenCardDataBuffer[BaseOffset + 8];
CardData.WorldToLocalRotation[0] = Vector0.xyz;
CardData.WorldToLocalRotation[1] = Vector1.xyz;
CardData.WorldToLocalRotation[2] = Vector2.xyz;
CardData.Origin = float3(Vector0.w, Vector1.w, Vector2.w);
CardData.LocalExtent = abs(Vector3.xyz);
uint Packed3W = asuint(Vector3.w);
CardData.ResLevelToResLevelXYBias.x = (Packed3W >> 0) & 0xFF;
CardData.ResLevelToResLevelXYBias.y = (Packed3W >> 8) & 0xFF;
CardData.AxisAlignedDirection = (Packed3W >> 16) & 0xF;
CardData.LightingChannelMask = (Packed3W >> 20) & 0xF;
CardData.bVisible = (Packed3W >> 24) & 1;
CardData.bHeightfield = (Packed3W >> 25) & 1;
CardData.SizeInPages.x = (asuint(Vector4.x) >> 0) & 0xFFFF;
CardData.SizeInPages.y = (asuint(Vector4.x) >> 16) & 0xFFFF;
CardData.PageTableOffset = asuint(Vector4.y);
CardData.HiResSizeInPages.x = (asuint(Vector4.z) >> 0) & 0xFFFF;
CardData.HiResSizeInPages.y = (asuint(Vector4.z) >> 16) & 0xFFFF;
CardData.HiResPageTableOffset = asuint(Vector4.w);
CardData.MeshCardsToLocalRotation[0] = Vector5.xyz;
CardData.MeshCardsToLocalRotation[1] = Vector6.xyz;
CardData.MeshCardsToLocalRotation[2] = Vector7.xyz;
CardData.MeshCardsOrigin = float3(Vector5.w, Vector6.w, Vector7.w);
CardData.MeshCardsExtent = Vector8.xyz;
CardData.TexelSize = Vector8.w;
return CardData;
}
uint GetNumCardPageTiles(FLumenCardPageData CardPage)
{
return (CardPage.SizeInTexels.x * CardPage.SizeInTexels.y) / (CARD_TILE_SIZE * CARD_TILE_SIZE);
}
C++(LumenSceneLighting.cpp)
void Lumen::BuildCardUpdateContext(...)
{
:
// Compute prefix sum and pick max bucket
{
FSelectMaxUpdateBucketCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FSelectMaxUpdateBucketCS::FParameters>();
PassParameters->RWMaxUpdateBucket = MaxUpdateBucketUAV;
PassParameters->PriorityHistogram = PriorityHistogramSRV;
PassParameters->MaxDirectLightingTilesToUpdate = DirectLightingCardUpdateContext.MaxUpdateTiles;
PassParameters->MaxIndirectLightingTilesToUpdate = IndirectLightingCardUpdateContext.MaxUpdateTiles;
PassParameters->SurfaceCacheUpdateFrameIndex = UpdateFrameIndex;
PassParameters->FreezeUpdateFrame = FreezeUpdateFrame;
auto ComputeShader = Views[0].ShaderMap->GetShader<FSelectMaxUpdateBucketCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("Select max update bucket"),
ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(2, 1, 1));
}
:
}
- シェーダー(LumenSceneLighting.usf)
```C++
/**
* Compute max bucket histogram to update and how many tiles should be updated in that last bucket
*/
[numthreads(THREADGROUP_SIZE, 1, 1)]
void SelectMaxUpdateBucketCS(
uint3 GroupId : SV_GroupID,
uint3 GroupThreadId : SV_GroupThreadID,
uint3 DispatchThreadId : SV_DispatchThreadID)
{
if (GroupId.x == 0 && GroupThreadId.x == 0)
{
SelectMaxUpdateBucket(CARD_UPDATE_CONTEXT_DIRECT_LIGHTING);
}
else if (GroupId.x == 1 && GroupThreadId.x == 0)
{
SelectMaxUpdateBucket(CARD_UPDATE_CONTEXT_INDIRECT_LIGHTING);
}
}
void SelectMaxUpdateBucket(uint CardUpdateContext)
{
const uint MaxTilesToUpdate = MaxDirectLightingTilesToUpdate/* ダイレクトLightingの場合*/;
uint UpdateTileSum = 0;
uint PriorityBucketIndex = 0;
uint PriorityBucketMaxTiles = MaxTilesToUpdate;
for (; PriorityBucketIndex < PRIORITY_HISTOGRAM_SIZE; ++PriorityBucketIndex)
{
uint TilesPerBucket = PriorityHistogram[CardUpdateContext * PRIORITY_HISTOGRAM_SIZE + PriorityBucketIndex];
if (UpdateTileSum + TilesPerBucket >= MaxTilesToUpdate)
{
PriorityBucketMaxTiles = MaxTilesToUpdate - UpdateTileSum;
break;
}
UpdateTileSum += TilesPerBucket;
}
RWMaxUpdateBucket[MAX_UPDATE_BUCKET_STRIDE * CardUpdateContext + 0] = PriorityBucketIndex;
RWMaxUpdateBucket[MAX_UPDATE_BUCKET_STRIDE * CardUpdateContext + 1] = PriorityBucketMaxTiles;
}
C++(LumenSceneLighting.cpp)
void Lumen::BuildCardUpdateContext(...)
{
:
// Build list of tiles to update in this frame
{
FBuildCardsUpdateListCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FBuildCardsUpdateListCS::FParameters>();
PassParameters->RWDirectLightingCardPageIndexAllocator = DirectCardPageIndexAllocatorUAV;
PassParameters->RWDirectLightingCardPageIndexData = GraphBuilder.CreateUAV(DirectLightingCardUpdateContext.CardPageIndexData);
PassParameters->RWIndirectLightingCardPageIndexAllocator = IndirectCardPageIndexAllocatorUAV;
PassParameters->RWIndirectLightingCardPageIndexData = GraphBuilder.CreateUAV(IndirectLightingCardUpdateContext.CardPageIndexData);
PassParameters->RWCardPageTileAllocator = CardPageTileAllocatorUAV;
PassParameters->MaxUpdateBucket = MaxUpdateBucketSRV;
PassParameters->LumenCardDataBuffer = FrameTemporaries.CardBufferSRV;
PassParameters->RWLumenCardPageDataBuffer = FrameTemporaries.CardPageBufferUAV;
PassParameters->CardPageLastUsedBuffer = CardPageLastUsedBufferSRV;
PassParameters->CardPageHighResLastUsedBuffer = CardPageHighResLastUsedBufferSRV;
PassParameters->CardPageNum = NumCardPages;
PassParameters->SurfaceCacheUpdateFrameIndex = UpdateFrameIndex;
PassParameters->FreezeUpdateFrame = FreezeUpdateFrame;
PassParameters->FirstClipmapWorldExtentRcp = FirstClipmapWorldExtentRcp;
PassParameters->NumCameraOrigins = Views.Num();
check(Views.Num() <= PassParameters->WorldCameraOrigins.Num());
for (int32 i = 0; i < Views.Num(); i++)
{
PassParameters->WorldCameraOrigins[i] = FVector4f((FVector3f)Views[i].ViewMatrices.GetViewOrigin(), 0.0f);
}
PassParameters->MaxDirectLightingTilesToUpdate = DirectLightingCardUpdateContext.MaxUpdateTiles;
PassParameters->MaxIndirectLightingTilesToUpdate = IndirectLightingCardUpdateContext.MaxUpdateTiles;
PassParameters->DirectLightingUpdateFactor = DirectLightingCardUpdateContext.UpdateFactor;
PassParameters->IndirectLightingUpdateFactor = IndirectLightingCardUpdateContext.UpdateFactor;
FBuildCardsUpdateListCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FBuildCardsUpdateListCS::FSurfaceCacheFeedback>(bUseFeedback);
auto ComputeShader = Views[0].ShaderMap->GetShader<FBuildCardsUpdateListCS>(PermutationVector);
const FIntVector GroupSize(FMath::DivideAndRoundUp<int32>(LumenSceneData.GetNumCardPages(), FBuildCardsUpdateListCS::GetGroupSize()), 1, 1);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("Build cards update list"),
ComputePassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
:
}
- シェーダー(LumenSceneLighting.usf)
```C++
/**
* Iterate over all cards and pick first N for update based on the histogram max update bucket
*/
[numthreads(THREADGROUP_SIZE, 1, 1)]
void BuildCardsUpdateListCS(
uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint IndexInIndexBuffer = DispatchThreadId.x;
if (IndexInIndexBuffer < CardPageNum)
{
const uint CardPageIndex = IndexInIndexBuffer;
FLumenCardPageData CardPage = GetLumenCardPageData(CardPageIndex);
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
const uint NumCardPageTiles = GetNumCardPageTiles(CardPage);
if (NumCardPageTiles > 0)
{
bool bUpdatedCardPage = false;
if (BuildCardsUpdateList(
Card,
CardPage,
CardPageIndex,
NumCardPageTiles,
CARD_UPDATE_CONTEXT_DIRECT_LIGHTING,
RWDirectLightingCardPageIndexAllocator,
RWDirectLightingCardPageIndexData))
{
CardPage.LastDirectLightingUpdateFrameIndex = SurfaceCacheUpdateFrameIndex;
bUpdatedCardPage = true;
}
if (BuildCardsUpdateList(
Card,
CardPage,
CardPageIndex,
NumCardPageTiles,
CARD_UPDATE_CONTEXT_INDIRECT_LIGHTING,
RWIndirectLightingCardPageIndexAllocator,
RWIndirectLightingCardPageIndexData))
{
CardPage.LastIndirectLightingUpdateFrameIndex = SurfaceCacheUpdateFrameIndex;
CardPage.IndirectLightingTemporalIndex = CardPage.IndirectLightingTemporalIndex + 1;
bUpdatedCardPage = true;
}
if (bUpdatedCardPage && FreezeUpdateFrame == 0)
{
SetCardPageUpdateData(CardPageIndex, CardPage);
}
}
}
}
bool BuildCardsUpdateList(
FLumenCardData Card,
FLumenCardPageData CardPage,
uint CardPageIndex,
uint NumCardPageTiles,
uint CardUpdateContext,
RWStructuredBuffer<uint> RWCardPageIndexAllocator,
RWStructuredBuffer<uint> RWCardPageIndexData)
{
const uint MaxTilesToUpdate = MaxDirectLightingTilesToUpdate/*ダイレクトLightingの場合*/;
const uint MaxUpdateBucketIndex = MaxUpdateBucket[MAX_UPDATE_BUCKET_STRIDE * CardUpdateContext + 0];
const uint MaxUpdateBucketMaxTiles = MaxUpdateBucket[MAX_UPDATE_BUCKET_STRIDE * CardUpdateContext + 1];
// Update everything up to the max selected priority bucket
uint PriorityBucketIndex = GetPriorityBucketIndex(Card, CardPage, CardPageIndex, CardUpdateContext);
bool bUpdateThisPage = PriorityBucketIndex <= MaxUpdateBucketIndex;
if (bUpdateThisPage && PriorityBucketIndex == MaxUpdateBucketIndex)
{
// Can't update more than MaxUpdateBucketMaxTiles in the max bucket to preserve the general order
uint NumAllocatedTilesInMaxUpdateBucket = 0;
InterlockedAdd(RWCardPageTileAllocator[CARD_PAGE_TILE_ALLOCATOR_STRIDE * CardUpdateContext + 1], NumCardPageTiles, NumAllocatedTilesInMaxUpdateBucket);
if (!(NumAllocatedTilesInMaxUpdateBucket + NumCardPageTiles <= MaxUpdateBucketMaxTiles))
{
bUpdateThisPage = false;
}
}
if (bUpdateThisPage)
{
bUpdateThisPage = false;
uint NumAllocatedTiles = 0;
InterlockedAdd(RWCardPageTileAllocator[CARD_PAGE_TILE_ALLOCATOR_STRIDE * CardUpdateContext + 0], NumCardPageTiles, NumAllocatedTiles);
if (NumAllocatedTiles + NumCardPageTiles <= MaxTilesToUpdate)
{
uint NextIndex = 0;
InterlockedAdd(RWCardPageIndexAllocator[0], 1, NextIndex);
if (NextIndex < CardPageNum)
{
RWCardPageIndexData[NextIndex] = CardPageIndex;
bUpdateThisPage = true;
}
}
}
return bUpdateThisPage;
}
C++(LumenSceneLighting.cpp)
void Lumen::BuildCardUpdateContext(...)
{
:
// Setup indirect args
{
FSetCardPageIndexIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FSetCardPageIndexIndirectArgsCS::FParameters>();
PassParameters->RWDirectLightingDrawCardPageIndicesIndirectArgs = GraphBuilder.CreateUAV(DirectLightingCardUpdateContext.DrawCardPageIndicesIndirectArgs);
PassParameters->RWDirectLightingDispatchCardPageIndicesIndirectArgs = GraphBuilder.CreateUAV(DirectLightingCardUpdateContext.DispatchCardPageIndicesIndirectArgs);
PassParameters->RWIndirectLightingDrawCardPageIndicesIndirectArgs = GraphBuilder.CreateUAV(IndirectLightingCardUpdateContext.DrawCardPageIndicesIndirectArgs);
PassParameters->RWIndirectLightingDispatchCardPageIndicesIndirectArgs = GraphBuilder.CreateUAV(IndirectLightingCardUpdateContext.DispatchCardPageIndicesIndirectArgs);
PassParameters->DirectLightingCardPageIndexAllocator = DirectCardPageIndexAllocatorSRV;
PassParameters->IndirectLightingCardPageIndexAllocator = IndirectCardPageIndexAllocatorSRV;
PassParameters->VertexCountPerInstanceIndirect = GRHISupportsRectTopology ? 3 : 6;
auto ComputeShader = Views[0].ShaderMap->GetShader<FSetCardPageIndexIndirectArgsCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("SetCardPageIndexIndirectArgs"),
ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, 1));
}
:
}
- シェーダー(LumenSceneLighting.cpp)
```C++
RWBuffer<uint> RWDirectLightingDrawCardPageIndicesIndirectArgs;
RWBuffer<uint> RWDirectLightingDispatchCardPageIndicesIndirectArgs;
RWBuffer<uint> RWIndirectLightingDrawCardPageIndicesIndirectArgs;
RWBuffer<uint> RWIndirectLightingDispatchCardPageIndicesIndirectArgs;
StructuredBuffer<uint> DirectLightingCardPageIndexAllocator;
StructuredBuffer<uint> IndirectLightingCardPageIndexAllocator;
uint VertexCountPerInstanceIndirect;
[numthreads(THREADGROUP_SIZE, 1, 1)]
void SetCardPageIndexIndirectArgsCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
if (DispatchThreadId.x == 0)
{
{
uint NumPageIndices = DirectLightingCardPageIndexAllocator[0];
// FRHIDrawIndirectParameters
RWDirectLightingDrawCardPageIndicesIndirectArgs[0] = VertexCountPerInstanceIndirect;
RWDirectLightingDrawCardPageIndicesIndirectArgs[1] = NumPageIndices;
RWDirectLightingDrawCardPageIndicesIndirectArgs[2] = 0;
RWDirectLightingDrawCardPageIndicesIndirectArgs[3] = 0;
// Thread per page
RWDirectLightingDispatchCardPageIndicesIndirectArgs[0] = (NumPageIndices + 63) / 64;
RWDirectLightingDispatchCardPageIndicesIndirectArgs[1] = 1;
RWDirectLightingDispatchCardPageIndicesIndirectArgs[2] = 1;
// Thread per tile
RWDirectLightingDispatchCardPageIndicesIndirectArgs[3 + 0] = 4 * NumPageIndices;
RWDirectLightingDispatchCardPageIndicesIndirectArgs[3 + 1] = 1;
RWDirectLightingDispatchCardPageIndicesIndirectArgs[3 + 2] = 1;
}
{
uint NumPageIndices = IndirectLightingCardPageIndexAllocator[0];
// FRHIDrawIndirectParameters
RWIndirectLightingDrawCardPageIndicesIndirectArgs[0] = VertexCountPerInstanceIndirect;
RWIndirectLightingDrawCardPageIndicesIndirectArgs[1] = NumPageIndices;
RWIndirectLightingDrawCardPageIndicesIndirectArgs[2] = 0;
RWIndirectLightingDrawCardPageIndicesIndirectArgs[3] = 0;
// Thread per page
RWIndirectLightingDispatchCardPageIndicesIndirectArgs[0] = (NumPageIndices + 63) / 64;
RWIndirectLightingDispatchCardPageIndicesIndirectArgs[1] = 1;
RWIndirectLightingDispatchCardPageIndicesIndirectArgs[2] = 1;
// Thread per tile
RWIndirectLightingDispatchCardPageIndicesIndirectArgs[3 + 0] = 4 * NumPageIndices;
RWIndirectLightingDispatchCardPageIndicesIndirectArgs[3 + 1] = 1;
RWIndirectLightingDispatchCardPageIndicesIndirectArgs[3 + 2] = 1;
}
}
}
ライトをGatheredLights
に集める
TArray<FLumenGatheredLight, TInlineAllocator<64>> GatheredLights;
bool bHasRectLights = false;
for (auto LightIt = Scene->Lights.CreateConstIterator(); LightIt; ++LightIt)
{
const FLightSceneInfoCompact& LightSceneInfoCompact = *LightIt;
const FLightSceneInfo* LightSceneInfo = LightSceneInfoCompact.LightSceneInfo;
if (LightSceneInfo->ShouldRenderLightViewIndependent()
&& LightSceneInfo->Proxy->GetIndirectLightingScale() > 0.0f)
{
for (const FViewInfo& View : Views)
{
if (LightSceneInfo->ShouldRenderLight(View, true))
{
const FLumenGatheredLight GatheredLight(Scene, View, LightSceneInfo, /*LightIndex*/ GatheredLights.Num());
bHasRectLights = bHasRectLights || GatheredLight.Type == ELumenLightType::Rect;
GatheredLights.Add(GatheredLight);
break;
}
}
}
}
void FDeferredShadingSceneRenderer::RenderDirectLightingForLumenScene(...)
{
:
FRDGBufferRef LumenPackedLights = CreateLumenLightDataBuffer(GraphBuilder, GatheredLights, MainView.GetLastEyeAdaptationExposure()); // TODO View
FLightTileCullContext CullContext;
FLumenCardTileUpdateContext CardTileUpdateContext;
CullDirectLightingTiles(Views, GraphBuilder, CardUpdateContext, LumenCardSceneUniformBuffer, GatheredLights, LumenPackedLights, CullContext, CardTileUpdateContext, ComputePassFlags);
:
}
C++(LumenSceneDirectLighting.cpp)
// Build list of surface cache tiles per light for future processing
void CullDirectLightingTiles(
const TArray<FViewInfo>& Views,
FRDGBuilder& GraphBuilder,
const FLumenCardUpdateContext& CardUpdateContext,
TRDGUniformBufferRef<FLumenCardScene> LumenCardSceneUniformBuffer,
const TArray<FLumenGatheredLight, TInlineAllocator<64>>& GatheredLights,
FRDGBufferRef LumenPackedLights,
FLightTileCullContext& CullContext,
FLumenCardTileUpdateContext& CardTileUpdateCotnext,
ERDGPassFlags ComputePassFlags)
{
RDG_EVENT_SCOPE(GraphBuilder, "CullTiles %d lights", GatheredLights.Num());
const FGlobalShaderMap* GlobalShaderMap = Views[0].ShaderMap;
const uint32 MaxLightTiles = CardUpdateContext.MaxUpdateTiles;;
const uint32 NumLightsRoundedUp = FMath::RoundUpToPowerOfTwo(FMath::Max(GatheredLights.Num(), 1)) * Views.Num();
const uint32 MaxLightsPerTile = FMath::RoundUpToPowerOfTwo(FMath::Clamp(CVarLumenDirectLightingMaxLightsPerTile.GetValueOnRenderThread(), 1, 32));
const uint32 MaxCulledCardTiles = MaxLightsPerTile * MaxLightTiles;
Lumen::SpliceCardPagesIntoTiles(GraphBuilder, GlobalShaderMap, CardUpdateContext, LumenCardSceneUniformBuffer, CardTileUpdateCotnext, ComputePassFlags);
:
}
void Lumen::SpliceCardPagesIntoTiles(
FRDGBuilder& GraphBuilder,
const FGlobalShaderMap GlobalShaderMap,
const FLumenCardUpdateContext& CardUpdateContext,
const TRDGUniformBufferRef
C++(LumenSceneDirectLighting.cpp)
void Lumen::SpliceCardPagesIntoTiles(...)
{
:
FRDGBufferRef CardTileAllocator = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), 1), TEXT("Lumen.CardTileAllocator"));
FRDGBufferRef CardTiles = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), MaxLightTiles), TEXT("Lumen.CardTiles"));
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(CardTileAllocator), 0, ComputePassFlags);
// Splice card pages into card tiles
{
FSpliceCardPagesIntoTilesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FSpliceCardPagesIntoTilesCS::FParameters>();
PassParameters->IndirectArgBuffer = CardUpdateContext.DispatchCardPageIndicesIndirectArgs;
PassParameters->LumenCardScene = LumenCardSceneUniformBuffer;
PassParameters->RWCardTileAllocator = GraphBuilder.CreateUAV(CardTileAllocator);
PassParameters->RWCardTiles = GraphBuilder.CreateUAV(CardTiles);
PassParameters->CardPageIndexAllocator = GraphBuilder.CreateSRV(CardUpdateContext.CardPageIndexAllocator);
PassParameters->CardPageIndexData = GraphBuilder.CreateSRV(CardUpdateContext.CardPageIndexData);
auto ComputeShader = GlobalShaderMap->GetShader<FSpliceCardPagesIntoTilesCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("SpliceCardPagesIntoTiles"),
ComputePassFlags,
ComputeShader,
PassParameters,
CardUpdateContext.DispatchCardPageIndicesIndirectArgs,
FLumenCardUpdateContext::EIndirectArgOffset::ThreadPerTile);
}
:
}
StructuredBuffer<uint> CardPageIndexAllocator;
StructuredBuffer<uint> CardPageIndexData;
RWStructuredBuffer<uint> RWCardTileAllocator;
RWStructuredBuffer<uint> RWCardTiles;
groupshared uint SharedTileAllocator; groupshared uint SharedTiles[THREADGROUP_SIZE * THREADGROUP_SIZE]; groupshared uint SharedGlobalTileOffset;
/**
Splice card pages into N card tiles / [numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)] void SpliceCardPagesIntoTilesCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint LinearThreadIndex = GroupThreadId.x + GroupThreadId.y THREADGROUP_SIZE; if (all(GroupThreadId == 0)) { SharedTileAllocator = 0; SharedGlobalTileOffset = 0; SharedTiles[0] = 0; }
GroupMemoryBarrierWithGroupSync();
// One thread per tile uint LinearLightTileOffset = (GroupId.x % 4); uint IndexInIndexBuffer = GroupId.x / 4;
// Improve tile coherency uint2 TileOffset = ZOrder2D(LinearThreadIndex, log2(8));
uint2 TileCoord; TileCoord.x = (LinearLightTileOffset % 2) 8 + TileOffset.x; TileCoord.y = (LinearLightTileOffset / 2) 8 + TileOffset.y;
if (IndexInIndexBuffer < CardPageIndexAllocator[0]) { uint CardPageIndex = CardPageIndexData[IndexInIndexBuffer]; FLumenCardPageData CardPage = GetLumenCardPageData(CardPageIndex); if (CardPage.CardIndex >= 0) { FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
const uint2 SizeInTiles = CardPage.SizeInTexels / CARD_TILE_SIZE;
if (all(TileCoord < SizeInTiles))
{
FCardTileData CardTile;
CardTile.CardPageIndex = CardPageIndex;
CardTile.TileCoord = TileCoord;
uint CardTileIndex = 0;
InterlockedAdd(SharedTileAllocator, 1, CardTileIndex);
SharedTiles[CardTileIndex] = PackCardTileData(CardTile);
}
}
}
GroupMemoryBarrierWithGroupSync();
if (all(GroupThreadId == 0) && SharedTileAllocator > 0) { InterlockedAdd(RWCardTileAllocator[0], SharedTileAllocator, SharedGlobalTileOffset); }
GroupMemoryBarrierWithGroupSync();
if (LinearThreadIndex < SharedTileAllocator) { RWCardTiles[SharedGlobalTileOffset + LinearThreadIndex] = SharedTiles[LinearThreadIndex]; } }
![image](https://user-images.githubusercontent.com/14350715/218236468-b6e5f5a4-c67a-4fba-a7e7-24078ec0791a.png)
C++(LumenSceneDirectLighting.cpp)
void Lumen::SpliceCardPagesIntoTiles(...)
{
:
// Setup indirect args for card tile processing
FRDGBufferRef DispatchCardTilesIndirectArgs = GraphBuilder.CreateBuffer(
FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>((uint32)ELumenDispatchCardTilesIndirectArgsOffset::Num),
TEXT("Lumen.DispatchCardTilesIndirectArgs"));
{
FInitializeCardTileIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FInitializeCardTileIndirectArgsCS::FParameters>();
PassParameters->RWDispatchCardTilesIndirectArgs = GraphBuilder.CreateUAV(DispatchCardTilesIndirectArgs);
PassParameters->CardTileAllocator = GraphBuilder.CreateSRV(CardTileAllocator);
auto ComputeShader = GlobalShaderMap->GetShader<FInitializeCardTileIndirectArgsCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("InitializeCardTileIndirectArgs"),
ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, 1));
}
OutCardTileUpdateContext.CardTileAllocator = CardTileAllocator;
OutCardTileUpdateContext.CardTiles = CardTiles;
OutCardTileUpdateContext.DispatchCardTilesIndirectArgs = DispatchCardTilesIndirectArgs;
}
StructuredBuffer<uint> CardTileAllocator;
RWBuffer<uint> RWDispatchCardTilesIndirectArgs;
[numthreads(THREADGROUP_SIZE, 1, 1)] void InitializeCardTileIndirectArgsCS(uint3 DispatchThreadId : SV_DispatchThreadID) { if (DispatchThreadId.x == 0) { uint NumCardTiles = CardTileAllocator[0];
// One thread per card tile
RWDispatchCardTilesIndirectArgs[0] = (NumCardTiles + 63) / 64;
RWDispatchCardTilesIndirectArgs[1] = 1;
RWDispatchCardTilesIndirectArgs[2] = 1;
// One thread group per card tile
RWDispatchCardTilesIndirectArgs[3] = NumCardTiles;
RWDispatchCardTilesIndirectArgs[4] = 1;
RWDispatchCardTilesIndirectArgs[5] = 1;
}
}
C++((LumenSceneDirectLighting.cpp))
void CullDirectLightingTiles(...)
{
:
// Used to figure out the offset to store light tiles for each card tile
FRDGBufferRef LightTileAllocatorForPerCardTileDispatch = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), 1), TEXT("Lumen.DirectLighting.LightTileAllocatorForPerCardTileDispatch"));
FRDGBufferRef LightTileOffsetNumPerCardTile = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), MaxLightTiles), TEXT("Lumen.DirectLighting.LightTileOffsetNumPerCardTile"));
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(LightTileAllocatorForPerCardTileDispatch), 0, ComputePassFlags);
// Build a list of light tiles for future processing
{
FBuildLightTilesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FBuildLightTilesCS::FParameters>();
PassParameters->IndirectArgBuffer = DispatchCardTilesIndirectArgs;
PassParameters->View = Views[0].ViewUniformBuffer;
PassParameters->LumenCardScene = LumenCardSceneUniformBuffer;
PassParameters->LumenPackedLights = GraphBuilder.CreateSRV(LumenPackedLights);
PassParameters->RWLightTileAllocator = GraphBuilder.CreateUAV(LightTileAllocator);
PassParameters->RWLightTileAllocatorForPerCardTileDispatch = GraphBuilder.CreateUAV(LightTileAllocatorForPerCardTileDispatch);
PassParameters->RWLightTiles = GraphBuilder.CreateUAV(LightTiles);
PassParameters->RWLightTileAllocatorPerLight = GraphBuilder.CreateUAV(LightTileAllocatorPerLight);
PassParameters->RWLightTileOffsetNumPerCardTile = GraphBuilder.CreateUAV(LightTileOffsetNumPerCardTile);
PassParameters->CardTileAllocator = GraphBuilder.CreateSRV(CardTileAllocator);
PassParameters->CardTiles = GraphBuilder.CreateSRV(CardTiles);
PassParameters->MaxLightsPerTile = MaxLightsPerTile;
PassParameters->NumLights = GatheredLights.Num();
PassParameters->NumViews = Views.Num();
check(Views.Num() <= PassParameters->WorldToClip.Num());
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
PassParameters->WorldToClip[ViewIndex] = FMatrix44f(Views[ViewIndex].ViewMatrices.GetViewProjectionMatrix());
PassParameters->PreViewTranslation[ViewIndex] = FVector4f((FVector3f)Views[ViewIndex].ViewMatrices.GetPreViewTranslation(), 0.0f);
}
FBuildLightTilesCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FBuildLightTilesCS::FMaxLightSamples>(MaxLightsPerTile);
auto ComputeShader = GlobalShaderMap->GetShader<FBuildLightTilesCS>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("BuildLightTiles"),
ComputePassFlags,
ComputeShader,
PassParameters,
DispatchCardTilesIndirectArgs,
(uint32)ELumenDispatchCardTilesIndirectArgsOffset::OneThreadPerCardTile);
}
:
}
- シェーダー(LumenSceneDirectLightingCulling.usf)
![image](https://user-images.githubusercontent.com/14350715/218243071-91eb9df2-e542-4080-8999-4028ae507848.png)
```C++
StructuredBuffer<uint> CardTileAllocator;
StructuredBuffer<uint> CardTiles;
struct FLightSampleAccumulator
{
uint PackedSamples[MAX_LIGHT_SAMPLES];
};
struct FCardTileData
{
uint CardPageIndex;
uint2 TileCoord;
};
FCardTileData UnpackCardTileData(uint PackedTile)
{
FCardTileData TileData;
TileData.CardPageIndex = PackedTile & 0xFFFFFF;
TileData.TileCoord.x = (PackedTile >> 24) & 0xF;
TileData.TileCoord.y = (PackedTile >> 28) & 0xF;
return TileData;
}
/**
* Pick N most important lights per tile in page selected to update to update this frame, and output a list of light tiles
*/
[numthreads(THREADGROUP_SIZE, 1, 1)]
void BuildLightTilesCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
// One thread per tile
uint CardTileIndex = DispatchThreadId.x;
FLightSampleAccumulator LightSampleAccumulator = InitLightSampleAccumulator();
if (CardTileIndex < CardTileAllocator[0])
{
FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
uint PackedOffsetNum = 0;
if (CardPage.CardIndex >= 0)
{
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
const uint2 SizeInTiles = CardPage.SizeInTexels / CARD_TILE_SIZE;
float2 UVMin = float2(CardTile.TileCoord) / SizeInTiles;
float2 UVMax = float2(CardTile.TileCoord + 1) / SizeInTiles;
// UVMin.y/Max.yのY軸を反転
float SwapY = UVMin.y;
UVMin.y = 1.0f - UVMax.y;
UVMax.y = 1.0f - SwapY;
uint ViewIndex = GetCardViewIndex(CardPage, Card, UVMin, UVMax, NumViews, true);
// Loop over lights to select N most important lights
for (uint LightIndex = 0; LightIndex < NumLights; ++LightIndex)
{
FLumenLight LumenLight = LoadLumenLight(LightIndex, PreViewTranslation[ViewIndex].xyz);
float3 CardPageWorldCenter = 0.0f; // LWC_TODO:
bool bLightAffectsCard = DoesLightAffectCardPageUVRange(LumenLight, CardPage, Card, UVMin, UVMax, CardPageWorldCenter);
if (bLightAffectsCard)
{
// Center of a tile for estimating attenuation
float3 TranslatedWorldPosition = CardPageWorldCenter + LWCHackToFloat(PrimaryView.PreViewTranslation);
FLightSample LightSample;
LightSample.Weight = GetLightWeight(LumenLight, TranslatedWorldPosition);
LightSample.LightIndex = LightIndex;
LightSample.bHasShadowMask = LumenLight.bHasShadowMask;
AddLightSample(LightSampleAccumulator, LightSample);
}
}
uint NumPackedLightSamples = 0;
for (uint PackedSampleIndex = 0; PackedSampleIndex < MAX_LIGHT_SAMPLES; ++PackedSampleIndex)
{
if (LightSampleAccumulator.PackedSamples[PackedSampleIndex] > 0)
{
++NumPackedLightSamples;
}
}
uint LightTileOffset = 0;
if (NumPackedLightSamples > 0)
{
InterlockedAdd(RWLightTileAllocator[0], NumPackedLightSamples, LightTileOffset);
}
for (uint LightSampleIndex = 0; LightSampleIndex < NumPackedLightSamples; ++LightSampleIndex)
{
FLightSample LightSample = UnpackLightSample(LightSampleAccumulator.PackedSamples[LightSampleIndex]);
// Write light tile to global light tile array
FLightTileForCompactionPass LightTile;
LightTile.LightIndex = LightSample.LightIndex;
LightTile.ViewIndex = ViewIndex;
LightTile.bHasShadowMask = LightSample.bHasShadowMask;
LightTile.CardTileIndex = CardTileIndex;
LightTile.CulledLightIndex = LightSampleIndex;
RWLightTiles[LightTileOffset + LightSampleIndex] = PackLightTileForCompactionPass(LightTile);
InterlockedAdd(RWLightTileAllocatorPerLight[LightSample.LightIndex * NumViews + ViewIndex], 1);
}
if (NumPackedLightSamples > 0)
{
uint CardLightTilesOffset;
InterlockedAdd(RWLightTileAllocatorForPerCardTileDispatch[0], NumPackedLightSamples, CardLightTilesOffset);
PackedOffsetNum = (NumPackedLightSamples << 24) | CardLightTilesOffset;
}
}
RWLightTileOffsetNumPerCardTile[CardTileIndex] = PackedOffsetNum;
}
}
float4x4 WorldToClip[2];
float4 PreViewTranslation[2];
uint GetCardViewIndex(FLumenCardPageData CardPage, FLumenCardData Card, float2 UVMin, float2 UVMax, uint NumViews, bool bPrioritizeWhenInFrustum)
{
float3 CardPageLocalCenter;
float3 CardPageLocalExtent;
GetCardLocalBBox(CardPage, Card, UVMin, UVMax, CardPageLocalCenter, CardPageLocalExtent);
float3 CardPageWorldCenter = mul(Card.WorldToLocalRotation, CardPageLocalCenter) + Card.Origin;
float3 CardPageWorldExtent = mul(abs(Card.WorldToLocalRotation), CardPageLocalExtent);
uint ViewIndex = 0;
if (NumViews > 1)
{
float View0Distance = length(CardPageWorldCenter - -PreViewTranslation[0].xyz);
float View1Distance = length(CardPageWorldCenter - -PreViewTranslation[1].xyz);
#define IN_FRUSTUM_DISTANCE 1
#if IN_FRUSTUM_DISTANCE
if (bPrioritizeWhenInFrustum)
{
float4 CardOriginClipSpace0 = mul(float4(CardPageWorldCenter, 1.0f), WorldToClip[0]);
if (all(CardOriginClipSpace0.xy >= CardOriginClipSpace0.w) && all(CardOriginClipSpace0.xy <= CardOriginClipSpace0.w) && CardOriginClipSpace0.z < 1.0f) // Card中心がフラスタム内?
{
View0Distance = .5f * CardOriginClipSpace0.w;
}
float4 CardOriginClipSpace1 = mul(float4(CardPageWorldCenter, 1.0f), WorldToClip[1]);
if (all(CardOriginClipSpace1.xy >= CardOriginClipSpace1.w) && all(CardOriginClipSpace1.xy <= CardOriginClipSpace1.w) && CardOriginClipSpace1.z < 1.0f)
{
View1Distance = .5f * CardOriginClipSpace1.w;
}
}
#endif
ViewIndex = View0Distance < View1Distance ? 0 : 1; // 0 or 1, よりCard中心からの距離が小さいViewを選択
}
return ViewIndex;
}
C++(LumenSceneDirectLighting.cpp)
void CullDirectLightingTiles(...)
{
:
// Compute prefix sum for card tile array
{
FComputeLightTileOffsetsPerLightCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FComputeLightTileOffsetsPerLightCS::FParameters>();
PassParameters->RWLightTileOffsetsPerLight = GraphBuilder.CreateUAV(LightTileOffsetsPerLight);
PassParameters->LightTileAllocatorPerLight = GraphBuilder.CreateSRV(LightTileAllocatorPerLight);
PassParameters->NumLights = GatheredLights.Num();
PassParameters->NumViews = Views.Num();
auto ComputeShader = GlobalShaderMap->GetShader<FComputeLightTileOffsetsPerLightCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("ComputeLightTileOffsetsPerLight"),
ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, 1));
}
:
}
- シェーダー(LumenSceneDirectLightingCulling.usf)
```C++
StructuredBuffer<uint> LightTileAllocatorPerLight;
RWStructuredBuffer<uint> RWLightTileOffsetsPerLight;
/**
* Prefix sum for card tile array compaction
*/
[numthreads(THREADGROUP_SIZE, 1, 1)]
void ComputeLightTileOffsetsPerLightCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
if (DispatchThreadId.x == 0)
{
uint TileOffset = 0;
for (uint ViewIndex = 0; ViewIndex < NumViews; ViewIndex++)
{
for (uint LightIndex = 0; LightIndex < NumLights; ++LightIndex)
{
RWLightTileOffsetsPerLight[LightIndex * NumViews + ViewIndex] = TileOffset;
TileOffset += LightTileAllocatorPerLight[LightIndex * NumViews + ViewIndex];
}
}
}
}
RWLightTileOffsetsPerLight
= prefixsum(LightTileAllocatorPerLight
)
C++(LumenSceneDirectLighting.cpp)
void CullDirectLightingTiles(...)
{
:
enum class EDispatchTilesIndirectArgOffset
{
NumTilesDiv1 = 0 * sizeof(FRHIDispatchIndirectParameters),
NumTilesDiv64 = 1 * sizeof(FRHIDispatchIndirectParameters),
MAX = 2,
};
// Initialize indirect args for culled tiles
FRDGBufferRef DispatchLightTilesIndirectArgs = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>((int32)EDispatchTilesIndirectArgOffset::MAX), TEXT("Lumen.DirectLighting.DispatchLightTilesIndirectArgs"));
FRDGBufferRef DrawTilesPerLightIndirectArgs = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(NumLightsRoundedUp), TEXT("Lumen.DirectLighting.DrawTilesPerLightIndirectArgs"));
FRDGBufferRef DispatchTilesPerLightIndirectArgs = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(NumLightsRoundedUp), TEXT("Lumen.DirectLighting.DispatchTilesPerLightIndirectArgs"));
{
FInitializeLightTileIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FInitializeLightTileIndirectArgsCS::FParameters>();
PassParameters->RWDispatchLightTilesIndirectArgs = GraphBuilder.CreateUAV(DispatchLightTilesIndirectArgs);
PassParameters->RWDrawTilesPerLightIndirectArgs = GraphBuilder.CreateUAV(DrawTilesPerLightIndirectArgs);
PassParameters->RWDispatchTilesPerLightIndirectArgs = GraphBuilder.CreateUAV(DispatchTilesPerLightIndirectArgs);
PassParameters->LightTileAllocator = GraphBuilder.CreateSRV(LightTileAllocator);
PassParameters->LightTileAllocatorPerLight = GraphBuilder.CreateSRV(LightTileAllocatorPerLight);
PassParameters->VertexCountPerInstanceIndirect = GRHISupportsRectTopology ? 3 : 6; // draw indirect argsの係数
PassParameters->PerLightDispatchFactor = Lumen::UseThreadGroupSize32() ? 2 : 1; // dispatch indrect argsの係数
PassParameters->NumLights = GatheredLights.Num();
PassParameters->NumViews = Views.Num();
auto ComputeShader = GlobalShaderMap->GetShader<FInitializeLightTileIndirectArgsCS>();
const FIntVector GroupSize = FComputeShaderUtils::GetGroupCount(GatheredLights.Num() * Views.Num(), FInitializeLightTileIndirectArgsCS::GetGroupSize());
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("InitializeLightTileIndirectArgs"),
ComputePassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
:
}
- シェーダー(LumenSceneDirectLightingCulling.usf)
```C++
RWBuffer<uint> RWDispatchLightTilesIndirectArgs; // 0: NumTilesDiv1, 1: NumTilesDiv64
RWBuffer<uint> RWDrawTilesPerLightIndirectArgs;
RWBuffer<uint> RWDispatchTilesPerLightIndirectArgs;
uint VertexCountPerInstanceIndirect;
uint PerLightDispatchFactor;
[numthreads(THREADGROUP_SIZE, 1, 1)]
void InitializeLightTileIndirectArgsCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint PerViewLightIndex = DispatchThreadId.x;
// Global card tile array
if (PerViewLightIndex == 0)
{
uint NumLightTiles = LightTileAllocator[0];
// NumTilesDiv1
RWDispatchLightTilesIndirectArgs[0] = NumLightTiles;
RWDispatchLightTilesIndirectArgs[1] = 1;
RWDispatchLightTilesIndirectArgs[2] = 1;
// NumTilesDiv64
RWDispatchLightTilesIndirectArgs[3 + 0] = (NumLightTiles + 63) / 64;
RWDispatchLightTilesIndirectArgs[3 + 1] = 1;
RWDispatchLightTilesIndirectArgs[3 + 2] = 1;
}
// Per light card tile array
if (PerViewLightIndex < NumLights * NumViews)
{
uint NumLightTilesPerLight = LightTileAllocatorPerLight[PerViewLightIndex]; // view/lightごとのTIle数
// FRHIDispatchIndirectParameters
RWDispatchTilesPerLightIndirectArgs[3 * PerViewLightIndex + 0] = PerLightDispatchFactor * NumLightTilesPerLight;
RWDispatchTilesPerLightIndirectArgs[3 * PerViewLightIndex + 1] = 1;
RWDispatchTilesPerLightIndirectArgs[3 * PerViewLightIndex + 2] = 1;
// FRHIDrawIndirectParameters
RWDrawTilesPerLightIndirectArgs[4 * PerViewLightIndex + 0] = VertexCountPerInstanceIndirect;
RWDrawTilesPerLightIndirectArgs[4 * PerViewLightIndex + 1] = NumLightTilesPerLight;
RWDrawTilesPerLightIndirectArgs[4 * PerViewLightIndex + 2] = 0;
RWDrawTilesPerLightIndirectArgs[4 * PerViewLightIndex + 3] = 0;
}
}
NumTilesDiv1
: NumLightTiles
個のthread group(64,1,1)を投入するindrect argsNumTilesDiv64
: NumLightTiles
個のthreadを投入するindirect argsC++(LumenSceneDirectLighting.cpp)
void CullDirectLightingTiles(...)
{
:
FRDGBufferRef LightTilesPerCardTile = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(2 * sizeof(uint32), MaxCulledCardTiles), TEXT("Lumen.DirectLighting.LightTilesPerCardTile"));
// Compact card tile array
{
FRDGBufferRef CompactedLightTiles = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(2 * sizeof(uint32), MaxCulledCardTiles), TEXT("Lumen.DirectLighting.CompactedLightTiles"));
FRDGBufferRef CompactedLightTileAllocatorPerLight = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), NumLightsRoundedUp), TEXT("Lumen.DirectLighting.CompactedLightTileAllocatorPerLight"));
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(CompactedLightTileAllocatorPerLight), 0, ComputePassFlags);
FCompactLightTilesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FCompactLightTilesCS::FParameters>();
PassParameters->IndirectArgBuffer = DispatchLightTilesIndirectArgs;
PassParameters->RWCompactedLightTiles = GraphBuilder.CreateUAV(CompactedLightTiles);
PassParameters->RWCompactedLightTileAllocatorPerLight = GraphBuilder.CreateUAV(CompactedLightTileAllocatorPerLight);
PassParameters->RWLightTilesPerCardTile = GraphBuilder.CreateUAV(LightTilesPerCardTile);
PassParameters->LightTileAllocator = GraphBuilder.CreateSRV(LightTileAllocator);
PassParameters->LightTiles = GraphBuilder.CreateSRV(LightTiles);
PassParameters->LightTileOffsetsPerLight = GraphBuilder.CreateSRV(LightTileOffsetsPerLight);
PassParameters->CardTiles = GraphBuilder.CreateSRV(CardTiles);
PassParameters->LightTileOffsetNumPerCardTile = GraphBuilder.CreateSRV(LightTileOffsetNumPerCardTile);
PassParameters->NumLights = GatheredLights.Num();
PassParameters->NumViews = Views.Num();
auto ComputeShader = GlobalShaderMap->GetShader<FCompactLightTilesCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("CompactLightTiles"),
ComputePassFlags,
ComputeShader,
PassParameters,
DispatchLightTilesIndirectArgs,
(int32)EDispatchTilesIndirectArgOffset::NumTilesDiv64);
LightTiles = CompactedLightTiles;
}
:
}
- シェーダー(LumenSceneDirectLightingCulling.usf)
```C++
RWStructuredBuffer<uint2> RWCompactedLightTiles;
RWStructuredBuffer<uint2> RWLightTilesPerCardTile;
RWStructuredBuffer<uint> RWCompactedLightTileAllocatorPerLight;
StructuredBuffer<uint> LightTileAllocator;
StructuredBuffer<uint2> LightTiles;
StructuredBuffer<uint> LightTileOffsetsPerLight;
StructuredBuffer<uint> LightTileOffsetNumPerCardTile;
/**
* Compact card tile array
*/
[numthreads(THREADGROUP_SIZE, 1, 1)]
void CompactLightTilesCS(uint3 DispatchThreadId : SV_DispatchThreadID)
{
const uint LightTileIndex = DispatchThreadId.x;
if (LightTileIndex < LightTileAllocator[0])
{
FLightTileForCompactionPass LightTile = UnpackLightTileForCompactionPass(LightTiles[LightTileIndex]);
uint CompactedLightTileIndex = 0;
InterlockedAdd(RWCompactedLightTileAllocatorPerLight[LightTile.LightIndex * NumViews + LightTile.ViewIndex], 1, CompactedLightTileIndex);
CompactedLightTileIndex += LightTileOffsetsPerLight[LightTile.LightIndex * NumViews + LightTile.ViewIndex];
uint CardTileIndex = LightTile.CardTileIndex;
FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);
FLightTileForShadowMaskPass TileForLight;
TileForLight.LightIndex = LightTile.LightIndex;
TileForLight.ViewIndex = LightTile.ViewIndex;
TileForLight.CardPageIndex = CardTile.CardPageIndex;
TileForLight.TileCoord = CardTile.TileCoord;
uint2 PackedLightTile = PackLightTileForShadowMaskPass(TileForLight);
RWCompactedLightTiles[CompactedLightTileIndex] = PackedLightTile;
uint PackedOffsetNum = LightTileOffsetNumPerCardTile[CardTileIndex];
uint LightTileOffset = (PackedOffsetNum & 0x00ffffff) + LightTile.CulledLightIndex/* lightSampleIndex: tile単位で集められたlight中のindex*/;
FLightTileForLightPass TileForCardTile;
TileForCardTile.LightIndex = LightTile.LightIndex;
TileForCardTile.ViewIndex = LightTile.ViewIndex;
TileForCardTile.ShadowMaskIndex = LightTile.bHasShadowMask ? CompactedLightTileIndex : 0xffffffff;
PackedLightTile = PackLightTileForLightPass(TileForCardTile);
RWLightTilesPerCardTile[LightTileOffset] = PackedLightTile;
}
}
BEGIN_GLOBAL_SHADER_PARAMETER_STRUCT(FLumenCardScene, )
SHADER_PARAMETER(uint32, NumCards)
SHADER_PARAMETER(uint32, NumMeshCards)
SHADER_PARAMETER(uint32, NumCardPages)
SHADER_PARAMETER(uint32, NumHeightfields)
SHADER_PARAMETER(uint32, MaxConeSteps)
SHADER_PARAMETER(FVector2f, PhysicalAtlasSize)
SHADER_PARAMETER(FVector2f, InvPhysicalAtlasSize)
SHADER_PARAMETER(float, IndirectLightingAtlasDownsampleFactor)
SHADER_PARAMETER(uint32, NumDistantCards)
SHADER_PARAMETER(float, DistantSceneMaxTraceDistance)
SHADER_PARAMETER(FVector3f, DistantSceneDirection)
SHADER_PARAMETER_SCALAR_ARRAY(uint32, DistantCardIndices, [MaxDistantCards])
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, CardData)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, CardPageData)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, MeshCardsData)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, HeightfieldData)
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, PageTableBuffer)
SHADER_PARAMETER_RDG_BUFFER_SRV(ByteAddressBuffer, SceneInstanceIndexToMeshCardsIndexBuffer)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, AlbedoAtlas)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, OpacityAtlas)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, NormalAtlas)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, EmissiveAtlas)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, DepthAtlas)
END_GLOBAL_SHADER_PARAMETER_STRUCT()
C++(LumenSceneDirectLighting.cpp)
BEGIN_SHADER_PARAMETER_STRUCT(FLumenDirectLightingNonRayTracedShadowsParameters, )
RDG_BUFFER_ACCESS(IndirectArgBuffer, ERHIAccess::IndirectArgs)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer<uint>, RWShadowMaskTiles)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer<uint>, RWShadowTraceAllocator)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer<uint>, RWShadowTraces)
SHADER_PARAMETER_STRUCT_REF(FViewUniformShaderParameters, View)
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FLumenCardScene, LumenCardScene)
SHADER_PARAMETER_STRUCT_INCLUDE(FLumenLightTileScatterParameters, LightTileScatterParameters)
SHADER_PARAMETER(uint32, CardScatterInstanceIndex)
SHADER_PARAMETER(uint32, LightIndex)
SHADER_PARAMETER(uint32, ViewIndex)
SHADER_PARAMETER(uint32, NumViews)
SHADER_PARAMETER(uint32, DummyZeroForFixingShaderCompilerBug)
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FForwardLightData, ForwardLightData)
SHADER_PARAMETER_STRUCT_REF(FDeferredLightUniformStruct, DeferredLightUniforms)
SHADER_PARAMETER_STRUCT_INCLUDE(FVirtualShadowMapSamplingParameters, VirtualShadowMapSamplingParameters)
SHADER_PARAMETER_STRUCT_INCLUDE(FVolumeShadowingShaderParameters, VolumeShadowingShaderParameters)
SHADER_PARAMETER_STRUCT_INCLUDE(FLightCloudTransmittanceParameters, LightCloudTransmittanceParameters)
SHADER_PARAMETER(float, ShadowMapSamplingBias)
SHADER_PARAMETER(float, VirtualShadowMapSamplingBias)
SHADER_PARAMETER(float, HeightfieldShadowReceiverBias)
SHADER_PARAMETER(float, StepFactor)
SHADER_PARAMETER(float, TanLightSourceAngle)
SHADER_PARAMETER(float, MaxTraceDistance)
SHADER_PARAMETER(int32, VirtualShadowMapId)
SHADER_PARAMETER(uint32, SampleDenseShadowMap)
SHADER_PARAMETER(uint32, ForceShadowMaps)
SHADER_PARAMETER(uint32, UseIESProfile)
SHADER_PARAMETER_TEXTURE(Texture2D, IESTexture)
SHADER_PARAMETER_SAMPLER(SamplerState, IESTextureSampler)
END_SHADER_PARAMETER_STRUCT()
void FDeferredShadingSceneRenderer::RenderDirectLightingForLumenScene(...)
{
:
// Apply shadow map
{
RDG_EVENT_SCOPE(GraphBuilder, "Non raytraced shadows");
FRDGBufferUAVRef ShadowMaskTilesUAV = GraphBuilder.CreateUAV(ShadowMaskTiles, ERDGUnorderedAccessViewFlags::SkipBarrier);
FRDGBufferUAVRef ShadowTraceAllocatorUAV = ShadowTraceAllocator ? GraphBuilder.CreateUAV(ShadowTraceAllocator, ERDGUnorderedAccessViewFlags::SkipBarrier) : nullptr;
FRDGBufferUAVRef ShadowTracesUAV = ShadowTraces ? GraphBuilder.CreateUAV(ShadowTraces, ERDGUnorderedAccessViewFlags::SkipBarrier) : nullptr;
int32 NumShadowedLights = 0;
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
const FViewInfo& View = Views[ViewIndex];
for (int32 LightIndex = 0; LightIndex < GatheredLights.Num(); ++LightIndex)
{
const FLumenGatheredLight& GatheredLight = GatheredLights[LightIndex];
if (GatheredLight.NeedsShadowMask())
{
ComputeNonRayTracedShadows(
GraphBuilder,
Scene,
View,
LumenCardSceneUniformBuffer,
VisibleLightInfos,
VirtualShadowMapArray,
GatheredLight,
CullContext.LightTileScatterParameters,
ViewIndex,
Views.Num(),
ShadowMaskTilesUAV,
ShadowTraceAllocatorUAV,
ShadowTracesUAV,
ComputePassFlags);
++NumShadowedLights;
}
}
}
// Clear to mark resource as used if it wasn't ever written to
if (ShadowTracesUAV && NumShadowedLights == 0)
{
AddClearUAVPass(GraphBuilder, ShadowTracesUAV, 0);
}
}
:
}
void ComputeNonRayTracedShadows(
FRDGBuilder& GraphBuilder,
const FScene Scene,
const FViewInfo& View,
TRDGUniformBufferRef
:
const FMaterialRenderProxy* LightFunctionMaterialProxy = Light.LightSceneInfo->Proxy->GetLightFunctionMaterial();
bool bUseLightFunction = true;
if (!LightFunctionMaterialProxy
|| !LightFunctionMaterialProxy->GetIncompleteMaterialWithFallback(Scene->GetFeatureLevel()).IsLightFunction()
|| !View.Family->EngineShowFlags.LightFunctions)
{
bUseLightFunction = false;
LightFunctionMaterialProxy = UMaterial::GetDefaultMaterial(MD_LightFunction)->GetRenderProxy();
}
const uint32 DispatchIndirectArgOffset = (Light.LightIndex * NumViews + ViewIndex) * sizeof(FRHIDispatchIndirectParameters);
if (bUseLightFunction)
{
:
}
else
{
FLumenDirectLightingNonRayTracedShadowsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenDirectLightingNonRayTracedShadowsCS::FParameters>();
SetCommonParameters(PassParameters->Common);
FLumenDirectLightingNonRayTracedShadowsCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FThreadGroupSize32>(Lumen::UseThreadGroupSize32());
PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FCompactShadowTraces>(ShadowTraceAllocatorUAV != nullptr);
PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FLightType>(Light.Type);
PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FVirtualShadowMap>(bUseVirtualShadowMap);
PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FDynamicallyShadowed>(bUseDenseShadowMap);
PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FDenseShadowMap>(bUseDenseShadowMap);
PermutationVector.Set<FLumenDirectLightingNonRayTracedShadowsCS::FCloudTransmittance>(bUseCloudTransmittance);
TShaderRef<FLumenDirectLightingNonRayTracedShadowsCS> ComputeShader = View.ShaderMap->GetShader<FLumenDirectLightingNonRayTracedShadowsCS>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("NonRayTracedShadowPass %s", *Light.Name),
ComputePassFlags,
ComputeShader,
PassParameters,
LightTileScatterParameters.DispatchIndirectArgs,
DispatchIndirectArgOffset);
}
- シェーダー(LumenCardCommon.h)
```C++
float3 GetCardLocalPosition(float3 CardLocalExtent, float2 CardUV, float Depth)
{
CardUV.x = 1.0f - CardUV.x;
float3 LocalPosition;
LocalPosition.xy = CardLocalExtent.xy * (1.0f - 2.0f * CardUV);
LocalPosition.z = -(2.0f * Depth - 1.0f) * CardLocalExtent.z;
return LocalPosition;
}
float3 GetCardWorldPosition(FLumenCardData Card, float2 CardUV, float Depth)
{
float3 LocalPosition = GetCardLocalPosition(Card.LocalExtent, CardUV, Depth);
float3 WorldPosition = mul(Card.WorldToLocalRotation, LocalPosition) + Card.Origin;
return WorldPosition;
}
#define CARD_TILE_SIZE 8
#define SHADOW_MASK_RAY_BITS 8
#define SHADOW_MASK_RAY_BITS_MASK ((1u << SHADOW_MASK_RAY_BITS) - 1)
#define SHADOW_MASK_CARD_TILE_DWORDS (SHADOW_MASK_RAY_BITS * CARD_TILE_SIZE * CARD_TILE_SIZE / 32)
#define SHADOW_FACTOR_BITS 7
#define SHADOW_FACTOR_BITS_MASK ((1u << SHADOW_FACTOR_BITS) - 1)
#define SHADOW_FACTOR_COMPLETE_BITS 1
#define SHADOW_FACTOR_COMPLETE_BITS_MASK (((1u << SHADOW_FACTOR_COMPLETE_BITS) - 1) << SHADOW_FACTOR_BITS)
// 8 bits per texel
groupshared uint SharedShadowMask[(SHADOW_MASK_RAY_BITS * THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y) / 32];
void WriteSharedShadowMaskRay(FShadowMaskRay Ray, uint2 CoordInCardTile, const bool bClearExistingMask) { uint Mask = uint(Ray.ShadowFactor * SHADOW_FACTOR_BITS_MASK);
if (Ray.bShadowFactorComplete)
{
Mask |= SHADOW_FACTOR_COMPLETE_BITS_MASK;
}
uint BitOffset = SHADOW_MASK_RAY_BITS * (CoordInCardTile.x + CoordInCardTile.y * CARD_TILE_SIZE);
if (bClearExistingMask)
{
InterlockedAnd(SharedShadowMask[BitOffset / 32], ~(SHADOW_MASK_RAY_BITS_MASK << (BitOffset % 32)));
}
if (Mask != 0)
{
InterlockedOr(SharedShadowMask[BitOffset / 32], Mask << (BitOffset % 32));
}
}
- シェーダー(LumenSurfaceCache.ush)
```C++
struct FLumenSurfaceCacheData
{
bool bValid;
float Depth;
float3 Albedo;
float3 Emissive;
// Derived
float3 WorldPosition;
float3 WorldNormal;
};
FLumenSurfaceCacheData GetSurfaceCacheData(FLumenCardData Card, float2 CardUV, float2 AtlasUV)
{
float Depth = Texture2DSampleLevel(LumenCardScene.DepthAtlas, GlobalPointClampedSampler, AtlasUV, 0).x;
FLumenSurfaceCacheData SurfaceCacheData;
SurfaceCacheData.Depth = Depth;
SurfaceCacheData.bValid = IsSurfaceCacheDepthValid(Depth); // Depth < 1.0f
SurfaceCacheData.Albedo = float3(0.0f, 0.0f, 0.0f);
SurfaceCacheData.Emissive = float3(0.0f, 0.0f, 0.0f);
float2 NormalXY = float2(0.5f, 0.5f);
if (SurfaceCacheData.bValid)
{
SurfaceCacheData.Albedo = DecodeSurfaceCacheAlbedo(Texture2DSampleLevel(LumenCardScene.AlbedoAtlas, GlobalPointClampedSampler, AtlasUV, 0).xyz);
SurfaceCacheData.Emissive = Texture2DSampleLevel(LumenCardScene.EmissiveAtlas, GlobalPointClampedSampler, AtlasUV, 0).x;
NormalXY = Texture2DSampleLevel(LumenCardScene.NormalAtlas, GlobalPointClampedSampler, AtlasUV, 0).xy;
}
SurfaceCacheData.WorldNormal = DecodeSurfaceCacheNormal(Card, NormalXY); // xy(0-1) -> {xy(-1-+1), sqrt(1-xy^2)}
SurfaceCacheData.WorldPosition = GetCardWorldPosition(Card, CardUV, SurfaceCacheData.Depth);
return SurfaceCacheData;
}
groupshared uint SharedNumShadowTraces;
groupshared uint SharedShadowTraces[THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y];
groupshared uint SharedGlobalShadowTraceOffset;
[numthreads(THREADGROUP_SIZE_X, THREADGROUP_SIZE_Y, 1)] void LumenSceneDirectLightingNonRayTracedShadowsCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint LinearGroupThreadId = GroupThreadId.x + GroupThreadId.y * THREADGROUP_SIZE_X;
uint LocalLightTileIndex = GroupId.x / 2;
uint2 CoordInCardTile = GroupThreadId.xy + uint2(0, GroupId.x % 2 ? 0 : 4);
uint ShadowMaskSize = SHADOW_MASK_CARD_TILE_DWORDS / 2;
uint ShadowMaskOffset = GroupId.x % 2 ? 0 : 1;
uint LocalLightTileIndex = GroupId.x;
uint2 CoordInCardTile = GroupThreadId.xy;
uint ShadowMaskSize = SHADOW_MASK_CARD_TILE_DWORDS;
uint ShadowMaskOffset = 0;
if (LinearGroupThreadId < ShadowMaskSize)
{
SharedShadowMask[LinearGroupThreadId] = 0;
}
if (all(GroupThreadId.xy == 0))
{
SharedNumShadowTraces = 0;
SharedGlobalShadowTraceOffset = 0;
SharedShadowTraces[0] = 0;
}
GroupMemoryBarrierWithGroupSync();
uint LightTileIndex = LocalLightTileIndex + LightTileOffsetsPerLight[LightIndex * NumViews + ViewIndex];
FLightTileForShadowMaskPass LightTile = UnpackLightTileForShadowMaskPass(LightTiles[LightTileIndex]);
uint2 TexelInCardPageCoord = LightTile.TileCoord * CARD_TILE_SIZE + CoordInCardTile;
FLumenCardPageData CardPage = GetLumenCardPageData(LightTile.CardPageIndex + DummyZeroForFixingShaderCompilerBug);
float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (TexelInCardPageCoord + 0.5f);
float2 CardUV = CardPage.CardUVRect.xy + CardPage.CardUVTexelScale * (TexelInCardPageCoord + 0.5f);
FShadowMaskRay ShadowMaskRay;
ShadowMaskRay.ShadowFactor = 0.0f;
ShadowMaskRay.bShadowFactorComplete = true;
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
FLumenSurfaceCacheData SurfaceCacheData = GetSurfaceCacheData(Card, CardUV, AtlasUV);
if (SurfaceCacheData.bValid)
{
FDeferredLightData LightData = LoadLightData(LIGHT_TYPE);
float3 WorldNormal = SurfaceCacheData.WorldNormal;
float3 WorldPosition = SurfaceCacheData.WorldPosition;
float3 TranslatedWorldPosition = WorldPosition + LWCHackToFloat(PrimaryView.PreViewTranslation);
float3 L = LightData.Direction;
float3 ToLight = L;
float CombinedAttenuation = 1.0f;
float Attenuation = 1.0f;
float LightMask = 1.0f;
if (LightData.bRadialLight)
{
LightMask = GetLocalLightAttenuation(TranslatedWorldPosition, LightData, ToLight, L);
}
if (LightData.bRectLight)
{
FRect Rect = GetRect(ToLight, LightData);
Attenuation = IntegrateLight(Rect);
}
else
{
FCapsuleLight Capsule = GetCapsule(ToLight, LightData);
Capsule.DistBiasSqr = 0;
Attenuation = IntegrateLight(Capsule, LightData.bInverseSquared);
}
CombinedAttenuation *= Attenuation * LightMask;
CombinedAttenuation *= saturate(dot(WorldNormal, L));
if (CombinedAttenuation > 0)
{
ShadowMaskRay.ShadowFactor = 1.0f;
ShadowMaskRay.bShadowFactorComplete = false;
bool bVSMValid = false;
const float4 PostProjectionPosition = mul(float4(WorldPosition, 1.0), LWCHackToFloat(PrimaryView.WorldToClip));
float ReceiverBias = 0.0f;
if (Card.bHeightfield)
{
float3 WorldCameraOrigin = LWCHackToFloat(PrimaryView.WorldCameraOrigin);
ReceiverBias = CalculateDistanceBasedHeightfieldBias(HeightfieldShadowReceiverBias, WorldPosition, WorldCameraOrigin);
}
// Shadow maps are culled so only query points inside the view frustum are valid
if (all(and(PostProjectionPosition.xy < PostProjectionPosition.w, PostProjectionPosition.xy > -PostProjectionPosition.w)))
{
#if VIRTUAL_SHADOW_MAP
{
// Bias only ray start to maximize chances of hitting an allocated page
FVirtualShadowMapSampleResult VirtualShadowMapSample = SampleVirtualShadowMapLWCHack(VirtualShadowMapId, WorldPosition, VirtualShadowMapSamplingBias + ReceiverBias, WorldNormal);
bVSMValid = VirtualShadowMapSample.bValid;
ShadowMaskRay.ShadowFactor *= VirtualShadowMapSample.ShadowFactor;
// If there's also a dense shadow map present we need to sample both (unless the VSM determines we are fully shadowed anyways)
ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.ShadowFactor < 0.01f;
#if !DENSE_SHADOW_MAP
ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.bShadowFactorComplete || VirtualShadowMapSample.bValid;
#endif
}
#endif
#if DENSE_SHADOW_MAP
if (!ShadowMaskRay.bShadowFactorComplete)
{
float3 TranslatedWorldPositionForShadowing = GetCardWorldPositionForShadowing(TranslatedWorldPosition, L, WorldNormal, ShadowMapSamplingBias + ReceiverBias);
#if LIGHT_TYPE == LIGHT_TYPE_DIRECTIONAL
{
#if DYNAMICALLY_SHADOWED
{
float SceneDepth = dot(TranslatedWorldPositionForShadowing - PrimaryView.TranslatedWorldCameraOrigin, View.ViewForward);
bool bShadowingFromValidUVArea = false;
float NewShadowFactor = ComputeDirectionalLightDynamicShadowing(TranslatedWorldPositionForShadowing, SceneDepth, bShadowingFromValidUVArea);
if (bShadowingFromValidUVArea)
{
ShadowMaskRay.ShadowFactor *= NewShadowFactor;
ShadowMaskRay.bShadowFactorComplete = VIRTUAL_SHADOW_MAP ? bVSMValid : true;
}
}
#endif
}
#else
{
bool bShadowingFromValidUVArea = false;
float NewShadowFactor = ComputeVolumeShadowing(TranslatedWorldPositionForShadowing, LightData.bRadialLight && !LightData.bSpotLight, LightData.bSpotLight, bShadowingFromValidUVArea);
if (bShadowingFromValidUVArea)
{
ShadowMaskRay.ShadowFactor *= NewShadowFactor;
ShadowMaskRay.bShadowFactorComplete = VIRTUAL_SHADOW_MAP ? bVSMValid : true;
}
}
#endif
}
#endif
}
#if LIGHT_FUNCTION
if (ShadowMaskRay.ShadowFactor > 0.01f)
{
ShadowMaskRay.ShadowFactor *= GetLightFunction(TranslatedWorldPosition);
ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.bShadowFactorComplete || ShadowMaskRay.ShadowFactor < 0.01f;
}
#endif
#if USE_CLOUD_TRANSMITTANCE
if (ShadowMaskRay.ShadowFactor > 0.01f)
{
float OutOpticalDepth = 0.0f;
ShadowMaskRay.ShadowFactor *= lerp(1.0f, GetCloudVolumetricShadow(TranslatedWorldPosition, CloudShadowmapTranslatedWorldToLightClipMatrix, CloudShadowmapFarDepthKm, CloudShadowmapTexture, CloudShadowmapSampler, OutOpticalDepth), CloudShadowmapStrength);
ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.bShadowFactorComplete || ShadowMaskRay.ShadowFactor < 0.01f;
}
#endif
if (ShadowMaskRay.ShadowFactor > 0.01f && UseIESProfile > 0)
{
ShadowMaskRay.ShadowFactor *= ComputeLightProfileMultiplier(TranslatedWorldPosition, DeferredLightUniforms.TranslatedWorldPosition, -DeferredLightUniforms.Direction, DeferredLightUniforms.Tangent);
ShadowMaskRay.bShadowFactorComplete = ShadowMaskRay.bShadowFactorComplete || ShadowMaskRay.ShadowFactor < 0.01f;
}
}
else
{
ShadowMaskRay.ShadowFactor = 0.0f;
ShadowMaskRay.bShadowFactorComplete = true;
}
}
if (ForceShadowMaps != 0)
{
ShadowMaskRay.bShadowFactorComplete = true;
}
WriteSharedShadowMaskRay(ShadowMaskRay, GroupThreadId.xy, false);
#if COMPACT_SHADOW_TRACES
{
if (!ShadowMaskRay.bShadowFactorComplete)
{
// シャドウマップがなければ(Virtual Shadow Mapも含む)、レイトレーシングへ
uint ShadowTraceIndex = 0;
InterlockedAdd(SharedNumShadowTraces, 1, ShadowTraceIndex);
FShadowTrace ShadowTrace;
ShadowTrace.LightTileIndex = LightTileIndex;
ShadowTrace.LightTileCoord = CoordInCardTile;
SharedShadowTraces[ShadowTraceIndex] = PackShadowTrace(ShadowTrace);
}
}
#endif
GroupMemoryBarrierWithGroupSync();
#if COMPACT_SHADOW_TRACES
{
if (all(GroupThreadId == 0))
{
InterlockedAdd(RWShadowTraceAllocator[0], SharedNumShadowTraces, SharedGlobalShadowTraceOffset);
}
}
#endif
GroupMemoryBarrierWithGroupSync();
#if COMPACT_SHADOW_TRACES
{
if (LinearGroupThreadId < SharedNumShadowTraces)
{
RWShadowTraces[SharedGlobalShadowTraceOffset + LinearGroupThreadId] = SharedShadowTraces[LinearGroupThreadId];
}
}
#endif
if (LinearGroupThreadId < ShadowMaskSize)
{
RWShadowMaskTiles[SHADOW_MASK_CARD_TILE_DWORDS * LightTileIndex + ShadowMaskSize * ShadowMaskOffset + LinearGroupThreadId] = SharedShadowMask[LinearGroupThreadId];
}
}
![image](https://user-images.githubusercontent.com/14350715/219937697-85292354-6766-4fd9-af1f-f0fc41967232.png)
C++(LumenSceneDirectLighting.cpp)
void FDeferredShadingSceneRenderer::RenderDirectLightingForLumenScene(...)
{
:
// Offscreen shadowing
{
RDG_EVENT_SCOPE(GraphBuilder, "Offscreen shadows");
FRDGBufferUAVRef ShadowMaskTilesUAV = GraphBuilder.CreateUAV(ShadowMaskTiles, ERDGUnorderedAccessViewFlags::SkipBarrier);
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
const FViewInfo& View = Views[ViewIndex];
if (Lumen::UseHardwareRayTracedDirectLighting(ViewFamily))
{
TraceLumenHardwareRayTracedDirectLightingShadows(
GraphBuilder,
Scene,
View,
ViewIndex,
TracingInputs,
ShadowTraceIndirectArgs,
ShadowTraceAllocator,
ShadowTraces,
CullContext.LightTileAllocator,
CullContext.LightTiles,
LumenPackedLights,
ShadowMaskTilesUAV);
}
else
{
:
}
}
}
:
}
- C++(LumenSceneDirectLightingHardwareRayTracing.cpp)
```C++
void TraceLumenHardwareRayTracedDirectLightingShadows(
FRDGBuilder& GraphBuilder,
const FScene* Scene,
const FViewInfo& View,
int32 ViewIndex,
const FLumenCardTracingInputs& TracingInputs,
FRDGBufferRef ShadowTraceIndirectArgs,
FRDGBufferRef ShadowTraceAllocator,
FRDGBufferRef ShadowTraces,
FRDGBufferRef LightTileAllocator,
FRDGBufferRef LightTiles,
FRDGBufferRef LumenPackedLights,
FRDGBufferUAVRef ShadowMaskTilesUAV)
{
const bool bInlineRayTracing = Lumen::UseHardwareInlineRayTracing(*View.Family);
const bool bUseMinimalPayload = true;
FRDGBufferRef HardwareRayTracingIndirectArgsBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(1), TEXT("Lumen.Reflection.CompactTracingIndirectArgs"));
C++(LumenSceneDirectLightingHardwareRayTracing.cpp)
void TraceLumenHardwareRayTracedDirectLightingShadows(...)
{
:
if (IsHardwareRayTracedDirectLightingIndirectDispatch())
{
FLumenDirectLightingHardwareRayTracingIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenDirectLightingHardwareRayTracingIndirectArgsCS::FParameters>();
{
PassParameters->DispatchLightTilesIndirectArgs = GraphBuilder.CreateSRV(ShadowTraceIndirectArgs, PF_R32_UINT);
PassParameters->RWHardwareRayTracingIndirectArgs = GraphBuilder.CreateUAV(HardwareRayTracingIndirectArgsBuffer, PF_R32_UINT);
PassParameters->OutputThreadGroupSize = bInlineRayTracing ? FLumenDirectLightingHardwareRayTracingBatchedCS::GetThreadGroupSize() : FLumenDirectLightingHardwareRayTracingBatchedRGS::GetThreadGroupSize();
}
TShaderRef<FLumenDirectLightingHardwareRayTracingIndirectArgsCS> ComputeShader = View.ShaderMap->GetShader<FLumenDirectLightingHardwareRayTracingIndirectArgsCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FLumenDirectLightingHardwareRayTracingIndirectArgsCS"),
ComputeShader,
PassParameters,
FIntVector(1, 1, 1));
}
:
}
- シェーダー(LumenSceneDirectLightingHardwareRayTracing.usf)
```C++
Buffer<uint> DispatchLightTilesIndirectArgs;
RWBuffer<uint> RWHardwareRayTracingIndirectArgs;
uint2 OutputThreadGroupSize;
[numthreads(1, 1, 1)]
void LumenDirectLightingHardwareRayTracingIndirectArgsCS()
{
RWHardwareRayTracingIndirectArgs[0] = (CARD_TILE_SIZE * CARD_TILE_SIZE + OutputThreadGroupSize.x - 1) / OutputThreadGroupSize.x;
RWHardwareRayTracingIndirectArgs[1] = (DispatchLightTilesIndirectArgs[0] + OutputThreadGroupSize.y - 1) / OutputThreadGroupSize.y;
RWHardwareRayTracingIndirectArgs[2] = 1;
}
C++(LumenSceneDirectLightingHardwareRayTracing.cpp)
void TraceLumenHardwareRayTracedDirectLightingShadows(...)
{
:
TShaderRef<FLumenDirectLightingHardwareRayTracingBatchedCS> ComputeShader = View.ShaderMap->GetShader<FLumenDirectLightingHardwareRayTracingBatchedCS>(PermutationVector);
if (IsHardwareRayTracedDirectLightingIndirectDispatch())
{
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("LumenDirectLightingHardwareRayTracingCS %s", *Resolution),
ComputeShader,
PassParameters,
PassParameters->HardwareRayTracingIndirectArgs,
0);
}
else
{
const FIntVector GroupCount = FComputeShaderUtils::GetGroupCount(DispatchResolution, FLumenDirectLightingHardwareRayTracingBatchedCS::GetThreadGroupSize());
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("LumenDirectLightingHardwareRayTracingCS %s", *Resolution),
ComputeShader,
PassParameters,
GroupCount);
}
:
}
- シェーダー(LumenSceneDirectLightingHardwareRayTracing.usf)
```C++
RaytracingAccelerationStructure TLAS;
StructuredBuffer<FHitGroupRootConstants> HitGroupData;
StructuredBuffer<FRayTracingSceneMetadataRecord> RayTracingSceneMetadata;
Buffer<uint2> ShadowTraceTileData;
Buffer<int> VirtualShadowMapIds;
uint MaxTranslucentSkipCount;
uint MaxTraversalIterations;
uint GroupCount;
uint ViewIndex;
uint LumenLightType;
float MaxTraceDistance;
float FarFieldMaxTraceDistance;
float3 FarFieldReferencePos;
float HardwareRayTracingShadowRayBias;
float HeightfieldShadowReceiverBias;
float HeightfieldProjectionBiasSearchRadius;
StructuredBuffer<uint> LightTileAllocator;
StructuredBuffer<uint2> LightTiles;
bool IsRayOccluded(FLumenMinimalRayResult RayResult)
{
return RayResult.bHit || !RayResult.bCompleted;
}
bool IsRayOccluded(FRayTracedLightingResult RayResult)
{
return RayResult.bIsHit || !RayResult.bIsCompleted;
}
StructuredBuffer<uint> ShadowTraceAllocator;
StructuredBuffer<uint> ShadowTraces;
void LumenSceneDirectLightingHardwareRayTracing_INTERNAL(
uint3 DispatchThreadIndex,
uint3 DispatchGroupId,
uint DispatchGroupIndex);
[numthreads(INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X(=8), INLINE_RAY_TRACING_THREAD_GROUP_SIZE_Y(=4), 1)]
void LumenSceneDirectLightingHardwareRayTracingCS(
uint3 DispatchThreadIndex : SV_DispatchThreadID,
uint3 DispatchGroupId : SV_GroupID,
uint DispatchGroupIndex : SV_GroupIndex)
{
LumenSceneDirectLightingHardwareRayTracing_INTERNAL(DispatchThreadIndex, DispatchGroupId, DispatchGroupIndex);
}
void LumenSceneDirectLightingHardwareRayTracing_INTERNAL(
uint3 DispatchThreadIndex,
uint3 DispatchGroupId,
uint DispatchGroupIndex)
{
uint ShadowTraceIndexOffset = DispatchThreadIndex.x % 64;
#if DIM_INDIRECT_DISPATCH
uint ShadowTraceGroup = DispatchThreadIndex.y;
#else
for (uint ShadowTraceGroup = DispatchThreadIndex.y; ShadowTraceGroup * 64 < ShadowTraceAllocator[0]; ShadowTraceGroup += GroupCount)
#endif
{
uint ShadowTraceIndex = ShadowTraceGroup * 64 + ShadowTraceIndexOffset;
FShadowTrace ShadowTrace = UnpackShadowTrace(ShadowTraces[ShadowTraceIndex]);
uint2 CoordInCardTile = ShadowTrace.LightTileCoord;
const FLightTileForShadowMaskPass LightTile = UnpackLightTileForShadowMaskPass(LightTiles[ShadowTrace.LightTileIndex]);
if (LightTile.ViewIndex != ViewIndex)
{
return;
}
uint2 TexelInCardPageCoord = LightTile.TileCoord * CARD_TILE_SIZE + CoordInCardTile;
const FLumenLight LumenLight = LoadLumenLight(LightTile.LightIndex, LWCHackToFloat(PrimaryView.PreViewTranslation));
if (all(CoordInCardTile < CARD_TILE_SIZE))
{
FShadowMaskRay ShadowMaskRay;
ReadShadowMaskRayRW(ShadowTrace.LightTileIndex, CoordInCardTile, ShadowMaskRay);
// Trace visibility ray
if (!ShadowMaskRay.bShadowFactorComplete)
{
FLumenCardPageData CardPage = GetLumenCardPageData(LightTile.CardPageIndex);
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (TexelInCardPageCoord + 0.5f);
float2 CardUV = CardPage.CardUVRect.xy + CardPage.CardUVTexelScale * (TexelInCardPageCoord + 0.5f);
FLumenSurfaceCacheData SurfaceCacheData = GetSurfaceCacheData(Card, CardUV, AtlasUV);
float3 WorldPosition = SurfaceCacheData.WorldPosition;
float3 WorldNormal = SurfaceCacheData.WorldNormal;
float3 TranslatedWorldPosition = WorldPosition + LWCHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO
float3 L = LumenLight.DeferredLightData.Direction;
float3 ToLight = L;
float NearFieldTMax = MaxTraceDistance;
float FarFieldTMax = FarFieldMaxTraceDistance;
if (LumenLight.Type != LIGHT_TYPE_DIRECTIONAL)
{
ToLight = LumenLight.DeferredLightData.TranslatedWorldPosition - TranslatedWorldPosition;
float LengthToLight = length(ToLight);
NearFieldTMax = min(NearFieldTMax, LengthToLight);
FarFieldTMax = min(FarFieldTMax, LengthToLight);
L = normalize(ToLight);
}
FRayDesc Ray;
const float2 RandSample = 0.5;
#if 0
bool bIsValid = GenerateOcclusionRay(LumenLight.Type, LightParameters, WorldPosition, WorldNormal, RandSample,
Ray.Origin, Ray.Direction, Ray.TMin, Ray.TMax);
#else
float ReceiverBias = 0.0f;
#if !ENABLE_HEIGHTFIELD_PROJECTION_BIAS
if (Card.bHeightfield)
{
float3 WorldCameraOrigin = LWCHackToFloat(PrimaryView.WorldCameraOrigin);
ReceiverBias = CalculateDistanceBasedHeightfieldBias(HeightfieldShadowReceiverBias, WorldPosition, WorldCameraOrigin);
}
#endif
Ray.Origin = GetCardWorldPositionForShadowing(WorldPosition, L, WorldNormal, HardwareRayTracingShadowRayBias + ReceiverBias) + LWCHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO
Ray.Direction = L;
#endif
{
FRayCone RayCone = (FRayCone)0;
uint CullingMode = RAY_FLAG_CULL_FRONT_FACING_TRIANGLES;
FRayTracedLightingContext Context = CreateRayTracedLightingContext(
TLAS,
RayCone,
CoordInCardTile,
CoordInCardTile.x, // dummy coordinate
CullingMode,
MaxTranslucentSkipCount,
MaxTraversalIterations);
// Shadows don't need closest hit distance
Context.bAcceptFirstHitAndEndSearch = true;
Context.HitGroupData = HitGroupData;
Context.RayTracingSceneMetadata = RayTracingSceneMetadata;
bool bRayOccluded = false;
#if ENABLE_FAR_FIELD_TRACING
{
// Rebase origin and point-source direction for far-field rays
FRayDesc FarFieldRay;
FarFieldRay.Origin = TranslatedWorldPosition;
FarFieldRay.Direction = Ray.Direction;
FarFieldRay.TMin = NearFieldTMax;
FarFieldRay.TMax = FarFieldTMax;
Context.InstanceMask = RAY_TRACING_MASK_FAR_FIELD;
Context.bSkipClosestHitShader = true;
Context.TranslucentSkipCountMax = 0;
Context.FarFieldReferencePos = FarFieldReferencePos;
Context.FarFieldMaxTraceDistance = FarFieldTMax;
FRayTracedLightingResult RayResult = TraceSurfaceCacheFarFieldRay(FarFieldRay, Context);
bRayOccluded = IsRayOccluded(RayResult);
Context.bSkipClosestHitShader = false;
}
#endif // ENABLE_FAR_FIELD_TRACING
#if ENABLE_HEIGHTFIELD_PROJECTION_BIAS
// Find the heightfield intersection that corresponds to the given card position.
if (Card.bHeightfield && !bRayOccluded)
{
float SearchRadius = HeightfieldProjectionBiasSearchRadius;
float3 SearchDirection = float3(0.0, 0.0, 1.0);
FRayDesc ProjectedRay;
ProjectedRay.Origin = Ray.Origin - SearchDirection * SearchRadius;
ProjectedRay.Direction = SearchDirection;
ProjectedRay.TMin = 0.0f;
ProjectedRay.TMax = 2.0f * SearchRadius;
Context.CullingMode = RAY_FLAG_CULL_FRONT_FACING_TRIANGLES;
FLumenMinimalRayResult SearchResult = TraceLumenMinimalRay(ProjectedRay, Context);
if (IsRayOccluded(SearchResult))
{
float Epsilon = 0.01;
Ray.Origin = ProjectedRay.Origin + ProjectedRay.Direction * SearchResult.HitT + SearchResult.HitNormal * Epsilon;
}
}
#endif // ENABLE_HEIGHTFIELD_PROJECTION_BIAS
if (!bRayOccluded)
{
Ray.TMin = 0;
Ray.TMax = NearFieldTMax;
Context.InstanceMask = RAY_TRACING_MASK_OPAQUE;
Context.TranslucentSkipCountMax = MaxTranslucentSkipCount;
FRayTracedLightingResult RayResult = TraceSurfaceCacheRay(Ray, Context);
bRayOccluded = IsRayOccluded(RayResult);
}
ShadowMaskRay.ShadowFactor *= bRayOccluded ? 0.0f : 1.0f;
}
ShadowMaskRay.bShadowFactorComplete = true;
WriteShadowMaskRay(ShadowMaskRay, ShadowTrace.LightTileIndex, CoordInCardTile, true);
}
}
}
}
C++(LumenSceneDirectLighting.cpp)
void FDeferredShadingSceneRenderer::RenderDirectLightingForLumenScene(...)
{
:
// Apply lights
{
RDG_EVENT_SCOPE(GraphBuilder, "Lights");
FRDGBufferSRVRef ShadowMaskTilesSRV = GraphBuilder.CreateSRV(ShadowMaskTiles->HasBeenProduced() ? ShadowMaskTiles : GSystemTextures.GetDefaultStructuredBuffer(GraphBuilder, sizeof(uint32)));
FRDGBufferSRVRef LumenPackedLightsSRV = GraphBuilder.CreateSRV(LumenPackedLights);
FRDGBufferSRVRef CardTilesSRV = GraphBuilder.CreateSRV(CardTileUpdateContext.CardTiles);
FRDGBufferSRVRef LightTileOffsetNumPerCardTileSRV = GraphBuilder.CreateSRV(CullContext.LightTileOffsetNumPerCardTile);
FRDGBufferSRVRef LightTilesPerCardTileSRV = GraphBuilder.CreateSRV(CullContext.LightTilesPerCardTile);
FRDGTextureUAVRef DirectLightingAtlasUAV = GraphBuilder.CreateUAV(TracingInputs.DirectLightingAtlas);
RenderDirectLightIntoLumenCardsBatched(
GraphBuilder,
Views,
LumenCardSceneUniformBuffer,
LumenPackedLightsSRV,
ShadowMaskTilesSRV,
CardTilesSRV,
LightTileOffsetNumPerCardTileSRV,
LightTilesPerCardTileSRV,
DirectLightingAtlasUAV,
CardTileUpdateContext.DispatchCardTilesIndirectArgs,
bHasRectLights,
ComputePassFlags);
}
:
}
C++(LumenSceneDirectLighting.cpp)
void RenderDirectLightIntoLumenCardsBatched(
FRDGBuilder& GraphBuilder,
const TArray<FViewInfo>& Views,
TRDGUniformBufferRef<FLumenCardScene> LumenCardSceneUniformBuffer,
FRDGBufferSRVRef LumenPackedLightsSRV,
FRDGBufferSRVRef ShadowMaskTilesSRV,
FRDGBufferSRVRef CardTilesSRV,
FRDGBufferSRVRef LightTileOffsetNumPerCardTileSRV,
FRDGBufferSRVRef LightTilesPerCardTileSRV,
FRDGTextureUAVRef DirectLightingAtlasUAV,
FRDGBufferRef IndirectArgBuffer,
bool bHasRectLights,
ERDGPassFlags ComputePassFlags)
{
FLumenCardBatchDirectLightingCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenCardBatchDirectLightingCS::FParameters>();
PassParameters->IndirectArgBuffer = IndirectArgBuffer;
PassParameters->View = Views[0].ViewUniformBuffer;
PassParameters->LumenCardScene = LumenCardSceneUniformBuffer;
PassParameters->LumenPackedLights = LumenPackedLightsSRV;
PassParameters->ShadowMaskTiles = ShadowMaskTilesSRV;
PassParameters->CardTiles = CardTilesSRV;
PassParameters->LightTileOffsetNumPerCardTile = LightTileOffsetNumPerCardTileSRV;
PassParameters->LightTilesPerCardTile = LightTilesPerCardTileSRV;
PassParameters->RWDirectLightingAtlas = DirectLightingAtlasUAV;
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ++ViewIndex)
{
PassParameters->PreViewTranslation[ViewIndex] = FVector4f((FVector3f)Views[ViewIndex].ViewMatrices.GetPreViewTranslation(), 0.0f);
}
FLumenCardBatchDirectLightingCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FLumenCardBatchDirectLightingCS::FMultiView>(Views.Num() > 1);
PermutationVector.Set<FLumenCardBatchDirectLightingCS::FHasRectLights>(bHasRectLights);
auto ComputeShader = Views[0].ShaderMap->GetShader<FLumenCardBatchDirectLightingCS>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("Batched lights"),
ComputePassFlags,
ComputeShader,
PassParameters,
IndirectArgBuffer,
(uint32)ELumenDispatchCardTilesIndirectArgsOffset::OneGroupPerCardTile);
}
RWTexture2D<float3> RWDirectLightingAtlas;
StructuredBuffer<uint> CardTiles;
StructuredBuffer<uint> LightTileOffsetNumPerCardTile;
StructuredBuffer<uint2> LightTilesPerCardTile;
[numthreads(CARD_TILE_SIZE, CARD_TILE_SIZE, 1)] void LumenCardBatchDirectLightingCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID) { uint CardTileIndex = GroupId.x; uint2 TexelCoordInTile = GroupThreadId.xy;
uint PackedOffsetNum = LightTileOffsetNumPerCardTile[CardTileIndex];
uint LightTilesOffset = BitFieldExtractU32(PackedOffsetNum, 24, 0);
uint NumLightTiles = BitFieldExtractU32(PackedOffsetNum, 8, 24);
FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
uint2 CoordInCardPage = CARD_TILE_SIZE * CardTile.TileCoord + TexelCoordInTile;
uint2 AtlasCoord = CardPage.PhysicalAtlasCoord + CoordInCardPage;
float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (CoordInCardPage + 0.5);
float2 CardUV = CardPage.CardUVRect.xy + CardPage.CardUVTexelScale * (CoordInCardPage + 0.5);
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
FLumenSurfaceCacheData SurfaceCacheData = GetSurfaceCacheData(Card, CardUV, AtlasUV);
if (NumLightTiles == 0 || !SurfaceCacheData.bValid)
{
RWDirectLightingAtlas[AtlasCoord] = 0;
return;
}
float3 Irradiance = 0.0f;
for (uint CulledLightIndex = 0; CulledLightIndex < NumLightTiles; ++CulledLightIndex)
{
FLightTileForLightPass LightTile = UnpackLightTileForLightPass(LightTilesPerCardTile[LightTilesOffset + CulledLightIndex]);
FShadowMaskRay ShadowMaskRay;
ShadowMaskRay.bShadowFactorComplete = true;
ShadowMaskRay.ShadowFactor = 1.0f;
if (LightTile.ShadowMaskIndex != 0xffffffff)
{
ReadShadowMaskRay(LightTile.ShadowMaskIndex, TexelCoordInTile, ShadowMaskRay);
}
if (ShadowMaskRay.ShadowFactor > 0.0f)
{
uint ViewIndex = HAS_MULTIPLE_VIEWS ? LightTile.ViewIndex : 0;
Irradiance += GetIrradianceForLight(LightTile.LightIndex, SurfaceCacheData, LWCHackToFloat(PreViewTranslation[ViewIndex]).xyz, ShadowMaskRay.ShadowFactor);
//Irradiance += bShadowFactorValid ? float3(0, 1, 0) : float3(0.2f, 0.0f, 0.0f);
}
}
RWDirectLightingAtlas[AtlasCoord] = Irradiance;
}
- シェーダー(LumenSceneDirectLighting.ush)
```C++
StructuredBuffer<uint> ShadowMaskTiles;
void ReadShadowMaskRay(uint CardTileIndex, uint2 CoordInCardTile, inout FShadowMaskRay ShadowMaskRay)
{
uint BitOffset = SHADOW_MASK_RAY_BITS * (CoordInCardTile.x + CoordInCardTile.y * CARD_TILE_SIZE);
uint ShadowMask = ShadowMaskTiles[SHADOW_MASK_CARD_TILE_DWORDS * CardTileIndex + BitOffset / 32];
ShadowMask = ShadowMask >> (BitOffset % 32);
ShadowMaskRay.ShadowFactor = float(ShadowMask & SHADOW_FACTOR_BITS_MASK) / SHADOW_FACTOR_BITS_MASK;
ShadowMaskRay.bShadowFactorComplete = (ShadowMask & SHADOW_FACTOR_COMPLETE_BITS_MASK) != 0;
}
FDeferredLightData GetLumenDirectLightingLightData(uint LightIndex, float3 PreViewTranslation)
{
FDeferredLightData Out;
Out = LoadLightData(LIGHT_TYPE);
FLumenLight LumenLight = LoadLumenLight(LightIndex, PreViewTranslation);
Out = LumenLight.DeferredLightData;
Out.bRectLight = HAS_RECT_LIGHTS && Out.bRectLight;
return Out;
}
float3 GetIrradianceForLight( uint LightIndex, FLumenSurfaceCacheData SurfaceCacheData, float3 PreViewTranslation, float ShadowFactor) { FDeferredLightData LightData = GetLumenDirectLightingLightData(LightIndex, PreViewTranslation);
float3 WorldNormal = SurfaceCacheData.WorldNormal;
float3 WorldPosition = SurfaceCacheData.WorldPosition;
float3 TranslatedWorldPosition = WorldPosition + PreViewTranslation;
float3 LightColor = LightData.Color;
float3 L = LightData.Direction;
float3 ToLight = L;
float3 AreaLightFalloffColor = 1;
float CombinedAttenuation = 1;
float NoL = saturate(dot(WorldNormal, L));
if (LightData.bRadialLight)
{
FAreaLightIntegrateContext Context = (FAreaLightIntegrateContext) 0;
float LightMask = GetLocalLightAttenuation(TranslatedWorldPosition, LightData, ToLight, L);
float Attenuation = 0.0f;
float Roughness = 1;
float3 V = float3(1, 0, 0);
if (LightData.bRectLight)
{
FRect Rect = GetRect(ToLight, LightData);
Attenuation = IntegrateLight(Rect);
if (IsRectVisible(Rect))
{
const FRectTexture SourceTexture = InitRectTexture(LightData);
Context = CreateRectIntegrateContext(Roughness, WorldNormal, V, Rect, SourceTexture);
}
}
else
{
FCapsuleLight Capsule = GetCapsule(ToLight, LightData);
Capsule.DistBiasSqr = 0;
Context = CreateCapsuleIntegrateContext(Roughness, WorldNormal, V, Capsule, LightData.bInverseSquared);
Attenuation = IntegrateLight(Capsule, LightData.bInverseSquared);
}
CombinedAttenuation = Attenuation * LightMask;
AreaLightFalloffColor = Context.AreaLight.FalloffColor;
NoL = Context.NoL;
}
float3 Irradiance = LightColor * AreaLightFalloffColor * (CombinedAttenuation * NoL * ShadowFactor);
return Irradiance;
}
C++(LumenSceneDIrectLighting.cpp)
void FDeferredShadingSceneRenderer::RenderDirectLightingForLumenScene(
FRDGBuilder& GraphBuilder,
const FLumenCardTracingInputs& TracingInputs,
const FLumenCardUpdateContext& CardUpdateContext,
ERDGPassFlags ComputePassFlags)
{
LLM_SCOPE_BYTAG(Lumen);
if (GLumenDirectLighting)
{
:
// Update Final Lighting
Lumen::CombineLumenSceneLighting(
Scene,
MainView,
GraphBuilder,
TracingInputs,
CardUpdateContext,
CardTileUpdateContext,
ComputePassFlags);
}
}
C++(LumenSceneLighting.cpp)
void Lumen::CombineLumenSceneLighting(
FScene* Scene,
const FViewInfo& View,
FRDGBuilder& GraphBuilder,
const FLumenCardTracingInputs& TracingInputs,
const FLumenCardUpdateContext& CardUpdateContext,
const FLumenCardTileUpdateContext& CardTileUpdateContext,
ERDGPassFlags ComputePassFlags)
{
LLM_SCOPE_BYTAG(Lumen);
FLumenSceneData& LumenSceneData = *Scene->GetLumenSceneData(View);
FLumenCardCombineLightingCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenCardCombineLightingCS::FParameters>();
PassParameters->IndirectArgsBuffer = CardTileUpdateContext.DispatchCardTilesIndirectArgs;
PassParameters->View = View.ViewUniformBuffer;
PassParameters->LumenCardScene = TracingInputs.LumenCardSceneUniformBuffer;
PassParameters->DiffuseColorBoost = 1.0f / FMath::Max(View.FinalPostProcessSettings.LumenDiffuseColorBoost, 1.0f);
PassParameters->AlbedoAtlas = TracingInputs.AlbedoAtlas;
PassParameters->OpacityAtlas = TracingInputs.OpacityAtlas;
PassParameters->EmissiveAtlas = TracingInputs.EmissiveAtlas;
PassParameters->DirectLightingAtlas = TracingInputs.DirectLightingAtlas;
PassParameters->IndirectLightingAtlas = TracingInputs.IndirectLightingAtlas;
PassParameters->BilinearClampedSampler = TStaticSamplerState<SF_Bilinear, AM_Clamp, AM_Clamp, AM_Clamp>::GetRHI();
PassParameters->CardTiles = GraphBuilder.CreateSRV(CardTileUpdateContext.CardTiles);
PassParameters->RWFinalLightingAtlas = GraphBuilder.CreateUAV(TracingInputs.FinalLightingAtlas);
const FIntPoint IndirectLightingAtlasSize = LumenSceneData.GetRadiosityAtlasSize();
PassParameters->IndirectLightingAtlasHalfTexelSize = FVector2f(0.5f / IndirectLightingAtlasSize.X, 0.5f / IndirectLightingAtlasSize.Y);
auto ComputeShader = View.ShaderMap->GetShader<FLumenCardCombineLightingCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("CombineLighting CS"),
ComputePassFlags,
ComputeShader,
PassParameters,
CardTileUpdateContext.DispatchCardTilesIndirectArgs,
(uint32)ELumenDispatchCardTilesIndirectArgsOffset::OneGroupPerCardTile);
}
SamplerState BilinearClampedSampler;
StructuredBuffer<uint> CardTiles;
RWTexture2D<float3> RWFinalLightingAtlas;
float2 IndirectLightingAtlasHalfTexelSize;
[numthreads(CARD_TILE_SIZE, CARD_TILE_SIZE, 1)] void CombineLumenSceneLightingCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID) { uint CardTileIndex = GroupId.x; uint2 TexelCoordInTile = GroupThreadId.xy;
FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
uint2 CoordInCardPage = CARD_TILE_SIZE * CardTile.TileCoord + TexelCoordInTile;
uint2 AtlasCoord = CardPage.PhysicalAtlasCoord + CoordInCardPage;
float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (CoordInCardPage + 0.5);
float2 IndirectLightingAtlasUV = AtlasUV;
// When sampling from a downsampled Indirect Lighting atlas we need to appropriately clamp input UVs to prevent bilinear reading outside of the valid area
float2 IndirectLightingAtlasUV = clamp(AtlasUV, CardPage.PhysicalAtlasUVRect.xy + IndirectLightingAtlasHalfTexelSize, CardPage.PhysicalAtlasUVRect.zw - IndirectLightingAtlasHalfTexelSize);
float3 Albedo = Texture2DSampleLevel(AlbedoAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 Emissive = Texture2DSampleLevel(EmissiveAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 DirectLighting = Texture2DSampleLevel(DirectLightingAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 IndirectLighting = Texture2DSampleLevel(IndirectLightingAtlas, BilinearClampedSampler, IndirectLightingAtlasUV, 0).xyz;
float3 FinalLighting = CombineFinalLighting(Albedo, Emissive, DirectLighting, IndirectLighting);
RWFinalLightingAtlas[AtlasCoord] = FinalLighting;
}
float3 CombineFinalLighting(float3 Albedo, float3 Emissive, float3 DirectLighting, float3 IndirectLighting) { Albedo = DecodeSurfaceCacheAlbedo(Albedo);
float3 FinalLighting = (DirectLighting + IndirectLighting) * Diffuse_Lambert(Albedo) + Emissive;
// Secure against strange values, as we are writing it to a persistent atlas with a feedback loop
FinalLighting = max(MakeFinite(FinalLighting), float3(0.0f, 0.0f, 0.0f));
return FinalLighting;
}
C++(LumenRadiosity.cpp)
void FDeferredShadingSceneRenderer::RenderRadiosityForLumenScene(
FRDGBuilder& GraphBuilder,
const FLumenSceneFrameTemporaries& FrameTemporaries,
const FLumenCardTracingInputs& TracingInputs,
FRDGTextureRef RadiosityAtlas,
FRDGTextureRef RadiosityNumFramesAccumulatedAtlas,
const FLumenCardUpdateContext& CardUpdateContext,
ERDGPassFlags ComputePassFlags)
{
LLM_SCOPE_BYTAG(Lumen);
FLumenSceneData& LumenSceneData = *Scene->GetLumenSceneData(Views[0]);
extern int32 GLumenSceneRecaptureLumenSceneEveryFrame;
if (Lumen::IsRadiosityEnabled(ViewFamily)
&& LumenSceneData.bFinalLightingAtlasContentsValid)
{
RDG_EVENT_SCOPE(GraphBuilder, "Radiosity");
FLumenCardTileUpdateContext CardTileUpdateContext;
Lumen::SpliceCardPagesIntoTiles(GraphBuilder, Views[0].ShaderMap, CardUpdateContext, TracingInputs.LumenCardSceneUniformBuffer, CardTileUpdateContext, ComputePassFlags);
const bool bRenderSkylight = Lumen::ShouldHandleSkyLight(Scene, ViewFamily);
LumenRadiosity::AddRadiosityPass(
GraphBuilder,
Scene,
Views,
bRenderSkylight,
LumenSceneData,
RadiosityAtlas,
RadiosityNumFramesAccumulatedAtlas,
TracingInputs,
CardUpdateContext,
ComputePassFlags);
// Update Final Lighting
Lumen::CombineLumenSceneLighting(
Scene,
Views[0],
GraphBuilder,
TracingInputs,
CardUpdateContext,
CardTileUpdateContext,
ComputePassFlags);
}
else
{
AddClearRenderTargetPass(GraphBuilder, RadiosityAtlas);
}
}
C++(LumenRadiosity.cpp)
void LumenRadiosity::AddRadiosityPass(...)
{
:
const FGlobalShaderMap* GlobalShaderMap = FirstView.ShaderMap;
// Build a list of radiosity tiles for future processing
{
FBuildRadiosityTilesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FBuildRadiosityTilesCS::FParameters>();
PassParameters->IndirectArgBuffer = CardUpdateContext.DispatchCardPageIndicesIndirectArgs;
PassParameters->LumenCardScene = TracingInputs.LumenCardSceneUniformBuffer;
PassParameters->RWCardTileAllocator = GraphBuilder.CreateUAV(CardTileAllocator);
PassParameters->RWCardTileData = GraphBuilder.CreateUAV(CardTiles);
PassParameters->CardPageIndexAllocator = GraphBuilder.CreateSRV(CardUpdateContext.CardPageIndexAllocator);
PassParameters->CardPageIndexData = GraphBuilder.CreateSRV(CardUpdateContext.CardPageIndexData);
PassParameters->NumViews = Views.Num();
PassParameters->MaxCardTiles = MaxCardTiles;
check(Views.Num() <= PassParameters->WorldToClip.Num());
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
PassParameters->WorldToClip[ViewIndex] = FMatrix44f(Views[ViewIndex].ViewMatrices.GetViewProjectionMatrix());
PassParameters->PreViewTranslation[ViewIndex] = FVector4f((FVector3f)Views[ViewIndex].ViewMatrices.GetPreViewTranslation(), 0.0f);
}
auto ComputeShader = GlobalShaderMap->GetShader<FBuildRadiosityTilesCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("BuildRadiosityTiles"),
ComputePassFlags,
ComputeShader,
PassParameters,
CardUpdateContext.DispatchCardPageIndicesIndirectArgs,
FLumenCardUpdateContext::EIndirectArgOffset::ThreadPerTile);
}
:
}
- シェーダー(LumenRadiosityCulling.usf)
```C++
StructuredBuffer<uint> CardPageIndexAllocator;
StructuredBuffer<uint> CardPageIndexData;
RWStructuredBuffer<uint> RWCardTileAllocator;
RWStructuredBuffer<uint> RWCardTileData;
uint NumViews;
uint MaxCardTiles;
/**
* Build a list of radiosity tiles
*/
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void BuildRadiosityTilesCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
// One thread per tile
uint LinearLightTileOffset = (GroupId.x % 4);
uint IndexInIndexBuffer = GroupId.x / 4;
uint2 TileCoord;
TileCoord.x = (LinearLightTileOffset % 2) * 8 + GroupThreadId.x;
TileCoord.y = (LinearLightTileOffset / 2) * 8 + GroupThreadId.y;
if (IndexInIndexBuffer < CardPageIndexAllocator[0])
{
uint CardPageIndex = CardPageIndexData[IndexInIndexBuffer];
FLumenCardPageData CardPage = GetLumenCardPageData(CardPageIndex);
if (CardPage.CardIndex >= 0)
{
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
const uint2 SizeInTiles = CardPage.SizeInTexels / CARD_TILE_SIZE;
if (all(TileCoord < SizeInTiles))
{
float2 UVMin = float2(TileCoord) / SizeInTiles;
float2 UVMax = float2(TileCoord + 1) / SizeInTiles;
float SwapY = UVMin.y;
UVMin.y = 1.0f - UVMax.y;
UVMax.y = 1.0f - SwapY;
uint ViewIndex = GetCardViewIndex(CardPage, Card, UVMin, UVMax, NumViews, false);
FCardTileData CardTile;
CardTile.CardPageIndex = CardPageIndex;
CardTile.TileCoord = TileCoord;
uint NextTileIndex = 0;
InterlockedAdd(RWCardTileAllocator[ViewIndex], 1, NextTileIndex);
RWCardTileData[ViewIndex * MaxCardTiles + NextTileIndex] = PackCardTileData(CardTile);
}
}
}
}
C++(LumenRadiosity.cpp)
void LumenRadiosity::AddRadiosityPass(...)
{
:
FRDGBufferRef RadiosityIndirectArgs = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>((uint32)ERadiosityIndirectArgs::MAX * Views.Num()), TEXT("Lumen.RadiosityIndirectArgs"));
// Setup indirect args for future passes
{
FLumenRadiosityIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenRadiosityIndirectArgsCS::FParameters>();
PassParameters->RWIndirectArgs = GraphBuilder.CreateUAV(RadiosityIndirectArgs);
PassParameters->RadiosityTexelTraceParameters = RadiosityTexelTraceParameters;
#if RHI_RAYTRACING
PassParameters->HardwareRayTracingThreadGroupSize = Lumen::UseHardwareInlineRayTracing(*FirstView.Family) ?
FLumenRadiosityHardwareRayTracingCS::GetThreadGroupSize().X :
FLumenRadiosityHardwareRayTracingRGS::GetThreadGroupSize().X;
#else
PassParameters->HardwareRayTracingThreadGroupSize = 1;
#endif
auto ComputeShader = GlobalShaderMap->GetShader<FLumenRadiosityIndirectArgsCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("IndirectArgs"),
ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, 1));
}
:
}
- C++(LumenRadiodity.cpp)
```C++
uint32 GetRadiosityProbeSpacing(const FViewInfo& View)
{
int32 RadiosityProbeSpacing = GLumenRadiosityProbeSpacing/*=4*/;
if (View.FinalPostProcessSettings.LumenSceneLightingQuality >= 6)
{
RadiosityProbeSpacing /= 2;
}
return FMath::RoundUpToPowerOfTwo(FMath::Clamp<uint32>(RadiosityProbeSpacing, 1, Lumen::CardTileSize(=8)));
}
const int32 ProbeSpacing = LumenRadiosity::GetRadiosityProbeSpacing(FirstView);
int32 GetHemisphereProbeResolution(const FViewInfo& View)
{
const float LumenSceneLightingQuality = FMath::Clamp<float>(View.FinalPostProcessSettings.LumenSceneLightingQuality, .5f, 4.0f);
return FMath::Clamp<int32>(GLumenRadiosityHemisphereProbeResolution/*=4*/ * FMath::Sqrt(LumenSceneLightingQuality), 1, 16);
}
const int32 HemisphereProbeResolution = LumenRadiosity::GetHemisphereProbeResolution(FirstView);
RWBuffer<uint> RWIndirectArgs;
uint HardwareRayTracingThreadGroupSize;
void SetIndirectArgs(uint ArgIndex, uint NumThreads, uint ThreadGroupSize) { uint BaseOffset = ArgIndex * 3; RWIndirectArgs[BaseOffset + 0] = (NumThreads + ThreadGroupSize - 1) / ThreadGroupSize; RWIndirectArgs[BaseOffset + 1] = 1; RWIndirectArgs[BaseOffset + 2] = 1; }
void SetHardwareRayTracingIndirectArgs(uint ArgIndex, uint NumThreads, uint ThreadGroupSize) { uint NumGroups = (NumThreads + HardwareRayTracingThreadGroupSize - 1) / HardwareRayTracingThreadGroupSize;
uint BaseOffset = ArgIndex * 3;
int3 DispatchDimension = GetRayTracingThreadCountWrapped(NumGroups, ThreadGroupSize);
RWIndirectArgs[BaseOffset + 0] = DispatchDimension.x;
RWIndirectArgs[BaseOffset + 1] = DispatchDimension.y;
RWIndirectArgs[BaseOffset + 2] = DispatchDimension.z;
}
[numthreads(THREADGROUP_SIZE(=1), 1, 1)] void LumenRadiosityIndirectArgsCS(uint3 DispatchThreadId : SV_DispatchThreadID) { uint ViewIndex = DispatchThreadId.x;
if (ViewIndex < NumViews)
{
// View offset
uint BaseOffset = ViewIndex * 5;
/ const uint32 RadiosityTileSize = Lumen::CardTileSize(=8) / ProbeSpacing; / uint NumTraces = CardTileAllocator[ViewIndex] RadiosityTileSize RadiosityTileSize NumTracesPerProbe(=HemisphereProbeResolution HemisphereProbeResolution);
// ERadiosityIndirectArgs::NumTracesDiv64
SetIndirectArgs(BaseOffset + 0, NumTraces, /*ThreadGroupSize*/ 64);
// ERadiosityIndirectArgs::NumTracesDiv32
SetIndirectArgs(BaseOffset + 1, NumTraces, /*ThreadGroupSize*/ 32);
// ERadiosityIndirectArgs::ThreadPerProbe
SetIndirectArgs(BaseOffset + 2, CardTileAllocator[ViewIndex] * RadiosityTileSize * RadiosityTileSize, /*ThreadGroupSize*/ 32);
// ERadiosityIndirectArgs::ThreadPerRadiosityTexel
SetIndirectArgs(BaseOffset + 3, CardTileAllocator[ViewIndex] * CARD_TILE_SIZE(=8) * CARD_TILE_SIZE, /*ThreadGroupSize*/ 32);
// ERadiosityIndirectArgs::HardwareRayTracingThreadPerTrace
SetHardwareRayTracingIndirectArgs(BaseOffset + 4, NumTraces, /*ThreadGroupSize*/ 64);
}
}
C++(LumenRadiosity.cpp)
void LumenRadiosity::AddRadiosityPass(...)
{
:
// Trace rays from surface cache texels
if (Lumen::UseHardwareRayTracedRadiosity(*FirstView.Family))
{
#if RHI_RAYTRACING
const bool bUseMinimalPayload = true;
const bool bInlineRayTracing = Lumen::UseHardwareInlineRayTracing(*FirstView.Family);
checkf((Views.Num() == 1 || IStereoRendering::IsStereoEyeView(FirstView)), TEXT("Radiosity HW tracing needs to be updated for splitscreen support"));
uint32 ViewIndex = 0;
const FViewInfo& View = Views[ViewIndex];
FLumenRadiosityHardwareRayTracingRGS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenRadiosityHardwareRayTracingRGS::FParameters>();
SetLumenHardwareRayTracingSharedParameters(
GraphBuilder,
GetSceneTextureParameters(GraphBuilder, View),
View,
TracingInputs,
&PassParameters->SharedParameters
);
PassParameters->HardwareRayTracingIndirectArgs = RadiosityIndirectArgs;
PassParameters->RadiosityTexelTraceParameters = RadiosityTexelTraceParameters;
PassParameters->RadiosityTexelTraceParameters.ViewIndex = 0;
PassParameters->RWTraceRadianceAtlas = GraphBuilder.CreateUAV(TraceRadianceAtlas);
PassParameters->RWTraceHitDistanceAtlas = GraphBuilder.CreateUAV(TraceHitDistanceAtlas);
const uint32 NumThreadsToDispatch = GRHIPersistentThreadGroupCount * FLumenRadiosityHardwareRayTracingRGS::GetGroupSize();
PassParameters->NumThreadsToDispatch = NumThreadsToDispatch;
PassParameters->SurfaceBias = FMath::Clamp(GLumenRadiosityHardwareRayTracingSurfaceSlopeBias, 0.0f, 1000.0f);
PassParameters->HeightfieldSurfaceBias = Lumen::GetHeightfieldReceiverBias();
PassParameters->AvoidSelfIntersectionTraceDistance = FMath::Clamp(GLumenRadiosityAvoidSelfIntersectionTraceDistance, 0.0f, 1000000.0f);
PassParameters->MaxRayIntensity = FMath::Clamp(GLumenRadiosityMaxRayIntensity, 0.0f, 1000000.0f);
PassParameters->MinTraceDistance = FMath::Clamp(GLumenRadiosityHardwareRayTracingSurfaceBias, 0.0f, 1000.0f);
PassParameters->MaxTraceDistance = Lumen::GetMaxTraceDistance(View);
PassParameters->MinTraceDistanceToSampleSurface = GLumenRadiosityMinTraceDistanceToSampleSurface;
PassParameters->MaxTranslucentSkipCount = Lumen::GetMaxTranslucentSkipCount();
PassParameters->MaxTraversalIterations = LumenHardwareRayTracing::GetMaxTraversalIterations();
FLumenRadiosityHardwareRayTracingRGS::FPermutationDomain PermutationVector;
PermutationVector.Set<FLumenRadiosityHardwareRayTracingRGS::FIndirectDispatchDim>(IsHardwareRayTracingRadiosityIndirectDispatch());
PermutationVector.Set<FLumenRadiosityHardwareRayTracingRGS::FAvoidSelfIntersectionTrace>(GLumenRadiosityAvoidSelfIntersectionTraceDistance > 0.0f);
const FIntPoint DispatchResolution = FIntPoint(NumThreadsToDispatch, 1);
FString Resolution = FString::Printf(TEXT("%ux%u"), DispatchResolution.X, DispatchResolution.Y);
if (IsHardwareRayTracingRadiosityIndirectDispatch())
{
Resolution = FString::Printf(TEXT("<indirect>"));
}
if (bInlineRayTracing)
{
TShaderRef<FLumenRadiosityHardwareRayTracingCS> ComputeShader = GlobalShaderMap->GetShader<FLumenRadiosityHardwareRayTracingCS>(PermutationVector);
if (IsHardwareRayTracingRadiosityIndirectDispatch())
{
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("HardwareRayTracing (inline) %s %ux%u probes at %u spacing", *Resolution, HemisphereProbeResolution, HemisphereProbeResolution, ProbeSpacing),
ComputeShader,
PassParameters,
PassParameters->HardwareRayTracingIndirectArgs,
(uint32)ERadiosityIndirectArgs::HardwareRayTracingThreadPerTrace + ViewIndex * (uint32)ERadiosityIndirectArgs::MAX * sizeof(FRHIDispatchIndirectParameters));
}
:
:
}
uint RadiosityTileSize = Lumen::CardTileSize(=8) / ProbeSpacing;
uint NumTracesPerProbe = HemisphereProbeResolution * HemisphereProbeResolution;
FCardTileData GetCardTile(uint CardTileIndex) { return UnpackCardTileData(CardTileData[ViewIndex * MaxCardTiles + CardTileIndex]); }
void UnswizzleCardTileIndex( uint RadiosityProbeIndex, inout uint CardTileIndex, inout uint2 CoordInCardTile) { uint NumProbesPerTile = RadiosityTileSize RadiosityTileSize; CardTileIndex = RadiosityProbeIndex / NumProbesPerTile; uint LinearIndexInCardTile = RadiosityProbeIndex - CardTileIndex NumProbesPerTile; uint2 ProbeCoord = uint2(LinearIndexInCardTile % RadiosityTileSize, LinearIndexInCardTile / RadiosityTileSize);
FCardTileData CardTile = GetCardTile(CardTileIndex);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
CoordInCardTile = ProbeCoord * ProbeSpacingInRadiosityTexels/*=ProbeSpacing*/ + GetProbeJitter(CardPage.IndirectLightingTemporalIndex);
}
void UnswizzleTexelTraceCoords( uint DispatchThreadId, inout uint CardTileIndex, inout uint2 CoordInCardTile, inout uint2 TraceTexelCoord) { uint RadiosityProbeIndex = DispatchThreadId / NumTracesPerProbe;
UnswizzleCardTileIndex(RadiosityProbeIndex, CardTileIndex, CoordInCardTile);
uint LinearTexelIndex = DispatchThreadId - RadiosityProbeIndex * NumTracesPerProbe;
TraceTexelCoord = uint2(LinearTexelIndex % HemisphereProbeResolution, LinearTexelIndex / HemisphereProbeResolution);
}
![image](https://user-images.githubusercontent.com/14350715/224533719-e0b6bcfd-38fa-46a9-b1c3-71854095d054.png)
- シェーダー(LumenRadiosity.ush)
```C++
#define PROBE_HEMISPHERE_HEMI_OCTAHEDRON 0
#define PROBE_HEMISPHERE_UNIFORM 1
#define PROBE_HEMISPHERE_COSINE 2
#define RADIOSITY_PROBE_MAPPING PROBE_HEMISPHERE_UNIFORM
void GetRadiosityRay(FRadiosityTexel RadiosityTexel, uint2 ProbeCoord, uint2 TracingTexelCoord, out float3 WorldRayDirection, out float ConeHalfAngle, out float PDF)
{
float2 ProbeTexelCenter = GetProbeTexelCenter(RadiosityTexel.IndirectLightingTemporalIndex, ProbeCoord);
/*
float2 GetProbeTexelCenter(uint IndirectLightingTemporalIndex, uint2 ProbeTileCoord)
{
uint2 RandomSeed = Rand3DPCG16(int3(ProbeTileCoord, 0)).xy;
uint TemporalIndex = (FixedJitterIndex < 0 ? IndirectLightingTemporalIndex : FixedJitterIndex);
return Hammersley16(TemporalIndex % MaxFramesAccumulated, MaxFramesAccumulated, RandomSeed);
}
float2 Hammersley16( uint Index, uint NumSamples, uint2 Random )
{
float E1 = frac( (float)Index / NumSamples + float( Random.x ) * (1.0 / 65536.0) );
float E2 = float( ( reversebits(Index) >> 16 ) ^ Random.y ) * (1.0 / 65536.0);
return float2( E1, E2 );
}
*/
float2 ProbeUV = (TracingTexelCoord + ProbeTexelCenter) / float(HemisphereProbeResolution);
float3 LocalRayDirection;
uint RadiosityProbeHemisphereMapping = RADIOSITY_PROBE_MAPPING;
// Sample generation must match probe occlusion
if (RadiosityProbeHemisphereMapping == PROBE_HEMISPHERE_HEMI_OCTAHEDRON)
{
LocalRayDirection = HemiOctahedronToUnitVector(ProbeUV * 2 - 1);
/*
float3 HemiOctahedronToUnitVector( float2 Oct )
{
Oct = float2( Oct.x + Oct.y, Oct.x - Oct.y );
float3 N = float3( Oct, 2.0 - dot( 1, abs(Oct) ) );
return normalize(N);
}
*/
//@todo - hemi octahedron solid angle
PDF = 1.0 / (2 * PI);
}
else if (RadiosityProbeHemisphereMapping == PROBE_HEMISPHERE_UNIFORM)
{
float4 Sample = UniformSampleHemisphere(ProbeUV);
/*
// PDF = 1 / (2 * PI)
float4 UniformSampleHemisphere( float2 E )
{
float Phi = 2 * PI * E.x;
float CosTheta = E.y;
float SinTheta = sqrt( 1 - CosTheta * CosTheta );
float3 H;
H.x = SinTheta * cos( Phi );
H.y = SinTheta * sin( Phi );
H.z = CosTheta;
float PDF = 1.0 / (2 * PI);
return float4( H, PDF );
}
*/
LocalRayDirection = Sample.xyz;
PDF = Sample.w;
}
else
{
float4 Sample = CosineSampleHemisphere(ProbeUV);
/*
// PDF = NoL / PI
float4 CosineSampleHemisphere( float2 E )
{
float Phi = 2 * PI * E.x;
float CosTheta = sqrt(E.y);
float SinTheta = sqrt(1 - CosTheta * CosTheta);
float3 H;
H.x = SinTheta * cos(Phi);
H.y = SinTheta * sin(Phi);
H.z = CosTheta;
float PDF = CosTheta * (1.0 / PI);
return float4(H, PDF);
}
*/
LocalRayDirection = Sample.xyz;
PDF = Sample.w;
}
float3x3 TangentBasis = GetTangentBasisFrisvad(RadiosityTexel.WorldNormal);
/*
// [Frisvad 2012, "Building an Orthonormal Basis from a 3D Unit Vector Without Normalization"]
// Discontinuity at TangentZ.z < -0.9999999f
float3x3 GetTangentBasisFrisvad(float3 TangentZ)
{
float3 TangentX;
float3 TangentY;
if (TangentZ.z < -0.9999999f)
{
TangentX = float3(0, -1, 0);
TangentY = float3(-1, 0, 0);
}
else
{
float A = 1.0f / (1.0f + TangentZ.z);
float B = -TangentZ.x * TangentZ.y * A;
TangentX = float3(1.0f - TangentZ.x * TangentZ.x * A, B, -TangentZ.x);
TangentY = float3(B, 1.0f - TangentZ.y * TangentZ.y * A, -TangentZ.y);
}
return float3x3( TangentX, TangentY, TangentZ );
}
*/
WorldRayDirection = mul(LocalRayDirection, TangentBasis);
ConeHalfAngle = acosFast(1.0f - 1.0f / (float)(NumTracesPerProbe));
}
RWTexture2D<float3> RWTraceRadianceAtlas;
RWTexture2D<float> RWTraceHitDistanceAtlas;
RaytracingAccelerationStructure TLAS;
StructuredBuffer
float MinTraceDistance; float MaxTraceDistance; float MinTraceDistanceToSampleSurface; float SurfaceBias; float HeightfieldSurfaceBias; float AvoidSelfIntersectionTraceDistance; float MaxRayIntensity; uint NumThreadsToDispatch; float TanRadiosityRayConeHalfAngle; uint MaxTranslucentSkipCount; uint MaxTraversalIterations;
[numthreads(INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X(=32), INLINE_RAY_TRACING_THREAD_GROUP_SIZE_Y(=1), 1)] void LumenRadiosityHardwareRayTracingCS( uint3 DispatchThreadIndex : SV_DispatchThreadID, uint3 DispatchGroupId : SV_GroupID, uint DispatchGroupIndex : SV_GroupIndex) { LumenRadiosityHardwareRayTracing_INTERNAL(DispatchThreadIndex, DispatchGroupId, DispatchGroupIndex); }
void LumenRadiosityHardwareRayTracing_INTERNAL( uint3 DispatchThreadIndex, uint3 DispatchGroupId, uint DispatchGroupIndex) {
uint GlobalThreadIndex = GetUnWrappedRayTracingDispatchThreadId(DispatchGroupId, THREADGROUP_SIZE(=64));
/ uint GetUnWrappedRayTracingDispatchThreadId(int3 DispatchThreadId, uint ThreadGroupSize) { return DispatchThreadId.x + (DispatchThreadId.z ThreadGroupSize + DispatchThreadId.y) ThreadGroupSize; } /
// When running with indirect inline RT, ThreadIndex is actually GroupIndex, so we need to account for that.
GlobalThreadIndex = GlobalThreadIndex * INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X + DispatchGroupIndex;
if (GlobalThreadIndex >= CardTileAllocator[ViewIndex] * NumTracesPerProbe * RadiosityTileSize * RadiosityTileSize)
{
return;
}
for (uint GlobalThreadIndex = DispatchThreadIndex.x;
GlobalThreadIndex < CardTileAllocator[ViewIndex] * NumTracesPerProbe * RadiosityTileSize * RadiosityTileSize;
GlobalThreadIndex += NumThreadsToDispatch)
{
uint CardTileIndex;
uint2 CoordInCardTile;
uint2 TraceTexelCoord;
UnswizzleTexelTraceCoords(GlobalThreadIndex, CardTileIndex, CoordInCardTile, TraceTexelCoord);
FRadiosityTexel RadiosityTexel = GetRadiosityTexelFromCardTile(CardTileIndex, CoordInCardTile);
if (RadiosityTexel.bInsideAtlas)
{
float3 Radiance = 0.0f;
float TraceHitDistance = MaxTraceDistance;
if (RadiosityTexel.bValid)
{
float3 TranslatedWorldPosition = RadiosityTexel.WorldPosition + LWCHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO
float3 WorldNormal = RadiosityTexel.WorldNormal;
float3 WorldRayDirection;
float ConeHalfAngle;
float PDF;
GetRadiosityRay(RadiosityTexel, RadiosityTexel.CardCoord / ProbeSpacingInRadiosityTexels, TraceTexelCoord, WorldRayDirection, ConeHalfAngle, PDF);
float ReceiverBias = 0.0f;
if (RadiosityTexel.bHeightfield)
{
float3 TranslatedWorldCameraOrigin = PrimaryView.TranslatedWorldCameraOrigin;
ReceiverBias = CalculateDistanceBasedHeightfieldBias(HeightfieldSurfaceBias, TranslatedWorldPosition, TranslatedWorldCameraOrigin);
}
FRayDesc Ray;
Ray.Origin = TranslatedWorldPosition + WorldNormal * (SurfaceBias + ReceiverBias);
Ray.Direction = WorldRayDirection;
Ray.TMin = MinTraceDistance;
FRayCone RayCone = (FRayCone)0;
RayCone = PropagateRayCone(RayCone, ConeHalfAngle, 0.0);
/ FRayCone PropagateRayCone(in FRayCone Cone, in float SurfaceSpreadAngle, in float HitT) { FRayCone NewCone; NewCone.Width = Cone.SpreadAngle HitT + Cone.Width; NewCone.SpreadAngle = Cone.SpreadAngle + SurfaceSpreadAngle; return NewCone; } */
const uint LinearCoord = CoordInCardTile.y * CARD_TILE_SIZE + CoordInCardTile.x;
const uint OriginalCullingMode = 0;
FRayTracedLightingContext Context = CreateRayTracedLightingContext(TLAS, RayCone, CoordInCardTile, LinearCoord, OriginalCullingMode, MaxTranslucentSkipCount, MaxTraversalIterations);
Context.HitGroupData = HitGroupData;
Context.RayTracingSceneMetadata = RayTracingSceneMetadata;
// First trace - short distance with backface culling enabled to avoid self intersecting in cases where the traced geometry doesn't match what's in the GBuffer (Nanite, Ray Tracing LODs, etc)
Ray.TMax = AvoidSelfIntersectionTraceDistance;
FRayTracedLightingResult RayResult = CreateRayTracedLightingResult();
if (Ray.TMax > Ray.TMin)
{
Context.CullingMode = RAY_FLAG_CULL_BACK_FACING_TRIANGLES;
RayResult = TraceAndCalculateRayTracedLightingFromSurfaceCache(Ray, Context);
}
// Second trace - remaining ray with backface culling enabled to reduce leaking when starting from a point inside the mesh
if (!RayResult.bIsHit)
{
Ray.TMin = max(Ray.TMin, AvoidSelfIntersectionTraceDistance);
Ray.TMax = MaxTraceDistance;
Context.CullingMode = OriginalCullingMode;
RayResult = TraceAndCalculateRayTracedLightingFromSurfaceCache(Ray, Context);
}
Ray.TMax = MaxTraceDistance;
FRayTracedLightingResult RayResult = TraceAndCalculateRayTracedLightingFromSurfaceCache(Ray, Context);
if (RayResult.TraceHitDistance < MinTraceDistanceToSampleSurface)
{
RayResult.Radiance = 0.0f;
}
if (RayResult.bIsHit)
{
Radiance = RayResult.Radiance;
// Recalculate TraceHitDistance to incorporate biases
float3 HitPosition = Ray.Origin + Ray.Direction * RayResult.TraceHitDistance;
TraceHitDistance = length(TranslatedWorldPosition - HitPosition);
}
else
{
Radiance = EvaluateSkyRadiance(WorldRayDirection);
}
float MaxLighting = max3(Radiance.x, Radiance.y, Radiance.z);
if (MaxLighting > MaxRayIntensity * View.OneOverPreExposure)
{
Radiance *= MaxRayIntensity * View.OneOverPreExposure / MaxLighting;
}
}
FCardTileData CardTile = GetCardTile(CardTileIndex);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
uint2 RadiosityProbeTracingAtlasCoord = GetRadiosityProbeAtlasCoord(CardPage, CardTile, CoordInCardTile) * HemisphereProbeResolution + TraceTexelCoord;
RWTraceRadianceAtlas[RadiosityProbeTracingAtlasCoord] = Radiance;
if (UseProbeOcclusion > 0)
{
RWTraceHitDistanceAtlas[RadiosityProbeTracingAtlasCoord] = TraceHitDistance;
}
}
}
}
- [TraceAndCalculateRayTracedLightingFromSurfaceCache()](https://github.com/yasukichi/testcode/issues/23#issuecomment-1665455910)
C++(LumenRadiosity.cpp)
void LumenRadiosity::AddRadiosityPass(...)
{
:
if (GLumenRadiositySpatialFilterProbes && GLumenRadiositySpatialFilterProbesKernelSize > 0)
{
//@todo - use temporary buffer based off of CardUpdateContext.UpdateAtlasSize which is smaller
FRDGTextureRef FilteredTraceRadianceAtlas = GraphBuilder.CreateTexture(
FRDGTextureDesc::Create2D(RadiosityProbeTracingAtlasSize, PF_FloatRGB, FClearValueBinding::Black, TexCreate_ShaderResource | TexCreate_UAV),
TEXT("Lumen.Radiosity.FilteredTraceRadianceAtlas"));
FRDGTextureUAVRef FilteredTraceRadianceAtlasUAV = GraphBuilder.CreateUAV(FilteredTraceRadianceAtlas, ERDGUnorderedAccessViewFlags::SkipBarrier);
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
const FViewInfo& View = Views[ViewIndex];
FLumenRadiositySpatialFilterProbeRadiance::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenRadiositySpatialFilterProbeRadiance::FParameters>();
PassParameters->RWFilteredTraceRadianceAtlas = FilteredTraceRadianceAtlasUAV;
PassParameters->IndirectArgs = RadiosityIndirectArgs;
PassParameters->View = View.ViewUniformBuffer;
PassParameters->LumenCardScene = TracingInputs.LumenCardSceneUniformBuffer;
PassParameters->RadiosityTexelTraceParameters = RadiosityTexelTraceParameters;
PassParameters->RadiosityTexelTraceParameters.ViewIndex = ViewIndex;
PassParameters->ProbePlaneWeightingDepthScale = GRadiosityProbePlaneWeightingDepthScale;
FLumenRadiositySpatialFilterProbeRadiance::FPermutationDomain PermutationVector;
PermutationVector.Set<FLumenRadiositySpatialFilterProbeRadiance::FPlaneWeighting>(GRadiosityFilteringProbePlaneWeighting != 0);
PermutationVector.Set<FLumenRadiositySpatialFilterProbeRadiance::FProbeOcclusion>(bUseProbeOcclusion);
PermutationVector.Set<FLumenRadiositySpatialFilterProbeRadiance::FKernelSize>(FMath::Clamp<int32>(GLumenRadiositySpatialFilterProbesKernelSize, 0, 2));
auto ComputeShader = GlobalShaderMap->GetShader<FLumenRadiositySpatialFilterProbeRadiance>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("SpatialFilterProbes"),
ComputePassFlags,
ComputeShader,
PassParameters,
RadiosityIndirectArgs,
(uint32)ERadiosityIndirectArgs::NumTracesDiv64 + ViewIndex * (uint32)ERadiosityIndirectArgs::MAX * sizeof(FRHIDispatchIndirectParameters));
}
RadiosityTexelTraceParameters.TraceRadianceAtlas = FilteredTraceRadianceAtlas;
}
:
}
- シェーダー(LumenRadiosity.ush)
```C++
// Coord in persistent radiosity probe atlas
uint2 GetRadiosityProbeAtlasCoord(FLumenCardPageData CardPage, FCardTileData CardTile, uint2 CoordInCardTile)
{
uint2 AtlasCoord = CardPage.PhysicalAtlasCoord + CardTile.TileCoord * uint2(CARD_TILE_SIZE, CARD_TILE_SIZE) + CoordInCardTile;
return AtlasCoord >> ProbeSpacingInRadiosityTexelsDivideShift;
}
シェーダー(LumenRadiosity.usf)
[numthreads(THREADGROUP_SIZE, 1, 1)]
void LumenRadiositySpatialFilterProbeRadiance(
uint DispatchThreadId : SV_DispatchThreadID)
{
uint CardTileIndex;
uint2 CoordInCardTile;
uint2 TraceTexelCoord;
UnswizzleTexelTraceCoords(DispatchThreadId, CardTileIndex, CoordInCardTile, TraceTexelCoord);
FRadiosityTexel RadiosityTexel = GetRadiosityTexelFromCardTile(CardTileIndex, CoordInCardTile);
if (RadiosityTexel.bInsideAtlas)
{
FCardTileData CardTile = GetCardTile(CardTileIndex);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
uint2 ProbeAtlasCoord = GetRadiosityProbeAtlasCoord(CardPage, CardTile, CoordInCardTile);
uint2 RadiosityProbeTracingAtlasCoord = ProbeAtlasCoord * HemisphereProbeResolution + TraceTexelCoord;
float CenterWeight = 2.0f;
float3 Radiance = TraceRadianceAtlas[RadiosityProbeTracingAtlasCoord] * CenterWeight;
float TotalWeight = CenterWeight;
if (RadiosityTexel.bValid)
{
uint2 ResLevelSizeInProbes = CardPage.ResLevelSizeInTiles * RadiosityTileSize;
uint2 CardPageProbeCoord = CardPage.CardUVRect.xy * ResLevelSizeInProbes;
int2 ProbeCoordInCard = CardPageProbeCoord + CardTile.TileCoord * RadiosityTileSize + (CoordInCardTile >> ProbeSpacingInRadiosityTexelsDivideShift);
const uint NumSamples = 4;
int2 NeighborOffsets[NumSamples];
NeighborOffsets[0] = int2(0, 1);
NeighborOffsets[1] = int2(1, 0);
NeighborOffsets[2] = int2(0, -1);
NeighborOffsets[3] = int2(-1, 0);
const uint NumSamples = 13;
int2 NeighborOffsets[NumSamples];
NeighborOffsets[0] = int2(0, 2);
NeighborOffsets[1] = int2(-1, 1);
NeighborOffsets[2] = int2(0, 1);
NeighborOffsets[3] = int2(1, 1);
NeighborOffsets[4] = int2(-2, 0);
NeighborOffsets[5] = int2(-1, 0);
NeighborOffsets[6] = int2(0, 0);
NeighborOffsets[7] = int2(1, 0);
NeighborOffsets[8] = int2(2, 0);
NeighborOffsets[9] = int2(-1, -1);
NeighborOffsets[10] = int2(0, -1);
NeighborOffsets[11] = int2(1, -1);
NeighborOffsets[12] = int2(0, -2);
UNROLL
for (uint i = 0; i < NumSamples; i++)
{
SampleTraceRadianceAtlas(
RadiosityTexel,
ProbeAtlasCoord,
CardPage.ResLevelPageTableOffset,
ResLevelSizeInProbes,
ProbeCoordInCard + NeighborOffsets[i],
TraceTexelCoord,
1.0f,
Radiance,
TotalWeight);
}
}
RWFilteredTraceRadianceAtlas[RadiosityProbeTracingAtlasCoord] = Radiance / TotalWeight;
}
}
void SampleTraceRadianceAtlas( FRadiosityTexel GatherProbeTexel, uint2 GatherProbeAtlasCoord, uint ResLevelPageTableOffset, uint2 ResLevelSizeInProbes, int2 ProbeCoordInCard, uint2 TraceTexelCoord, float InterpolationWeight, inout float3 Radiance, inout float WeightSum) { if (all(ProbeCoordInCard >= 0)) { uint2 ResLevelSizeInPages = (ResLevelSizeInProbes ProbeSpacingInRadiosityTexels) / PHYSICAL_PAGE_SIZE; uint2 CoordInCard = ProbeCoordInCard ProbeSpacingInRadiosityTexels; // First find page to sample from uint2 PageCoordInCard = CoordInCard / PHYSICAL_PAGE_SIZE;
//@todo - breaks SW tracing
//if (all(PageCoordInCard < ResLevelSizeInPages))
{
uint LinearCardPageIndex = PageCoordInCard.x + PageCoordInCard.y * ResLevelSizeInPages.x;
FLumenCardPageData CardPage = GetLumenCardPageData(ResLevelPageTableOffset + LinearCardPageIndex);
// Don't sample if page doesn't have a valid probe
if (CardPage.bMapped && CardPage.LastIndirectLightingUpdateFrameIndex != 0)
{
// Then tile and probe coordinates
uint2 CoordInCardPage = CoordInCard - (PageCoordInCard * PHYSICAL_PAGE_SIZE);
uint2 ProbeAtlasCoord = ((uint2)CardPage.PhysicalAtlasCoord + CoordInCardPage) >> ProbeSpacingInRadiosityTexelsDivideShift;
FRadiosityTexel ProbeTexel = GetRadiosityTexel(CardPage, CoordInCardPage);
if (ProbeTexel.bInsideAtlas)
{
float Weight = ProbeTexel.bValid ? InterpolationWeight : 0.0f;
#if FILTERING_PLANE_WEIGHTING
{
float PlaneWeight = CalculatePlaneWeight(GatherProbeTexel.WorldPosition, GatherProbeTexel.WorldNormal, ProbeTexel.WorldPosition);
Weight = min(Weight, PlaneWeight);
}
#endif
#if FILTERING_PROBE_OCCLUSION
{
float VisibilityWeight = CalculateProbeVisibility(GatherProbeTexel.WorldPosition, ProbeTexel, ProbeAtlasCoord);
Weight = min(Weight, VisibilityWeight);
float VisibilityWeight2 = CalculateProbeVisibility(ProbeTexel.WorldPosition, GatherProbeTexel, GatherProbeAtlasCoord);
Weight = min(Weight, VisibilityWeight2);
}
#endif
if (Weight > 0)
{
Radiance += TraceRadianceAtlas[ProbeAtlasCoord * HemisphereProbeResolution + TraceTexelCoord] * Weight;
WeightSum += Weight;
}
}
}
}
}
}
![image](https://user-images.githubusercontent.com/14350715/224553603-92d56f82-a97c-4e3c-bf46-91380d93596f.png)
C++(LumenRadiosity.cpp)
void LumenRadiosity::AddRadiosityPass(...)
{
:
// Convert traces to SH and store in persistent SH atlas
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
const FViewInfo& View = Views[ViewIndex];
FLumenRadiosityConvertToSH::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenRadiosityConvertToSH::FParameters>();
PassParameters->RWRadiosityProbeSHRedAtlas = RadiosityProbeSHRedAtlasUAV;
PassParameters->RWRadiosityProbeSHGreenAtlas = RadiosityProbeSHGreenAtlasUAV;
PassParameters->RWRadiosityProbeSHBlueAtlas = RadiosityProbeSHBlueAtlasUAV;
PassParameters->IndirectArgs = RadiosityIndirectArgs;
PassParameters->View = View.ViewUniformBuffer;
PassParameters->LumenCardScene = TracingInputs.LumenCardSceneUniformBuffer;
PassParameters->RadiosityTexelTraceParameters = RadiosityTexelTraceParameters;
PassParameters->RadiosityTexelTraceParameters.ViewIndex = ViewIndex;
auto ComputeShader = GlobalShaderMap->GetShader<FLumenRadiosityConvertToSH>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("ConvertToSH"),
ComputePassFlags,
ComputeShader,
PassParameters,
RadiosityIndirectArgs,
(uint32)ERadiosityIndirectArgs::ThreadPerProbe + ViewIndex * (uint32)ERadiosityIndirectArgs::MAX * sizeof(FRHIDispatchIndirectParameters));
}
:
}
- シェーダー(LumenRadiosity.usf)
```C++
RWTexture2D<float4> RWRadiosityProbeSHRedAtlas;
RWTexture2D<float4> RWRadiosityProbeSHGreenAtlas;
RWTexture2D<float4> RWRadiosityProbeSHBlueAtlas;
[numthreads(THREADGROUP_SIZE, 1, 1)]
void LumenRadiosityConvertToSH(
uint DispatchThreadId : SV_DispatchThreadID)
{
uint CardTileIndex;
uint2 CoordInCardTile;
UnswizzleCardTileIndex(DispatchThreadId, CardTileIndex, CoordInCardTile);
if (CardTileIndex < CardTileAllocator[ViewIndex])
{
FRadiosityTexel RadiosityTexel = GetRadiosityTexelFromCardTile(CardTileIndex, CoordInCardTile);
FCardTileData CardTile = GetCardTile(CardTileIndex);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
uint2 RadiosityProbeAtlasCoord = GetRadiosityProbeAtlasCoord(CardPage, CardTile, CoordInCardTile);
FTwoBandSHVectorRGB IrradianceSH = (FTwoBandSHVectorRGB)0;
float NumValidSamples = 0.0f;
if (RadiosityTexel.bInsideAtlas && RadiosityTexel.bValid)
{
for (uint TraceY = 0; TraceY < HemisphereProbeResolution; ++TraceY)
{
for (uint TraceX = 0; TraceX < HemisphereProbeResolution; ++TraceX)
{
uint2 TraceTexelCoord = uint2(TraceX, TraceY);
float3 TraceRadiance = TraceRadianceAtlas[RadiosityProbeAtlasCoord * HemisphereProbeResolution + TraceTexelCoord];
float3 WorldRayDirection;
float ConeHalfAngle;
float PDF;
GetRadiosityRay(RadiosityTexel, RadiosityTexel.CardCoord >> ProbeSpacingInRadiosityTexelsDivideShift, TraceTexelCoord, WorldRayDirection, ConeHalfAngle, PDF);
IrradianceSH = AddSH(IrradianceSH, MulSH(SHBasisFunction(WorldRayDirection), TraceRadiance / PDF));
NumValidSamples += 1.0f;
}
}
}
if (NumValidSamples > 0.0f)
{
IrradianceSH = MulSH(IrradianceSH, 1.0f / NumValidSamples);
}
RWRadiosityProbeSHRedAtlas[RadiosityProbeAtlasCoord] = IrradianceSH.R.V;
RWRadiosityProbeSHGreenAtlas[RadiosityProbeAtlasCoord] = IrradianceSH.G.V;
RWRadiosityProbeSHBlueAtlas[RadiosityProbeAtlasCoord] = IrradianceSH.B.V;
}
}
C++(LumenRadiosity.cpp)
void LumenRadiosity::AddRadiosityPass(...)
{
:
for (int32 ViewIndex = 0; ViewIndex < Views.Num(); ViewIndex++)
{
const FViewInfo& View = Views[ViewIndex];
FLumenRadiosityIntegrateCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenRadiosityIntegrateCS::FParameters>();
PassParameters->IndirectArgs = RadiosityIndirectArgs;
PassParameters->View = View.ViewUniformBuffer;
PassParameters->LumenCardScene = TracingInputs.LumenCardSceneUniformBuffer;
PassParameters->RadiosityTexelTraceParameters = RadiosityTexelTraceParameters;
PassParameters->RadiosityTexelTraceParameters.ViewIndex = ViewIndex;
PassParameters->RWRadiosityAtlas = RadiosityAtlasUAV;
PassParameters->RWRadiosityNumFramesAccumulatedAtlas = RadiosityNumFramesAccumulatedAtlasUAV;
PassParameters->RadiosityProbeSHRedAtlas = RadiosityProbeSHRedAtlas;
PassParameters->RadiosityProbeSHGreenAtlas = RadiosityProbeSHGreenAtlas;
PassParameters->RadiosityProbeSHBlueAtlas = RadiosityProbeSHBlueAtlas;
PassParameters->ProbePlaneWeightingDepthScale = GRadiosityProbePlaneWeightingDepthScale;
FLumenRadiosityIntegrateCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FLumenRadiosityIntegrateCS::FPlaneWeighting>(GRadiosityFilteringProbePlaneWeighting != 0);
PermutationVector.Set<FLumenRadiosityIntegrateCS::FProbeOcclusion>(bUseProbeOcclusion);
PermutationVector.Set<FLumenRadiosityIntegrateCS::FTemporalAccumulation>(LumenRadiosity::UseTemporalAccumulation());
auto ComputeShader = GlobalShaderMap->GetShader<FLumenRadiosityIntegrateCS>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("Integrate"),
ComputePassFlags,
ComputeShader,
PassParameters,
RadiosityIndirectArgs,
(uint32)ERadiosityIndirectArgs::ThreadPerRadiosityTexel + ViewIndex * (uint32)ERadiosityIndirectArgs::MAX * sizeof(FRHIDispatchIndirectParameters));
}
:
}
- シェーダー(LumenRadiosity.usf)
```C++
RWTexture2D<float3> RWRadiosityAtlas;
RWTexture2D<UNORM float> RWRadiosityNumFramesAccumulatedAtlas;
[numthreads(THREADGROUP_SIZE, 1, 1)]
void LumenRadiosityIntegrateCS(
uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint LinearTexelIndex = DispatchThreadId.x;
uint NumTexelsPerTile = CARD_TILE_SIZE/*=8*/ * CARD_TILE_SIZE;
uint CardTileIndex = LinearTexelIndex / NumTexelsPerTile;
const uint LinearIndexInCardTile = LinearTexelIndex - CardTileIndex * NumTexelsPerTile;
uint2 CoordInCardTile = uint2(LinearIndexInCardTile % CARD_TILE_SIZE, LinearIndexInCardTile / CARD_TILE_SIZE);
float3 TexelRadiance = float3(0.0f, 0.0f, 0.0f);
float3 Debug = 0;
float WeightSum = 0.0f;
float NewNumFramesAccumulated = 0;
FRadiosityTexel RadiosityTexel = GetRadiosityTexelFromCardTile(CardTileIndex, CoordInCardTile);
if (RadiosityTexel.bInsideAtlas && RadiosityTexel.bValid)
{
FCardTileData CardTile = GetCardTile(CardTileIndex);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
//@todo - seam if CardPage.IndirectLightingTemporalIndex different
uint2 CoordInCard = (CardPage.CardUVRect.xy * CardPage.ResLevelSizeInTiles + CardTile.TileCoord) * CARD_TILE_SIZE + CoordInCardTile;
uint2 ProbeFullResCoord = max((float2)CoordInCard - GetProbeJitter(CardPage.IndirectLightingTemporalIndex), 0.0f);
uint2 ProbeCoord00 = ProbeFullResCoord >> ProbeSpacingInRadiosityTexelsDivideShift;
uint2 ProbeCoord10 = ProbeCoord00 + uint2(1, 0);
uint2 ProbeCoord01 = ProbeCoord00 + uint2(0, 1);
uint2 ProbeCoord11 = ProbeCoord00 + uint2(1, 1);
// Guarantee that no probe will have a bilinear weight of zero, when other probes might be discarded due to occlusion
float BilinearExpand = 1;
float2 BilinearWeights = ((float2)ProbeFullResCoord - ProbeCoord00 * ProbeSpacingInRadiosityTexels + BilinearExpand) / (float)(ProbeSpacingInRadiosityTexels + 2 * BilinearExpand);
float4 Weights = float4(
(1.0f - BilinearWeights.x) * (1.0f - BilinearWeights.y),
BilinearWeights.x * (1.0f - BilinearWeights.y),
(1.0f - BilinearWeights.x) * BilinearWeights.y,
BilinearWeights.x * BilinearWeights.y);
FTwoBandSHVectorRGB IrradianceSHWeighted = (FTwoBandSHVectorRGB)0;
uint2 ResLevelSizeInProbes = CardPage.ResLevelSizeInTiles * RadiosityTileSize;
SampleRadiositySH(CoordInCard, RadiosityTexel.WorldPosition, RadiosityTexel.WorldNormal, CardPage.ResLevelPageTableOffset, ResLevelSizeInProbes, ProbeCoord00, Weights.x, IrradianceSHWeighted, WeightSum, Debug);
SampleRadiositySH(CoordInCard, RadiosityTexel.WorldPosition, RadiosityTexel.WorldNormal, CardPage.ResLevelPageTableOffset, ResLevelSizeInProbes, ProbeCoord10, Weights.y, IrradianceSHWeighted, WeightSum, Debug);
SampleRadiositySH(CoordInCard, RadiosityTexel.WorldPosition, RadiosityTexel.WorldNormal, CardPage.ResLevelPageTableOffset, ResLevelSizeInProbes, ProbeCoord01, Weights.z, IrradianceSHWeighted, WeightSum, Debug);
SampleRadiositySH(CoordInCard, RadiosityTexel.WorldPosition, RadiosityTexel.WorldNormal, CardPage.ResLevelPageTableOffset, ResLevelSizeInProbes, ProbeCoord11, Weights.w, IrradianceSHWeighted, WeightSum, Debug);
FTwoBandSHVector DiffuseTransferSH = CalcDiffuseTransferSH(RadiosityTexel.WorldNormal, 1.0f);
TexelRadiance = max(float3(0.0f, 0.0f, 0.0f), DotSH(IrradianceSHWeighted, DiffuseTransferSH));
if (WeightSum > 0.0f)
{
TexelRadiance /= WeightSum;
}
#if TEMPORAL_ACCUMULATION
float NumFramesAccumulated = RWRadiosityNumFramesAccumulatedAtlas[RadiosityTexel.AtlasCoord] * 255.0f;
NewNumFramesAccumulated = min(NumFramesAccumulated + 1, (float)MaxFramesAccumulated);
float Alpha = 1.0f / (1.0f + NumFramesAccumulated);
float3 HistoryRadiosity = RWRadiosityAtlas[RadiosityTexel.AtlasCoord];
TexelRadiance = lerp(HistoryRadiosity, TexelRadiance, Alpha);
#endif
////////////////////////
//TexelRadiance = Debug;
//TexelRadiance = WeightSum > 0 ? 1 : 0;
#define DEBUG_VISUALIZE_PROBE_PLACEMENT 0
#if DEBUG_VISUALIZE_PROBE_PLACEMENT
if (all(CoordInCard == ProbeCoord00 * ProbeSpacingInRadiosityTexels + GetProbeJitter(CardPage.IndirectLightingTemporalIndex)))
{
TexelRadiance = float3(10, 0, 10);
}
#endif
}
RWRadiosityNumFramesAccumulatedAtlas[RadiosityTexel.AtlasCoord] = NewNumFramesAccumulated / 255.0f;
RWRadiosityAtlas[RadiosityTexel.AtlasCoord] = TexelRadiance;
}
void SampleRadiositySH(
uint2 TexelCoordInCard,
float3 TexelWorldPosition,
float3 TexelNormal,
uint ResLevelPageTableOffset,
uint2 ResLevelSizeInProbes,
uint2 ProbeCoordInCard,
float InterpolationWeight,
inout FTwoBandSHVectorRGB IrradianceSH,
inout float WeightSum,
inout float3 Debug)
{
if (all(ProbeCoordInCard < ResLevelSizeInProbes))
{
uint2 ResLevelSizeInPages = (ResLevelSizeInProbes * ProbeSpacingInRadiosityTexels) / PHYSICAL_PAGE_SIZE;
// First find page to sample from
uint2 PageCoordInCard = (ProbeCoordInCard * ProbeSpacingInRadiosityTexels) / PHYSICAL_PAGE_SIZE;
uint LinearCardPageIndex = PageCoordInCard.x + PageCoordInCard.y * ResLevelSizeInPages.x;
FLumenCardPageData CardPage = GetLumenCardPageData(ResLevelPageTableOffset + LinearCardPageIndex);
// Don't sample if page doesn't have a valid probe
if (CardPage.bMapped && CardPage.LastIndirectLightingUpdateFrameIndex != 0)
{
// Then tile and probe coordinates
uint2 CoordInCardPage = ProbeCoordInCard * ProbeSpacingInRadiosityTexels - (PageCoordInCard * PHYSICAL_PAGE_SIZE);
uint2 ProbeAtlasCoord = ((uint2)CardPage.PhysicalAtlasCoord + CoordInCardPage) >> ProbeSpacingInRadiosityTexelsDivideShift;
if (all(CoordInCardPage < CardPage.SizeInTexels))
{
FRadiosityTexel NeighborProbeTexel = GetRadiosityTexel(CardPage, CoordInCardPage);
float Weight = NeighborProbeTexel.bValid ? InterpolationWeight : 0.0f;
#if INTERPOLATION_PLANE_WEIGHTING
{
float PlaneWeight = CalculatePlaneWeight(TexelWorldPosition, TexelNormal, NeighborProbeTexel.WorldPosition);
Weight = min(Weight, PlaneWeight);
}
#endif
#if INTERPOLATION_PROBE_OCCLUSION
{
float VisibilityWeight = CalculateProbeVisibility(TexelWorldPosition, NeighborProbeTexel, ProbeAtlasCoord);
Weight = min(Weight, VisibilityWeight);
}
#endif
if (Weight > 0.0f)
{
FTwoBandSHVectorRGB ProbeSH = GetRadiosityProbeSH(ProbeAtlasCoord);
IrradianceSH = AddSH(IrradianceSH, MulSH(ProbeSH, Weight));
WeightSum += Weight;
}
}
}
}
}
C++(LumenSceneLighting.cpp)
void Lumen::CombineLumenSceneLighting(
FScene* Scene,
const FViewInfo& View,
FRDGBuilder& GraphBuilder,
const FLumenCardTracingInputs& TracingInputs,
const FLumenCardUpdateContext& CardUpdateContext,
const FLumenCardTileUpdateContext& CardTileUpdateContext,
ERDGPassFlags ComputePassFlags)
{
LLM_SCOPE_BYTAG(Lumen);
FLumenSceneData& LumenSceneData = *Scene->GetLumenSceneData(View);
FLumenCardCombineLightingCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenCardCombineLightingCS::FParameters>();
PassParameters->IndirectArgsBuffer = CardTileUpdateContext.DispatchCardTilesIndirectArgs;
PassParameters->View = View.ViewUniformBuffer;
PassParameters->LumenCardScene = TracingInputs.LumenCardSceneUniformBuffer;
PassParameters->DiffuseColorBoost = 1.0f / FMath::Max(View.FinalPostProcessSettings.LumenDiffuseColorBoost, 1.0f);
PassParameters->AlbedoAtlas = TracingInputs.AlbedoAtlas;
PassParameters->OpacityAtlas = TracingInputs.OpacityAtlas;
PassParameters->EmissiveAtlas = TracingInputs.EmissiveAtlas;
PassParameters->DirectLightingAtlas = TracingInputs.DirectLightingAtlas;
PassParameters->IndirectLightingAtlas = TracingInputs.IndirectLightingAtlas;
PassParameters->BilinearClampedSampler = TStaticSamplerState<SF_Bilinear, AM_Clamp, AM_Clamp, AM_Clamp>::GetRHI();
PassParameters->CardTiles = GraphBuilder.CreateSRV(CardTileUpdateContext.CardTiles);
PassParameters->RWFinalLightingAtlas = GraphBuilder.CreateUAV(TracingInputs.FinalLightingAtlas);
const FIntPoint IndirectLightingAtlasSize = LumenSceneData.GetRadiosityAtlasSize();
PassParameters->IndirectLightingAtlasHalfTexelSize = FVector2f(0.5f / IndirectLightingAtlasSize.X, 0.5f / IndirectLightingAtlasSize.Y);
auto ComputeShader = View.ShaderMap->GetShader<FLumenCardCombineLightingCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("CombineLighting CS"),
ComputePassFlags,
ComputeShader,
PassParameters,
CardTileUpdateContext.DispatchCardTilesIndirectArgs,
(uint32)ELumenDispatchCardTilesIndirectArgsOffset::OneGroupPerCardTile);
}
シェーダー(LumenSceneLighting.usf)
float3 CombineFinalLighting(float3 Albedo, float3 Emissive, float3 DirectLighting, float3 IndirectLighting)
{
Albedo = DecodeSurfaceCacheAlbedo(Albedo);
float3 FinalLighting = (DirectLighting + IndirectLighting) * Diffuse_Lambert(Albedo) + Emissive;
// Secure against strange values, as we are writing it to a persistent atlas with a feedback loop
FinalLighting = max(MakeFinite(FinalLighting), float3(0.0f, 0.0f, 0.0f));
return FinalLighting;
}
SamplerState BilinearClampedSampler;
StructuredBuffer
[numthreads(CARD_TILE_SIZE, CARD_TILE_SIZE, 1)] void CombineLumenSceneLightingCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID) { uint CardTileIndex = GroupId.x; uint2 TexelCoordInTile = GroupThreadId.xy;
FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);
FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);
FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);
uint2 CoordInCardPage = CARD_TILE_SIZE * CardTile.TileCoord + TexelCoordInTile;
uint2 AtlasCoord = CardPage.PhysicalAtlasCoord + CoordInCardPage;
float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (CoordInCardPage + 0.5);
float2 IndirectLightingAtlasUV = AtlasUV;
// When sampling from a downsampled Indirect Lighting atlas we need to appropriately clamp input UVs to prevent bilinear reading outside of the valid area
float2 IndirectLightingAtlasUV = clamp(AtlasUV, CardPage.PhysicalAtlasUVRect.xy + IndirectLightingAtlasHalfTexelSize, CardPage.PhysicalAtlasUVRect.zw - IndirectLightingAtlasHalfTexelSize);
float3 Albedo = Texture2DSampleLevel(AlbedoAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 Emissive = Texture2DSampleLevel(EmissiveAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 DirectLighting = Texture2DSampleLevel(DirectLightingAtlas, BilinearClampedSampler, AtlasUV, 0).xyz;
float3 IndirectLighting = Texture2DSampleLevel(IndirectLightingAtlas, BilinearClampedSampler, IndirectLightingAtlasUV, 0).xyz;
float3 FinalLighting = CombineFinalLighting(Albedo, Emissive, DirectLighting, IndirectLighting);
RWFinalLightingAtlas[AtlasCoord] = FinalLighting;
}
C++(LumenSceneProbeGather.cpp)
FSSDSignalTextures FDeferredShadingSceneRenderer::RenderLumenScreenProbeGather(...)
{
:
FScreenProbeDownsampleDepthUniformCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FScreenProbeDownsampleDepthUniformCS::FParameters>();
PassParameters->RWScreenProbeSceneDepth = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenProbeSceneDepth));
PassParameters->RWScreenProbeWorldNormal = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenProbeWorldNormal));
PassParameters->RWScreenProbeWorldSpeed = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenProbeWorldSpeed));
PassParameters->RWScreenProbeTranslatedWorldPosition = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenProbeTranslatedWorldPosition));
PassParameters->View = View.ViewUniformBuffer;
PassParameters->SceneTexturesStruct = SceneTextures.UniformBuffer;
PassParameters->Strata = Strata::BindStrataGlobalUniformParameters(View);
PassParameters->SceneTextures = SceneTextureParameters;
PassParameters->ScreenProbeParameters = ScreenProbeParameters;
auto ComputeShader = View.ShaderMap->GetShader<FScreenProbeDownsampleDepthUniformCS>(0);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("UniformPlacement DownsampleFactor=%u", ScreenProbeParameters.ScreenProbeDownsampleFactor),
ComputePassFlags,
ComputeShader,
PassParameters,
FComputeShaderUtils::GetGroupCount(ScreenProbeParameters.ScreenProbeViewSize, FScreenProbeDownsampleDepthUniformCS::GetGroupSize()));
:
}
- シェーダー(LumenScreenProbeGather.usf)
```C++
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void ScreenProbeDownsampleDepthUniformCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint2 ScreenProbeAtlasCoord = DispatchThreadId.xy;
if (all(ScreenProbeAtlasCoord < ScreenProbeAtlasViewSize))
{
uint2 ScreenProbeScreenPosition = GetUniformScreenProbeScreenPosition(ScreenProbeAtlasCoord);
float2 ScreenUV = (ScreenProbeScreenPosition + .5f) * View.BufferSizeAndInvSize.zw;
WriteDownsampledProbeMaterial(ScreenUV, ScreenProbeAtlasCoord, GetScreenProbeMaterial(ScreenProbeScreenPosition));
}
}
uint2 GetUniformScreenProbeScreenPosition(uint2 ScreenTileCoord)
{
uint2 ScreenJitter = GetScreenTileJitter(SCREEN_TEMPORAL_INDEX);
uint2 ScreenProbeScreenPosition = min((uint2)(View.ViewRectMinAndSize.xy + ScreenTileCoord * ScreenProbeDownsampleFactor + ScreenJitter), (uint2)(View.ViewRectMinAndSize.xy + View.ViewRectMinAndSize.zw) - 1);
return ScreenProbeScreenPosition;
}
FScreenProbeMaterial GetScreenProbeMaterial(uint2 PixelPos)
{
const FLumenMaterialData Material = ReadMaterialData(PixelPos);
FScreenProbeMaterial Out;
Out.WorldNormal = Material.WorldNormal;
Out.SceneDepth = Material.SceneDepth;
Out.bIsValid = IsValid(Material);
Out.bTwoSidedFoliage = IsFoliage(Material);
return Out;
}
FLumenMaterialData ReadMaterialData(uint2 InPixelPos)
{
#if FRONT_LAYER_TRANSLUCENCY
return InternalReadMaterialData_FrontLayerTranslucency(InPixelPos);
#elif STRATA_ENABLED
return InternalReadMaterialData_Strata(InPixelPos);
#else
return InternalReadMaterialData_GBuffer(InPixelPos);
#endif
}
void WriteDownsampledProbeMaterial(float2 ScreenUV, uint2 ScreenProbeAtlasCoord, FScreenProbeMaterial ProbeMaterial)
{
float EncodedDepth = ProbeMaterial.SceneDepth;
if (!ProbeMaterial.bIsValid)
{
// Store unlit in sign bit
EncodedDepth *= -1.0f;
}
RWScreenProbeSceneDepth[ScreenProbeAtlasCoord] = asuint(EncodedDepth);
RWScreenProbeWorldNormal[ScreenProbeAtlasCoord] = UnitVectorToOctahedron(ProbeMaterial.WorldNormal) * 0.5 + 0.5;
float3 ProbeWorldVelocity;
float3 ProbeTranslatedWorldPosition;
{
float2 ProbeScreenPosition = (ScreenUV - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
float ProbeDeviceZ = ConvertToDeviceZ(ProbeMaterial.SceneDepth);
float3 ProbeHistoryScreenPosition = GetHistoryScreenPositionIncludingTAAJitter(ProbeScreenPosition, ScreenUV, ProbeDeviceZ);
ProbeTranslatedWorldPosition = mul(float4(ProbeScreenPosition * ProbeMaterial.SceneDepth, ProbeMaterial.SceneDepth, 1), View.ScreenToTranslatedWorld).xyz;
ProbeWorldVelocity = ProbeTranslatedWorldPosition - GetPrevTranslatedWorldPosition(ProbeHistoryScreenPosition);
}
RWScreenProbeWorldSpeed[ScreenProbeAtlasCoord] = EncodeScreenProbeSpeed(length(ProbeWorldVelocity), ProbeMaterial.bTwoSidedFoliage);
RWScreenProbeTranslatedWorldPosition[ScreenProbeAtlasCoord] = ProbeTranslatedWorldPosition;
}
C++(LumenSceneProbeGather.cpp)
FSSDSignalTextures FDeferredShadingSceneRenderer::RenderLumenScreenProbeGather(...)
{
:
FRDGBufferRef NumAdaptiveScreenProbes = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), 1), TEXT("Lumen.ScreenProbeGather.NumAdaptiveScreenProbes"));
FRDGBufferRef AdaptiveScreenProbeData = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(uint32), FMath::Max<uint32>(ScreenProbeParameters.MaxNumAdaptiveProbes, 1)), TEXT("Lumen.ScreenProbeGather.daptiveScreenProbeData"));
ScreenProbeParameters.NumAdaptiveScreenProbes = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(NumAdaptiveScreenProbes, PF_R32_UINT));
ScreenProbeParameters.AdaptiveScreenProbeData = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(AdaptiveScreenProbeData, PF_R32_UINT));
const FIntPoint ScreenProbeViewportBufferSize = FIntPoint::DivideAndRoundUp(SceneTextures.Config.Extent, (int32)ScreenProbeParameters.ScreenProbeDownsampleFactor);
FRDGTextureDesc ScreenTileAdaptiveProbeHeaderDesc(FRDGTextureDesc::Create2D(ScreenProbeViewportBufferSize, PF_R32_UINT, FClearValueBinding::Black, TexCreate_ShaderResource | TexCreate_UAV | TexCreate_AtomicCompatible));
FIntPoint ScreenTileAdaptiveProbeIndicesBufferSize = FIntPoint(ScreenProbeViewportBufferSize.X * ScreenProbeParameters.ScreenProbeDownsampleFactor, ScreenProbeViewportBufferSize.Y * ScreenProbeParameters.ScreenProbeDownsampleFactor);
FRDGTextureDesc ScreenTileAdaptiveProbeIndicesDesc(FRDGTextureDesc::Create2D(ScreenTileAdaptiveProbeIndicesBufferSize, PF_R16_UINT, FClearValueBinding::Black, TexCreate_ShaderResource | TexCreate_UAV));
ScreenProbeParameters.ScreenTileAdaptiveProbeHeader = GraphBuilder.CreateTexture(ScreenTileAdaptiveProbeHeaderDesc, TEXT("Lumen.ScreenProbeGather.ScreenTileAdaptiveProbeHeader"));
ScreenProbeParameters.ScreenTileAdaptiveProbeIndices = GraphBuilder.CreateTexture(ScreenTileAdaptiveProbeIndicesDesc, TEXT("Lumen.ScreenProbeGather.ScreenTileAdaptiveProbeIndices"));
FUintVector4 ClearValues(0, 0, 0, 0);
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenTileAdaptiveProbeHeader)), ClearValues, ComputePassFlags);
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(NumAdaptiveScreenProbes), 0, ComputePassFlags);
const uint32 AdaptiveProbeMinDownsampleFactor = FMath::Clamp(GLumenScreenProbeGatherAdaptiveProbeMinDownsampleFactor, 1, 64);
if (ScreenProbeParameters.MaxNumAdaptiveProbes > 0 && AdaptiveProbeMinDownsampleFactor < ScreenProbeParameters.ScreenProbeDownsampleFactor)
{
uint32 PlacementDownsampleFactor = ScreenProbeParameters.ScreenProbeDownsampleFactor;
do
{
PlacementDownsampleFactor /= 2;
FScreenProbeAdaptivePlacementCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FScreenProbeAdaptivePlacementCS::FParameters>();
PassParameters->RWScreenProbeSceneDepth = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenProbeSceneDepth));
PassParameters->RWScreenProbeWorldNormal = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenProbeWorldNormal));
PassParameters->RWScreenProbeWorldSpeed = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenProbeWorldSpeed));
PassParameters->RWScreenProbeTranslatedWorldPosition = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenProbeTranslatedWorldPosition));
PassParameters->RWNumAdaptiveScreenProbes = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(NumAdaptiveScreenProbes, PF_R32_UINT));
PassParameters->RWAdaptiveScreenProbeData = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(AdaptiveScreenProbeData, PF_R32_UINT));
PassParameters->RWScreenTileAdaptiveProbeHeader = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenTileAdaptiveProbeHeader));
PassParameters->RWScreenTileAdaptiveProbeIndices = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenTileAdaptiveProbeIndices));
PassParameters->View = View.ViewUniformBuffer;
PassParameters->SceneTexturesStruct = SceneTextures.UniformBuffer;
PassParameters->SceneTextures = SceneTextureParameters;
PassParameters->Strata = Strata::BindStrataGlobalUniformParameters(View);
PassParameters->ScreenProbeParameters = ScreenProbeParameters;
PassParameters->PlacementDownsampleFactor = PlacementDownsampleFactor;
auto ComputeShader = View.ShaderMap->GetShader<FScreenProbeAdaptivePlacementCS>(0);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("AdaptivePlacement DownsampleFactor=%u", PlacementDownsampleFactor),
ComputePassFlags,
ComputeShader,
PassParameters,
FComputeShaderUtils::GetGroupCount(FIntPoint::DivideAndRoundDown(View.ViewRect.Size(), (int32)PlacementDownsampleFactor), FScreenProbeAdaptivePlacementCS::GetGroupSize()));
}
while (PlacementDownsampleFactor > AdaptiveProbeMinDownsampleFactor);
}
else
{
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(AdaptiveScreenProbeData), 0, ComputePassFlags);
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(FRDGTextureUAVDesc(ScreenProbeParameters.ScreenTileAdaptiveProbeIndices)), ClearValues, ComputePassFlags);
}
:
}
- シェーダー(LumenScreenProbeGather.usf)
```C++
float GetScreenProbeDepthFromUAV(uint2 ScreenProbeAtlasCoord)
{
return asfloat(RWScreenProbeSceneDepth[ScreenProbeAtlasCoord]);
}
void CalculateUniformUpsampleInterpolationWeights(
float2 ScreenCoord,
float2 NoiseOffset,
float3 WorldPosition,
float SceneDepth,
float3 WorldNormal,
uniform bool bIsUpsamplePass/*=false*/,
out uint2 ScreenTileCoord00,
out float4 InterpolationWeights)
{
uint2 ScreenProbeFullResScreenCoord = clamp(ScreenCoord.xy - View.ViewRectMin.xy - GetScreenTileJitter(SCREEN_TEMPORAL_INDEX) + NoiseOffset, 0.0f, View.ViewSizeAndInvSize.xy - 1.0f);
ScreenTileCoord00 = min(ScreenProbeFullResScreenCoord / ScreenProbeDownsampleFactor, (uint2)ScreenProbeViewSize - 2);
uint BilinearExpand = 1;
float2 BilinearWeights = (ScreenProbeFullResScreenCoord - ScreenTileCoord00 * ScreenProbeDownsampleFactor + BilinearExpand) / (float)(ScreenProbeDownsampleFactor + 2 * BilinearExpand);
float4 CornerDepths;
CornerDepths.x = bIsUpsamplePass ? GetScreenProbeDepth(ScreenTileCoord00) : GetScreenProbeDepthFromUAV(ScreenTileCoord00);
CornerDepths.y = bIsUpsamplePass ? GetScreenProbeDepth(ScreenTileCoord00 + int2(1, 0)) : GetScreenProbeDepthFromUAV(ScreenTileCoord00 + int2(1, 0));
CornerDepths.z = bIsUpsamplePass ? GetScreenProbeDepth(ScreenTileCoord00 + int2(0, 1)) : GetScreenProbeDepthFromUAV(ScreenTileCoord00 + int2(0, 1));
CornerDepths.w = bIsUpsamplePass ? GetScreenProbeDepth(ScreenTileCoord00 + int2(1, 1)) : GetScreenProbeDepthFromUAV(ScreenTileCoord00 + int2(1, 1));
InterpolationWeights = float4(
(1 - BilinearWeights.y) * (1 - BilinearWeights.x),
(1 - BilinearWeights.y) * BilinearWeights.x,
BilinearWeights.y * (1 - BilinearWeights.x),
BilinearWeights.y * BilinearWeights.x);
float4 DepthWeights;
#define PLANE_WEIGHTING 1
#if PLANE_WEIGHTING
{
float4 ScenePlane = float4(WorldNormal, dot(WorldPosition, WorldNormal));
float3 Position00 = GetWorldPositionFromScreenUV(GetScreenUVFromScreenTileCoord(ScreenTileCoord00), CornerDepths.x);
float3 Position10 = GetWorldPositionFromScreenUV(GetScreenUVFromScreenTileCoord(ScreenTileCoord00 + uint2(1, 0)), CornerDepths.y);
float3 Position01 = GetWorldPositionFromScreenUV(GetScreenUVFromScreenTileCoord(ScreenTileCoord00 + uint2(0, 1)), CornerDepths.z);
float3 Position11 = GetWorldPositionFromScreenUV(GetScreenUVFromScreenTileCoord(ScreenTileCoord00 + uint2(1, 1)), CornerDepths.w);
float4 PlaneDistances;
PlaneDistances.x = abs(dot(float4(Position00, -1), ScenePlane));
PlaneDistances.y = abs(dot(float4(Position10, -1), ScenePlane));
PlaneDistances.z = abs(dot(float4(Position01, -1), ScenePlane));
PlaneDistances.w = abs(dot(float4(Position11, -1), ScenePlane));
float4 RelativeDepthDifference = PlaneDistances / SceneDepth;
DepthWeights = select(CornerDepths > 0, exp2(-10000.0f * (RelativeDepthDifference * RelativeDepthDifference)), 0.0);
}
#else
{
float4 DepthDifference = abs(CornerDepths - SceneDepth.xxxx);
float4 RelativeDepthDifference = DepthDifference / SceneDepth;
DepthWeights = CornerDepths > 0 ? exp2(-100.0f * (RelativeDepthDifference * RelativeDepthDifference)) : 0;
}
#endif
InterpolationWeights *= DepthWeights;
}
RWTexture2D<uint> RWScreenTileAdaptiveProbeHeader;
RWTexture2D<uint> RWScreenTileAdaptiveProbeIndices;
RWStructuredBuffer<uint> RWAdaptiveScreenProbeData;
struct FScreenProbeSample
{
uint2 AtlasCoord[4];
float4 Weights;
};
void CalculateUpsampleInterpolationWeights(
float2 ScreenCoord,
float2 NoiseOffset,
float3 WorldPosition,
float SceneDepth,
float3 WorldNormal,
uniform bool bIsUpsamplePass,
out FScreenProbeSample ScreenProbeSample)
{
uint2 ScreenTileCoord00;
CalculateUniformUpsampleInterpolationWeights(ScreenCoord, NoiseOffset, WorldPosition, SceneDepth, WorldNormal, bIsUpsamplePass, ScreenTileCoord00, ScreenProbeSample.Weights);
ScreenProbeSample.AtlasCoord[0] = ScreenTileCoord00;
ScreenProbeSample.AtlasCoord[1] = ScreenTileCoord00 + uint2(1, 0);
ScreenProbeSample.AtlasCoord[2] = ScreenTileCoord00 + uint2(0, 1);
ScreenProbeSample.AtlasCoord[3] = ScreenTileCoord00 + uint2(1, 1);
bool bUseAdaptiveProbesForUpsample = true;
if (bUseAdaptiveProbesForUpsample || !bIsUpsamplePass)
{
float Epsilon = .01f;
float4 ScenePlane = float4(WorldNormal, dot(WorldPosition, WorldNormal));
UNROLL
for (uint CornerIndex = 0; CornerIndex < 4; CornerIndex++)
{
if (ScreenProbeSample.Weights[CornerIndex] <= Epsilon)
{
uint2 ScreenTileCoord = ScreenTileCoord00 + uint2(CornerIndex % 2, CornerIndex / 2);
uint NumAdaptiveProbes = bIsUpsamplePass ? ScreenTileAdaptiveProbeHeader[ScreenTileCoord] : RWScreenTileAdaptiveProbeHeader[ScreenTileCoord];
for (uint AdaptiveProbeListIndex = 0; AdaptiveProbeListIndex < NumAdaptiveProbes; AdaptiveProbeListIndex++)
{
uint2 AdaptiveProbeCoord = GetAdaptiveProbeCoord(ScreenTileCoord, AdaptiveProbeListIndex);
uint AdaptiveProbeIndex = bIsUpsamplePass ? ScreenTileAdaptiveProbeIndices[AdaptiveProbeCoord] : RWScreenTileAdaptiveProbeIndices[AdaptiveProbeCoord];
uint ScreenProbeIndex = AdaptiveProbeIndex + NumUniformScreenProbes;
uint2 ScreenProbeScreenPosition = bIsUpsamplePass ? GetScreenProbeScreenPosition(ScreenProbeIndex) : DecodeScreenProbeData(RWAdaptiveScreenProbeData[AdaptiveProbeIndex]);
uint2 ScreenProbeAtlasCoord = uint2(ScreenProbeIndex % ScreenProbeAtlasViewSize.x, ScreenProbeIndex / ScreenProbeAtlasViewSize.x);
float ProbeDepth = bIsUpsamplePass ? GetScreenProbeDepth(ScreenProbeAtlasCoord) : GetScreenProbeDepthFromUAV(ScreenProbeAtlasCoord);
float NewDepthWeight = 0;
bool bPlaneWeighting = true;
if (bPlaneWeighting)
{
float3 ProbePosition = GetWorldPositionFromScreenUV(GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition), ProbeDepth);
float PlaneDistance = abs(dot(float4(ProbePosition, -1), ScenePlane));
float RelativeDepthDifference = PlaneDistance / SceneDepth;
NewDepthWeight = exp2(-10000.0f * (RelativeDepthDifference * RelativeDepthDifference));
}
else
{
float DepthDifference = abs(ProbeDepth - SceneDepth);
float RelativeDepthDifference = DepthDifference / SceneDepth;
NewDepthWeight = ProbeDepth > 0 ? exp2(-100.0f * (RelativeDepthDifference * RelativeDepthDifference)) : 0;
}
float2 DistanceToScreenProbe = abs(ScreenProbeScreenPosition - ScreenCoord);
float NewCornerWeight = 1.0f - saturate(min(DistanceToScreenProbe.x, DistanceToScreenProbe.y) / (float)ScreenProbeDownsampleFactor);
float NewInterpolationWeight = NewDepthWeight * NewCornerWeight;
if (NewInterpolationWeight > ScreenProbeSample.Weights[CornerIndex])
{
ScreenProbeSample.Weights[CornerIndex] = NewInterpolationWeight;
ScreenProbeSample.AtlasCoord[CornerIndex] = ScreenProbeAtlasCoord;
}
}
}
}
}
}
RWStructuredBuffer<uint> RWNumAdaptiveScreenProbes;
groupshared uint SharedNumProbesToAllocate; groupshared uint SharedAdaptiveProbeBaseIndex; groupshared uint2 SharedProbeScreenPositionsToAllocate[THREADGROUP_SIZE THREADGROUP_SIZE]; groupshared FScreenProbeMaterial SharedScreenProbeMaterial[THREADGROUP_SIZE THREADGROUP_SIZE];
uint PlacementDownsampleFactor;
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)] void ScreenProbeAdaptivePlacementCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x;
if (ThreadIndex == 0)
{
SharedNumProbesToAllocate = 0;
}
GroupMemoryBarrierWithGroupSync();
{
uint2 ScreenProbeScreenPosition = DispatchThreadId.xy * PlacementDownsampleFactor + GetScreenTileJitter(SCREEN_TEMPORAL_INDEX) + View.ViewRectMinAndSize.xy;
if (all(ScreenProbeScreenPosition < View.ViewRectMinAndSize.xy + View.ViewRectMinAndSize.zw) && any((DispatchThreadId.xy & 0x1) != 0))
{
const FScreenProbeMaterial ScreenProbeMaterial = GetScreenProbeMaterial(ScreenProbeScreenPosition);
if (ScreenProbeMaterial.bIsValid)
{
float2 ScreenUV = (ScreenProbeScreenPosition + .5f) * View.BufferSizeAndInvSize.zw;
float3 WorldPosition = GetWorldPositionFromScreenUV(ScreenUV, ScreenProbeMaterial.SceneDepth);
float2 NoiseOffset = 0.0f;
FScreenProbeSample ScreenProbeSample = (FScreenProbeSample)0;
CalculateUpsampleInterpolationWeights(
ScreenProbeScreenPosition,
NoiseOffset,
WorldPosition,
ScreenProbeMaterial.SceneDepth,
ScreenProbeMaterial.WorldNormal,
false,
ScreenProbeSample);
float Epsilon = .01f;
ScreenProbeSample.Weights /= max(dot(ScreenProbeSample.Weights, 1), Epsilon);
float LightingIsValid = (dot(ScreenProbeSample.Weights, 1) < 1.0f - Epsilon) ? 0.0f : 1.0f;
if (!LightingIsValid)
{
// probeが見つからなかった場合、LDS(SharedProbeScreenPositionsToAllocate,SharedScreenProbeMaterial)に座標とマテリアルを格納
uint SharedListIndex;
InterlockedAdd(SharedNumProbesToAllocate, 1, SharedListIndex);
SharedProbeScreenPositionsToAllocate[SharedListIndex] = ScreenProbeScreenPosition;
SharedScreenProbeMaterial[SharedListIndex] = ScreenProbeMaterial;
}
}
}
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex == 0)
{
// RWStructuredBuffer<uint>に追加するprobe数を蓄積
InterlockedAdd(RWNumAdaptiveScreenProbes[0], SharedNumProbesToAllocate, SharedAdaptiveProbeBaseIndex);
}
GroupMemoryBarrierWithGroupSync();
uint AdaptiveProbeIndex = ThreadIndex + SharedAdaptiveProbeBaseIndex;
if (ThreadIndex < SharedNumProbesToAllocate && AdaptiveProbeIndex < MaxNumAdaptiveProbes)
{
uint2 ScreenProbeScreenPosition = SharedProbeScreenPositionsToAllocate[ThreadIndex];
// RWRegularBuffer<uint>にprobeを追加するscreen座標を追加
RWAdaptiveScreenProbeData[AdaptiveProbeIndex] = EncodeScreenProbeData(ScreenProbeScreenPosition);
uint2 ScreenTileCoord = GetScreenTileCoord(ScreenProbeScreenPosition);
uint TileProbeIndex;
// RWTexture2D<uint> RWScreenTileAdaptiveProbeHeaderで指定したScreen座標のprobe数をインクリメント
InterlockedAdd(RWScreenTileAdaptiveProbeHeader[ScreenTileCoord], 1, TileProbeIndex);
uint2 AdaptiveProbeCoord = GetAdaptiveProbeCoord(ScreenTileCoord, TileProbeIndex);
// RWTexture2D<uint> RWScreenTileAdaptiveProbeIndicesに追加するprobeのadaptive probe indexを格納
RWScreenTileAdaptiveProbeIndices[AdaptiveProbeCoord] = AdaptiveProbeIndex;
float2 ScreenUV = (ScreenProbeScreenPosition + .5f) * View.BufferSizeAndInvSize.zw;
uint ScreenProbeIndex = NumUniformScreenProbes + AdaptiveProbeIndex;
uint2 ScreenProbeAtlasCoord = uint2(ScreenProbeIndex % ScreenProbeAtlasViewSize.x, ScreenProbeIndex / ScreenProbeAtlasViewSize.x);
WriteDownsampledProbeMaterial(ScreenUV, ScreenProbeAtlasCoord, SharedScreenProbeMaterial[ThreadIndex]);
}
}
void WriteDownsampledProbeMaterial(float2 ScreenUV, uint2 ScreenProbeAtlasCoord, FScreenProbeMaterial ProbeMaterial) { float EncodedDepth = ProbeMaterial.SceneDepth;
if (!ProbeMaterial.bIsValid)
{
// Store unlit in sign bit
EncodedDepth *= -1.0f;
}
RWScreenProbeSceneDepth[ScreenProbeAtlasCoord] = asuint(EncodedDepth);
RWScreenProbeWorldNormal[ScreenProbeAtlasCoord] = UnitVectorToOctahedron(ProbeMaterial.WorldNormal) * 0.5 + 0.5;
float3 ProbeWorldVelocity;
float3 ProbeTranslatedWorldPosition;
{
float2 ProbeScreenPosition = (ScreenUV - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy;
float ProbeDeviceZ = ConvertToDeviceZ(ProbeMaterial.SceneDepth);
float3 ProbeHistoryScreenPosition = GetHistoryScreenPositionIncludingTAAJitter(ProbeScreenPosition, ScreenUV, ProbeDeviceZ);
ProbeTranslatedWorldPosition = mul(float4(ProbeScreenPosition * ProbeMaterial.SceneDepth, ProbeMaterial.SceneDepth, 1), View.ScreenToTranslatedWorld).xyz;
ProbeWorldVelocity = ProbeTranslatedWorldPosition - GetPrevTranslatedWorldPosition(ProbeHistoryScreenPosition);
}
RWScreenProbeWorldSpeed[ScreenProbeAtlasCoord] = EncodeScreenProbeSpeed(length(ProbeWorldVelocity), ProbeMaterial.bTwoSidedFoliage);
RWScreenProbeTranslatedWorldPosition[ScreenProbeAtlasCoord] = ProbeTranslatedWorldPosition;
}
C++(LumenScreenProbeGather.cpp)
FSSDSignalTextures FDeferredShadingSceneRenderer::RenderLumenScreenProbeGather(...)
{
:
FSetupAdaptiveProbeIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FSetupAdaptiveProbeIndirectArgsCS::FParameters>();
PassParameters->RWScreenProbeIndirectArgs = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ScreenProbeIndirectArgs, PF_R32_UINT));
PassParameters->ScreenProbeParameters = ScreenProbeParameters;
auto ComputeShader = View.ShaderMap->GetShader<FSetupAdaptiveProbeIndirectArgsCS>(0);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("SetupAdaptiveProbeIndirectArgs"),
ComputePassFlags,
ComputeShader,
PassParameters,
FIntVector(1, 1, 1));
:
}
- シェーダー(LumenScreenProbeGather.usf)
```C++
RWBuffer<uint> RWScreenProbeIndirectArgs;
void WriteArgs2D(uint Index, uint2 ThreadCount)
{
RWScreenProbeIndirectArgs[Index * 3 + 0] = (ThreadCount.x + PROBE_THREADGROUP_SIZE_2D - 1) / PROBE_THREADGROUP_SIZE_2D;
RWScreenProbeIndirectArgs[Index * 3 + 1] = (ThreadCount.y + PROBE_THREADGROUP_SIZE_2D - 1) / PROBE_THREADGROUP_SIZE_2D;
RWScreenProbeIndirectArgs[Index * 3 + 2] = 1;
}
[numthreads(1, 1, 1)]
void SetupAdaptiveProbeIndirectArgsCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint2 AtlasSizeInProbes = uint2(ScreenProbeAtlasViewSize.x, (GetNumScreenProbes() + ScreenProbeAtlasViewSize.x - 1) / ScreenProbeAtlasViewSize.x);
// Must match EScreenProbeIndirectArgs in C++
WriteArgs2D(0, AtlasSizeInProbes * PROBE_THREADGROUP_SIZE_2D);
WriteArgs2D(1, AtlasSizeInProbes);
WriteArgs2D(2, AtlasSizeInProbes * ScreenProbeTracingOctahedronResolution);
WriteArgs2D(3, AtlasSizeInProbes * ScreenProbeGatherOctahedronResolution);
WriteArgs2D(4, AtlasSizeInProbes * ScreenProbeGatherOctahedronResolutionWithBorder);
WriteArgs2D(5, AtlasSizeInProbes * ScreenProbeLightSampleResolutionXY);
}
C++(LumenScreenProbeImportanceSampling.cpp)
void GenerateBRDF_PDF(
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
const FSceneTextures& SceneTextures,
FRDGTextureRef& BRDFProbabilityDensityFunction,
FRDGBufferSRVRef& BRDFProbabilityDensityFunctionSH,
FScreenProbeParameters& ScreenProbeParameters,
ERDGPassFlags ComputePassFlags)
{
const FRDGSystemTextures& SystemTextures = FRDGSystemTextures::Get(GraphBuilder);
{
const uint32 BRDFOctahedronResolution = GLumenScreenProbeBRDFOctahedronResolution;
ScreenProbeParameters.ImportanceSampling.ScreenProbeBRDFOctahedronResolution = BRDFOctahedronResolution;
FIntPoint PDFBufferSize = ScreenProbeParameters.ScreenProbeAtlasBufferSize * BRDFOctahedronResolution;
FRDGTextureDesc BRDFProbabilityDensityFunctionDesc(FRDGTextureDesc::Create2D(PDFBufferSize, PF_R16F, FClearValueBinding::Black, TexCreate_ShaderResource | TexCreate_UAV));
BRDFProbabilityDensityFunction = GraphBuilder.CreateTexture(BRDFProbabilityDensityFunctionDesc, TEXT("Lumen.ScreenProbeGather.BRDFProbabilityDensityFunction"));
const int32 BRDF_SHBufferSize = ScreenProbeParameters.ScreenProbeAtlasBufferSize.X * ScreenProbeParameters.ScreenProbeAtlasBufferSize.Y * 9;
FRDGBufferDesc BRDFProbabilityDensityFunctionSHDesc = FRDGBufferDesc::CreateBufferDesc(sizeof(FFloat16), BRDF_SHBufferSize);
FRDGBufferRef BRDFProbabilityDensityFunctionSHBuffer = GraphBuilder.CreateBuffer(BRDFProbabilityDensityFunctionSHDesc, TEXT("Lumen.ScreenProbeGather.BRDFProbabilityDensityFunctionSH"));
{
FScreenProbeComputeBRDFProbabilityDensityFunctionCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FScreenProbeComputeBRDFProbabilityDensityFunctionCS::FParameters>();
PassParameters->Strata = Strata::BindStrataGlobalUniformParameters(View);
PassParameters->RWBRDFProbabilityDensityFunction = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(BRDFProbabilityDensityFunction));
PassParameters->RWBRDFProbabilityDensityFunctionSH = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(BRDFProbabilityDensityFunctionSHBuffer, PF_R16F));
PassParameters->View = View.ViewUniformBuffer;
PassParameters->SceneTexturesStruct = SceneTextures.UniformBuffer;
PassParameters->ScreenProbeParameters = ScreenProbeParameters;
auto ComputeShader = View.ShaderMap->GetShader<FScreenProbeComputeBRDFProbabilityDensityFunctionCS>(0);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("ComputeBRDF_PDF"),
ComputePassFlags,
ComputeShader,
PassParameters,
ScreenProbeParameters.ProbeIndirectArgs,
(uint32)EScreenProbeIndirectArgs::GroupPerProbe * sizeof(FRHIDispatchIndirectParameters));
}
BRDFProbabilityDensityFunctionSH = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(BRDFProbabilityDensityFunctionSHBuffer, PF_R16F));
}
}
groupshared float4 PixelPlanes[PROBE_THREADGROUP_SIZE_2D][PROBE_THREADGROUP_SIZE_2D];
groupshared float PDF_SphericalHarmonic[PROBE_THREADGROUP_SIZE_2D * PROBE_THREADGROUP_SIZE_2D * 2][NUM_PDF_SH_COEFFICIENTS];
groupshared uint NumSphericalHarmonics;
FThreeBandSHVector GetGroupSharedSH(uint ThreadIndex) { FThreeBandSHVector BRDF; BRDF.V0.x = PDF_SphericalHarmonic[ThreadIndex][0]; BRDF.V0.y = PDF_SphericalHarmonic[ThreadIndex][1]; BRDF.V0.z = PDF_SphericalHarmonic[ThreadIndex][2]; BRDF.V0.w = PDF_SphericalHarmonic[ThreadIndex][3]; BRDF.V1.x = PDF_SphericalHarmonic[ThreadIndex][4]; BRDF.V1.y = PDF_SphericalHarmonic[ThreadIndex][5]; BRDF.V1.z = PDF_SphericalHarmonic[ThreadIndex][6]; BRDF.V1.w = PDF_SphericalHarmonic[ThreadIndex][7]; BRDF.V2.x = PDF_SphericalHarmonic[ThreadIndex][8]; return BRDF; }
void WriteGroupSharedSH(FThreeBandSHVector SH, uint ThreadIndex) { PDF_SphericalHarmonic[ThreadIndex][0] = SH.V0.x; PDF_SphericalHarmonic[ThreadIndex][1] = SH.V0.y; PDF_SphericalHarmonic[ThreadIndex][2] = SH.V0.z; PDF_SphericalHarmonic[ThreadIndex][3] = SH.V0.w; PDF_SphericalHarmonic[ThreadIndex][4] = SH.V1.x; PDF_SphericalHarmonic[ThreadIndex][5] = SH.V1.y; PDF_SphericalHarmonic[ThreadIndex][6] = SH.V1.z; PDF_SphericalHarmonic[ThreadIndex][7] = SH.V1.w; PDF_SphericalHarmonic[ThreadIndex][8] = SH.V2.x; }
[numthreads(PROBE_THREADGROUP_SIZE_2D(=8), PROBE_THREADGROUP_SIZE_2D, 1)] void ScreenProbeComputeBRDFProbabilityDensityFunctionCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint2 ScreenProbeAtlasCoord = GroupId.xy; uint ScreenProbeIndex = ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x;
uint2 ScreenProbeScreenPosition = GetScreenProbeScreenPosition(ScreenProbeIndex);
if (ScreenProbeIndex < GetNumScreenProbes() && ScreenProbeAtlasCoord.x < ScreenProbeAtlasViewSize.x)
{
float ProbeSceneDepth = GetScreenProbeDepth(ScreenProbeAtlasCoord);
if (ProbeSceneDepth > 0)
{
uint ThreadIndex = GroupThreadId.y * PROBE_THREADGROUP_SIZE_2D + GroupThreadId.x;
if (ThreadIndex == 0)
{
NumSphericalHarmonics = 0;
}
GroupMemoryBarrierWithGroupSync();
float2 ScreenUV = GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition);
{
float2 ThreadOffset = GroupThreadId.xy / (float)PROBE_THREADGROUP_SIZE_2D * ScreenProbeDownsampleFactor * 2.0f - 1.0f;
bool bCenterSample = all(GroupThreadId.xy == PROBE_THREADGROUP_SIZE_2D / 2);
if (bCenterSample)
{
// Make sure we have at least one pixel that won't be rejected by the depth weight
ThreadOffset = 0;
}
float2 PixelScreenUV = ScreenUV + ThreadOffset * View.BufferSizeAndInvSize.zw;
PixelScreenUV = clamp(PixelScreenUV, View.ViewRectMin.xy * View.BufferSizeAndInvSize.zw, (View.ViewRectMin.xy + View.ViewSizeAndInvSize.xy - 1) * View.BufferSizeAndInvSize.zw);
const uint2 PixelPos = PixelScreenUV * View.BufferSizeAndInvSize.xy;
const FLumenMaterialData Material = ReadMaterialData(PixelPos, PixelScreenUV);
float3 PixelPosition = GetWorldPositionFromScreenUV(PixelScreenUV, Material.SceneDepth);
float4 PixelPlane = float4(Material.WorldNormal, dot(Material.WorldNormal, PixelPosition));
float3 ProbeWorldPosition = GetWorldPositionFromScreenUV(ScreenUV, ProbeSceneDepth);
float PlaneDistance = abs(dot(float4(ProbeWorldPosition, -1), PixelPlane));
float RelativeDepthDifference = PlaneDistance / ProbeSceneDepth;
float DepthWeight = exp2(-10000.0f * (RelativeDepthDifference * RelativeDepthDifference));
if (DepthWeight > .1f || bCenterSample)
{
uint Index;
InterlockedAdd(NumSphericalHarmonics, 1, Index);
FThreeBandSHVector BRDF;
if (HasSphericalVisibility(Material)/* IsFoliage(Material) || IsHair(Material) */)
{
// Avoid culling directions that the shading models will sample
BRDF = (FThreeBandSHVector)0;
BRDF.V0.x = 1.0f;
}
else
{
BRDF = CalcDiffuseTransferSH3(Material.WorldNormal, 1.0f);
}
WriteGroupSharedSH(BRDF, Index);
}
}
GroupMemoryBarrierWithGroupSync();
uint NumSHToAccumulate = NumSphericalHarmonics;
uint Offset = 0;
while (NumSHToAccumulate > 1)
{
uint ThreadBaseIndex = ThreadIndex * 4;
if (ThreadBaseIndex < NumSHToAccumulate)
{
FThreeBandSHVector PDF = GetGroupSharedSH(ThreadBaseIndex + Offset);
if (ThreadBaseIndex + 1 < NumSHToAccumulate)
{
PDF = AddSH(PDF, GetGroupSharedSH(ThreadBaseIndex + 1 + Offset));
}
if (ThreadBaseIndex + 2 < NumSHToAccumulate)
{
PDF = AddSH(PDF, GetGroupSharedSH(ThreadBaseIndex + 2 + Offset));
}
if (ThreadBaseIndex + 3 < NumSHToAccumulate)
{
PDF = AddSH(PDF, GetGroupSharedSH(ThreadBaseIndex + 3 + Offset));
}
WriteGroupSharedSH(PDF, ThreadIndex + Offset + NumSHToAccumulate);
}
Offset += NumSHToAccumulate;
NumSHToAccumulate = (NumSHToAccumulate + 3) / 4;
GroupMemoryBarrierWithGroupSync();
}
if (ThreadIndex < NUM_PDF_SH_COEFFICIENTS)
{
uint WriteIndex = (ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x) * NUM_PDF_SH_COEFFICIENTS + ThreadIndex;
float NormalizeWeight = 1.0f / (float)(NumSphericalHarmonics);
RWBRDFProbabilityDensityFunctionSH[WriteIndex] = PDF_SphericalHarmonic[Offset][ThreadIndex] * NormalizeWeight;
}
}
}
}
<img width="555" alt="image" src="https://github.com/yasukichi/testcode/assets/14350715/2595174a-ac24-44b6-b097-355f79b808d0">
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(
FRDGBuilder& GraphBuilder,
const TInlineArray<FUpdateInputs>& InputArray,
TInlineArray<FUpdateOutputs>& OutputArray,
const FScene* Scene,
const FEngineShowFlags& EngineShowFlags,
bool bPropagateGlobalLightingChange,
ERDGPassFlags ComputePassFlags)
{
if (GRadianceCacheUpdate != 0)
{
RDG_EVENT_SCOPE(GraphBuilder, "UpdateRadianceCaches");
check(InputArray.Num() == OutputArray.Num());
TInlineArray<FRadianceCacheSetup> SetupOutputArray(InputArray.Num());
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
FRadianceCacheSetup& SetupOutputs = SetupOutputArray[RadianceCacheIndex];
SetupOutputs.LastFrameClipmaps = RadianceCacheState.Clipmaps;
bool bResizedHistoryState = UpdateRadianceCacheState(GraphBuilder, View, RadianceCacheInputs, RadianceCacheState);
bool UpdateRadianceCacheState(FRDGBuilder& GraphBuilder, const FViewInfo& View, const LumenRadianceCache::FRadianceCacheInputs& RadianceCacheInputs, FRadianceCacheState& CacheState)
{
bool bResetState = CacheState.ClipmapWorldExtent != RadianceCacheInputs.ClipmapWorldExtent || CacheState.ClipmapDistributionBase != RadianceCacheInputs.ClipmapDistributionBase;
CacheState.ClipmapWorldExtent = RadianceCacheInputs.ClipmapWorldExtent;
CacheState.ClipmapDistributionBase = RadianceCacheInputs.ClipmapDistributionBase;
const int32 ClipmapResolution = RadianceCacheInputs.RadianceProbeClipmapResolution;
const int32 NumClipmaps = RadianceCacheInputs.NumRadianceProbeClipmaps;
const FVector NewViewOrigin = View.ViewMatrices.GetViewOrigin();
CacheState.Clipmaps.SetNum(NumClipmaps);
for (int32 ClipmapIndex = 0; ClipmapIndex < NumClipmaps; ++ClipmapIndex)
{
FRadianceCacheClipmap& Clipmap = CacheState.Clipmaps[ClipmapIndex];
const float ClipmapExtent = RadianceCacheInputs.ClipmapWorldExtent * FMath::Pow(RadianceCacheInputs.ClipmapDistributionBase, ClipmapIndex);
const float CellSize = (2.0f * ClipmapExtent) / ClipmapResolution;
FIntVector GridCenter;
GridCenter.X = FMath::FloorToInt(NewViewOrigin.X / CellSize);
GridCenter.Y = FMath::FloorToInt(NewViewOrigin.Y / CellSize);
GridCenter.Z = FMath::FloorToInt(NewViewOrigin.Z / CellSize);
const FVector SnappedCenter = FVector(GridCenter) * CellSize;
Clipmap.Center = SnappedCenter;
Clipmap.Extent = ClipmapExtent;
Clipmap.VolumeUVOffset = FVector(0.0f, 0.0f, 0.0f);
Clipmap.CellSize = CellSize;
// Shift the clipmap grid down so that probes align with other clipmaps
const FVector ClipmapMin = Clipmap.Center - Clipmap.Extent - 0.5f * Clipmap.CellSize;
Clipmap.ProbeCoordToWorldCenterBias = ClipmapMin + 0.5f * Clipmap.CellSize;
Clipmap.ProbeCoordToWorldCenterScale = Clipmap.CellSize;
Clipmap.WorldPositionToProbeCoordScale = 1.0f / CellSize;
Clipmap.WorldPositionToProbeCoordBias = -ClipmapMin / CellSize;
Clipmap.ProbeTMin = RadianceCacheInputs.CalculateIrradiance ? 0.0f : FVector(CellSize, CellSize, CellSize).Size();
}
return bResetState;
}
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(...)
{
:
if (GRadianceCacheUpdate != 0)
{
RDG_EVENT_SCOPE(GraphBuilder, "UpdateRadianceCaches");
check(InputArray.Num() == OutputArray.Num());
:
// Clear each clipmap indirection entry to invalid probe index
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FClearProbeIndirectionCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FClearProbeIndirectionCS::FParameters>();
PassParameters->RWRadianceProbeIndirectionTexture = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(Outputs.RadianceCacheParameters.RadianceProbeIndirectionTexture));
auto ComputeShader = Inputs.View.ShaderMap->GetShader<FClearProbeIndirectionCS>(0);
const FIntVector GroupSize = FComputeShaderUtils::GetGroupCount(Outputs.RadianceCacheParameters.RadianceProbeIndirectionTexture->Desc.GetSize(), FClearProbeIndirectionCS::GetGroupSize());
// Do clear on graphics if there is any graphics mark pass and LumenSeneLighting is async so the mark pass is not blocked.
// If LumenSceneLighting isn't async, it will block graphics mark passes anyway. May as well finish the clear early on the compute pipe.
// TODO: Is it possible to move graphics mark passes and their clears before LumenSceneLighting without heavy code change?
const ERDGPassFlags ClearPassFlags = Inputs.GraphicsMarkUsedRadianceCacheProbes.IsBound() && bLumenSceneLightingAsync ? ERDGPassFlags::Compute : ComputePassFlags;
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("ClearProbeIndirectionCS"),
ClearPassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
#ifndef THREADGROUP_SIZE
#define THREADGROUP_SIZE 1
#endif
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void ClearProbeIndirectionCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { RWRadianceProbeIndirectionTexture[DispatchThreadId] = INVALID_PROBE_INDEX; }
C++(LumenScreenProbeGather.cpp)
FSSDSignalTextures FDeferredShadingSceneRenderer::RenderLumenScreenProbeGather(...)
{
:
// Mark radiance caches for screen probes
ComputeMarkUsedRadianceCacheProbesCallbacks.AddLambda([&SceneTextures, &ScreenProbeParameters, ComputePassFlags](
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
const LumenRadianceCache::FRadianceCacheMarkParameters& RadianceCacheMarkParameters)
{
ScreenGatherMarkUsedProbes(
GraphBuilder,
View,
SceneTextures,
ScreenProbeParameters,
RadianceCacheMarkParameters,
ComputePassFlags);
});
:
if (Lumen::UseLumenTranslucencyRadianceCacheReflections(View))
{
const FSceneRenderer& SceneRenderer = *this;
FViewInfo& ViewNonConst = View;
GraphicsMarkUsedRadianceCacheProbesCallbacks.AddLambda([&SceneTextures, &SceneRenderer, &ViewNonConst](
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
const LumenRadianceCache::FRadianceCacheMarkParameters& RadianceCacheMarkParameters)
{
LumenTranslucencyReflectionsMarkUsedProbes(
GraphBuilder,
SceneRenderer,
ViewNonConst,
SceneTextures,
RadianceCacheMarkParameters);
});
}
LumenRadianceCache::TInlineArray<LumenRadianceCache::FUpdateInputs> InputArray;
LumenRadianceCache::TInlineArray<LumenRadianceCache::FUpdateOutputs> OutputArray;
InputArray.Add(LumenRadianceCache::FUpdateInputs(
TracingInputs,
RadianceCacheInputs,
FRadianceCacheConfiguration(),
View,
nullptr,
nullptr,
MoveTemp(GraphicsMarkUsedRadianceCacheProbesCallbacks),
MoveTemp(ComputeMarkUsedRadianceCacheProbesCallbacks)));
OutputArray.Add(LumenRadianceCache::FUpdateOutputs(
View.ViewState->Lumen.RadianceCacheState,
RadianceCacheParameters));
// Add the Translucency Volume radiance cache to the update so its dispatches can overlap
{
LumenRadianceCache::FUpdateInputs TranslucencyVolumeRadianceCacheUpdateInputs = GetLumenTranslucencyGIVolumeRadianceCacheInputs(
GraphBuilder,
View,
TracingInputs,
ComputePassFlags); // this returns MarkRadianceProbesUsedByTranslucencyVolume
if (TranslucencyVolumeRadianceCacheUpdateInputs.IsAnyCallbackBound())
{
InputArray.Add(TranslucencyVolumeRadianceCacheUpdateInputs);
OutputArray.Add(LumenRadianceCache::FUpdateOutputs(
View.ViewState->Lumen.TranslucencyVolumeRadianceCacheState,
TranslucencyVolumeRadianceCacheParameters));
}
}
LumenRadianceCache::UpdateRadianceCaches(
GraphBuilder,
InputArray,
OutputArray,
Scene,
ViewFamily.EngineShowFlags,
LumenCardRenderer.bPropagateGlobalLightingChange,
ComputePassFlags);
:
}
- C++(LumenTranslucentRadianceCache.cpp)
```C++
void LumenTranslucencyReflectionsMarkUsedProbes(
FRDGBuilder& GraphBuilder,
const FSceneRenderer& SceneRenderer,
FViewInfo& View,
const FSceneTextures& SceneTextures,
const LumenRadianceCache::FRadianceCacheMarkParameters& RadianceCacheMarkParameters)
{
check(GLumenTranslucencyRadianceCacheReflections != 0);
const EMeshPass::Type MeshPass = EMeshPass::LumenTranslucencyRadianceCacheMark;
const float ViewportScale = 1.0f / GLumenTranslucencyReflectionsMarkDownsampleFactor;
FIntRect DownsampledViewRect = GetScaledRect(View.ViewRect, ViewportScale);
View.BeginRenderView();
FLumenTranslucencyRadianceCacheMarkParameters* PassParameters = GraphBuilder.AllocParameters<FLumenTranslucencyRadianceCacheMarkParameters>();
{
FViewUniformShaderParameters DownsampledTranslucencyViewParameters = *View.CachedViewUniformShaderParameters;
FViewMatrices ViewMatrices = View.ViewMatrices;
FViewMatrices PrevViewMatrices = View.PrevViewInfo.ViewMatrices;
// Update the parts of DownsampledTranslucencyParameters which are dependent on the buffer size and view rect
View.SetupViewRectUniformBufferParameters(
DownsampledTranslucencyViewParameters,
SceneTextures.Config.Extent,
DownsampledViewRect,
ViewMatrices,
PrevViewMatrices);
PassParameters->View.View = TUniformBufferRef<FViewUniformShaderParameters>::CreateUniformBufferImmediate(DownsampledTranslucencyViewParameters, UniformBuffer_SingleFrame);
if (const FViewInfo* InstancedView = View.GetInstancedView())
{
InstancedView->SetupViewRectUniformBufferParameters(
DownsampledTranslucencyViewParameters,
SceneTextures.Config.Extent,
GetScaledRect(InstancedView->ViewRect, ViewportScale),
ViewMatrices,
PrevViewMatrices);
PassParameters->View.InstancedView = TUniformBufferRef<FInstancedViewUniformShaderParameters>::CreateUniformBufferImmediate(
reinterpret_cast<const FInstancedViewUniformShaderParameters&>(DownsampledTranslucencyViewParameters),
UniformBuffer_SingleFrame);
}
}
{
FLumenTranslucencyRadianceCacheMarkPassUniformParameters& MarkPassParameters = *GraphBuilder.AllocParameters<FLumenTranslucencyRadianceCacheMarkPassUniformParameters>();
SetupSceneTextureUniformParameters(GraphBuilder, &SceneTextures, View.FeatureLevel, ESceneTextureSetupMode::All, MarkPassParameters.SceneTextures);
MarkPassParameters.RadianceCacheMarkParameters = RadianceCacheMarkParameters;
MarkPassParameters.RadianceCacheMarkParameters.InvClipmapFadeSizeForMark = 1.0f / FMath::Clamp(GLumenTranslucencyVolumeRadianceCacheClipmapFadeSize, .001f, 16.0f);
MarkPassParameters.FurthestHZBTexture = View.HZB;
MarkPassParameters.ViewportUVToHZBBufferUV = FVector2f(
float(View.ViewRect.Width()) / float(2 * View.HZBMipmap0Size.X),
float(View.ViewRect.Height()) / float(2 * View.HZBMipmap0Size.Y));
MarkPassParameters.HZBMipLevel = FMath::Max<float>((int32)FMath::FloorLog2((float)GLumenTranslucencyReflectionsMarkDownsampleFactor) - 1, 0.0f);
PassParameters->MarkPass = GraphBuilder.CreateUniformBuffer(&MarkPassParameters);
}
View.ParallelMeshDrawCommandPasses[MeshPass].BuildRenderingCommands(GraphBuilder, SceneRenderer.Scene->GPUScene, PassParameters->InstanceCullingDrawParams);
GraphBuilder.AddPass(
RDG_EVENT_NAME("TranslucentSurfacesMarkPass"),
PassParameters,
ERDGPassFlags::Raster | ERDGPassFlags::SkipRenderPass,
[&View, &SceneRenderer, MeshPass, PassParameters, ViewportScale, DownsampledViewRect](FRHICommandList& RHICmdList)
{
FRHIRenderPassInfo RPInfo;
RPInfo.ResolveRect = FResolveRect(DownsampledViewRect);
RHICmdList.BeginRenderPass(RPInfo, TEXT("LumenTranslucencyRadianceCacheMark"));
SceneRenderer.SetStereoViewport(RHICmdList, View, ViewportScale);
View.ParallelMeshDrawCommandPasses[MeshPass].DispatchDraw(nullptr, RHICmdList, &PassParameters->InstanceCullingDrawParams);
RHICmdList.EndRenderPass();
});
}
シェーダー(LumenTranslucencyRadianceCacheMarkShader.usf)
void MainVS(
FVertexFactoryInput Input,
out FLumenTranslucencyRadianceCacheMarkVSToPS Output
)
{
uint EyeIndex = 0;
ResolvedView = ResolveView();
FVertexFactoryIntermediates VFIntermediates = GetVertexFactoryIntermediates(Input);
float4 WorldPositionExcludingWPO = VertexFactoryGetWorldPosition(Input, VFIntermediates);
float4 WorldPosition = WorldPositionExcludingWPO;
float4 ClipSpacePosition;
float3x3 TangentToLocal = VertexFactoryGetTangentToLocal(Input, VFIntermediates);
FMaterialVertexParameters VertexParameters = GetMaterialVertexParameters(Input, VFIntermediates, WorldPosition.xyz, TangentToLocal);
ISOLATE
{
WorldPosition.xyz += GetMaterialWorldPositionOffset(VertexParameters);
float4 RasterizedWorldPosition = VertexFactoryGetRasterizedWorldPosition(Input, VFIntermediates, WorldPosition);
ClipSpacePosition = mul(RasterizedWorldPosition, ResolvedView.TranslatedWorldToClip);
Output.Position = ClipSpacePosition;
}
Output.FactoryInterpolants = VertexFactoryGetInterpolantsVSToPS(Input, VFIntermediates, VertexParameters);
}
EARLYDEPTHSTENCIL
void MainPS(
FVertexFactoryInterpolantsVSToPS Interpolants,
FLumenTranslucencyRadianceCacheMarkInterpolantsVSToPS PassInterpolants,
in INPUT_POSITION_QUALIFIERS float4 SvPosition : SV_Position
OPTIONAL_IsFrontFace)
{
ResolvedView = ResolveView();
FMaterialPixelParameters MaterialParameters = GetMaterialPixelParameters(Interpolants, SvPosition);
FPixelMaterialInputs PixelMaterialInputs;
{
float4 ScreenPosition = SvPositionToResolvedScreenPosition(SvPosition);
float3 TranslatedWorldPosition = SvPositionToResolvedTranslatedWorld(SvPosition);
CalcMaterialParametersEx(MaterialParameters, PixelMaterialInputs, SvPosition, ScreenPosition, bIsFrontFace, TranslatedWorldPosition, TranslatedWorldPosition);
}
GetMaterialCoverageAndClipping(MaterialParameters, PixelMaterialInputs);
//@todo - conservative coverage
uint ClipmapIndex = GetRadianceProbeClipmapForMark(LWCHackToFloat(MaterialParameters.AbsoluteWorldPosition), InterleavedGradientNoise(MaterialParameters.SvPosition.xy, View.StateFrameIndexMod8));
if (IsValidRadianceCacheClipmapForMark(ClipmapIndex))
{
float MaxDepth = 1000000.0f;
float2 HZBScreenUV = (SvPosition.xy - View.ViewRectMin.xy) * View.ViewSizeAndInvSize.zw * LumenTranslucencyRadianceCacheMarkPass.ViewportUVToHZBBufferUV;
MaxDepth = ConvertFromDeviceZ(LumenTranslucencyRadianceCacheMarkPass.FurthestHZBTexture.SampleLevel(GlobalPointClampedSampler, HZBScreenUV, LumenTranslucencyRadianceCacheMarkPass.HZBMipLevel).x);
if (MaterialParameters.ScreenPosition.w < MaxDepth)
{
MarkPositionUsedInIndirectionTexture(LWCHackToFloat(MaterialParameters.AbsoluteWorldPosition), ClipmapIndex);
}
}
}
#### MarkRadianceProbes(ScreenProbes) 87x81
- C++(LumenScreenProbeGather.cpp)
```C++
static void ScreenGatherMarkUsedProbes(
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
const FSceneTextures& SceneTextures,
FScreenProbeParameters& ScreenProbeParameters,
const LumenRadianceCache::FRadianceCacheMarkParameters& RadianceCacheMarkParameters,
ERDGPassFlags ComputePassFlags)
{
FMarkRadianceProbesUsedByScreenProbesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FMarkRadianceProbesUsedByScreenProbesCS::FParameters>();
PassParameters->View = View.ViewUniformBuffer;
PassParameters->SceneTexturesStruct = SceneTextures.UniformBuffer;
PassParameters->ScreenProbeParameters = ScreenProbeParameters;
PassParameters->RadianceCacheMarkParameters = RadianceCacheMarkParameters;
auto ComputeShader = View.ShaderMap->GetShader<FMarkRadianceProbesUsedByScreenProbesCS>(0);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("MarkRadianceProbes(ScreenProbes) %ux%u", PassParameters->ScreenProbeParameters.ScreenProbeAtlasViewSize.X, PassParameters->ScreenProbeParameters.ScreenProbeAtlasViewSize.Y),
ComputePassFlags,
ComputeShader,
PassParameters,
PassParameters->ScreenProbeParameters.ProbeIndirectArgs,
(uint32)EScreenProbeIndirectArgs::ThreadPerProbe * sizeof(FRHIDispatchIndirectParameters));
}
シェーダー(LumenScreenProbeGather.usf)
[numthreads(PROBE_THREADGROUP_SIZE_2D, PROBE_THREADGROUP_SIZE_2D, 1)]
void MarkRadianceProbesUsedByScreenProbesCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint2 ScreenProbeAtlasCoord = DispatchThreadId.xy;
uint ScreenProbeIndex = ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x;
uint2 ScreenProbeScreenPosition = GetScreenProbeScreenPosition(ScreenProbeIndex);
if (ScreenProbeIndex < GetNumScreenProbes() && ScreenProbeAtlasCoord.x < ScreenProbeAtlasViewSize.x)
{
float2 ScreenUV = GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition);
float SceneDepth = GetScreenProbeDepth(ScreenProbeAtlasCoord);
float3 WorldPosition = GetWorldPositionFromScreenUV(ScreenUV, SceneDepth);
if (SceneDepth > 0)
{
uint ClipmapIndex = GetRadianceProbeClipmapForMark(WorldPosition, InterleavedGradientNoise(ScreenProbeAtlasCoord.xy, GENERAL_TEMPORAL_INDEX));
if (IsValidRadianceCacheClipmapForMark(ClipmapIndex))
{
//@todo - cull by screen size
//@todo - cull probes too small for voxel tracing and too large for max trace distance
MarkPositionUsedInIndirectionTexture(WorldPosition, ClipmapIndex);
}
}
}
}
RWTexture3D<uint> RWRadianceProbeIndirectionTexture;
void MarkProbeIndirectionTextureCoord(int3 ProbeCoord, uint ClipmapIndex) { if (all(ProbeCoord >= 0) && all(ProbeCoord < (int3)RadianceProbeClipmapResolutionForMark) && ClipmapIndex < NumRadianceProbeClipmapsForMark) { int3 IndirectionTextureCoord = ProbeCoord + int3(ClipmapIndex * RadianceProbeClipmapResolutionForMark, 0, 0); RWRadianceProbeIndirectionTexture[IndirectionTextureCoord] = USED_PROBE_INDEX; } }
void MarkPositionUsedInIndirectionTexture(float3 WorldPosition, uint ClipmapIndex) { float3 ProbeCoordFloat = WorldPosition * WorldPositionToRadianceProbeCoordScaleForMark(ClipmapIndex) + WorldPositionToRadianceProbeCoordBiasForMark(ClipmapIndex); int3 BottomCornerProbeCoord = floor(ProbeCoordFloat - 0.5f);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(0, 0, 0), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(0, 0, 1), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(0, 1, 0), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(0, 1, 1), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(1, 0, 0), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(1, 0, 1), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(1, 1, 0), ClipmapIndex);
MarkProbeIndirectionTextureCoord(BottomCornerProbeCoord + int3(1, 1, 1), ClipmapIndex);
}
#### MarkRadianceProbesUsedByTranslucencyVolume
- C++(LumenTranslucencyVolumeLighting.cpp)
```C++
static void MarkRadianceProbesUsedByTranslucencyVolume(
FRDGBuilder& GraphBuilder,
const FViewInfo& View,
FLumenTranslucencyLightingVolumeParameters VolumeParameters,
const LumenRadianceCache::FRadianceCacheMarkParameters& RadianceCacheMarkParameters,
ERDGPassFlags ComputePassFlags)
{
FMarkRadianceProbesUsedByTranslucencyVolumeCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FMarkRadianceProbesUsedByTranslucencyVolumeCS::FParameters>();
PassParameters->View = View.ViewUniformBuffer;
PassParameters->RadianceCacheMarkParameters = RadianceCacheMarkParameters;
PassParameters->VolumeParameters = VolumeParameters;
FMarkRadianceProbesUsedByTranslucencyVolumeCS::FPermutationDomain PermutationVector;
auto ComputeShader = View.ShaderMap->GetShader<FMarkRadianceProbesUsedByTranslucencyVolumeCS>();
const FIntVector GroupSize = FComputeShaderUtils::GetGroupCount(VolumeParameters.TranslucencyGIGridSize, FMarkRadianceProbesUsedByTranslucencyVolumeCS::GetGroupSize());
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("MarkRadianceProbesUsedByTranslucencyVolume"),
ComputePassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
シェーダー(LumenTranslucencyVolumeLighting.usf)
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
void MarkRadianceProbesUsedByTranslucencyVolumeCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint3 GridCoordinate = DispatchThreadId;
if (all(GridCoordinate < TranslucencyGIGridSize))
{
float3 WorldPosition = ComputeCellWorldPosition(GridCoordinate, FrameJitterOffset.xyz);
uint ClipmapIndex = GetRadianceProbeClipmapForMark(WorldPosition);
if (IsValidRadianceCacheClipmapForMark(ClipmapIndex)/*ClipmapIndex < NumRadianceProbeClipmapsForMark*/ && IsFroxelVisible(GridCoordinate))
{
MarkPositionUsedInIndirectionTexture(WorldPosition, ClipmapIndex);
}
}
}
シェーダー(LumenTranslucencyVolumeLightingShared.ush)
float3 ComputeCellTranslatedWorldPosition(uint3 GridCoordinate, float3 CellOffset, out float SceneDepth)
{
float2 VolumeUV = (GridCoordinate.xy + CellOffset.xy) / TranslucencyGIGridSize.xy;
float2 VolumeNDC = (VolumeUV * 2 - 1) * float2(1, -1);
SceneDepth = ComputeDepthFromZSlice(GridCoordinate.z + CellOffset.z);
float TileDeviceZ = ConvertToDeviceZ(SceneDepth);
float4 CenterPosition = mul(float4(VolumeNDC, TileDeviceZ, 1), UnjitteredClipToTranslatedWorld);
return CenterPosition.xyz / CenterPosition.w;
}
float3 ComputeCellTranslatedWorldPosition(uint3 GridCoordinate, float3 CellOffset) { float Unused; return ComputeCellTranslatedWorldPosition(GridCoordinate, CellOffset, Unused); }
float3 ComputeCellWorldPosition(uint3 GridCoordinate, float3 CellOffset) { return ComputeCellTranslatedWorldPosition(GridCoordinate, CellOffset) - LWCHackToFloat(PrimaryView.PreViewTranslation); }
Texture2D FurthestHZBTexture; float HZBMipLevel; float2 ViewportUVToHZBBufferUV;
float GetMaxVisibleDepth(uint2 GridCoordinate) { float2 HZBScreenUV = (GridCoordinate.xy + .5f) (1U << TranslucencyGIGridPixelSizeShift) View.ViewSizeAndInvSize.zw * ViewportUVToHZBBufferUV; float TrilinearFootprintMipBias = 1.0f; return ConvertFromDeviceZ(FurthestHZBTexture.SampleLevel(GlobalPointClampedSampler, HZBScreenUV, HZBMipLevel + TrilinearFootprintMipBias).x); }
bool IsFroxelVisible(uint3 GridCoordinate) { float MaxVisibleDepth = GetMaxVisibleDepth(GridCoordinate.xy); float TrilinearFootprintBias = -1.0f; float FroxelMinSceneDepth = ComputeDepthFromZSlice(max((float)GridCoordinate.z + TrilinearFootprintBias, 0.0f)); return FroxelMinSceneDepth < MaxVisibleDepth; }
- シェーダー(LumenRadianceCacheMarkCommon.ush)
```C++
FRadianceProbeCoord GetRadianceProbeCoord(float3 WorldSpacePosition, float ClipmapDitherRandom)
{
FRadianceProbeCoord Out = (FRadianceProbeCoord)0;
Out.ClipmapIndex = NumRadianceProbeClipmapsForMark;
uint ClipmapIndex = 0;
for (; ClipmapIndex < NumRadianceProbeClipmapsForMark; ++ClipmapIndex)
{
float3 ProbeCoordFloat = WorldSpacePosition * WorldPositionToRadianceProbeCoordScaleForMark(ClipmapIndex) + WorldPositionToRadianceProbeCoordBiasForMark(ClipmapIndex);
float3 BottomEdgeFades = saturate((ProbeCoordFloat - .5f) * InvClipmapFadeSizeForMark);
float3 TopEdgeFades = saturate(((float3)RadianceProbeClipmapResolutionForMark - .5f - ProbeCoordFloat) * InvClipmapFadeSizeForMark);
float EdgeFade = min(min3(BottomEdgeFades.x, BottomEdgeFades.y, BottomEdgeFades.z), min3(TopEdgeFades.x, TopEdgeFades.y, TopEdgeFades.z));
int3 ProbeMinCoord;
int3 ProbeMaxCoord;
ProbeMinCoord = floor(ProbeCoordFloat - 0.5f);
ProbeMaxCoord = ProbeMinCoord + 1;
if (EdgeFade > ClipmapDitherRandom)
{
Out.ProbeMinCoord = ProbeMinCoord;
Out.ProbeMaxCoord = ProbeMaxCoord;
Out.ClipmapIndex = ClipmapIndex;
return Out;
}
}
return Out;
}
uint GetRadianceProbeClipmapForMark(float3 WorldSpacePosition, float ClipmapDitherRandom)
{
FRadianceProbeCoord Cell = GetRadianceProbeCoord(WorldSpacePosition, ClipmapDitherRandom);
return Cell.ClipmapIndex;
}
uint GetRadianceProbeClipmapForMark(float3 WorldSpacePosition)
{
return GetRadianceProbeClipmapForMark(WorldSpacePosition, .01f);
}
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(...)
{
:
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
// Propagate probes from last frame to the new frame's indirection
if (Setup.bPersistentCache)
{
FRDGTextureUAVRef RadianceProbeIndirectionTextureUAV = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(RadianceCacheParameters.RadianceProbeIndirectionTexture));
FRDGTextureRef LastFrameRadianceProbeIndirectionTexture = GraphBuilder.RegisterExternalTexture(RadianceCacheState.RadianceProbeIndirectionTexture);
{
FRDGBufferUAVRef ProbeFreeListAllocatorUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeFreeListAllocator[RadianceCacheIndex], PF_R32_SINT));
FRDGBufferUAVRef ProbeFreeListUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeFreeList[RadianceCacheIndex], PF_R32_UINT));
FRDGBufferUAVRef ProbeLastUsedFrameUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeLastUsedFrame[RadianceCacheIndex], PF_R32_UINT));
FUpdateCacheForUsedProbesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FUpdateCacheForUsedProbesCS::FParameters>();
PassParameters->View = View.ViewUniformBuffer;
PassParameters->RWRadianceProbeIndirectionTexture = RadianceProbeIndirectionTextureUAV;
PassParameters->RWProbeFreeListAllocator = ProbeFreeListAllocatorUAV;
PassParameters->RWProbeFreeList = ProbeFreeListUAV;
PassParameters->RWProbeLastUsedFrame = ProbeLastUsedFrameUAV;
PassParameters->LastFrameRadianceProbeIndirectionTexture = LastFrameRadianceProbeIndirectionTexture;
PassParameters->RadianceCacheParameters = RadianceCacheParameters;
PassParameters->FrameNumber = View.ViewState->GetFrameIndex();
PassParameters->NumFramesToKeepCachedProbes = GRadianceNumFramesToKeepCachedProbes;
for (int32 ClipmapIndex = 0; ClipmapIndex < Setup.LastFrameClipmaps.Num(); ++ClipmapIndex)
{
const FRadianceCacheClipmap& Clipmap = Setup.LastFrameClipmaps[ClipmapIndex];
SetRadianceProbeCoordToWorldPosition(PassParameters->PackedLastFrameRadianceProbeCoordToWorldPosition[ClipmapIndex], (FVector3f)Clipmap.ProbeCoordToWorldCenterBias, Clipmap.ProbeCoordToWorldCenterScale);
}
auto ComputeShader = View.ShaderMap->GetShader<FUpdateCacheForUsedProbesCS>(0);
const FIntVector GroupSize = FComputeShaderUtils::GetGroupCount(RadianceCacheParameters.RadianceProbeIndirectionTexture->Desc.GetSize(), FUpdateCacheForUsedProbesCS::GetGroupSize());
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("UpdateCacheForUsedProbes"),
ComputePassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
}
}
Texture3D<uint> LastFrameRadianceProbeIndirectionTexture;
float4 PackedLastFrameRadianceProbeCoordToWorldPosition[RADIANCE_PROBE_MAX_CLIPMAPS];
uint NumFramesToKeepCachedProbes;
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void UpdateCacheForUsedProbesCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution; uint3 LastFrameProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.yz);
if (ClipmapIndex < NumRadianceProbeClipmaps && all(LastFrameProbeCoord < RadianceProbeClipmapResolution))
{
uint3 LastFrameProbeIndirectionTextureCoord = uint3(LastFrameProbeCoord.x + ClipmapIndex * RadianceProbeClipmapResolution, LastFrameProbeCoord.yz);
uint LastFrameProbeIndex = LastFrameRadianceProbeIndirectionTexture.Load(uint4(LastFrameProbeIndirectionTextureCoord, 0));
if (LastFrameProbeIndex != INVALID_PROBE_INDEX)
{
float3 ProbeWorldPosition = LastFrameProbeCoord * PackedLastFrameRadianceProbeCoordToWorldPosition[ClipmapIndex].w + PackedLastFrameRadianceProbeCoordToWorldPosition[ClipmapIndex].xyz;
int3 ProbeCoord = GetRadianceProbeCoord(ProbeWorldPosition, ClipmapIndex)/*=floor(ProbeWorldPosition * GetWorldPositionToRadianceProbeCoordScale(ClipmapIndex) + GetWorldPositionToRadianceProbeCoordBias(ClipmapIndex))*/;
bool bReused = false;
if (all(ProbeCoord >= 0) && all(ProbeCoord < (int3)RadianceProbeClipmapResolution))
{
uint3 ProbeIndirectionTextureCoord = uint3(ProbeCoord.x + ClipmapIndex * RadianceProbeClipmapResolution, ProbeCoord.yz);
uint ProbeUsedMarker = RWRadianceProbeIndirectionTexture[ProbeIndirectionTextureCoord];
uint LastUsedFrameNumber = RWProbeLastUsedFrame[LastFrameProbeIndex];
if (ProbeUsedMarker == USED_PROBE_INDEX
|| FrameNumber - LastUsedFrameNumber < NumFramesToKeepCachedProbes)
{
bReused = true;
if (ProbeUsedMarker == USED_PROBE_INDEX)
{
RWProbeLastUsedFrame[LastFrameProbeIndex] = FrameNumber;
}
RWRadianceProbeIndirectionTexture[ProbeIndirectionTextureCoord] = LastFrameProbeIndex;
}
}
if (!bReused)
{
int FreeIndex;
InterlockedAdd(RWProbeFreeListAllocator[0], 1, FreeIndex);
RWProbeFreeList[FreeIndex] = LastFrameProbeIndex;
}
}
}
}
<img width="783" alt="image" src="https://github.com/yasukichi/testcode/assets/14350715/c0c89321-0897-47bf-82ab-47e2b4a81fbc">
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(...)
{
:
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
// Allocated used probes
{
FRDGBufferUAVRef ProbeFreeListAllocatorUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeFreeListAllocator[RadianceCacheIndex], PF_R32_SINT));
FRDGBufferUAVRef ProbeLastUsedFrameUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeLastUsedFrame[RadianceCacheIndex], PF_R32_UINT));
FRDGTextureUAVRef RadianceProbeIndirectionTextureUAV = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(RadianceCacheParameters.RadianceProbeIndirectionTexture));
FRDGBufferUAVRef ProbeAllocatorUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeAllocator[RadianceCacheIndex], PF_R32_UINT));
const int32 MaxNumProbes = RadianceCacheInputs.ProbeAtlasResolutionInProbes.X * RadianceCacheInputs.ProbeAtlasResolutionInProbes.Y;
FAllocateUsedProbesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FAllocateUsedProbesCS::FParameters>();
PassParameters->RWRadianceProbeIndirectionTexture = RadianceProbeIndirectionTextureUAV;
PassParameters->RWPriorityHistogram = GraphBuilder.CreateUAV(PriorityHistogram[RadianceCacheIndex]);
PassParameters->RWProbeLastTracedFrame = GraphBuilder.CreateUAV(ProbeLastTracedFrame[RadianceCacheIndex]);
PassParameters->RWProbeLastUsedFrame = ProbeLastUsedFrameUAV;
PassParameters->RWProbeAllocator = ProbeAllocatorUAV;
PassParameters->RWProbeFreeListAllocator = Setup.bPersistentCache ? ProbeFreeListAllocatorUAV : nullptr;
PassParameters->View = View.ViewUniformBuffer;
PassParameters->ProbeFreeList = Setup.bPersistentCache ? GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeFreeList[RadianceCacheIndex], PF_R32_UINT)) : nullptr;
PassParameters->FirstClipmapWorldExtentRcp = 1.0f / FMath::Max(RadianceCacheInputs.ClipmapWorldExtent, 1.0f);
PassParameters->SupersampleDistanceFromCameraSq = GLumenRadianceCacheSupersampleDistanceFromCamera * GLumenRadianceCacheSupersampleDistanceFromCamera;
PassParameters->DownsampleDistanceFromCameraSq = GLumenRadianceCacheDownsampleDistanceFromCamera * GLumenRadianceCacheDownsampleDistanceFromCamera;
PassParameters->FrameNumber = View.ViewState ? View.ViewState->GetFrameIndex() : View.Family->FrameNumber;
PassParameters->MaxNumProbes = MaxNumProbes;
PassParameters->RadianceCacheParameters = RadianceCacheParameters;
FAllocateUsedProbesCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FAllocateUsedProbesCS::FPersistentCache>(Setup.bPersistentCache);
auto ComputeShader = View.ShaderMap->GetShader<FAllocateUsedProbesCS>(PermutationVector);
const FIntVector GroupSize = FComputeShaderUtils::GetGroupCount(RadianceCacheParameters.RadianceProbeIndirectionTexture->Desc.GetSize(), FAllocateUsedProbesCS::GetGroupSize());
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("AllocateUsedProbes"),
ComputePassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
}
:
}
- シェーダー(LumenRadianceCacheMarkCommon.ush)
```C++
RWTexture3D<uint> RWRadianceProbeIndirectionTexture;
uint FrameNumber;
RWStructuredBuffer<uint> RWPriorityHistogram;
RWStructuredBuffer<uint> RWProbeLastTracedFrame;
RWBuffer<uint> RWProbeLastUsedFrame;
RWBuffer
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)] void AllocateUsedProbesCS( uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID) { uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution; uint3 ProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.y, DispatchThreadId.z);
if (ClipmapIndex < NumRadianceProbeClipmaps && all(ProbeCoord < RadianceProbeClipmapResolution))
{
uint ProbeUsedMarker = RWRadianceProbeIndirectionTexture[DispatchThreadId];
float3 ProbeWorldPosition = ProbeCoord * GetRadianceProbeCoordToWorldPositionScale(ClipmapIndex) + GetRadianceProbeCoordToWorldPositionBias(ClipmapIndex);
if (ProbeUsedMarker != INVALID_PROBE_INDEX)
{
uint ProbeIndex = INVALID_PROBE_INDEX;
uint LastTracedFrameIndex = 0;
uint LastUsedFrameIndex = 0;
if (ProbeUsedMarker == USED_PROBE_INDEX)
{
// Allocate new probe
#if PERSISTENT_CACHE
int NumFree;
InterlockedAdd(RWProbeFreeListAllocator[0], -1, NumFree);
if (NumFree > 0)
{
ProbeIndex = ProbeFreeList[NumFree - 1];
}
else
{
InterlockedAdd(RWProbeAllocator[0], 1, ProbeIndex);
}
#else
InterlockedAdd(RWProbeAllocator[0], 1, ProbeIndex);
#endif
RWProbeLastTracedFrame[ProbeIndex] = PROBE_FRAME_INDEX_NEVER_TRACED;
RWProbeLastUsedFrame[ProbeIndex] = FrameNumber;
LastTracedFrameIndex = PROBE_FRAME_INDEX_NEVER_TRACED;
LastUsedFrameIndex = FrameNumber;
}
else
{
// Re-trace existing probe
ProbeIndex = ProbeUsedMarker;
LastTracedFrameIndex = RWProbeLastTracedFrame[ProbeIndex];
LastUsedFrameIndex = RWProbeLastUsedFrame[ProbeIndex];
}
if (ProbeIndex < MaxNumProbes)
{
// Update histogram
const uint PriorityBucketIndex = GetPriorityBucketIndex(LastTracedFrameIndex, LastUsedFrameIndex, ProbeWorldPosition);
uint ProbeTraceCost = GetProbeTraceCost(ProbeWorldPosition);
InterlockedAdd(RWPriorityHistogram[PriorityBucketIndex], ProbeTraceCost);
RWRadianceProbeIndirectionTexture[DispatchThreadId] = ProbeIndex;
}
else
{
RWRadianceProbeIndirectionTexture[DispatchThreadId] = INVALID_PROBE_INDEX;
}
}
}
}
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(...)
{
:
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
// Trace probes up to selected priority bucket
{
FRDGBufferUAVRef ProbeFreeListAllocatorUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeFreeListAllocator[RadianceCacheIndex], PF_R32_SINT));
FRDGTextureUAVRef RadianceProbeIndirectionTextureUAV = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(RadianceCacheParameters.RadianceProbeIndirectionTexture));
FRDGBufferUAVRef ProbeTraceAllocatorUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeTraceAllocator[RadianceCacheIndex], PF_R32_UINT));
const int32 MaxNumProbes = RadianceCacheInputs.ProbeAtlasResolutionInProbes.X * RadianceCacheInputs.ProbeAtlasResolutionInProbes.Y;
FAllocateProbeTracesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FAllocateProbeTracesCS::FParameters>();
PassParameters->RWRadianceProbeIndirectionTexture = RadianceProbeIndirectionTextureUAV;
PassParameters->RWProbesToUpdateTraceCost = GraphBuilder.CreateUAV(ProbesToUpdateTraceCost[RadianceCacheIndex]);
PassParameters->RWProbeLastTracedFrame = GraphBuilder.CreateUAV(ProbeLastTracedFrame[RadianceCacheIndex]);
PassParameters->RWProbeTraceAllocator = ProbeTraceAllocatorUAV;
PassParameters->RWProbeTraceData = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeTraceData[RadianceCacheIndex], PF_A32B32G32R32F));
PassParameters->RWProbeFreeListAllocator = Setup.bPersistentCache ? ProbeFreeListAllocatorUAV : nullptr;
PassParameters->MaxUpdateBucket = GraphBuilder.CreateSRV(MaxUpdateBucket[RadianceCacheIndex], PF_R32_UINT);
PassParameters->MaxTracesFromMaxUpdateBucket = GraphBuilder.CreateSRV(MaxTracesFromMaxUpdateBucket[RadianceCacheIndex]);
PassParameters->ProbeLastUsedFrame = GraphBuilder.CreateSRV(ProbeLastUsedFrame[RadianceCacheIndex], PF_R32_UINT);
PassParameters->View = View.ViewUniformBuffer;
PassParameters->ProbeFreeList = Setup.bPersistentCache ? GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeFreeList[RadianceCacheIndex], PF_R32_UINT)) : nullptr;
PassParameters->SupersampleDistanceFromCameraSq = GLumenRadianceCacheSupersampleDistanceFromCamera * GLumenRadianceCacheSupersampleDistanceFromCamera;
PassParameters->DownsampleDistanceFromCameraSq = GLumenRadianceCacheDownsampleDistanceFromCamera * GLumenRadianceCacheDownsampleDistanceFromCamera;
PassParameters->FirstClipmapWorldExtentRcp = 1.0f / FMath::Max(RadianceCacheInputs.ClipmapWorldExtent, 1.0f);
PassParameters->FrameNumber = View.ViewState ? View.ViewState->GetFrameIndex() : View.Family->FrameNumber;
PassParameters->MaxNumProbes = MaxNumProbes;
PassParameters->RadianceCacheParameters = RadianceCacheParameters;
auto ComputeShader = View.ShaderMap->GetShader<FAllocateProbeTracesCS>();
const FIntVector GroupSize = FComputeShaderUtils::GetGroupCount(RadianceCacheParameters.RadianceProbeIndirectionTexture->Desc.GetSize(), FAllocateProbeTracesCS::GetGroupSize());
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("AllocateProbeTraces"),
ComputePassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
}
:
}
- シェーダー(LumenRadianceCacheUpdate,usf)
```C++
/**
* Iterate again over all probes and update them based on the histogram priority max update bucket
*/
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
void AllocateProbeTracesCS(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ClipmapIndex = DispatchThreadId.x / RadianceProbeClipmapResolution;
uint3 ProbeCoord = uint3(DispatchThreadId.x - ClipmapIndex * RadianceProbeClipmapResolution, DispatchThreadId.y, DispatchThreadId.z);
if (ClipmapIndex < NumRadianceProbeClipmaps && all(ProbeCoord < RadianceProbeClipmapResolution))
{
const uint ProbeIndex = RadianceProbeIndirectionTexture[DispatchThreadId];
float3 ProbeWorldPosition = ProbeCoord * GetRadianceProbeCoordToWorldPositionScale(ClipmapIndex) + GetRadianceProbeCoordToWorldPositionBias(ClipmapIndex);
if (ProbeIndex < MaxNumProbes)
{
const uint MaxUpdateBucketIndex = MaxUpdateBucket[0];
const uint LocalMaxTracesFromMaxUpdateBucket = MaxTracesFromMaxUpdateBucket[0];
const uint ProbeTraceCost = GetProbeTraceCost(ProbeWorldPosition);
/*
uint GetProbeTraceCost(float3 ProbeWorldPosition)
{
float3 CameraVector = LWCHackToFloat(PrimaryView.WorldCameraOrigin) - ProbeWorldPosition;
float DistanceFromCameraSq = dot(CameraVector, CameraVector);
if (DistanceFromCameraSq < SupersampleDistanceFromCameraSq)
{
return 16;
}
if (DistanceFromCameraSq < DownsampleDistanceFromCameraSq)
{
return 4;
}
return 1;
}
*/
const uint LastTracedFrameIndex = RWProbeLastTracedFrame[ProbeIndex];
const uint LastUsedFrameIndex = ProbeLastUsedFrame[ProbeIndex];
// Update everything up to the max selected priority bucket
const uint PriorityBucketIndex = GetPriorityBucketIndex(LastTracedFrameIndex, LastUsedFrameIndex, ProbeWorldPosition);
bool bTraceProbe = PriorityBucketIndex <= MaxUpdateBucketIndex;
// Can't trace more than MaxTracesFromMaxUpdateBucket from the last bucket
// Make an exception for the first bucket, as it contains probes without valid data
if (bTraceProbe && PriorityBucketIndex == MaxUpdateBucketIndex && MaxUpdateBucketIndex > 0)
{
// Accumulate allocated probe trace cost for the last bucket
uint ProbeTraceAllocatedFromMaxUpdateBucket = 0;
InterlockedAdd(RWProbesToUpdateTraceCost[1], ProbeTraceCost, ProbeTraceAllocatedFromMaxUpdateBucket);
if (ProbeTraceAllocatedFromMaxUpdateBucket + ProbeTraceCost > LocalMaxTracesFromMaxUpdateBucket)
{
bTraceProbe = false;
}
}
if (bTraceProbe)
{
InterlockedAdd(RWProbesToUpdateTraceCost[0], ProbeTraceCost);
uint TraceIndex;
InterlockedAdd(RWProbeTraceAllocator[0], 1, TraceIndex);
RWProbeTraceData[TraceIndex] = float4(ProbeWorldPosition, asfloat((ClipmapIndex << 24) | ProbeIndex));
RWProbeLastTracedFrame[ProbeIndex] = FrameNumber;
}
}
}
}
uint GetPriorityBucketIndex(uint LastTracedFrameIndex, uint LastUsedFrameIndex, float3 ProbeWorldPosition)
{
// [1;N]
uint FramesBetweenTracedAndUsed = LastUsedFrameIndex - LastTracedFrameIndex;
uint BucketIndex = 0;
if (LastTracedFrameIndex == PROBE_FRAME_INDEX_NEVER_TRACED)
{
// Special case for probes which were created this frame. Places those in the most important bucket 0.
BucketIndex = 0;
}
else
{
// [0;MAX_UPDATE_FREQUENCY]
float Frequency = 1.0f;
{
float3 CameraVector = LWCHackToFloat(PrimaryView.WorldCameraOrigin) - ProbeWorldPosition;
const float DistanceFromViewer = sqrt(dot(CameraVector, CameraVector));
Frequency = MAX_UPDATE_FREQUENCY - clamp(DistanceFromViewer * FirstClipmapWorldExtentRcp, 0.0f, MAX_UPDATE_FREQUENCY);
}
// [1;N]
float UpdateImportance = FramesBetweenTracedAndUsed * (Frequency + 1.0f);
// Normalize histogram based on the expected range
float HistogramScale = (PRIORITY_HISTOGRAM_SIZE * NumProbesToTraceBudget) / (MaxNumProbes * (MAX_UPDATE_FREQUENCY + 1.0f));
// Remap from [1;N] to log2([N;1])
BucketIndex = PRIORITY_HISTOGRAM_SIZE - 1 - clamp((UpdateImportance - 1.0f) * HistogramScale, 0, PRIORITY_HISTOGRAM_SIZE - 2);
}
return BucketIndex;
}
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(...)
{
:
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
if (RadianceCacheInputs.CalculateIrradiance)
{
FRDGBufferUAVRef ProbeWorldOffsetUAV = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeWorldOffset[RadianceCacheIndex], PF_A32B32G32R32F));
FComputeProbeWorldOffsetsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FComputeProbeWorldOffsetsCS::FParameters>();
PassParameters->RWProbeWorldOffset = ProbeWorldOffsetUAV;
PassParameters->View = View.ViewUniformBuffer;
PassParameters->ProbeTraceData = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceData[RadianceCacheIndex], PF_A32B32G32R32F));
PassParameters->RadianceCacheParameters = RadianceCacheParameters;
PassParameters->IndirectArgs = GenerateProbeTraceTilesIndirectArgs[RadianceCacheIndex];
auto ComputeShader = View.ShaderMap->GetShader<FComputeProbeWorldOffsetsCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("ComputeProbeWorldOffsets"),
ComputePassFlags,
ComputeShader,
PassParameters,
PassParameters->IndirectArgs,
0);
}
RadianceCacheParameters.ProbeWorldOffset = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeWorldOffset[RadianceCacheIndex], PF_A32B32G32R32F));
}
:
}
- シェーダー(LumenRadianceCache.usf)
```C++
groupshared float4 SharedBestOffset[THREADGROUP_SIZE];
groupshared float4 SharedBestOffset2[THREADGROUP_SIZE];
[numthreads(THREADGROUP_SIZE, 1, 1)]
void ComputeProbeWorldOffsetsCS(
uint3 GroupId : SV_GroupID,
uint GroupThreadId : SV_GroupThreadID)
{
uint ProbeTraceIndex = GroupId.z;
float3 ProbeWorldCenter;
uint ClipmapIndex;
uint ProbeIndex;
GetProbeTraceDataNoOffset(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);
/*
void GetProbeTraceDataNoOffset(uint ProbeTraceIndex, out float3 ProbeWorldCenter, out uint ClipmapIndex, out uint ProbeIndex)
{
ProbeWorldCenter = ProbeTraceData[ProbeTraceIndex].xyz;
uint PackedW = asuint(ProbeTraceData[ProbeTraceIndex].w);
ClipmapIndex = PackedW >> 24;
ProbeIndex = PackedW & 0xFFFFFF;
}
*/
float DistanceToSurface = GetDistanceToNearestSurfaceGlobal(ProbeWorldCenter);
float TooCloseThreshold = .05f * GetRadianceProbeCoordToWorldPositionScale(ClipmapIndex);
if (DistanceToSurface < TooCloseThreshold)
{
float MaxVoxelOffset = .25f;
uint X = GroupThreadId % 4;
uint Y = (GroupThreadId % 16) / 4;
uint Z = GroupThreadId / 16;
{
float3 Offset = (float3(X, Y, Z) * 2.0f / 3.0f - 1.0f) * MaxVoxelOffset * GetRadianceProbeCoordToWorldPositionScale(ClipmapIndex);
float SampleDistanceToSurface = GetDistanceToNearestSurfaceGlobal(ProbeWorldCenter + Offset);
SharedBestOffset[GroupThreadId] = float4(Offset, SampleDistanceToSurface);
}
GroupMemoryBarrierWithGroupSync();
if (GroupThreadId < 8)
{
float4 BestOffset = SharedBestOffset[GroupThreadId * 8];
for (uint i = 1; i < 8; i++)
{
if (SharedBestOffset[GroupThreadId * 8 + i].w > BestOffset.w)
{
BestOffset = SharedBestOffset[GroupThreadId * 8 + i];
}
}
SharedBestOffset2[GroupThreadId] = BestOffset;
}
GroupMemoryBarrierWithGroupSync();
if (GroupThreadId == 0)
{
float4 BestOffset = SharedBestOffset2[0];
for (uint i = 1; i < 8; i++)
{
if (SharedBestOffset2[i].w > BestOffset.w)
{
BestOffset = SharedBestOffset2[i];
}
}
if (BestOffset.w >= TooCloseThreshold)
{
RWProbeWorldOffset[ProbeIndex] = float4(BestOffset.xyz, 1);
}
else
{
RWProbeWorldOffset[ProbeIndex] = 0;
}
}
}
else
{
RWProbeWorldOffset[ProbeIndex] = 0;
}
}
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(...)
{
:
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
if (RadianceProbeSH_PDF[RadianceCacheIndex])
{
FScatterScreenProbeBRDFToRadianceProbesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FScatterScreenProbeBRDFToRadianceProbesCS::FParameters>();
PassParameters->RWRadianceProbeSH_PDF = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(RadianceProbeSH_PDF[RadianceCacheIndex], PF_R32_SINT));
PassParameters->BRDFProbabilityDensityFunctionSH = Inputs.BRDFProbabilityDensityFunctionSH;
PassParameters->View = View.ViewUniformBuffer;
PassParameters->ScreenProbeParameters = *Inputs.ScreenProbeParameters;
PassParameters->RadianceCacheParameters = RadianceCacheParameters;
auto ComputeShader = View.ShaderMap->GetShader<FScatterScreenProbeBRDFToRadianceProbesCS>(0);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("ScatterScreenProbeBRDFToRadianceProbes"),
ComputePassFlags,
ComputeShader,
PassParameters,
Inputs.ScreenProbeParameters->ProbeIndirectArgs,
(uint32)EScreenProbeIndirectArgs::GroupPerProbe * sizeof(FRHIDispatchIndirectParameters));
}
}
:
}
- シェーダー(LumenRadianceCahce.usf)
```C++
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void ScatterScreenProbeBRDFToRadianceProbesCS(
uint3 GroupId : SV_GroupID,
uint3 GroupThreadId : SV_GroupThreadID)
{
// PDF pass: Accumulate BRDF SH from screen probes
// Scatter: Scatter BRDF SH into 8 radiance probes using atomics to buffer
// Threadgroup per probe
uint2 ScreenProbeAtlasCoord = GroupId.xy;
uint ScreenProbeIndex = ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x;
uint2 ScreenProbeScreenPosition = GetScreenProbeScreenPosition(ScreenProbeIndex);
if (ScreenProbeIndex < GetNumScreenProbes() && ScreenProbeAtlasCoord.x < ScreenProbeAtlasViewSize.x)
{
float2 ScreenUV = GetScreenUVFromScreenProbePosition(ScreenProbeScreenPosition);
float SceneDepth = GetScreenProbeDepth(ScreenProbeAtlasCoord);
if (SceneDepth > 0)
{
float3 WorldPosition = GetWorldPositionFromScreenUV(ScreenUV, SceneDepth);
uint2 ScreenTileCoord = GetScreenTileCoord(ScreenProbeScreenPosition);
uint ClipmapIndex = GetRadianceProbeClipmap(WorldPosition, InterleavedGradientNoise(ScreenTileCoord, View.StateFrameIndexMod8));
/*
float InterleavedGradientNoise( float2 uv, float FrameId )
{
// magic values are found by experimentation
uv += FrameId * (float2(47, 17) * 0.695f);
const float3 magic = float3( 0.06711056f, 0.00583715f, 52.9829189f );
return frac(magic.z * frac(dot(uv, magic.xy)));
}
*/
if (ClipmapIndex < NumRadianceProbeClipmaps)
{
int3 BottomCornerProbeCoord = GetRadianceProbeBottomCornerCoord(WorldPosition, ClipmapIndex);
int3 IndirectionTextureCoord = BottomCornerProbeCoord + int3(ClipmapIndex * RadianceProbeClipmapResolution, 0, 0);
uint ProbeIndex = RadianceProbeIndirectionTexture[IndirectionTextureCoord + int3(GroupThreadId.x & 0x1, (GroupThreadId.x & 0x2) >> 1, (GroupThreadId.x & 0x4) >> 2)];
bool bTwoSidedFoliage = GetScreenProbeIsTwoSidedFoliage(ScreenProbeAtlasCoord);
/*
bool GetScreenProbeIsTwoSidedFoliage(uint2 ScreenProbeAtlasCoord)
{
uint Encoded = ScreenProbeWorldSpeed.Load(int3(ScreenProbeAtlasCoord, 0));
return (Encoded & 0x8000) != 0;
}
*/
//@todo - skip probes which are cached this frame
if (ProbeIndex != INVALID_PROBE_INDEX)
{
uint SHBaseIndex = (ScreenProbeAtlasCoord.y * ScreenProbeAtlasViewSize.x + ScreenProbeAtlasCoord.x) * NUM_PDF_SH_COEFFICIENTS/*=9*/;
uint ProbeSHBaseCoord = ProbeIndex * NUM_RADIANCE_PROBE_PDF_COEFFICIENTS/*NUM_PDF_SH_COEFFICIENTS+1*/;
for (uint CoefficientIndex = GroupThreadId.y; CoefficientIndex < NUM_RADIANCE_PROBE_PDF_COEFFICIENTS; CoefficientIndex += THREADGROUP_SIZE)
{
float Coefficient = 1.0f;
float MaxValuePerThread = 1.0f;
if (CoefficientIndex < NUM_PDF_SH_COEFFICIENTS)
{
// The Radiance Cache over-samples with high depth complexity caused by foliage, attempt to offset that by keeping at the lowest trace resolution
Coefficient = bTwoSidedFoliage ? 0.0f : BRDFProbabilityDensityFunctionSH[SHBaseIndex + CoefficientIndex];
MaxValuePerThread = (float)0xFFFFFFFF / 100000.0f;
}
int QuantizedCoefficient = Coefficient * MaxValuePerThread;
InterlockedAdd(RWRadianceProbeSH_PDF[ProbeSHBaseCoord + CoefficientIndex], QuantizedCoefficient);
}
}
}
}
}
}
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(...)
{
:
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
const int32 MaxNumProbes = RadianceCacheInputs.ProbeAtlasResolutionInProbes.X * RadianceCacheInputs.ProbeAtlasResolutionInProbes.Y;
const int32 MaxProbeTraceTileResolution = RadianceCacheInputs.RadianceProbeResolution / FRadianceCacheTraceFromProbesCS::GetGroupSize() * 2;
ProbeTraceTileData[RadianceCacheIndex] = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateBufferDesc(sizeof(FIntPoint), MaxNumProbes * MaxProbeTraceTileResolution * MaxProbeTraceTileResolution), TEXT("Lumen.RadianceCache.ProbeTraceTileData"));
const int32 DebugProbeBRDFOctahedronResolution = 8;
FRDGTextureDesc DebugBRDFProbabilityDensityFunctionDesc = FRDGTextureDesc::Create2D(
FIntPoint(RadianceCacheInputs.ProbeAtlasResolutionInProbes * DebugProbeBRDFOctahedronResolution),
PF_R16F,
FClearValueBinding::None,
TexCreate_ShaderResource | TexCreate_UAV);
FRDGTextureRef DebugBRDFProbabilityDensityFunction = GraphBuilder.CreateTexture(DebugBRDFProbabilityDensityFunctionDesc, TEXT("Lumen.RadianceCache.DebugBRDFProbabilityDensityFunction"));
{
FGenerateProbeTraceTilesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FGenerateProbeTraceTilesCS::FParameters>();
PassParameters->RWProbeTraceTileAllocator = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeTraceTileAllocator[RadianceCacheIndex], PF_R32_UINT));
PassParameters->RWProbeTraceTileData = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(ProbeTraceTileData[RadianceCacheIndex], PF_R32G32_UINT));
PassParameters->ProbeTraceData = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceData[RadianceCacheIndex], PF_A32B32G32R32F));
PassParameters->RadianceProbeSH_PDF = RadianceProbeSH_PDF[RadianceCacheIndex] ? GraphBuilder.CreateSRV(FRDGBufferSRVDesc(RadianceProbeSH_PDF[RadianceCacheIndex], PF_R32_SINT)) : nullptr;
PassParameters->ProbesToUpdateTraceCost = GraphBuilder.CreateSRV(ProbesToUpdateTraceCost[RadianceCacheIndex]);
PassParameters->SupersampleTileBRDFThreshold = GLumenRadianceCacheSupersampleTileBRDFThreshold;
PassParameters->SupersampleDistanceFromCameraSq = GLumenRadianceCacheSupersampleDistanceFromCamera * GLumenRadianceCacheSupersampleDistanceFromCamera;
PassParameters->DownsampleDistanceFromCameraSq = GLumenRadianceCacheDownsampleDistanceFromCamera * GLumenRadianceCacheDownsampleDistanceFromCamera;
PassParameters->RWDebugBRDFProbabilityDensityFunction = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(DebugBRDFProbabilityDensityFunction));
PassParameters->DebugProbeBRDFOctahedronResolution = DebugProbeBRDFOctahedronResolution;
PassParameters->View = View.ViewUniformBuffer;
PassParameters->RadianceCacheParameters = RadianceCacheParameters;
PassParameters->GenerateProbeTraceTilesIndirectArgs = GenerateProbeTraceTilesIndirectArgs[RadianceCacheIndex];
FGenerateProbeTraceTilesCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FGenerateProbeTraceTilesCS::FUniformTraces>(RadianceProbeSH_PDF[RadianceCacheIndex] == nullptr);
auto ComputeShader = View.ShaderMap->GetShader<FGenerateProbeTraceTilesCS>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("GenerateProbeTraceTiles"),
ComputePassFlags,
ComputeShader,
PassParameters,
PassParameters->GenerateProbeTraceTilesIndirectArgs,
0);
}
}
:
}
- シェーダー(LumenRadianceCache.usf)
```C++
groupshared uint SharedNumPendingTraceTiles;
groupshared uint2 PendingTraceTileList[THREADGROUP_SIZE * THREADGROUP_SIZE * 4];
groupshared uint SharedNumCompletedTraceTiles;
groupshared uint2 CompletedTraceTileList[THREADGROUP_SIZE * THREADGROUP_SIZE * 4];
StructuredBuffer<uint> ProbesToUpdateTraceCost;
[numthreads(THREADGROUP_SIZE/*=8*/, THREADGROUP_SIZE, 1)]
void GenerateProbeTraceTilesCS(
uint3 GroupId : SV_GroupID,
uint2 GroupThreadId : SV_GroupThreadID)
{
uint ProbeTraceIndex = GroupId.z;
float3 ProbeWorldCenter;
uint Unused;
uint ProbeIndex;
GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, Unused, ProbeIndex);
float DistanceFromCameraSq = dot(LWCHackToFloat(PrimaryView.WorldCameraOrigin) - ProbeWorldCenter, LWCHackToFloat(PrimaryView.WorldCameraOrigin) - ProbeWorldCenter);
// Ray gen pass:
// Clear trace tile list
// For each level [0 - 2] test PDF at tile center, issue trace tile if below threshold, otherwise subdivide and queue for next level
// Write out all trace tiles for indirect dispatch
if (all(GroupThreadId.xy == 0))
{
SharedNumCompletedTraceTiles = 0;
SharedNumPendingTraceTiles = 0;
}
GroupMemoryBarrierWithGroupSync();
uint NumLevels = 1;
// Calculate subdivision level for the probe
// Level 0 is half of RadianceProbeResolution
if (DistanceFromCameraSq < DownsampleDistanceFromCameraSq)
{
NumLevels = DistanceFromCameraSq < SupersampleDistanceFromCameraSq ? 3 : 2;
}
// Force downsampling if we have too many probes to update
if (ProbesToUpdateTraceCost[0] > 2 * GetProbeTraceCostBudget()/*=NumProbesToTraceBudget * 4*/)
{
NumLevels = 1;
}
FThreeBandSHVector BRDF = GetBRDF_PDF(ProbeIndex);
uint BaseTraceTileResolution = RadianceProbeResolution / THREADGROUP_SIZE / 2;
// Queue trace tiles for level 0
if (all(GroupThreadId.xy < BaseTraceTileResolution))
{
uint2 TraceTileCoord = GroupThreadId.xy;
uint Level = 0;
if (Level < (NumLevels - 1) && ShouldRefineTraceTile(TraceTileCoord, BaseTraceTileResolution, /*pdf thresold=*/0.0f, BRDF))
/*
bool ShouldRefineTraceTile(uint2 TraceTileCoord, uint TraceTileResolution, float LevelPDFThreshold, FThreeBandSHVector BRDF)
{
float2 ProbeUV = (TraceTileCoord + float2(.5f, .5f)) / float(TraceTileResolution);
float3 WorldConeDirection = OctahedronToUnitVector(ProbeUV * 2.0 - 1.0);
FThreeBandSHVector DirectionSH = SHBasisFunction3(WorldConeDirection);
float PDF = max(DotSH3(BRDF, DirectionSH), 0);
bool bRefineTraceTile = PDF > LevelPDFThreshold;
return bRefineTraceTile;
}
*/
{
uint TileBaseIndex;
InterlockedAdd(SharedNumPendingTraceTiles, 4, TileBaseIndex);
PendingTraceTileList[TileBaseIndex + 0] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 0), /*Level=*/1, ProbeTraceIndex);
PendingTraceTileList[TileBaseIndex + 1] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 0), /*Level=*/1, ProbeTraceIndex);
PendingTraceTileList[TileBaseIndex + 2] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 1), /*Level=*/1, ProbeTraceIndex);
PendingTraceTileList[TileBaseIndex + 3] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 1), /*Level=*/1, ProbeTraceIndex);
/*
uint2 PackTraceTileInfo(uint2 TraceTileCoord, uint Level, uint ProbeTraceIndex)
{
return uint2((TraceTileCoord.x & 0xFF) | ((TraceTileCoord.y & 0xFF) << 8) | ((Level & 0xFF) << 16), ProbeTraceIndex);
}
*/
}
else
{
uint TileIndex;
InterlockedAdd(SharedNumCompletedTraceTiles, 1, TileIndex);
CompletedTraceTileList[TileIndex] = PackTraceTileInfo(TraceTileCoord, 0, ProbeTraceIndex);
}
}
GroupMemoryBarrierWithGroupSync();
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZE + GroupThreadId.x;
// Queue trace tiles for remaining levels
if (NumLevels == 3)
{
SubdivideTraceTileTree(ThreadIndex, BaseTraceTileResolution, 3, ProbeTraceIndex, BRDF);
}
else if (NumLevels == 2)
{
SubdivideTraceTileTree(ThreadIndex, BaseTraceTileResolution, 2, ProbeTraceIndex, BRDF);
}
if (ThreadIndex == 0)
{
InterlockedAdd(RWProbeTraceTileAllocator[0], SharedNumCompletedTraceTiles, GlobalTraceOffset);
}
GroupMemoryBarrierWithGroupSync();
for (uint TraceTileIndex = ThreadIndex; TraceTileIndex < SharedNumCompletedTraceTiles; TraceTileIndex += THREADGROUP_SIZE * THREADGROUP_SIZE)
{
RWProbeTraceTileData[GlobalTraceOffset + TraceTileIndex] = CompletedTraceTileList[TraceTileIndex];
}
}
void SubdivideTraceTileTree(
uint ThreadIndex,
uint BaseTraceTileResolution,
uint NumLevels,
uint ProbeTraceIndex,
FThreeBandSHVector BRDF)
{
uint PendingTraceListStartIndex = 0;
// NumLevels must be a literal to allow the loop to unroll, otherwise we get this incorrect compile error from the DXC compiler:
// error X3663: thread sync operation found in varying flow control, consider reformulating your algorithm so all threads will hit the sync simultaneously
UNROLL
for (uint Level = 1; Level < NumLevels; Level++)
{
uint TraceTileResolution = BaseTraceTileResolution << Level;
uint NumPendingTraceTiles = SharedNumPendingTraceTiles;
for (uint PendingTraceTileIndex = PendingTraceListStartIndex + ThreadIndex; PendingTraceTileIndex < NumPendingTraceTiles; PendingTraceTileIndex += THREADGROUP_SIZE * THREADGROUP_SIZE)
{
uint2 TraceTileCoord = UnpackTraceTileInfo(PendingTraceTileList[PendingTraceTileIndex]);
if (Level < (NumLevels - 1) && ShouldRefineTraceTile(TraceTileCoord, TraceTileResolution, SupersampleTileBRDFThreshold, BRDF))
{
uint TileBaseIndex;
InterlockedAdd(SharedNumPendingTraceTiles, 4, TileBaseIndex);
PendingTraceTileList[TileBaseIndex + 0] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 0), Level + 1, ProbeTraceIndex);
PendingTraceTileList[TileBaseIndex + 1] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 0), Level + 1, ProbeTraceIndex);
PendingTraceTileList[TileBaseIndex + 2] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(0, 1), Level + 1, ProbeTraceIndex);
PendingTraceTileList[TileBaseIndex + 3] = PackTraceTileInfo(TraceTileCoord * 2 + uint2(1, 1), Level + 1, ProbeTraceIndex);
}
else
{
uint TileIndex;
InterlockedAdd(SharedNumCompletedTraceTiles, 1, TileIndex);
CompletedTraceTileList[TileIndex] = PackTraceTileInfo(TraceTileCoord, Level, ProbeTraceIndex);
}
}
GroupMemoryBarrierWithGroupSync();
PendingTraceListStartIndex = NumPendingTraceTiles;
}
}
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(...)
{
:
TInlineArray<FRDGBufferRef> TraceProbesIndirectArgs(InputArray.Num());
TInlineArray<FRDGBufferRef> SortProbeTraceTilesIndirectArgs(InputArray.Num());
TInlineArray<FRDGBufferRef> RadianceCacheHardwareRayTracingIndirectArgs(InputArray.Num());
TInlineArray<FRDGBufferRef> HardwareRayTracingRayAllocatorBuffer(InputArray.Num());
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
TraceProbesIndirectArgs[RadianceCacheIndex] = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(4), TEXT("Lumen.RadianceCache.TraceProbesIndirectArgs"));
SortProbeTraceTilesIndirectArgs[RadianceCacheIndex] = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(5), TEXT("Lumen.RadianceCache.SortProbeTraceTilesIndirectArgs"));
RadianceCacheHardwareRayTracingIndirectArgs[RadianceCacheIndex] = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(6), TEXT("Lumen.RadianceCache.RadianceCacheHardwareRayTracingIndirectArgs"));
HardwareRayTracingRayAllocatorBuffer[RadianceCacheIndex] = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateBufferDesc(sizeof(uint32), 1), TEXT("Lumen.RadianceCache.HardwareRayTracing.RayAllocatorBuffer"));
{
FSetupTraceFromProbesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FSetupTraceFromProbesCS::FParameters>();
PassParameters->RWTraceProbesIndirectArgs = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(TraceProbesIndirectArgs[RadianceCacheIndex], PF_R32_UINT));
PassParameters->RWSortProbeTraceTilesIndirectArgs = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(SortProbeTraceTilesIndirectArgs[RadianceCacheIndex], PF_R32_UINT));
PassParameters->RWRadianceCacheHardwareRayTracingIndirectArgs = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(RadianceCacheHardwareRayTracingIndirectArgs[RadianceCacheIndex], PF_R32_UINT));
PassParameters->RWHardwareRayTracingRayAllocatorBuffer = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(HardwareRayTracingRayAllocatorBuffer[RadianceCacheIndex], PF_R32_UINT));
PassParameters->ProbeTraceTileAllocator = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceTileAllocator[RadianceCacheIndex], PF_R32_UINT));
PassParameters->SortTraceTilesGroupSize = FSortProbeTraceTilesCS::GetGroupSize();
auto ComputeShader = View.ShaderMap->GetShader<FSetupTraceFromProbesCS>(0);
const FIntVector GroupSize = FIntVector(1);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("SetupTraceFromProbesCS"),
ComputePassFlags,
ComputeShader,
PassParameters,
GroupSize);
}
}
:
}
- シェーダー(LumenRadianceCache.usf)
```C++
RWBuffer<uint> RWTraceProbesIndirectArgs;
RWBuffer<uint> RWSortProbeTraceTilesIndirectArgs;
RWBuffer<uint> RWRadianceCacheHardwareRayTracingIndirectArgs;
RWBuffer<uint> RWHardwareRayTracingRayAllocatorBuffer;
uint SortTraceTilesGroupSize;
[numthreads(1, 1, 1)]
void SetupTraceFromProbesCS()
{
uint NumProbeTraceTiles = ProbeTraceTileAllocator[0];
// Decompose the dispatch group layout into 2d to work around hitting D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION (65k) with a 1d layout,
// which manifests as flickering during Force Full Update
RWTraceProbesIndirectArgs[0] = TRACE_TILE_GROUP_STRIDE/*=128*/;
RWTraceProbesIndirectArgs[1] = (NumProbeTraceTiles + TRACE_TILE_GROUP_STRIDE - 1) / TRACE_TILE_GROUP_STRIDE;
RWTraceProbesIndirectArgs[2] = 1;
RWSortProbeTraceTilesIndirectArgs[0] = (NumProbeTraceTiles + SortTraceTilesGroupSize - 1) / SortTraceTilesGroupSize;
RWSortProbeTraceTilesIndirectArgs[1] = 1;
RWSortProbeTraceTilesIndirectArgs[2] = 1;
RWRadianceCacheHardwareRayTracingIndirectArgs[0] = RADIANCE_CACHE_TRACE_TILE_SIZE_1D/*=(RADIANCE_CACHE_TRACE_TILE_SIZE_2D(=8) * RADIANCE_CACHE_TRACE_TILE_SIZE_2D)*/;
RWRadianceCacheHardwareRayTracingIndirectArgs[1] = NumProbeTraceTiles;
RWRadianceCacheHardwareRayTracingIndirectArgs[2] = 1;
RWHardwareRayTracingRayAllocatorBuffer[0] = NumProbeTraceTiles * RADIANCE_CACHE_TRACE_TILE_SIZE_1D;
}
C++(LumenRadianceCache.cpp)
void UpdateRadianceCaches(...)
{
:
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
if (GRadianceCacheSortTraceTiles)
{
FRDGBufferRef SortedProbeTraceTileData = GraphBuilder.CreateBuffer(ProbeTraceTileData[RadianceCacheIndex]->Desc, TEXT("Lumen.RadianceCache.SortedProbeTraceTileData"));
FSortProbeTraceTilesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FSortProbeTraceTilesCS::FParameters>();
PassParameters->RWProbeTraceTileData = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(SortedProbeTraceTileData, PF_R32G32_UINT));
PassParameters->ProbeTraceData = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceData[RadianceCacheIndex], PF_A32B32G32R32F));
PassParameters->ProbeTraceTileData = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceTileData[RadianceCacheIndex], PF_R32G32_UINT));
PassParameters->ProbeTraceTileAllocator = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceTileAllocator[RadianceCacheIndex], PF_R32_UINT));
PassParameters->SortProbeTraceTilesIndirectArgs = SortProbeTraceTilesIndirectArgs[RadianceCacheIndex];
PassParameters->RadianceCacheInputs = RadianceCacheInputs;
auto ComputeShader = View.ShaderMap->GetShader<FSortProbeTraceTilesCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("SortTraceTiles"),
ComputePassFlags,
ComputeShader,
PassParameters,
PassParameters->SortProbeTraceTilesIndirectArgs,
0);
ProbeTraceTileData[RadianceCacheIndex] = SortedProbeTraceTileData;
}
}
:
}
- シェーダー(LumenRadianceCache.usf)
```C++
#ifndef SORT_TILES_THREADGROUP_SIZE
#define SORT_TILES_THREADGROUP_SIZE 1
#endif
#define NUM_DIRECTION_BINS_2D 8
#define NUM_DIRECTION_BINS_1D (NUM_DIRECTION_BINS_2D * NUM_DIRECTION_BINS_2D)
groupshared uint SharedNumTraceTileBins[NUM_DIRECTION_BINS_1D];
groupshared uint SharedTraceTileBinOffset[NUM_DIRECTION_BINS_1D];
[numthreads(SORT_TILES_THREADGROUP_SIZE, 1, 1)]
void SortProbeTraceTilesCS(
uint GroupId : SV_GroupID,
uint GroupThreadId : SV_GroupThreadID)
{
// Clear bins to 0
for (uint BinIndex = GroupThreadId; BinIndex < NUM_DIRECTION_BINS_1D; BinIndex += SORT_TILES_THREADGROUP_SIZE)
{
SharedNumTraceTileBins[BinIndex] = 0;
SharedTraceTileBinOffset[BinIndex] = 0;
}
GroupMemoryBarrierWithGroupSync();
uint TraceTileIndex = GroupId * SORT_TILES_THREADGROUP_SIZE + GroupThreadId;
// Count how many trace tiles in each direction bin
if (TraceTileIndex < ProbeTraceTileAllocator[0])
{
uint2 TraceTileCoord;
uint TraceTileLevel;
uint ProbeTraceIndex;
UnpackTraceTileInfo(ProbeTraceTileData[TraceTileIndex], TraceTileCoord, TraceTileLevel, ProbeTraceIndex);
uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
uint2 ProbeTexelCoord = TraceTileCoord * RADIANCE_CACHE_TRACE_TILE_SIZE_2D;
uint2 DirectionalBin = ProbeTexelCoord * NUM_DIRECTION_BINS_2D / TraceResolution;
//@todo - also bin by Morton encoded position
uint FinalBinIndex = DirectionalBin.y * NUM_DIRECTION_BINS_2D + DirectionalBin.x;
InterlockedAdd(SharedNumTraceTileBins[FinalBinIndex], 1);
}
GroupMemoryBarrierWithGroupSync();
if (TraceTileIndex < ProbeTraceTileAllocator[0])
{
uint2 TraceTileData = ProbeTraceTileData[TraceTileIndex];
uint2 TraceTileCoord;
uint TraceTileLevel;
uint ProbeTraceIndex;
UnpackTraceTileInfo(TraceTileData, TraceTileCoord, TraceTileLevel, ProbeTraceIndex);
uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
uint2 ProbeTexelCoord = TraceTileCoord * RADIANCE_CACHE_TRACE_TILE_SIZE_2D;
uint2 DirectionalBin = ProbeTexelCoord * NUM_DIRECTION_BINS_2D / TraceResolution;
uint FinalBinIndex = DirectionalBin.y * NUM_DIRECTION_BINS_2D + DirectionalBin.x;
uint SortedTraceTileOffset;
// Calculate our sorted offset by adding up all the bins before us
{
InterlockedAdd(SharedTraceTileBinOffset[FinalBinIndex], 1, SortedTraceTileOffset);
for (uint BinIndex = 0; BinIndex < FinalBinIndex; BinIndex++)
{
SortedTraceTileOffset += SharedNumTraceTileBins[BinIndex];
}
}
// Write out to the sorted position
RWProbeTraceTileData[GroupId * SORT_TILES_THREADGROUP_SIZE + SortedTraceTileOffset] = TraceTileData;
}
}
C++(LumenRadianceCahce.cpp)
void UpdateRadianceCaches(...)
{
:
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
FRDGTextureUAVRef RadianceProbeAtlasTextureUAV = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(Setup.RadianceProbeAtlasTextureSource));
FRDGTextureUAVRef DepthProbeTextureUAV = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(Setup.DepthProbeAtlasTexture));
const int32 MaxNumProbes = RadianceCacheInputs.ProbeAtlasResolutionInProbes.X * RadianceCacheInputs.ProbeAtlasResolutionInProbes.Y;
if (Lumen::UseHardwareRayTracedRadianceCache(*View.Family))
{
const int32 MaxProbeTraceTileResolution = RadianceCacheInputs.RadianceProbeResolution / FRadianceCacheTraceFromProbesCS::GetGroupSize() * 2;
float DiffuseConeHalfAngle = -1.0f;
RenderLumenHardwareRayTracingRadianceCache/*=RenderLumenHardwareRayTracingRadianceCacheTwoPass*/(
GraphBuilder,
Scene,
GetSceneTextureParameters(GraphBuilder, View),
View,
Inputs.TracingInputs,
RadianceCacheParameters,
Inputs.Configuration,
DiffuseConeHalfAngle,
MaxNumProbes,
MaxProbeTraceTileResolution,
ProbeTraceData[RadianceCacheIndex],
ProbeTraceTileData[RadianceCacheIndex],
ProbeTraceTileAllocator[RadianceCacheIndex],
TraceProbesIndirectArgs[RadianceCacheIndex],
HardwareRayTracingRayAllocatorBuffer[RadianceCacheIndex],
RadianceCacheHardwareRayTracingIndirectArgs[RadianceCacheIndex],
RadianceProbeAtlasTextureUAV,
DepthProbeTextureUAV
);
}
else
{
:
}
}
:
}
- C++(LumenRadianceCacheHardwareRayTracing.cpp)
```C++
void RenderLumenHardwareRayTracingRadianceCacheTwoPass(
FRDGBuilder& GraphBuilder,
const FScene* Scene,
const FSceneTextureParameters& SceneTextures,
const FViewInfo& View,
const FLumenCardTracingInputs& TracingInputs,
const LumenRadianceCache::FRadianceCacheInterpolationParameters& RadianceCacheParameters,
FRadianceCacheConfiguration Configuration,
float DiffuseConeHalfAngle,
int32 MaxNumProbes,
int32 MaxProbeTraceTileResolution,
FRDGBufferRef ProbeTraceData,
FRDGBufferRef ProbeTraceTileData,
FRDGBufferRef ProbeTraceTileAllocator,
FRDGBufferRef TraceProbesIndirectArgs,
FRDGBufferRef HardwareRayTracingRayAllocatorBuffer,
FRDGBufferRef RadianceCacheHardwareRayTracingIndirectArgs,
FRDGTextureUAVRef RadianceProbeAtlasTextureUAV,
FRDGTextureUAVRef DepthProbeTextureUAV
)
{
#if RHI_RAYTRACING
// Must match usf
const int32 TempAtlasTraceTileStride = 1024;
extern int32 GRadianceCacheForceFullUpdate;
// Overflow is possible however unlikely - only nearby probes trace at max resolution
const int32 TemporaryBufferAllocationDownsampleFactor = GRadianceCacheForceFullUpdate ? 4 : CVarLumenRadianceCacheTemporaryBufferAllocationDownsampleFactor.GetValueOnRenderThread();
const int32 TempAtlasNumTraceTiles = FMath::DivideAndRoundUp(MaxProbeTraceTileResolution * MaxProbeTraceTileResolution, TemporaryBufferAllocationDownsampleFactor);
const FIntPoint WrappedTraceTileLayout(
TempAtlasTraceTileStride,
FMath::DivideAndRoundUp(MaxNumProbes * TempAtlasNumTraceTiles, TempAtlasTraceTileStride));
uint32 TraceTileResultPackedBufferElementCount = MaxNumProbes * TempAtlasNumTraceTiles * FLumenRadianceCacheHardwareRayTracingRGS::GetGroupSize() * FLumenRadianceCacheHardwareRayTracingRGS::GetGroupSize();
FRDGBufferRef TraceTileResultPackedBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(FTraceTileResultPacked), TraceTileResultPackedBufferElementCount), TEXT("Lumen.RadianceCache.HardwareRayTracing.TraceTileResultPackedBuffer"));
FRDGBufferRef RetraceDataPackedBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(LumenHWRTPipeline::FTraceDataPacked), TraceTileResultPackedBufferElementCount), TEXT("Lumen.RadianceCache.HardwareRayTracing.RetraceTilePackedBuffer"));
uint32 MaxRayCount = TraceTileResultPackedBufferElementCount;
const bool bInlineRayTracing = Lumen::UseHardwareInlineRayTracing(*View.Family);
const bool bUseFarField = UseFarFieldForRadianceCache(*View.Family) && Configuration.bFarField;
// Default tracing of near-field, extract surface cache and material-id
{
bool bApplySkyLight = !bUseFarField;
FLumenRadianceCacheHardwareRayTracing::FPermutationDomain PermutationVector;
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FLightingModeDim>(LumenHWRTPipeline::ELightingMode::SurfaceCache);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FEnableNearFieldTracing>(true);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FEnableFarFieldTracing>(false);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FIndirectDispatchDim>(IsHardwareRayTracingRadianceCacheIndirectDispatch());
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FSpecularOcclusionDim>(false);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FPackTraceDataDim>(bUseFarField);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FClipRayDim>(GetRayTracingCulling() != 0);
DispatchRayGenOrComputeShader(GraphBuilder, Scene, View, SceneTextures, TracingInputs, RadianceCacheParameters, PermutationVector,
DiffuseConeHalfAngle, MaxNumProbes, MaxProbeTraceTileResolution, bApplySkyLight, bInlineRayTracing,
ProbeTraceTileAllocator, ProbeTraceTileData, ProbeTraceData,
HardwareRayTracingRayAllocatorBuffer, RetraceDataPackedBuffer, TraceTileResultPackedBuffer);
}
:
}
void DispatchRayGenOrComputeShader(
FRDGBuilder& GraphBuilder,
const FScene* Scene,
const FViewInfo& View,
const FSceneTextureParameters& SceneTextures,
const FLumenCardTracingInputs& TracingInputs,
const LumenRadianceCache::FRadianceCacheInterpolationParameters& RadianceCacheParameters,
const FLumenRadianceCacheHardwareRayTracing::FPermutationDomain& PermutationVector,
float DiffuseConeHalfAngle,
int32 MaxNumProbes,
int32 MaxProbeTraceTileResolution,
bool bApplySkyLight,
bool bInlineRayTracing,
FRDGBufferRef ProbeTraceTileAllocator,
FRDGBufferRef ProbeTraceTileData,
FRDGBufferRef ProbeTraceData,
FRDGBufferRef RayAllocatorBuffer,
FRDGBufferRef RetraceDataPackedBuffer,
FRDGBufferRef TraceTileResultPackedBuffer
)
{
FRDGBufferRef HardwareRayTracingIndirectArgsBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(1), TEXT("Lumen.RadianceCache.HardwareRayTracing.IndirectArgsBuffer"));
if (IsHardwareRayTracingRadianceCacheIndirectDispatch())
{
FLumenRadianceCacheHardwareRayTracingIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenRadianceCacheHardwareRayTracingIndirectArgsCS::FParameters>();
{
PassParameters->RayAllocatorBuffer = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(RayAllocatorBuffer, PF_R32_UINT));
PassParameters->RWHardwareRayTracingIndirectArgs = GraphBuilder.CreateUAV(HardwareRayTracingIndirectArgsBuffer, PF_R32_UINT);
PassParameters->OutputThreadGroupSize = bInlineRayTracing ? FLumenRadianceCacheHardwareRayTracingCS::GetThreadGroupSize() : FLumenRadianceCacheHardwareRayTracingRGS::GetThreadGroupSize();
}
TShaderRef<FLumenRadianceCacheHardwareRayTracingIndirectArgsCS> ComputeShader = View.ShaderMap->GetShader<FLumenRadianceCacheHardwareRayTracingIndirectArgsCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("HardwareRayTracingIndirectArgsCS"),
ComputeShader,
PassParameters,
FIntVector(1, 1, 1));
}
bool bEnableHitLighting = PermutationVector.Get<FLumenRadianceCacheHardwareRayTracingRGS::FLightingModeDim>() == LumenHWRTPipeline::ELightingMode::HitLighting;
bool bEnableFarFieldTracing = PermutationVector.Get<FLumenRadianceCacheHardwareRayTracingRGS::FEnableFarFieldTracing>();
FLumenRadianceCacheHardwareRayTracing::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenRadianceCacheHardwareRayTracing::FParameters>();
SetLumenHardwareRayTracingRadianceCacheParameters(
GraphBuilder,
View,
SceneTextures,
TracingInputs,
RadianceCacheParameters,
DiffuseConeHalfAngle,
bApplySkyLight,
bEnableHitLighting,
bEnableFarFieldTracing,
ProbeTraceTileAllocator,
ProbeTraceTileData,
ProbeTraceData,
RayAllocatorBuffer,
RetraceDataPackedBuffer,
TraceTileResultPackedBuffer,
HardwareRayTracingIndirectArgsBuffer,
PassParameters
);
uint32 PersistentTracingGroupCount = CVarLumenRadianceCacheHardwareRayTracingPersistentTracingGroupCount.GetValueOnRenderThread();
FIntPoint DispatchResolution(FLumenRadianceCacheHardwareRayTracingRGS::GetGroupSize() * FLumenRadianceCacheHardwareRayTracingRGS::GetGroupSize(), PersistentTracingGroupCount);
if (bInlineRayTracing)
{
TShaderRef<FLumenRadianceCacheHardwareRayTracingCS> ComputeShader = View.ShaderMap->GetShader<FLumenRadianceCacheHardwareRayTracingCS>(PermutationVector);
if (IsHardwareRayTracingRadianceCacheIndirectDispatch())
{
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("HardwareRayTracing (inline) %s %s", *LumenRadianceCache::GenerateModeString(bEnableHitLighting, bEnableFarFieldTracing), *LumenRadianceCache::GenerateResolutionString(DispatchResolution)),
ComputeShader,
PassParameters,
PassParameters->HardwareRayTracingIndirectArgs,
0);
}
else
{
:
}
}
else
{
:
}
}
#define MAX_DISPATCH_THREAD_PER_DIMENSION 4096
int3 GetRayTracingThreadCountWrapped(uint TargetThreadCount, uint ThreadGroupSize) { int3 ThreadCount = int3(TargetThreadCount, 1, 1); if (ThreadCount.x > MAX_DISPATCH_THREAD_PER_DIMENSION) { ThreadCount.y = (ThreadCount.x + ThreadGroupSize - 1) / ThreadGroupSize; ThreadCount.x = ThreadGroupSize; }
if (ThreadCount.y > MAX_DISPATCH_THREAD_PER_DIMENSION)
{
ThreadCount.z = (ThreadCount.y + ThreadGroupSize - 1) / ThreadGroupSize;
ThreadCount.y = ThreadGroupSize;
}
return ThreadCount;
}
- シェーダー(LumenRadianceCacheHardwareRayTracing.usf)
```C++
Buffer<uint> RayAllocatorBuffer;
RWBuffer<uint> RWHardwareRayTracingIndirectArgs;
uint2 OutputThreadGroupSize;
[numthreads(1, 1, 1)]
void LumenRadianceCacheHardwareRayTracingIndirectArgsCS()
{
uint NumRays = RayAllocatorBuffer[0];
uint NumGroups = (NumRays + OutputThreadGroupSize.x - 1) / OutputThreadGroupSize.x;
int3 IndirectArgs = GetRayTracingThreadCountWrapped(NumGroups, RADIANCE_CACHE_TRACE_TILE_SIZE_1D/*=RADIANCE_CACHE_TRACE_TILE_SIZE_2D(=8) * RADIANCE_CACHE_TRACE_TILE_SIZE_2D*/);
RWHardwareRayTracingIndirectArgs[0] = IndirectArgs.x;
RWHardwareRayTracingIndirectArgs[1] = IndirectArgs.y;
RWHardwareRayTracingIndirectArgs[2] = IndirectArgs.z;
}
[numthreads(INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X, INLINE_RAY_TRACING_THREAD_GROUP_SIZE_Y, 1)]
void LumenRadianceCacheHardwareRayTracingCS(
uint3 GroupId : SV_GroupID,
uint GroupIndex : SV_GroupIndex
)
{
uint GroupLinearIndex = GetUnWrappedRayTracingDispatchThreadId(GroupId, RADIANCE_CACHE_TRACE_TILE_SIZE_1D);
LumenRadianceCacheHardwareRayTracingCommon(GroupLinearIndex * INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X + GroupIndex);
}
uint PersistentTracingGroupCount;
void LumenRadianceCacheHardwareRayTracingCommon(uint ThreadIndex)
{
uint TexelLinearCoord = ThreadIndex % RADIANCE_CACHE_TRACE_TILE_SIZE_1D/* = (RADIANCE_CACHE_TRACE_TILE_SIZE_2D * RADIANCE_CACHE_TRACE_TILE_SIZE_2D)=(8*8)*/;
uint GlobalGroupIndex = ThreadIndex / RADIANCE_CACHE_TRACE_TILE_SIZE_1D;
{
uint RayIndex = ThreadIndex;
#if ENABLE_FAR_FIELD_TRACING
FTraceData TraceData = UnpackTraceData(RWRetraceDataPackedBuffer[RayIndex]);
FRayIdPacked RayIdPacked;
RayIdPacked.PackedData = TraceData.RayId;
FRayId RayId = UnpackRayId(RayIdPacked);
uint TraceTileIndex = RayId.TraceTileIndex;
uint2 TexelCoord = RayId.TexelCoord;
#else
uint TraceTileIndex = GlobalGroupIndex;
uint2 TexelCoord = uint2(TexelLinearCoord % RADIANCE_CACHE_TRACE_TILE_SIZE_2D, TexelLinearCoord / RADIANCE_CACHE_TRACE_TILE_SIZE_2D);
#endif
if (RayIndex >= RayAllocatorBuffer[0])
{
return;
}
uint2 TraceTileCoord;
uint TraceTileLevel;
uint ProbeTraceIndex;
UnpackTraceTileInfo(ProbeTraceTileData[TraceTileIndex], TraceTileCoord, TraceTileLevel, ProbeTraceIndex);
/*
void UnpackTraceTileInfo(uint2 TraceTileInfo, out uint2 TraceTileCoord, out uint Level, out uint ProbeTraceIndex)
{
TraceTileCoord = UnpackTraceTileInfo(TraceTileInfo);
Level = (TraceTileInfo.x >> 16) & 0xFF;
ProbeTraceIndex = TraceTileInfo.y;
}
*/
float3 ProbeWorldCenter;
uint ClipmapIndex;
uint ProbeIndex;
GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);
/*
void GetProbeTraceData(uint ProbeTraceIndex, out float3 ProbeWorldCenter, out uint ClipmapIndex, out uint ProbeIndex)
{
float3 ProbeWorldCenterNoOffset;
GetProbeTraceDataNoOffset(ProbeTraceIndex, ProbeWorldCenterNoOffset, ClipmapIndex, ProbeIndex);
ProbeWorldCenter = ProbeWorldCenterNoOffset + ProbeWorldOffset[ProbeIndex].xyz;
}
void GetProbeTraceDataNoOffset(uint ProbeTraceIndex, out float3 ProbeWorldCenter, out uint ClipmapIndex, out uint ProbeIndex)
{
ProbeWorldCenter = ProbeTraceData[ProbeTraceIndex].xyz;
uint PackedW = asuint(ProbeTraceData[ProbeTraceIndex].w);
ClipmapIndex = PackedW >> 24;
ProbeIndex = PackedW & 0xFFFFFF;
}
*/
uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
uint2 ProbeTexelCoord = TraceTileCoord * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + TexelCoord;
if (all(ProbeTexelCoord < TraceResolution))
{
float2 ProbeTexelCenter = float2(0.5, 0.5);
float2 ProbeUV = (ProbeTexelCoord + ProbeTexelCenter) / float(TraceResolution);
float3 WorldConeDirection = OctahedronToUnitVector(ProbeUV * 2.0 - 1.0);
/*
float3 OctahedronToUnitVector( float2 Oct )
{
float3 N = float3( Oct, 1 - dot( 1, abs(Oct) ) );
float t = max( -N.z, 0 );
N.xy += select(N.xy >= 0, float2(-t, -t), float2(t, t));
return normalize(N);
}
*/
float FinalMinTraceDistance = max(MinTraceDistance, GetRadianceProbeTMin(ClipmapIndex));
float FinalMaxTraceDistance = MaxTraceDistance;
// Evenly distributing the sphere solid angle among all cones instead of based on Octahedron distortion
float ConeHalfAngle = acosFast(1.0f - 1.0f / (float)(TraceResolution * TraceResolution));
FRayDesc Ray;
Ray.Origin = ProbeWorldCenter + LWCHackToFloat(PrimaryView.PreViewTranslation); // LUMEN_LWC_TODO
Ray.Direction = WorldConeDirection;
Ray.TMin = FinalMinTraceDistance;
Ray.TMax = FinalMaxTraceDistance;
#if ENABLE_FAR_FIELD_TRACING
Ray.TMin = max(Ray.TMin, FarFieldBias);
#endif
#if DIM_CLIP_RAY
// Cull TMax by the bounding sphere of the near-field
float2 RaySphereHit = RayIntersectSphere(Ray.Origin, Ray.Direction, float4(PrimaryView.TranslatedWorldCameraOrigin, RayTracingCullingRadius));
bool bInsideCullingSphere = RaySphereHit.x < 0 && RaySphereHit.y > 0;
Ray.TMax = bInsideCullingSphere ? RaySphereHit.y : Ray.TMin;
#endif // DIM_CULL_RAY
FRayCone RayCone = (FRayCone)0;
RayCone = PropagateRayCone(RayCone, ConeHalfAngle, 0.0);
/*
struct FRayCone
{
float Width;
float SpreadAngle;
};
FRayCone PropagateRayCone(in FRayCone Cone, in float SurfaceSpreadAngle, in float HitT)
{
FRayCone NewCone;
NewCone.Width = Cone.SpreadAngle * HitT + Cone.Width;
NewCone.SpreadAngle = Cone.SpreadAngle + SurfaceSpreadAngle;
return NewCone;
}
*/
const uint LinearCoord = ProbeTexelCoord.y * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + ProbeTexelCoord.x;
const uint CullingMode = 0;
FRayTracedLightingContext Context = CreateRayTracedLightingContext(TLAS, RayCone, ProbeTexelCoord, LinearCoord, CullingMode, MaxTranslucentSkipCount, MaxTraversalIterations);
// Set far-field context specialization
Context.FarFieldMaxTraceDistance = FarFieldMaxTraceDistance;
Context.FarFieldReferencePos = FarFieldReferencePos;
#if DIM_SPECULAR_OCCLUSION
Context.bAcceptFirstHitAndEndSearch = true;
Context.bSkipClosestHitShader = true;
#endif // DIM_SPECULAR_OCCLUSION
Context.HitGroupData = HitGroupData;
Context.RayTracingSceneMetadata = RayTracingSceneMetadata;
FRayTracedLightingResult Result = TraceAndCalculateRayTracedLightingFromSurfaceCache(Ray, Context);
if ((ApplySkyLight != 0) && !Result.bIsHit)
{
FConeTraceResult TraceResult = (FConeTraceResult)0;
TraceResult.Transparency = 1;
ApplySkylightToTraceResult(Ray.Direction, TraceResult);
/*
void ApplySkylightToTraceResult(float3 ConeDirection, inout FConeTraceResult TraceResult)
{
if (ReflectionStruct.SkyLightParameters.y > 0)
{
float SkyAverageBrightness = 1.0f;
float TanConeAngle = 0.0f;
float Roughness = TanConeAngleToRoughness(TanConeAngle);
TraceResult.Lighting += GetSkyLightReflection(ConeDirection, Roughness, SkyAverageBrightness) * TraceResult.Transparency;
TraceResult.Transparency = 0;
}
}
ReflectionStruct.SkyLightParameters: X = max mip, Y = 1 if sky light should be rendered, 0 otherwise, Z = 1 if sky light is dynamic, 0 otherwise, W = blend fraction.
float3 GetSkyLightReflection(float3 ReflectionVector, float Roughness, out float OutSkyAverageBrightness)
{
float AbsoluteSpecularMip = ComputeReflectionCaptureMipFromRoughness(Roughness, ReflectionStruct.SkyLightParameters.x);
float3 Reflection = TextureCubeSampleLevel(ReflectionStruct.SkyLightCubemap, ReflectionStruct.SkyLightCubemapSampler, ReflectionVector, AbsoluteSpecularMip).rgb;
OutSkyAverageBrightness = GetSkyLightCubemapBrightness()(=SkyIrradianceEnvironmentMap[7].x Refer to FSceneRenderer::UpdateSkyIrradianceGpuBuffer for more details.) * Luminance(View.SkyLightColor.rgb);
return Reflection * View.SkyLightColor.rgb;
}
*/
Result.Radiance = TraceResult.Lighting;
Result.TraceHitDistance = MaxTraceDistance;
}
float SampleHitDistance = Result.TraceHitDistance;
float3 SampleRadiance = Result.Radiance;
// Write continuation data
#if DIM_PACK_TRACE_DATA
FRayIdPacked RayIdPacked = PackRayId(CreateRayId(TraceTileIndex, TexelCoord));
RWRetraceDataPackedBuffer[RayIndex] = PackTraceData(CreateTraceData(
RayIdPacked.PackedData,
Result.MaterialShaderIndex,
Result.Bookmark,
Result.TraceHitDistance,
Result.bIsHit,
Result.bIsRadianceCompleted,
Result.bIsFarField));
#endif // DIM_PACK_TRACE_DATA
uint OutputIndex = TraceTileIndex * RADIANCE_CACHE_TRACE_TILE_SIZE_1D + TexelCoord.y * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + TexelCoord.x;
RWTraceTileResultPackedBuffer[OutputIndex] = PackTraceTileResult(CreateTraceTileResult(SampleRadiance, min(SampleHitDistance, MaxHalfFloat)));
/*
FTraceTileResult CreateTraceTileResult(float3 Radiance, float HitDistance)
{
FTraceTileResult TraceTileResult;
TraceTileResult.Radiance = Radiance;
TraceTileResult.HitDistance = HitDistance;
return TraceTileResult;
}
*/
}
}
}
C++(LumenRadianceCacheHardwareRayTracing.cpp)
void RenderLumenHardwareRayTracingRadianceCacheTwoPass(...)
{
:
FRDGBufferRef FarFieldRayAllocatorBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateBufferDesc(sizeof(uint32), 1), TEXT("Lumen.RadianceCache.HardwareRayTracing.FarFieldRayAllocatorBuffer"));
FRDGBufferRef FarFieldRetraceDataPackedBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateStructuredDesc(sizeof(LumenHWRTPipeline::FTraceDataPacked), TraceTileResultPackedBufferElementCount), TEXT("Lumen.RadianceCache.HardwareRayTracing.FarFieldRetraceDataPackedBuffer"));
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(FarFieldRayAllocatorBuffer, PF_R32_UINT), 0);
if (bUseFarField)
{
LumenHWRTCompactRays(GraphBuilder, Scene, View, MaxRayCount, LumenHWRTPipeline::ECompactMode::FarFieldRetrace,
HardwareRayTracingRayAllocatorBuffer, RetraceDataPackedBuffer,
FarFieldRayAllocatorBuffer, FarFieldRetraceDataPackedBuffer);
// Trace continuation rays
{
bool bApplySkyLight = true;
FLumenRadianceCacheHardwareRayTracing::FPermutationDomain PermutationVector;
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FLightingModeDim>(LumenHWRTPipeline::ELightingMode::SurfaceCache);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FEnableNearFieldTracing>(false);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FEnableFarFieldTracing>(true);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FIndirectDispatchDim>(IsHardwareRayTracingRadianceCacheIndirectDispatch());
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FSpecularOcclusionDim>(false);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FPackTraceDataDim>(false);
PermutationVector.Set<FLumenRadianceCacheHardwareRayTracing::FClipRayDim>(GetRayTracingCulling() != 0);
DispatchRayGenOrComputeShader(GraphBuilder, Scene, View, SceneTextures, TracingInputs, RadianceCacheParameters, PermutationVector,
DiffuseConeHalfAngle, MaxNumProbes, MaxProbeTraceTileResolution, bApplySkyLight, bInlineRayTracing,
ProbeTraceTileAllocator, ProbeTraceTileData, ProbeTraceData,
FarFieldRayAllocatorBuffer, FarFieldRetraceDataPackedBuffer, TraceTileResultPackedBuffer);
}
}
:
}
- C++(LumenHardwareRayTracingCommon.cpp)
```C++
void LumenHWRTCompactRays(
FRDGBuilder& GraphBuilder,
const FScene* Scene,
const FViewInfo& View,
int32 RayCount,
LumenHWRTPipeline::ECompactMode CompactMode,
const FRDGBufferRef& RayAllocatorBuffer,
const FRDGBufferRef& TraceDataPackedBuffer,
FRDGBufferRef& OutputRayAllocatorBuffer,
FRDGBufferRef& OutputTraceDataPackedBuffer
)
{
FRDGBufferRef CompactRaysIndirectArgsBuffer = GraphBuilder.CreateBuffer(FRDGBufferDesc::CreateIndirectDesc<FRHIDispatchIndirectParameters>(1), TEXT("Lumen.HWRT.CompactTracingIndirectArgs"));
{
FLumenHWRTCompactRaysIndirectArgsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenHWRTCompactRaysIndirectArgsCS::FParameters>();
{
PassParameters->RayAllocator = GraphBuilder.CreateSRV(RayAllocatorBuffer, PF_R32_UINT);
PassParameters->RWCompactRaysIndirectArgs = GraphBuilder.CreateUAV(CompactRaysIndirectArgsBuffer, PF_R32_UINT);
}
TShaderRef<FLumenHWRTCompactRaysIndirectArgsCS> ComputeShader = View.ShaderMap->GetShader<FLumenHWRTCompactRaysIndirectArgsCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("CompactRaysIndirectArgs"),
ComputeShader,
PassParameters,
FIntVector(1, 1, 1));
}
{
FLumenHWRTCompactRaysCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FLumenHWRTCompactRaysCS::FParameters>();
{
// Input
PassParameters->RayAllocator = GraphBuilder.CreateSRV(RayAllocatorBuffer, PF_R32_UINT);
PassParameters->TraceDataPacked = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(TraceDataPackedBuffer));
// Output
PassParameters->RWRayAllocator = GraphBuilder.CreateUAV(OutputRayAllocatorBuffer, PF_R32_UINT);
PassParameters->RWTraceDataPacked = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(OutputTraceDataPackedBuffer));
// Indirect args
PassParameters->CompactRaysIndirectArgs = CompactRaysIndirectArgsBuffer;
}
FLumenHWRTCompactRaysCS::FPermutationDomain PermutationVector;
PermutationVector.Set<FLumenHWRTCompactRaysCS::FCompactModeDim>(CompactMode);
TShaderRef<FLumenHWRTCompactRaysCS> ComputeShader = View.ShaderMap->GetShader<FLumenHWRTCompactRaysCS>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("CompactRays"),
ComputeShader,
PassParameters,
PassParameters->CompactRaysIndirectArgs,
0);
}
}
Buffer<uint> RayAllocator;
RWBuffer<uint> RWCompactRaysIndirectArgs;
[numthreads(1, 1, 1)] void FLumenHWRTCompactRaysIndirectArgsCS() { RWCompactRaysIndirectArgs[0] = (RayAllocator[0] + THREADGROUP_SIZE_1D - 1) / THREADGROUP_SIZE_1D; RWCompactRaysIndirectArgs[1] = 1; RWCompactRaysIndirectArgs[2] = 1; }
RWBuffer
Buffer
StructuredBuffer
groupshared uint SharedRayAllocator; groupshared uint SharedGroupOffset; groupshared uint2 SharedTraceTexelDataPacked[THREADGROUP_SIZE_1D]; groupshared FTraceDataPacked SharedTraceDataPacked[THREADGROUP_SIZE_1D];
[numthreads(THREADGROUP_SIZE_1D, 1, 1)] void FLumenHWRTCompactRaysCS( uint GroupThreadId : SV_GroupThreadID, uint DispatchThreadId : SV_DispatchThreadID) { SharedRayAllocator = 0; GroupMemoryBarrierWithGroupSync();
uint RayIndex = DispatchThreadId;
FTraceData TraceData = UnpackTraceData(TraceDataPacked[RayIndex]);
bool bIsRayValid = !TraceData.bIsHit;
if (bIsRayValid && (RayIndex < RayAllocator[0]))
{
// Allocate rays to re-trace with hit lighting
uint ThreadOffset;
InterlockedAdd(SharedRayAllocator, 1, ThreadOffset);
SharedTraceDataPacked[ThreadOffset] = TraceDataPacked[RayIndex];
}
GroupMemoryBarrierWithGroupSync();
if (GroupThreadId == 0)
{
InterlockedAdd(RWRayAllocator[0], SharedRayAllocator, SharedGroupOffset);
}
GroupMemoryBarrierWithGroupSync();
if (GroupThreadId < SharedRayAllocator)
{
RWTraceDataPacked[SharedGroupOffset + GroupThreadId] = SharedTraceDataPacked[GroupThreadId];
}
}
<img width="627" alt="image" src="https://github.com/yasukichi/testcode/assets/14350715/9085751e-a348-43ce-9a26-e0fa3d456721">
C++(LumenRadianceCacheHardwareRayTracing.cpp)
void RenderLumenHardwareRayTracingRadianceCacheTwoPass(...)
{
:
// Reduce to Atlas
{
FSplatRadianceCacheIntoAtlasCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FSplatRadianceCacheIntoAtlasCS::FParameters>();
GetLumenCardTracingParameters(GraphBuilder, View, TracingInputs, PassParameters->TracingParameters);
SetupLumenDiffuseTracingParametersForProbe(View, PassParameters->IndirectTracingParameters, -1.0f);
PassParameters->RWRadianceProbeAtlasTexture = RadianceProbeAtlasTextureUAV;
PassParameters->RWDepthProbeAtlasTexture = DepthProbeTextureUAV;
PassParameters->TraceTileResultPackedBuffer = GraphBuilder.CreateSRV(TraceTileResultPackedBuffer);
PassParameters->ProbeTraceData = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceData, PF_A32B32G32R32F));
PassParameters->ProbeTraceTileData = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceTileData, PF_R32G32_UINT));
PassParameters->ProbeTraceTileAllocator = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceTileAllocator, PF_R32_UINT));
PassParameters->RadianceCacheParameters = RadianceCacheParameters;
PassParameters->TraceProbesIndirectArgs = TraceProbesIndirectArgs;
PassParameters->TraceTileResultPackedBufferElementCount = TraceTileResultPackedBufferElementCount;
FSplatRadianceCacheIntoAtlasCS::FPermutationDomain PermutationVector;
auto ComputeShader = View.ShaderMap->GetShader<FSplatRadianceCacheIntoAtlasCS>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("CompositeTracesIntoAtlas"),
ComputeShader,
PassParameters,
PassParameters->TraceProbesIndirectArgs,
0);
}
}
uint TraceTileResultPackedBufferElementCount;
StructuredBuffer<FTraceTileResultPacked> TraceTileResultPackedBuffer;
[numthreads(RADIANCE_CACHE_TRACE_TILE_SIZE_2D, RADIANCE_CACHE_TRACE_TILE_SIZE_2D, 1)] void SplatRadianceCacheIntoAtlasCS( uint3 GroupId : SV_GroupID, uint3 GroupThreadId : SV_GroupThreadID ) { uint TraceTileIndex = GroupId.y TRACE_TILE_GROUP_STRIDE/=128*/ + GroupId.x; if (TraceTileIndex >= ProbeTraceTileAllocator[0]) { return; }
uint2 TraceTileCoord;
uint TraceTileLevel;
uint ProbeTraceIndex;
UnpackTraceTileInfo(ProbeTraceTileData[TraceTileIndex], TraceTileCoord, TraceTileLevel, ProbeTraceIndex);
uint TraceResolution = (RadianceProbeResolution / 2) << TraceTileLevel;
uint2 WrappedTileCoord = uint2(TraceTileIndex % TEMP_ATLAS_TRACE_TILE_STRIDE/*=1024*/, TraceTileIndex / TEMP_ATLAS_TRACE_TILE_STRIDE/*=1024*/);
float3 ProbeWorldCenter;
uint ClipmapIndex;
uint ProbeIndex;
GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);
uint2 ProbeAtlasBaseCoord = RadianceProbeResolution * uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);
if (TraceResolution < RadianceProbeResolution)
{
uint UpsampleFactor = RadianceProbeResolution / TraceResolution;
ProbeAtlasBaseCoord += (RADIANCE_CACHE_TRACE_TILE_SIZE_2D * TraceTileCoord + GroupThreadId.xy) * UpsampleFactor;
uint TraceTileResultIndex = TraceTileIndex * RADIANCE_CACHE_TRACE_TILE_SIZE_1D + GroupThreadId.y * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + GroupThreadId.x;
FTraceTileResult TraceTileResult = UnpackTraceTileResult(TraceTileResultPackedBuffer[TraceTileResultIndex]);
float3 Lighting = TraceTileResult.Radiance;
if (TraceTileResultIndex >= TraceTileResultPackedBufferElementCount)
{
// Visual assert on overflow due to r.Lumen.RadianceCache.HardwareRayTracing.TemporaryBufferAllocationDownsampleFactor
Lighting = float3(0, 10000, 0) * View.PreExposure;
}
float HitDistance = TraceTileResult.HitDistance;
for (uint Y = 0; Y < UpsampleFactor; Y++)
{
for (uint X = 0; X < UpsampleFactor; X++)
{
RWRadianceProbeAtlasTexture[ProbeAtlasBaseCoord + uint2(X, Y)] = Lighting;
RWDepthProbeAtlasTexture[ProbeAtlasBaseCoord + uint2(X, Y)] = HitDistance;
}
}
}
else
{
uint DownsampleFactor = TraceResolution / RadianceProbeResolution;
uint WriteTileSize = RADIANCE_CACHE_TRACE_TILE_SIZE_2D / DownsampleFactor;
if (all(GroupThreadId.xy < WriteTileSize))
{
float3 Lighting = 0;
float HitDistance = MaxHalfFloat;
for (uint Y = 0; Y < DownsampleFactor; Y++)
{
for (uint X = 0; X < DownsampleFactor; X++)
{
uint TraceTileResultIndex = TraceTileIndex * RADIANCE_CACHE_TRACE_TILE_SIZE_1D + (GroupThreadId.y * DownsampleFactor + Y) * RADIANCE_CACHE_TRACE_TILE_SIZE_2D + (GroupThreadId.x * DownsampleFactor + X);
FTraceTileResult TraceTileResult = UnpackTraceTileResult(TraceTileResultPackedBuffer[TraceTileResultIndex]);
Lighting += TraceTileResult.Radiance;
HitDistance = min(HitDistance, TraceTileResult.HitDistance);
if (TraceTileResultIndex >= TraceTileResultPackedBufferElementCount)
{
// Visual assert on overflow due to r.Lumen.RadianceCache.HardwareRayTracing.TemporaryBufferAllocationDownsampleFactor
Lighting += float3(0, 10000, 0) * View.PreExposure;
}
}
}
uint2 ProbeAtlasCoord = ProbeAtlasBaseCoord + WriteTileSize * TraceTileCoord + GroupThreadId.xy;
RWRadianceProbeAtlasTexture[ProbeAtlasCoord] = Lighting / (float)(DownsampleFactor * DownsampleFactor);
RWDepthProbeAtlasTexture[ProbeAtlasCoord] = HitDistance;
}
}
}
<img width="853" alt="image" src="https://github.com/yasukichi/testcode/assets/14350715/7aa3b63a-1a7a-463a-ada6-dd5a67a87b1c">
C++(LumenRadianceCacheHardwareRayTracing.cpp)
void UpdateRadianceCaches(...)
{
:
TInlineArray<FRDGTextureRef> RadianceProbeAtlasTexture(InputArray.Num());
for (int32 RadianceCacheIndex = 0; RadianceCacheIndex < InputArray.Num(); RadianceCacheIndex++)
{
const FUpdateInputs& Inputs = InputArray[RadianceCacheIndex];
const FRadianceCacheInputs& RadianceCacheInputs = Inputs.RadianceCacheInputs;
const FViewInfo& View = Inputs.View;
const FRadianceCacheSetup& Setup = SetupOutputArray[RadianceCacheIndex];
FUpdateOutputs& Outputs = OutputArray[RadianceCacheIndex];
FRadianceCacheState& RadianceCacheState = Outputs.RadianceCacheState;
FRadianceCacheInterpolationParameters& RadianceCacheParameters = Outputs.RadianceCacheParameters;
RadianceProbeAtlasTexture[RadianceCacheIndex] = Setup.RadianceProbeAtlasTextureSource;
if (GRadianceCacheFilterProbes)
{
FRDGTextureRef FilteredRadianceProbeAtlasTexture = GraphBuilder.CreateTexture(RadianceProbeAtlasTexture[RadianceCacheIndex]->Desc, TEXT("Lumen.RadianceCache.FilteredRadianceProbeAtlasTexture"));
{
FFilterProbeRadianceWithGatherCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FFilterProbeRadianceWithGatherCS::FParameters>();
PassParameters->RWRadianceProbeAtlasTexture = GraphBuilder.CreateUAV(FRDGTextureUAVDesc(FilteredRadianceProbeAtlasTexture));
PassParameters->RadianceProbeAtlasTexture = RadianceProbeAtlasTexture[RadianceCacheIndex];
PassParameters->DepthProbeAtlasTexture = Setup.DepthProbeAtlasTexture;
PassParameters->ProbeTraceData = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(ProbeTraceData[RadianceCacheIndex], PF_A32B32G32R32F));
PassParameters->View = View.ViewUniformBuffer;
PassParameters->RadianceCacheParameters = RadianceCacheParameters;
PassParameters->FilterProbesIndirectArgs = FilterProbesIndirectArgs[RadianceCacheIndex];
PassParameters->SpatialFilterMaxRadianceHitAngle = GLumenRadianceCacheFilterMaxRadianceHitAngle;
auto ComputeShader = View.ShaderMap->GetShader<FFilterProbeRadianceWithGatherCS>(0);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("FilterProbeRadiance Res=%ux%u", RadianceCacheInputs.RadianceProbeResolution, RadianceCacheInputs.RadianceProbeResolution),
ComputePassFlags,
ComputeShader,
PassParameters,
PassParameters->FilterProbesIndirectArgs,
0);
}
RadianceProbeAtlasTexture[RadianceCacheIndex] = FilteredRadianceProbeAtlasTexture;
}
}
:
- シェーダー(LumenRadianceCache,usf)
```C++
Texture2D<float3> RadianceProbeAtlasTexture;
Texture2D<float> DepthProbeAtlasTexture;
float SpatialFilterMaxRadianceHitAngle;
groupshared uint SharedRadiance[4][THREADGROUP_SIZE][THREADGROUP_SIZE];
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, 1)]
void FilterProbeRadianceWithGatherCS(
uint3 GroupId : SV_GroupID,
uint3 GroupThreadId : SV_GroupThreadID,
uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint ProbeTraceIndex = GroupId.z;
float3 ProbeWorldCenter;
uint ClipmapIndex;
uint ProbeIndex;
GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);
uint2 ProbeAtlasBaseCoord = RadianceProbeResolution * uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);
uint2 ProbeTexelCoord = DispatchThreadId.xy;
if (all(ProbeTexelCoord < RadianceProbeResolution))
{
float3 ProbeWorldCenter;
uint ClipmapIndex;
uint ProbeIndex;
GetProbeTraceData(ProbeTraceIndex, ProbeWorldCenter, ClipmapIndex, ProbeIndex);
uint2 ProbeAtlasBaseCoord = RadianceProbeResolution * uint2(ProbeIndex & ProbeAtlasResolutionModuloMask, ProbeIndex >> ProbeAtlasResolutionDivideShift);
float3 Lighting = RadianceProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord].xyz;
float HitDistance = DepthProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord];
float TotalWeight = 1.0f;
float2 ProbeTexelCenter = float2(0.5, 0.5);
float2 ProbeUV = (ProbeTexelCoord + ProbeTexelCenter) / (float)RadianceProbeResolution;
float3 WorldConeDirection = OctahedronToUnitVector(ProbeUV * 2.0 - 1.0);
int3 ProbeCoord = GetRadianceProbeCoord(ProbeWorldCenter, ClipmapIndex);
int3 Offsets[6];
Offsets[0] = int3(-1, 0, 0);
Offsets[1] = int3(1, 0, 0);
Offsets[2] = int3(0, -1, 0);
Offsets[3] = int3(0, 1, 0);
Offsets[4] = int3(0, 0, -1);
Offsets[5] = int3(0, 0, 1);
for (uint OffsetIndex = 0; OffsetIndex < 6; OffsetIndex++)
{
int3 NeighborCoord = ProbeCoord + Offsets[OffsetIndex];
if (all(NeighborCoord >= 0) && all(NeighborCoord < (int3)RadianceProbeClipmapResolution))
{
uint NeighborProbeIndex = GetProbeIndexFromIndirectionTexture(NeighborCoord, ClipmapIndex);
if (NeighborProbeIndex != INVALID_PROBE_INDEX)
{
uint2 NeighborProbeAtlasBaseCoord = RadianceProbeResolution * uint2(NeighborProbeIndex & ProbeAtlasResolutionModuloMask, NeighborProbeIndex >> ProbeAtlasResolutionDivideShift);
float NeighborRadianceDepth = DepthProbeAtlasTexture[ProbeTexelCoord + NeighborProbeAtlasBaseCoord];
float3 NeighborWorldPosition = GetProbeWorldPosition(NeighborCoord, ClipmapIndex, NeighborProbeIndex);
float OcclusionWeight = 1.0f;
{
// Test a nearby position along the neighbor ray's path, if occluded then discard the neighbor radiance to reduce leaking
// We can't test occlusion of the neighbor probe, because it is closer than GetRadianceProbeTMin
float VoxelRadius = sqrt(3.0f) / GetWorldPositionToRadianceProbeCoordScale(ClipmapIndex);
float OcclusionDistance = max(GetRadianceProbeTMin(ClipmapIndex), VoxelRadius);
float3 NeighborOcclusionTestPosition = NeighborWorldPosition + 2 * OcclusionDistance * WorldConeDirection;
float3 ToNeighborOcclusionPosition = NeighborOcclusionTestPosition - ProbeWorldCenter;
uint2 ProbeTexelCoordForNeighborOcclusionPosition = ( UnitVectorToOctahedron(ToNeighborOcclusionPosition) * 0.5 + 0.5 ) * RadianceProbeResolution;
float ProbeDepthForNeighborOcclusionPosition = DepthProbeAtlasTexture[ProbeTexelCoordForNeighborOcclusionPosition + ProbeAtlasBaseCoord];
if (ProbeDepthForNeighborOcclusionPosition * ProbeDepthForNeighborOcclusionPosition < dot(ToNeighborOcclusionPosition, ToNeighborOcclusionPosition))
{
OcclusionWeight = 0.0f;
}
}
// Clamp neighbor's hit distance to our own. This helps preserve contact shadows, as a long neighbor hit distance will cause a small NeighborAngle and bias toward distant lighting.
if (HitDistance >= 0)
{
NeighborRadianceDepth = min(NeighborRadianceDepth, HitDistance);
}
float3 NeighborHitPosition = NeighborWorldPosition + WorldConeDirection * NeighborRadianceDepth;
float3 ToNeighborHit = NeighborHitPosition - ProbeWorldCenter;
float NeighborAngle = acosFast(dot(ToNeighborHit, WorldConeDirection) / length(ToNeighborHit));
float AngleWeight = 1.0f - saturate(NeighborAngle / SpatialFilterMaxRadianceHitAngle);
float Weight = AngleWeight * OcclusionWeight;
Lighting += RadianceProbeAtlasTexture[ProbeTexelCoord + NeighborProbeAtlasBaseCoord].xyz * Weight;
TotalWeight += Weight;
}
}
}
RWRadianceProbeAtlasTexture[ProbeTexelCoord + ProbeAtlasBaseCoord] = Lighting / TotalWeight;
}
}
UE5 Lumenに関するまとめ Lumenの技術的詳細
Ray tracingはRenderDocと互換がないため、Ray tracingを有効化するためには、RendeDoc pluginを無効化する