yasukichi / testcode

okawa's personal testcode
0 stars 0 forks source link

UE5 RTXブランチ #25

Open yasukichi opened 8 months ago

yasukichi commented 8 months ago

https://developer.nvidia.com/game-engines/unreal-engine/rtx-branch

yasukichi commented 8 months ago

GRHISupportsShaderExecutionReordering

if WITH_NVAPI

if (IsRHIDeviceNVIDIA() && bAllowVendorDevice)
{
    const NvAPI_Status NvStatus = NvAPI_Initialize();
    if (NvStatus == NVAPI_OK)
    {
        NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(GetAdapter().GetD3DDevice(), NV_EXTN_OP_UINT64_ATOMIC, &bHasVendorSupportForAtomic64);

        NvAPI_Status NvStatusSER = NvAPI_D3D12_IsNvShaderExtnOpCodeSupported(GetAdapter().GetD3DDevice(), NV_EXTN_OP_HIT_OBJECT_REORDER_THREAD, &GRHISupportsShaderExecutionReordering);
        :
    }

    NvU32 DriverVersion = UINT32_MAX;

    NvAPI_ShortString BranchString("");
    NvAPI_SYS_GetDriverAndBranchVersion(&DriverVersion, BranchString);
    :

} // if NVIDIA

endif // NVAPI

:

}


## PrepareRTXDLumenReflections, PrepareRTXDILumenSurfaceCache
- C++(DeferredShadingRenderer.cpp)
```C++
void FDeferredShadingSceneRenderer::Render(FRDGBuilder& GraphBuilder)
{
    :

    FRDGTextureRef ViewFamilyTexture = TryCreateViewFamilyTexture(GraphBuilder, ViewFamily);
    if (RendererOutput == ERendererOutput::DepthPrepassOnly)
    {
        :
    }
    else
    {
        GVRSImageManager.PrepareImageBasedVRS(GraphBuilder, ViewFamily, SceneTextures);
        :

        FRDGBufferRef DynamicGeometryScratchBuffer = nullptr;
#if RHI_RAYTRACING
        // Async AS builds can potentially overlap with BasePass.
        bool bNeedToWaitForRayTracingScene = DispatchRayTracingWorldUpdates(GraphBuilder, DynamicGeometryScratchBuffer);

        /** Should be called somewhere before "WaitForRayTracingScene" */
        SetupRayTracingLightDataForViews(GraphBuilder);
#endif

        if (!bHasRayTracedOverlay)
        {
#if RHI_RAYTRACING
            // Lumen scene lighting requires ray tracing scene to be ready if HWRT shadows are desired
            if (bNeedToWaitForRayTracingScene && Lumen::UseHardwareRayTracedSceneLighting(ViewFamily))
            {
                WaitForRayTracingScene(GraphBuilder, DynamicGeometryScratchBuffer);
                bNeedToWaitForRayTracingScene = false;
            }
#endif

            LLM_SCOPE_BYTAG(Lumen);
            BeginGatheringLumenSurfaceCacheFeedback(GraphBuilder, Views[0], LumenFrameTemporaries);
            RenderLumenSceneLighting(GraphBuilder, LumenFrameTemporaries, InitViewTaskDatas.LumenDirectLighting);
        }
        :

        // Shadows, lumen and fog after base pass
        if (!bHasRayTracedOverlay)
        {
#if RHI_RAYTRACING
            // When Lumen HWRT is running async we need to wait for ray tracing scene before dispatching the work
            if (bNeedToWaitForRayTracingScene && Lumen::UseAsyncCompute(ViewFamily) && Lumen::UseHardwareInlineRayTracing(ViewFamily))
            {
                WaitForRayTracingScene(GraphBuilder, DynamicGeometryScratchBuffer);
                bNeedToWaitForRayTracingScene = false;
            }
#endif // RHI_RAYTRACING

            DispatchAsyncLumenIndirectLightingWork(
                GraphBuilder,
                CompositionLighting,
                SceneTextures,
                LumenFrameTemporaries,
                LightingChannelsTexture,
                bHasLumenLights,
                AsyncLumenIndirectLightingOutputs);
            :

#if RHI_RAYTRACING
            // Lumen scene lighting requires ray tracing scene to be ready if HWRT shadows are desired
            if (bNeedToWaitForRayTracingScene && Lumen::UseHardwareRayTracedSceneLighting(ViewFamily))
            {
                WaitForRayTracingScene(GraphBuilder, DynamicGeometryScratchBuffer);
                bNeedToWaitForRayTracingScene = false;
            }
#endif // RHI_RAYTRACING
        }
        :

#if RHI_RAYTRACING
        // If Lumen did not force an earlier ray tracing scene sync, we must wait for it here.
        if (bNeedToWaitForRayTracingScene)
        {
            WaitForRayTracingScene(GraphBuilder, DynamicGeometryScratchBuffer);
            bNeedToWaitForRayTracingScene = false;
        }
#endif // RHI_RAYTRACING
        :

        if (bRenderDeferredLighting)
        {
            :

            GraphBuilder.SetCommandListStat(GET_STATID(STAT_CLM_Lighting));
            RenderLights(GraphBuilder, SceneTextures, TranslucencyLightingVolumeTextures, LightingChannelsTexture, SortedLightSet);
            GraphBuilder.SetCommandListStat(GET_STATID(STAT_CLM_AfterLighting));
            :

        }
        :

}

bool FDeferredShadingSceneRenderer::SetupRayTracingPipelineStates(FRDGBuilder& GraphBuilder) { : if (GRHISupportsRayTracingShaders) { // #dxr_todo: UE-72565: refactor ray tracing effects to not be member functions of DeferredShadingRenderer. // Should register each effect at startup and just loop over them automatically to gather all required shaders.

    TArray<FRHIRayTracingShader*> RayGenShaders;

    // We typically see ~120 raygen shaders, but allow some headroom to avoid reallocation if our estimate is wrong.
    RayGenShaders.Reserve(256);

    if (bIsPathTracing)
    {
        // This view only needs the path tracing raygen shaders as all other
        // passes should be disabled.
        PreparePathTracing(ViewFamily, *Scene, RayGenShaders);
    }
    else
    {
        // Path tracing is disabled, get all other possible raygen shaders
        PrepareRayTracingDebug(RayGenShaders);

        // These other cases do potentially depend on the camera position since they are
        // driven by FinalPostProcessSettings, which is why we need to merge them across views
        if (!IsForwardShadingEnabled(ShaderPlatform))
        {
            for (const FViewInfo& View : Views)
            {
                PrepareRayTracingReflections(View, *Scene, RayGenShaders);
                PrepareSingleLayerWaterRayTracingReflections(View, *Scene, RayGenShaders);
                PrepareRayTracingShadows(View, *Scene, RayGenShaders);
                PrepareRayTracingAmbientOcclusion(View, RayGenShaders);
                PrepareRayTracingSkyLight(View, *Scene, RayGenShaders);
                PrepareRayTracingGlobalIllumination(View, RayGenShaders);
                PrepareRayTracingGlobalIlluminationPlugin(View, RayGenShaders);
                PrepareRayTracingTranslucency(View, RayGenShaders);
                PrepareRayTracingVolumetricFogShadows(View, *Scene, RayGenShaders);

                if (DoesPlatformSupportLumenGI(ShaderPlatform) && Lumen::UseHardwareRayTracing(ViewFamily))
                {
                    PrepareLumenHardwareRayTracingScreenProbeGather(View, RayGenShaders);
                    PrepareLumenHardwareRayTracingShortRangeAO(View, RayGenShaders);
                    PrepareLumenHardwareRayTracingRadianceCache(View, RayGenShaders);
                    PrepareLumenHardwareRayTracingReflections(View, RayGenShaders);
                    PrepareLumenHardwareRayTracingVisualize(View, RayGenShaders);

                    PrepareRTXDILumenReflections(View, RayGenShaders);
                }

                PrepareRayTracingSampledDirectLighting(View, RayGenShaders);
            }
        }
        DeduplicateRayGenerationShaders(RayGenShaders);
    }

    if (RayGenShaders.Num())
    {
        // Create RTPSO and kick off high-level material parameter binding tasks which will be consumed during RDG execution in BindRayTracingMaterialPipeline()
        ReferenceView.RayTracingMaterialPipeline = CreateRayTracingMaterialPipeline(GraphBuilder, ReferenceView, RayGenShaders);
    }
}

// Add deferred material gather shaders
if (GRHISupportsRayTracingShaders)
{
    TArray<FRHIRayTracingShader*> DeferredMaterialRayGenShaders;
    if (!IsForwardShadingEnabled(ShaderPlatform))
    {
        for (const FViewInfo& View : Views)
        {
            PrepareRayTracingReflectionsDeferredMaterial(View, *Scene, DeferredMaterialRayGenShaders);
            PrepareRayTracingDeferredReflectionsDeferredMaterial(View, *Scene, DeferredMaterialRayGenShaders);
            PrepareRayTracingGlobalIlluminationDeferredMaterial(View, DeferredMaterialRayGenShaders);
            if (DoesPlatformSupportLumenGI(ShaderPlatform))
            {
                PrepareLumenHardwareRayTracingReflectionsDeferredMaterial(View, DeferredMaterialRayGenShaders);
                PrepareLumenHardwareRayTracingRadianceCacheDeferredMaterial(View, DeferredMaterialRayGenShaders);
                PrepareLumenHardwareRayTracingScreenProbeGatherDeferredMaterial(View, DeferredMaterialRayGenShaders);
                PrepareLumenHardwareRayTracingVisualizeDeferredMaterial(View, DeferredMaterialRayGenShaders);
            }
        }
    }

    DeduplicateRayGenerationShaders(DeferredMaterialRayGenShaders);

    if (DeferredMaterialRayGenShaders.Num())
    {
        ReferenceView.RayTracingMaterialGatherPipeline = CreateRayTracingDeferredMaterialGatherPipeline(GraphBuilder.RHICmdList, ReferenceView, DeferredMaterialRayGenShaders);
    }
}

// Add Lumen hardware ray tracing materials
if (GRHISupportsRayTracingShaders)
{
    TArray<FRHIRayTracingShader*> LumenHardwareRayTracingRayGenShaders;
    if (DoesPlatformSupportLumenGI(ShaderPlatform))
    {
        for (const FViewInfo& View : Views)
        {
            PrepareLumenHardwareRayTracingVisualizeLumenMaterial(View, LumenHardwareRayTracingRayGenShaders);
            PrepareLumenHardwareRayTracingRadianceCacheLumenMaterial(View, LumenHardwareRayTracingRayGenShaders);
            PrepareLumenHardwareRayTracingTranslucencyVolumeLumenMaterial(View, LumenHardwareRayTracingRayGenShaders);
            PrepareLumenHardwareRayTracingRadiosityLumenMaterial(View, LumenHardwareRayTracingRayGenShaders);
            PrepareLumenHardwareRayTracingReflectionsLumenMaterial(View, LumenHardwareRayTracingRayGenShaders);
            PrepareLumenHardwareRayTracingScreenProbeGatherLumenMaterial(View, LumenHardwareRayTracingRayGenShaders);
            PrepareLumenHardwareRayTracingDirectLightingLumenMaterial(View, LumenHardwareRayTracingRayGenShaders);

            PrepareRTXDILumenSurfaceCache(View, LumenHardwareRayTracingRayGenShaders);
        }
    }
    DeduplicateRayGenerationShaders(LumenHardwareRayTracingRayGenShaders);

    if (LumenHardwareRayTracingRayGenShaders.Num())
    {
        ReferenceView.LumenHardwareRayTracingMaterialPipeline = CreateLumenHardwareRayTracingMaterialPipeline(GraphBuilder.RHICmdList, ReferenceView, LumenHardwareRayTracingRayGenShaders);
    }
}

// Initialize common resources used for lighting in ray tracing effects
for (int32 ViewIndex = 0; ViewIndex < AllFamilyViews.Num(); ++ViewIndex)
{
    // TODO:  It would make more sense for common ray tracing resources to be in a shared structure, rather than copied into each FViewInfo.
    //        A goal is to have the FViewInfo structure only be visible to the scene renderer that owns it, to avoid dependencies being created
    //        that could lead to maintenance issues or interfere with paralellism goals.  For now, this works though...
    FViewInfo* View = const_cast<FViewInfo*>(static_cast<const FViewInfo*>(AllFamilyViews[ViewIndex]));

    // Send common ray tracing resources from reference view to all others.
    if (View->bHasAnyRayTracingPass && View != &ReferenceView)
    {
        View->RayTracingMaterialPipeline = ReferenceView.RayTracingMaterialPipeline;
    }
}

return true;

}

- C++(SampledLightRendering.cpp)
```C++
void FDeferredShadingSceneRenderer::PrepareRayTracingSampledDirectLighting(const FViewInfo& View, TArray<FRHIRayTracingShader*>& OutRayGenShaders)
{
    // Declare all RayGen shaders that require material closest hit shaders to be bound

    if (!ShouldRenderRayTracingSampledLighting())
    {
        return;
    }

    if (ShouldRenderRayTracedSampledLightingForTranslucency())
    {
        PrepareTranslucentRayTracingSampledDirectLighting(View, OutRayGenShaders);
    }

    const int32 LightingMode = CVarSampledLightingMode.GetValueOnRenderThread();
    const int32 PresampleLightsMode = (int32)GetSampledLightPresamplingMode(View);
    const bool bAvoidSelfIntersection = CVarRayTracingSampledLightingAvoidSelfIntersectionTraceDistance.GetValueOnRenderThread() != 0.0f;
    const bool UsePairwiseMIS = CVarSampledLightingSpatialUsePairwiseMIS.GetValueOnRenderThread() != 0;
    const bool AlwaysSampleDirLight = CVarSampledLightingAlwaysSampleDirLight.GetValueOnRenderThread() != 0;
    const bool bEvaluateStrandBasedHair = CVarRayTracingSampledLightingHair.GetValueOnRenderThread() != 0;
    const bool bUseBrdfSamples = ShouldUseBrdfSamples();

    // strand based hair handles aniso independently and never can self intersect
    if (bEvaluateStrandBasedHair)
    {
        FDirectLightRGS::FPermutationDomain PermutationVector;
        PermutationVector.Set<FDirectLightRGS::FHairShadingDim>(true);
        PermutationVector.Set<FDirectLightRGS::FRisSampleLocalLightsDim>(PresampleLightsMode);
        PermutationVector.Set<FDirectLightRGS::FAvoidSelfIntersectionDim>(false);
        PermutationVector.Set<FDirectLightRGS::FSampleDirectionalLightOnly>(false);
        PermutationVector.Set<FDirectLightRGS::FAnisotropicMaterialsDim>(false);
        PermutationVector.Set<FDirectLightRGS::FSupportBrdfRayDim>(false);
        PermutationVector.Set<FDirectLightRGS::FHairLightingDim>(true);

        auto RayGenShader = View.ShaderMap->GetShader<FDirectLightRGS>(PermutationVector);
        OutRayGenShaders.Add(RayGenShader.GetRayTracingShader());
    }

    // aniso can change as a result of what is in the view, so always bind aniso and non-aniso versions if they can be present
    for (int32 AnisoPermutation = 0; AnisoPermutation < 2; AnisoPermutation++)
    {
        for (int32 Permutation = 0; Permutation < 2; Permutation++)
        {
            if (LightingMode == 0)
            {
                FDirectLightRGS::FPermutationDomain PermutationVector;
                PermutationVector.Set<FDirectLightRGS::FHairShadingDim>(false);
                PermutationVector.Set<FDirectLightRGS::FRisSampleLocalLightsDim>(PresampleLightsMode);
                PermutationVector.Set<FDirectLightRGS::FAvoidSelfIntersectionDim>(bAvoidSelfIntersection);
                PermutationVector.Set<FDirectLightRGS::FSampleDirectionalLightOnly>(false);
                PermutationVector.Set<FDirectLightRGS::FAnisotropicMaterialsDim>(AnisoPermutation != 0);
                PermutationVector.Set<FDirectLightRGS::FSupportBrdfRayDim>(bUseBrdfSamples);
                PermutationVector.Set<FDirectLightRGS::FHairLightingDim>(Permutation != 0);

                auto RayGenShader = View.ShaderMap->GetShader<FDirectLightRGS>(PermutationVector);
                OutRayGenShaders.Add(RayGenShader.GetRayTracingShader());
            }
            else if (AlwaysSampleDirLight)
            {
                FDirectLightRGS::FPermutationDomain PermutationVector;
                PermutationVector.Set<FDirectLightRGS::FHairShadingDim>(false);
                PermutationVector.Set<FDirectLightRGS::FRisSampleLocalLightsDim>(0);    // we won't use RIS in this permutation
                PermutationVector.Set<FDirectLightRGS::FAvoidSelfIntersectionDim>(bAvoidSelfIntersection);
                PermutationVector.Set<FDirectLightRGS::FSampleDirectionalLightOnly>(true);
                PermutationVector.Set<FDirectLightRGS::FAnisotropicMaterialsDim>(AnisoPermutation != 0);
                PermutationVector.Set<FDirectLightRGS::FSupportBrdfRayDim>(false);
                PermutationVector.Set<FDirectLightRGS::FHairLightingDim>(Permutation != 0);

                auto RayGenShader = View.ShaderMap->GetShader<FDirectLightRGS>(PermutationVector);
                OutRayGenShaders.Add(RayGenShader.GetRayTracingShader());
            }
        }

        {
            FGenerateInitialSamplesRGS::FPermutationDomain PermutationVector;
            PermutationVector.Set<FGenerateInitialSamplesRGS::FRisSampleLocalLightsDim>(PresampleLightsMode);
            PermutationVector.Set<FGenerateInitialSamplesRGS::FAnisotropicMaterialsDim>(AnisoPermutation != 0);
            PermutationVector.Set<FGenerateInitialSamplesRGS::FSupportBrdfRayDim>(bUseBrdfSamples);
            AddShaderPermutation<FGenerateInitialSamplesRGS>(PermutationVector, View, OutRayGenShaders);
        }

        {
            FApplySpatialResamplingRGS::FPermutationDomain PermutationVector;
            PermutationVector.Set<FApplySpatialResamplingRGS::FPairwiseMISDim>(UsePairwiseMIS);
            PermutationVector.Set<FApplySpatialResamplingRGS::FAnisotropicMaterialsDim>(AnisoPermutation != 0);
            AddShaderPermutation< FApplySpatialResamplingRGS>(PermutationVector, View, OutRayGenShaders);
        }

        AddShaderPermutation<FComputeTemporalGradientsRGS>(View, OutRayGenShaders);

        for (int32 Permutation = 0; Permutation < 2; Permutation++)
        {
            FApplyTemporalResamplingRGS::FPermutationDomain PermutationVector;

            PermutationVector.Set<FApplyTemporalResamplingRGS::FFuseInitialSamplingDim>(Permutation != 0);
            PermutationVector.Set<FApplyTemporalResamplingRGS::FRisSampleLocalLightsDim>(Permutation != 0 ? PresampleLightsMode : 0);
            PermutationVector.Set<FApplyTemporalResamplingRGS::FAnisotropicMaterialsDim>(AnisoPermutation != 0);
            PermutationVector.Set<FApplyTemporalResamplingRGS::FSupportBrdfRayDim>(bUseBrdfSamples && Permutation != 0);

            AddShaderPermutation< FApplyTemporalResamplingRGS>(PermutationVector, View, OutRayGenShaders);
        }

        for (int32 Permutation = 0; Permutation < 2; Permutation++)
        {
            FEvaluateSampledLightingRGS::FPermutationDomain PermutationVector;

            PermutationVector.Set<FEvaluateSampledLightingRGS::FHairLightingDim>(Permutation != 0);
            PermutationVector.Set<FEvaluateSampledLightingRGS::FAvoidSelfIntersectionDim>(bAvoidSelfIntersection);
            PermutationVector.Set<FEvaluateSampledLightingRGS::FAnisotropicMaterialsDim>(AnisoPermutation != 0);

            AddShaderPermutation< FEvaluateSampledLightingRGS>(PermutationVector, View, OutRayGenShaders);
        }
    }
}

void FDeferredShadingSceneRenderer::PrepareRTXDILumenReflections(const FViewInfo& View, TArray<FRHIRayTracingShader*>& OutRayGenShaders) { const bool bLumenGIEnabled = GetViewPipelineState(View).DiffuseIndirectMethod == EDiffuseIndirectMethod::Lumen; if (!ShouldRenderRTXDILumenReflections(View, bLumenGIEnabled)) return;

const int32 MaxBounces = LumenReflections::GetMaxReflectionBounces(View);
const bool bUseBrdfSampling = GetRTXDILumenReflectionsNumBrdfSamples() > 0;
const bool bUseReordering = LumenReflections::UseShaderExecutionReordering(*View.Family);

// 2 permutations bound:
//     2 hair occlusion modes
for (int32 HairOcclusion = 0; HairOcclusion < 2; HairOcclusion++)
{
    FRTXDILumenReflections::FPermutationDomain PermutationVector;
    PermutationVector.Set<FRTXDILumenReflections::FEnableFarFieldTracing>(IsRTXDILumenReflectionsFarFieldEnabled());
    PermutationVector.Set<FRTXDILumenReflections::FHairStrandsOcclusionDim>(HairOcclusion >= 1);
    PermutationVector.Set<FRTXDILumenReflections::FPresamplingMode>(GetRTXDILumenReflectionsPresamplingMode(View));
    PermutationVector.Set<FRTXDILumenReflections::FRecursiveReflectionTraces>(MaxBounces > 1);
    PermutationVector.Set<FRTXDILumenReflections::FSupportBrdfRayDim>(bUseBrdfSampling);
    PermutationVector.Set<FRTXDILumenReflections::FShaderExecutionReorderingDim>(bUseReordering);

    TShaderRef<FRTXDILumenReflectionsRGS> RayGenerationShader = View.ShaderMap->GetShader<FRTXDILumenReflectionsRGS>(PermutationVector);
    OutRayGenShaders.Add(RayGenerationShader.GetRayTracingShader());
}

}

- C++(RTXDLumenSurfaceCache.cpp)
```C++
class FLumenCardSampledDirectLighting : public FLumenHardwareRayTracingShaderBase
{
    DECLARE_LUMEN_RAYTRACING_SHADER(FLumenCardSampledDirectLighting, Lumen::ERayTracingShaderDispatchSize::DispatchSize2D)

    // Permutations
    class FPresamplingMode : SHADER_PERMUTATION_ENUM_CLASS("RIS_SAMPLE_LOCAL_LIGHTS", ERTXDIPresamplingMode);
    using FPermutationDomain = TShaderPermutationDomain<FPresamplingMode>;
    :
}

void FDeferredShadingSceneRenderer::PrepareRTXDILumenSurfaceCache(const FViewInfo& View, TArray<FRHIRayTracingShader*>& OutRayGenShaders)
{
    if (!ShouldRenderRTXDILumenSurfaceCache(View))
        return;

    for (int32 PresamplingModeIndex = 0; PresamplingModeIndex < (int32)ERTXDIPresamplingMode::MAX; PresamplingModeIndex++)
    {
        FLumenCardSampledDirectLightingRGS::FPermutationDomain PermutationVector;
        PermutationVector.Set<FLumenCardSampledDirectLightingRGS::FPresamplingMode>((ERTXDIPresamplingMode)PresamplingModeIndex);
        TShaderRef<FLumenCardSampledDirectLightingRGS> RayGenerationShader = View.ShaderMap->GetShader<FLumenCardSampledDirectLightingRGS>(PermutationVector);
        OutRayGenShaders.Add(RayGenerationShader.GetRayTracingShader());
    }
}
yasukichi commented 8 months ago

RenderSampledDirectLightingForLumenReflections

}

- C++(RTXDILumenReflections.cpp)
```C++
void RenderSampledDirectLightingForLumenReflections(
    FRDGBuilder& GraphBuilder,
    const FViewInfo& View,
    const bool bLumenGIEnabled,
    const FSceneTextureParameters& SceneTextureParameters,
    const FLumenCardTracingParameters& TracingParameters,
    const FLumenReflectionTracingParameters& ReflectionTracingParameters,
    const FCompactedReflectionTraceParameters& CompactedTraceParameters)
{
    check(View.SampledLightingState.IsValid());
    const FSampledLightingResources& Resources = *View.SampledLightingState.Get();

    checkf(View.HasRayTracingScene(), TEXT("TLAS does not exist. Verify that the current pass is represented in Lumen::AnyLumenHardwareRayTracingPassEnabled()."));

    FRDGBufferRef CompactedTraceTexelAllocator = CompactedTraceParameters.CompactedTraceTexelAllocator->Desc.Buffer;
    FRDGBufferRef CompactedTraceTexelData = CompactedTraceParameters.CompactedTraceTexelData->Desc.Buffer;

    // Reflection Ray Trace
    FIntPoint BufferSize = ReflectionTracingParameters.ReflectionTracingBufferSize;
    int32 RayCount = BufferSize.X * BufferSize.Y;

    static auto CVarSampledLightingFaceCull = IConsoleManager::Get().FindConsoleVariable(TEXT("r.RayTracing.SampledLighting.FaceCull"));
    static auto CVarSampledLightingApproximateMode = IConsoleManager::Get().FindConsoleVariable(TEXT("r.RayTracing.SampledLighting.ApproximateVisibilityMode"));
    static auto CVarSampledLightingAvoidSelfIntersectionTraceDistance = IConsoleManager::Get().FindConsoleVariable(TEXT("r.RayTracing.SampledLighting.AvoidSelfIntersectionTraceDistance"));

    const int32 MaxBounces = LumenReflections::GetMaxReflectionBounces(View);
    const int32 NumBrdfSamples = GetRTXDILumenReflectionsNumBrdfSamples();

    // Set the pass parameters
    FRTXDILumenReflections::FParameters* PassParameters = GraphBuilder.AllocParameters<FRTXDILumenReflections::FParameters>();
    PassParameters->RayTracingIndirectArgs = CompactedTraceParameters.RayTraceDispatchIndirectArgs;

    PassParameters->CompactedTraceTexelAllocator = GraphBuilder.CreateSRV(CompactedTraceTexelAllocator, PF_R32_UINT);
    PassParameters->CompactedTraceTexelData = GraphBuilder.CreateSRV(CompactedTraceTexelData, PF_R32_UINT);

    // Set shared parameters
    SetLumenHardwareRayTracingSharedParameters(GraphBuilder, SceneTextureParameters, View, TracingParameters, &PassParameters->SharedParameters);

    // Constants
    PassParameters->ApplySkyLight = true;
    PassParameters->FarFieldReferencePos = (FVector3f)Lumen::GetFarFieldReferencePos();
    PassParameters->PullbackBias = Lumen::GetHardwareRayTracingPullbackBias();

    PassParameters->ReflectionNormalBias = GetRTXDILumenReflectionsNormalBias();
    PassParameters->ReflectionViewBias = GetRTXDILumenReflectionsViewBias();

    // RTXDI
    {
        PassParameters->RTXDIParameters.InitialSampleParameters = GetSampledLightingInitialSampleParameters(GraphBuilder, Resources);
        PassParameters->RTXDIParameters.RayVisibilityParameters = GetSampledLightingRayVisibilityParameters();
        PassParameters->RTXDIParameters.NumLocalIterations = FMath::Max(0, GetRTXDILumenReflectionsNumLocalIterations());
        PassParameters->RTXDIParameters.NumLocalCandidates = FMath::Max(0, GetRTXDILumenReflectionsNumLocalCandidates());
        PassParameters->RTXDIParameters.NumDirectionalIterations = FMath::Max(0, GetRTXDILumenReflectionsNumDirectionalIterations());
        PassParameters->RTXDIParameters.NumDirectionalCandidates = FMath::Max(0, GetRTXDILumenReflectionsNumDirectionalCandidates());
        PassParameters->RTXDIParameters.NumSkyIterations = FMath::Max(0, GetRTXDILumenReflectionsNumSkyIterations());
        PassParameters->RTXDIParameters.NumSkyCandidates = FMath::Max(0, GetRTXDILumenReflectionsNumSkyCandidates());
        PassParameters->RTXDIParameters.NumBrdfCandidates = FMath::Max(0, NumBrdfSamples);
        PassParameters->RTXDIParameters.bEvaluateAllLights = IsRTXDILumenReflectionsEvaluateAllLightsEnabled();
        PassParameters->RTXDIParameters.SkylightTexture = GraphBuilder.CreateSRV(FRDGTextureSRVDesc::Create(Resources.SkyLightRIS.EnvTexture));
        PassParameters->RTXDIParameters.SkylightTextureSampler = TStaticSamplerState<SF_Bilinear>::GetRHI();
        PassParameters->RTXDIParameters.SampledLightData = Resources.SampledLightDataUniformBuffer;
        PassParameters->RTXDIParameters.DebugScreenCoords = FUintVector2(GetRTXDILumenDebugScreenCoordX(), GetRTXDILumenDebugScreenCoordY());

        PassParameters->LightTLAS = !ShouldBuildLightBvh(View, bLumenGIEnabled) ? nullptr : View.GetRayTracingSceneLayerViewChecked(ERayTracingSceneLayer::SampledLights);

#if SUPPORT_RTXDI_LUMENREFLECTIONS_DECALS
        PassParameters->DecalParameters = View.RayTracingDecalUniformBuffer;
#endif
    }

    // Resources
    PassParameters->ReflectionTracingParameters = ReflectionTracingParameters;

    extern int32 GLumenReflectionHairStrands_VoxelTrace;
    const bool bNeedTraceHairVoxel = HairStrands::HasViewHairStrandsVoxelData(View) && GLumenReflectionHairStrands_VoxelTrace > 0;

    if (bNeedTraceHairVoxel)
    {
        PassParameters->HairStrandsVoxel = HairStrands::BindHairStrandsVoxelUniformParameters(View);
    }

    const bool bUseReordering = LumenReflections::UseShaderExecutionReordering(*View.Family);

    // Get the ray generation shader permutation
    FRTXDILumenReflectionsRGS::FPermutationDomain PermutationVector;
    PermutationVector.Set<FRTXDILumenReflectionsRGS::FEnableFarFieldTracing>(IsRTXDILumenReflectionsFarFieldEnabled());
    PermutationVector.Set<FRTXDILumenReflectionsRGS::FHairStrandsOcclusionDim>(bNeedTraceHairVoxel);
    PermutationVector.Set<FRTXDILumenReflectionsRGS::FPresamplingMode>(GetRTXDILumenReflectionsPresamplingMode(View));
    PermutationVector.Set<FRTXDILumenReflectionsRGS::FRecursiveReflectionTraces>(MaxBounces > 1);
    PermutationVector.Set<FRTXDILumenReflectionsRGS::FSupportBrdfRayDim>(NumBrdfSamples > 0);
    PermutationVector.Set<FRTXDILumenReflectionsRGS::FShaderExecutionReorderingDim>(bUseReordering);

    TShaderRef<FRTXDILumenReflectionsRGS> RayGenerationShader = View.ShaderMap->GetShader<FRTXDILumenReflectionsRGS>(PermutationVector);

    AddLumenRayTraceDispatchIndirectPass(GraphBuilder, RDG_EVENT_NAME("RTXDILumenReflectionsRGS <indirect>"), RayGenerationShader, PassParameters, PassParameters->RayTracingIndirectArgs, 0, View, false);
}
yasukichi commented 8 months ago

RTXDLumenReflectionsCS

};

- シェーダー(RTXDILumenReflections.usf)
```C++
[numthreads(INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X, INLINE_RAY_TRACING_THREAD_GROUP_SIZE_Y, 1)]
void    RTXDILumenReflectionsCS(
            uint3   DispatchThreadIndex : SV_DispatchThreadID,
            uint3   DispatchGroupId     : SV_GroupID,
            uint    DispatchGroupIndex  : SV_GroupIndex)
{
    const uint DispatchThreadId = DispatchThreadIndex.x;
    const uint TotalTraceCount = CompactedTraceTexelAllocator[0];
    if (DispatchThreadId < TotalTraceCount)
    {
        const FReflectionTracingCoord ReflectionTracingCoord = DecodeTraceTexel(CompactedTraceTexelData[DispatchThreadId]);

        bool bUnused;
        float TraceHitDistance = DecodeRayDistance(RWTraceHit[ReflectionTracingCoord.CoordFlatten], bUnused);

        float2 ScreenUV = GetScreenUVFromReflectionTracingCoord(ReflectionTracingCoord.CoordFlatten);
        float2 ScreenCoord = ScreenUV * View.BufferSizeAndInvSize.xy;
        uint LinearCoord = ScreenCoord.y * View.BufferSizeAndInvSize.x + ScreenCoord.x;

        float SceneDepth = DownsampledDepth.Load(int3(ReflectionTracingCoord.CoordFlatten, 0)).x;
        float3 TranslatedWorldPosition = GetTranslatedWorldPositionFromScreenUV(ScreenUV, SceneDepth);
        const float3 WorldNormal = ReadMaterialData(uint2(ScreenCoord)).WorldNormal;
        FRayData RayData = GetRayData(ReflectionTracingCoord.CoordFlatten);

        float ClippedNearFieldMaxTraceDistance = ClipAndDitherNearFieldMaxTraceDistance(
            TranslatedWorldPosition,
            RayData.Direction,
            ReflectionTracingCoord.CoordFlatten,
            NearFieldSceneRadius,
            NearFieldMaxTraceDistance,
            NearFieldMaxTraceDistanceDitherScale);

        float RayBias = 0.05f;

        // Construct the reflection ray
        FRayDesc Ray;
        Ray.Origin = TranslatedWorldPosition;
        Ray.Direction = RayData.Direction;
        Ray.TMin = max(TraceHitDistance - PullbackBias, RayBias);
        Ray.TMax = max(ClippedNearFieldMaxTraceDistance - PullbackBias, RayBias);

        // Bias the reflection ray origin along the view (camera) ray and normal direction to avoid self-intersection
        float3 ViewDirection = (TranslatedWorldPosition - View.TranslatedWorldCameraOrigin);
        Ray.Origin -= normalize(ViewDirection) * ReflectionViewBias;
        Ray.Origin += WorldNormal * ReflectionNormalBias;

        FRayCone RayCone = (FRayCone)0;
        RayCone.SpreadAngle = View.EyeToPixelSpreadAngle;
        RayCone = PropagateRayCone(RayCone, RayData.ConeHalfAngle, SceneDepth);

        const bool bHiResSurface = true;
        const uint CullingMode = RAY_FLAG_CULL_BACK_FACING_TRIANGLES;

        FRayTracedLightingContext Context = CreateRayTracedLightingContext(RayCone, ReflectionTracingCoord.Coord, LinearCoord, CullingMode, MaxTraversalIterations, bHiResSurface);
        Context.MaxReflectionBounces = MaxReflectionBounces;
        Context.FarFieldMaxTraceDistance = FarFieldMaxTraceDistance;
        Context.FarFieldReferencePos = FarFieldReferencePos;
        Context.bUseBookmark = false;
        Context.bAlphaMasking = true;

        // Trace the reflection ray and evaluating lighting (near field)
        int LightingMode = EVALUATE_MATERIAL_AND_DIRECT_LIGHTING;
 /*ココ*/
        FRayTracedLightingResult Result = TraceReflectionRay(ReflectionTracingCoord.Coord, TLAS, Ray, Context, LightingMode, RTXDI_OPTIONAL_PASS_DEBUG_PARAM(DebugContext));

#if DIM_HAIRSTRANDS_VOXEL
        float HairTraceDistance = min(Ray.TMax, Result.TraceHitDistance);

        bool bHairHit;
        float HairTransparency;
        float HairHitT;

        TraceHairVoxels(
            ReflectionTracingCoord.Coord,
            SceneDepth,
            // Use (Translated)WorldPosition instead of SamplePosition, as the bias is too strong otherwise. This is not an issue as 
            // the voxel structure does not cause any self shadowing issue
            TranslatedWorldPosition,
            Ray.Direction,
            HairTraceDistance,
            true,
            bHairHit,
            HairTransparency,
            HairHitT);

        if (bHairHit && HairHitT < HairTraceDistance)
        {
            Result.Radiance *= HairTransparency;
            //Result.Transparency *= HairTransparency;
            Result.TraceHitDistance = min(HairHitT, Result.TraceHitDistance);
            Result.bIsHit = true;
        }
#endif // DIM_HAIRSTRANDS_VOXEL

        // Skylight
        if ((ApplySkyLight != 0) && !Result.bIsHit)
        {
            FConeTraceResult TraceResult;
            TraceResult.Lighting = Result.Radiance;
            TraceResult.Transparency = 1;

            ApplySkylightToTraceResult(RayData.Direction, TraceResult);

            Result.TraceHitDistance = ClippedNearFieldMaxTraceDistance;
            Result.Radiance = TraceResult.Lighting;
            Result.bIsHit = true;
        }

        Result.Radiance += GetSkylightLeaking(Ray.Direction, Result.TraceHitDistance);
        Result.Radiance *= View.PreExposure;

        float MaxLighting = max3(Result.Radiance.x, Result.Radiance.y, Result.Radiance.z);
        if (MaxLighting > MaxRayIntensity)
        {
            Result.Radiance *= MaxRayIntensity / MaxLighting;
        }

        RWTraceRadiance[ReflectionTracingCoord.Coord] = Result.Radiance;
        RWTraceHit[ReflectionTracingCoord.Coord] = EncodeRayDistance(Result.TraceHitDistance, Result.bIsHit);
    }
}

Nvidia Shader Execution Reordering image

FRayTracedLightingResult TraceReflectionRay(
    uint2 TraceCoord,
    in RaytracingAccelerationStructure TLAS,
    in FRayDesc Ray,
    in FRayTracedLightingContext Context,
    int LightingMode,
    RTXDI_OPTIONAL_DEBUG_PARAM(DebugContext)
)
{
    FLumenRayHitBookmark Bookmark;

    RandomSequence RandSequence;
    RandomSequence_Initialize(RandSequence, Context.LinearCoord, View.StateFrameIndex);

    FRayTracedLightingResult Result = CreateRayTracedLightingResult();
    Result.bIsHit = false;
    Result.TraceHitDistance = Ray.TMax;

    float PathRoughness = 0.0f;
    float3 PathThroughput = 1.0f;
    bool bReachedFarField = false;

#if !RECURSIVE_REFLECTION_TRACES
    {
        Context.MaxReflectionBounces = 1;
    }
#endif

    for (uint ReflectionBounceIndex = 0; ReflectionBounceIndex < Context.MaxReflectionBounces; ++ReflectionBounceIndex)
    {
        float NextReflectionRayAlpha = 0.0f;

        FPackedMaterialClosestHitPayload Payload = (FPackedMaterialClosestHitPayload)0;
        Payload.SetLumenPayload();
        Payload.SetIgnoreTranslucentMaterials();

        // Offset ray origin if tracing against far-field
        float3 RayOrigin = Ray.Origin;

        // Use the set culling mode since near/far field surface cache traces don't run when SER is enabled
        NvHitObject HitObject;
        NvTraceRayHitObject(TLAS, Context.CullingMode, Context.InstanceMask, RAY_TRACING_SHADER_SLOT_MATERIAL, RAY_TRACING_NUM_SHADER_SLOTS, 0, Ray.GetNativeDesc(), Payload, HitObject);
        // Nvidia Shader Execution Reordering https://developer.nvidia.com/blog/improve-shader-performance-and-in-game-frame-rates-with-shader-execution-reordering/

        //Ray.Origin = RayOrigin;
        NvReorderThread(HitObject);

#if DIM_FAR_FIELD_TRACING
        if (!HitObject.IsHit() && ReflectionBounceIndex == 0)
        {
            // missed nearfield, try to hit far field geometry

            // Adjust the ray origin, TMin, and TMax for the far field
            Ray.Origin += Context.FarFieldReferencePos;
            Ray.TMin = max(Ray.TMax - PullbackBias, 0.f);
            Ray.TMax = FarFieldMaxTraceDistance;

            Context.InstanceMask = RAY_TRACING_MASK_FAR_FIELD;

            NvTraceRayHitObject(TLAS, Context.CullingMode, Context.InstanceMask, RAY_TRACING_SHADER_SLOT_MATERIAL, RAY_TRACING_NUM_SHADER_SLOTS, 0, Ray.GetNativeDesc(), Payload, HitObject);

            // Reset ray origin into the original frame
            Ray.Origin -= Context.FarFieldReferencePos;

            bReachedFarField = true;

            // is it worth a reorder here?
            NvReorderThread(HitObject);
        }
#endif // DIM_FAR_FIELD_TRACING

        if (HitObject.IsHit())
        {
            // Reorder threads and invoke hit shaders
            NvInvokeHitObject(TLAS, HitObject, Payload);
            bool bTraceNextReflectionRay = false;

            // Apply emissive material
            float3 Radiance = Payload.GetRadiance() * Payload.GetOpacity();

            FLumenHitLightingMaterial LumenMaterial = GetLumenHitLightingMaterial(Payload, Ray);

            // Accumulate roughness in order to terminate recursive rays earlier on rough surfaces
            PathRoughness = 1.0f - (1.0f - PathRoughness) * (1.0f - LumenMaterial.TopLayerRoughness);

            float3 RayHitTranslatedWorldPosition = Ray.Origin + Ray.Direction * Payload.HitT;

            // Direct lighting evaluation
            {
                // Contribution from non-sampled light
                Radiance += CalculateDirectLighting(TLAS, Ray, Context, Payload, RandSequence, RayHitTranslatedWorldPosition, Payload.GetWorldNormal());

                // Create the surface
                FGBufferSurface Surface = (FGBufferSurface)0;
                Surface.GBuffer = GetGBufferDataFromPayload(Payload);
                Surface.ViewDirection = Ray.Direction;
                Surface.TranslatedWorldPosition = Ray.Origin + (Ray.Direction * Payload.HitT);
                Surface.LightingChannelMask = Payload.GetPrimitiveLightingChannelMask();
                Surface.IsSecondary = true;

#if SUPPORT_RTXDI_LUMENREFLECTIONS_DECALS
#if PLATFORM_SUPPORTS_CALLABLE_SHADERS
                {
                    FDecalData DecalData;
                    DecalData.BaseColor = float4(0, 0, 0, 1);
                    DecalData.WorldNormal = float4(0, 0, 0, 1);
                    DecalData.MetallicSpecularRoughness = float4(0, 0, 0, 1);
                    DecalData.Emissive = 0;

                    FDecalLoopCount DecalLoopCount = DecalGridLookup(Surface.TranslatedWorldPosition);
                    for (uint Index = 0, Num = DecalLoopCount.NumDecals; Index < Num; ++Index)
                    {
                        uint DecalId = GetDecalId(Index, DecalLoopCount);

                        FDecalShaderPayload DecalPayload = (FDecalShaderPayload)0;
                        DecalPayload.SetInputTranslatedWorldPosition(Surface.TranslatedWorldPosition);

                        CallShader(DecalId, DecalPayload);

                        CombineDecal(DecalPayload, DecalData);
                    }

                    DecalData.ApplyDecal(Surface.GBuffer);
                }
#endif
#endif
                // Perform lighting of sampled lights
                {
                    // Setup the RNG contexts, using the reflection trace coordinates as the seed
                    uint LinearIndex = CalcLinearIndex(TraceCoord);
                    FRandomContext RandContext = FRandomContext::Create(LinearIndex, View.StateFrameIndex);

                    uint CoherentLinearIndex = CalcLinearIndex(TraceCoord / 8);
                    FRandomContext CoherentRandContext = FRandomContext::Create(CoherentLinearIndex, View.StateFrameIndex + 63);

                    // Evaluate RTXDI hit lighting
/* ココ */
                    const HitLightingSamplingResult HitLightingResult = EvaluateHitLighting(TraceCoord, Surface, RandContext, CoherentRandContext, RTXDI_OPTIONAL_PASS_DEBUG_PARAM(DebugContext));

                    Radiance += HitLightingResult.DiffuseExitantRadiance + HitLightingResult.SpecularExitantRadiance;
                }
            }

            {
                float3 RayHitGeometryWorldNormal = Payload.GetGeometryNormal();

                // The Surface Cache lighting was computed using the front face, don't leak to the backface
                if (dot(Ray.Direction, RayHitGeometryWorldNormal) <= 0 || Payload.IsTwoSided())
                {
                    // Reverse surface cache lookup normal to match non hit lighting path (usually surface cache is only valid on front faces)
                    if (Payload.IsTwoSided() && !Payload.IsFrontFace())
                    {
                        RayHitGeometryWorldNormal = -RayHitGeometryWorldNormal;
                    }

                    FSurfaceCacheSample SurfaceCacheSample = CalculateSurfaceCacheLighting(Ray, Context, RayHitTranslatedWorldPosition, RayHitGeometryWorldNormal, Payload.HitT, Payload.GetSceneInstanceIndex());

                    if (LightingMode == EVALUATE_MATERIAL)
                    {
                        Radiance += Diffuse_Lambert(LumenMaterial.ApproxFullyRoughDiffuseColor) * (SurfaceCacheSample.DirectLighting + SurfaceCacheSample.IndirectLighting);
                    }
                    else
                    {
                        // Blend between rough reflections approximation and a single reflection ray
                        if (ReflectionBounceIndex + 1 < Context.MaxReflectionBounces)
                        {
                            NextReflectionRayAlpha = LumenCombineReflectionsAlpha(PathRoughness, /*bHasBackfaceDiffuse*/ false);
                        }

                        // Add diffuse part
                        Radiance += Diffuse_Lambert(LumenMaterial.DiffuseColor) * SurfaceCacheSample.IndirectLighting;

                        // Add rough specular part
                        Radiance += Diffuse_Lambert(LumenMaterial.ApproxFullyRoughSpecularColor) * (1.0f - NextReflectionRayAlpha) * SurfaceCacheSample.IndirectLighting;
                    }
                }
            }

            Result.Radiance += PathThroughput * Radiance;

            // Capture hit properties at first hit
            if (ReflectionBounceIndex == 0)
            {
                Result.bIsHit = true;
                Result.TraceHitDistance = Payload.HitT;
                Result.GeometryWorldNormal = Payload.GetGeometryNormal();
            }

            if (NextReflectionRayAlpha > 0.0f)
            {
                // Fresnel
                const float NoV = saturate(dot(-Ray.Direction, Payload.GetWorldNormal()));
                PathThroughput *= EnvBRDF(LumenMaterial.TopLayerSpecularColor, LumenMaterial.TopLayerRoughness, NoV);

                float3 ReflectedRayOrigin = Ray.Origin + Ray.Direction * Payload.HitT + 0.05f * Payload.GetGeometryNormal();

                float GGXSamplingBias = 0.1f;
                float2 E = RandomSequence_GenerateSample2D(RandSequence);
                E.y *= 1.0f - GGXSamplingBias;

                float3x3 TangentBasis = GetTangentBasis(Payload.GetWorldNormal());
                float3 TangentV = mul(TangentBasis, -Ray.Direction);

                float4 GGXSample = ImportanceSampleVisibleGGX(E, Pow2(LumenMaterial.TopLayerRoughness), TangentV);
                float3 WorldH = mul(GGXSample.xyz, TangentBasis);
                float3 ReflectedRayDirection = reflect(Ray.Direction, WorldH);

                // Setup next ray
                Ray.Origin = ReflectedRayOrigin;
                Ray.Direction = ReflectedRayDirection;
                Ray.TMin = 0.0f;
                Ray.TMax = Ray.TMax;
            }
            else
            {
                break;
            }
        }
        else
        {
            if (ReflectionBounceIndex > 0)
            {
                Result.Radiance += PathThroughput * EvaluateSkyRadiance(Ray.Direction);
            }
            break;
        }
        if (bReachedFarField)
        {
            //always terminate at far field
            break;
        }
    }

    return Result;
}
yasukichi commented 8 months ago

RenderSampledDirectlighintgIntoLumenCards

yasukichi commented 8 months ago

LumenCardsSampledDirectLightingCS

};

- シェーダー(RTXDLumenSurfaceCache.usf)
```C++
uint bAtlasContainsLight;

// Lighting Estimates Per Surface Card Tile Texel
//    (NumLocalIterations * NumLocalCandidates) + (NumDirectionalIterations * NumDirectionalCandidates) + (NumSkyIterations * NumSkyCandidates)
// Rays Traces Per Surface Card Tile Texel
//    (NumLocalIterations + NumDirectionalIterations + NumSkyIterations)

RWTexture2D<float3> RWDirectLightingAtlas;

[numthreads(INLINE_RAY_TRACING_THREAD_GROUP_SIZE_X, INLINE_RAY_TRACING_THREAD_GROUP_SIZE_Y, 1)]
void LumenCardSampledDirectLightingCS(
            uint3   DispatchThreadIndex : SV_DispatchThreadID,
            uint3   DispatchGroupId : SV_GroupID,
            uint    DispatchGroupIndex : SV_GroupIndex)
{
    uint CardTileIndex = DispatchThreadIndex.x / CARD_TILE_SIZE;
    uint2 TexelCoordsInCardTile = uint2(DispatchThreadIndex.x % CARD_TILE_SIZE, DispatchThreadIndex.y);

    // Load the Lumen surface data
    FLumenSurface Surface = GetLumenSurface(CardTileIndex, TexelCoordsInCardTile);

    // Early out: surface cache is not valid or it represents an emissive surface
    if (!Surface.CardData.bValid || any(Surface.CardData.Emissive))
    {
        RWDirectLightingAtlas[Surface.AtlasCoords] = 0;
        return;
    }

    const uint2 PixelCoord = uint2(0, 0);

    // Setup the RNG contexts, using the surface cache atlas coords as the seed
    uint LinearIndex = CalcLinearIndex(Surface.AtlasCoords);
    FRandomContext RandContext = FRandomContext::Create(LinearIndex, View.StateFrameIndex);

    uint CoherentLinearIndex = CalcLinearIndex(Surface.AtlasCoords / 8);
    FRandomContext CoherentRandContext = FRandomContext::Create(CoherentLinearIndex, View.StateFrameIndex + 63);

    // Evaluate RTXDI hit lighting
/* ココ */
    HitLightingSamplingResult HitLightingResult = EvaluateHitLighting( PixelCoord, Surface, RandContext, CoherentRandContext );

    if (bAtlasContainsLight > 0)
    {
        // Load existing irradiance from the atlas, if it exists from non-sampled lights or for temporal feedback
        HitLightingResult.Irradiance += RWDirectLightingAtlas[Surface.AtlasCoords].rgb;
    }

    // Write the irradiance
    RWDirectLightingAtlas[Surface.AtlasCoords] = HitLightingResult.Irradiance;
}

// Load data for a Lumen surface cache card given a card tile index and texel coords in the tile
FLumenSurface GetLumenSurface(uint CardTileIndex, uint2 TexelCoordsInCardTile)
{
    FLumenSurface Surface = (FLumenSurface)0;
    Surface.TexelCoordsInCardTile = TexelCoordsInCardTile;

    // Load card tile and page data
    FCardTileData CardTile = UnpackCardTileData(CardTiles[CardTileIndex]);
    FLumenCardPageData CardPage = GetLumenCardPageData(CardTile.CardPageIndex);

    // Compute the surface's atlas coordinates
    uint2 CoordsInCardPage = CARD_TILE_SIZE * CardTile.TileCoord + TexelCoordsInCardTile;
    Surface.AtlasCoords = CardPage.PhysicalAtlasCoord + CoordsInCardPage;

    // Get the card data
    float2 AtlasUV = CardPage.PhysicalAtlasUVRect.xy + CardPage.PhysicalAtlasUVTexelScale * (CoordsInCardPage + 0.5f);
    float2 CardUV = CardPage.CardUVRect.xy + CardPage.CardUVTexelScale * (CoordsInCardPage + 0.5f);
    FLumenCardData Card = GetLumenCardData(CardPage.CardIndex);

    // Load the surface cache data
    Surface.CardData = GetSurfaceCacheData(Card, CardUV, AtlasUV);
    Surface.LightingChannelMask = Card.LightingChannelMask;

    // Compute the LWC position if the cache has valid data
    if (Surface.CardData.bValid)
    {
        Surface.TranslatedWorldPosition = Surface.CardData.WorldPosition + LWCHackToFloat(PreViewTranslation[0]).xyz;
    }

    return Surface;
}
yasukichi commented 8 months ago

EvaluateHitLighting

template<typename AccumulationType, typename SurfaceType> AccumulationType EvaluateHitLightingTemplate(uint2 TraceCoord, SurfaceType Surface, FRandomContext RandContext, FRandomContext CoherentRandContext, RTXDI_OPTIONAL_DEBUG_PARAM(DebugContext) ) { AccumulationType LightingResult = (AccumulationType)0;

// This is the cursed control-flow structure of a for-case statement
// It allows us to evaluate multiple passes with slightly different
// parameters while ensuring that DXC doesn't make multiple copies of the
// function due to inlining. It saves on code bloat and instruction cache
// pressure.
LOOP
for (int LightReservoirType = 0; LightReservoirType < 3; LightReservoirType++)
{
    InitialCandidateOptions Options = (InitialCandidateOptions)0;
    int NumIterations = 0;

    switch (LightReservoirType)
    {
    case 0:
        // local lights
        Options.NumLocalSamples = NumLocalCandidates;
        Options.NumDirectionalSamples = 0;
        Options.NumSkyLightSamples = 0;
        Options.NumBrdfSamples = GetBrdfCandidates();
        NumIterations = NumLocalIterations;

        break;

    case 1:
        // directional lights
        Options.NumLocalSamples = 0;
        Options.NumDirectionalSamples = NumDirectionalCandidates;
        Options.NumSkyLightSamples = 0;
        Options.NumBrdfSamples = 0;
        NumIterations = NumDirectionalIterations;

        break;

    case 2:
        // skylight
        Options.NumLocalSamples = 0;
        Options.NumDirectionalSamples = 0;
        Options.NumSkyLightSamples = NumSkyCandidates;
        Options.NumBrdfSamples = GetBrdfCandidates();
        NumIterations = NumSkyIterations;

        break;
    };

    Options.InitialSampleVisibility = 0;

/ ココ / EvalLightingPass<AccumulationType, SurfaceType>( TraceCoord, Surface, RandContext, CoherentRandContext, Options, NumIterations, LightingResult, RTXDI_OPTIONAL_PASS_DEBUG_PARAM(DebugContext) ); }

return LightingResult;

}

template<typename AccumulationType, typename SurfaceType> void EvalLightingPass( uint2 PixelCoord, SurfaceType Surface, inout FRandomContext RandContext, inout FRandomContext CoherentRandContext, InitialCandidateOptions Options, uint Iterations, inout AccumulationType Result, RTXDI_OPTIONAL_DEBUG_PARAM(DebugContext) ) { // Local light shading

for (uint SampleIndex = 0; SampleIndex < Iterations; ++SampleIndex)
{

if !SAMPLE_DIRECTIONALLIGHT_ONLY

/ ココ / RTXDI_SDK_Reservoir Reservoir = ProduceInitialSample( PixelCoord, Surface, Options, RandContext, CoherentRandContext);

    // Skip this iteration, the reservoir doesn't contain a valid sample
    if (!Reservoir.sampleRef.IsValid())
    {
        continue;
    }

    const RTXDI_SDK_LightSampleRef SampleRef = Reservoir.sampleRef;
    const float UnbiasedContributionWeight = Reservoir.weightSum;

else

    uint DirectionalLightIndex = uint(RandContext.GenerateSample1D() * SampledLightData.DirectionalLightCount);
    DirectionalLightIndex = min(DirectionalLightIndex, SampledLightData.DirectionalLightCount - 1) + SampledLightData.DirectionalLightStart;

    // Use blue noise to sample directional light
    const uint SmallPrime = 17;
    const float2 SampleLocation = BlueNoiseVec2(PixelCoord, View.StateFrameIndex + SampleIndex * SmallPrime);

    const RTXDI_SDK_LightSampleRef SampleRef = RTXDI_SDK_LightSampleRef::Create(DirectionalLightIndex, SampleLocation);
    const float UnbiasedContributionWeight = (float)SampledLightData.DirectionalLightCount;

endif

    AccumulationType LocalResult = EvalLightingInternal<AccumulationType,SurfaceType>(
        SampleRef,
        UnbiasedContributionWeight,
        PixelCoord,
        Surface,
        false,
        RandContext,
        RTXDI_OPTIONAL_PASS_DEBUG_PARAM(DebugContext)
    );
    Result.Accumulate(LocalResult, 1.f / (float)max(1, Iterations));
}

}

template<typename AccumulationType, typename SurfaceType> AccumulationType EvalLightingInternal( in const RTXDI_SDK_LightSampleRef SampleRef, in const float UnbiasedContributionWeight, in const uint2 PixelCoord, in const SurfaceType Surface, in const bool bInexactShadowedGeometry, inout FRandomContext RandContext, RTXDI_OPTIONAL_DEBUG_PARAM(DebugContext) ) { FSampledLightData Light; FLightSampleLocation LightSample; GetLightSampleData(Surface, SampleRef, LightSample, Light); // Load the sampled light data and generate a sample location for the given GBuffer/Lumen Surface and light combination /* template void GetLightSampleData(SurfaceType Surface, RTXDI_SDK_LightSampleRef SampleRef, out FLightSampleLocation LightSample, out FSampledLightData Light) { const int LightIndex = SampleRef.GetLightIndex(); const float2 LightUV = SampleRef.GetUV();

Light = GetSampledDeferredLightDataWithSkylight(LightIndex, LightUV, SkylightTexture, SkylightTextureSampler);

LightSample = ComputeLightSampleLocation(Light, Surface.TranslatedWorldPosition, Surface.GetWorldNormal(), Light.LightType, LightUV);

*/

return EvalLightingInternal<AccumulationType,SurfaceType>(
    Light, LightSample, SampleRef,
    UnbiasedContributionWeight, PixelCoord,
    Surface, bInexactShadowedGeometry, RandContext,
    RTXDI_OPTIONAL_PASS_DEBUG_PARAM(DebugContext)
);

}

yasukichi commented 8 months ago

RenderSampledDirectLighintg

}

yasukichi commented 8 months ago

DirectLightRGS

};

- シェーダー(SampledDirectLighting.usf)
```C++
/***************************************************************************************************
 *
 *  SampledDirectLighting
 *
 *  Single-pass RGS to sample and shade one pass. Uses no history and no spatial resampling.
 * Is a semi-brute-force implementation of sampled lighting.
 *
 ***************************************************************************************************/
[shader("raygeneration")]
void SampledDirectLightingRGS()
{
    uint2 PixelCoord = DispatchRaysIndex().xy + View.ViewRectMin.xy;

    uint LinearIndex = CalcLinearIndex(PixelCoord);
    FRandomContext RandContext = FRandomContext::Create(LinearIndex, View.StateFrameIndex);

    // seed an RNG to be coherent across a small tile
    uint CoherentLinearIndex = CalcLinearIndex(PixelCoord / 8);
    FRandomContext CoherentRandContext = FRandomContext::Create(CoherentLinearIndex, View.StateFrameIndex + 63);

    // Get Surface data
    FGBufferSurface Surface = GetGBufferSurfaceFromSceneTexturesLoad(PixelCoord);
    Surface.LightingChannelMask = GetSceneLightingChannel(PixelCoord);

    const float LocalCoverage = 1.0f;

    bool DebugEnabled = true;

    float RayDistance = 0.0;
    float HitCount = 0.0;
    float3 SpecularExitantRadiance = 0.0;
    float3 DiffuseExitantRadiance = 0.0;

    // Mask out depth values that are infinitely far away
    const bool bTraceRay = ((Surface.DeviceZ > 0.0) && (Surface.GBuffer.ShadingModelID != SHADINGMODELID_UNLIT));
    if (bTraceRay)
    {
        // Evaluate RTXDI hit lighting
        const HitLightingSamplingResult HitLightingResult = EvaluateHitLighting( PixelCoord, 
            Surface, RandContext, CoherentRandContext, RTXDI_OPTIONAL_PASS_DEBUG_PARAM(DebugContext) );

        RayDistance = HitLightingResult.RayDistance;
        HitCount = HitLightingResult.HitCount;
        SpecularExitantRadiance = HitLightingResult.SpecularExitantRadiance;
        DiffuseExitantRadiance = HitLightingResult.DiffuseExitantRadiance;
    }

    if (DemodulateMaterials && Surface.GBuffer.ShadingModelID != SHADINGMODELID_HAIR)
    {
        DiffuseExitantRadiance /= Surface.GBuffer.DiffuseColor * 0.999 + 0.001;
        SpecularExitantRadiance /= Surface.GBuffer.SpecularColor * 0.999 + 0.001;
    }

    float3 OutputDiffuseRadiance = DiffuseExitantRadiance * View.PreExposure;
    float3 OutputSpecularRadiance = SpecularExitantRadiance * View.PreExposure;

    OutputDiffuseRadiance = ClampToHalfFloatRange(OutputDiffuseRadiance * LocalCoverage);
    OutputSpecularRadiance = ClampToHalfFloatRange(OutputSpecularRadiance * LocalCoverage);

#if SAMPLE_DIRECTIONALLIGHT_ONLY
    const float RayCount = RWDiffuseUAV[PixelCoord].a;

    RWRayDistanceUAV[PixelCoord] = (RWRayDistanceUAV[PixelCoord] * RayCount + RayDistance * NumDirectionalCandidates) / (NumDirectionalCandidates + RayCount);

    RWDiffuseUAV[PixelCoord].rgb += OutputDiffuseRadiance;
    RWDiffuseUAV[PixelCoord].a = 1.0;
    RWSpecularUAV[PixelCoord].rgb += OutputSpecularRadiance;
    RWSpecularUAV[PixelCoord].a = 1.0;
#elif !HAIR_SHADING
    RWDiffuseUAV[PixelCoord].rgb = OutputDiffuseRadiance;
    RWDiffuseUAV[PixelCoord].a = 1.0;
    RWSpecularUAV[PixelCoord].rgb = OutputSpecularRadiance;
    RWSpecularUAV[PixelCoord].a = 1.0;
    RWRayDistanceUAV[PixelCoord] = RayDistance / max(1.0, HitCount);
#else
    // performing the blend op for lighting here as we only touch each sample once
    float4 Source = RWSpecularUAV[ThreadCoord];
    RWSpecularUAV[ThreadCoord].rgb = Source.rgb + OutputSpecularRadiance + OutputDiffuseRadiance; // hair is technically only specular
    RWSpecularUAV[ThreadCoord].a = max(Source.a, LocalCoverage);
#endif
}
yasukichi commented 8 months ago

GenerateInitialSamplesRGS

};

- シェーダー(GenerateInitialSamples.usf)
```C++
/***************************************************************************************************
 *
 *  GenerateInitialSamples
 *
 *  Draw random samples from the light list and evaluate an approximate luminance to select
 * a weighted random sample. After selection, test the sample for visibility, and reject if not
 * visible.
 *
 ***************************************************************************************************/
[shader("raygeneration")]
void GenerateInitialSamplesRGS()
{
    const uint2 PixelCoord = DispatchRaysIndex().xy + View.ViewRectMin.xy;

    uint LinearIndex = CalcLinearIndex(PixelCoord);
    FRandomContext RandContext = FRandomContext::Create(LinearIndex, View.StateFrameIndex + HistoryReservoir * 32 + INITIAL_SAMPLE_PASS_NUM * 63);

    // seed an RNG to be coherent across a small tile
    uint CoherentLinearIndex = CalcLinearIndex(DispatchRaysIndex().xy / 8);
    FRandomContext CoherentRandContext = FRandomContext::Create(CoherentLinearIndex, View.StateFrameIndex + HistoryReservoir * 32 * INITIAL_SAMPLE_PASS_NUM * 63);

    // Get Surface data
    FGBufferSurface Surface = GetGBufferSurfaceFromSceneTexturesLoad(PixelCoord);
    Surface.LightingChannelMask = GetSceneLightingChannel(PixelCoord);

    RTXDI_SDK_Reservoir state = RTXDI_SDK_Reservoir::Empty();

    const bool bIsDepthValid = Surface.DeviceZ > 0.0;
    const bool bIsValidPixel = Surface.GBuffer.ShadingModelID != SHADINGMODELID_UNLIT && bIsDepthValid;

    if (bIsValidPixel)
    {
        InitialCandidateOptions Options;

        Options.NumLocalSamples = NumInitialLocalCandidates;
        Options.NumDirectionalSamples = NumInitialDirectionalCandidates;
        Options.NumSkyLightSamples = RISSkylightBufferTiles > 0 ? NumInitialSkyCandidates : 0;
        Options.NumBrdfSamples = NumInitialBrdfCandidates;
        Options.InitialSampleVisibility = InitialSampleVisibility;

        state = ProduceInitialSample(
            PixelCoord,
            Surface,
            Options,
            RandContext,
            CoherentRandContext);
    }

    WriteReservoirData(PixelCoord,OutputSlice, state.Store());
}
yasukichi commented 8 months ago

ApplyTemporalResamplingRGS

};

- シェーダー(ApplyTemporalResampling.usf)
```C++
RWTexture2D<uint>   RWPreviousSampleUAV;
Texture2D<float>    DepthHistory;
Texture2D<float4>   NormalHistory;
Texture2D<uint>     PermutationSamplingMask;

#if FUSE_TEMPORAL_AND_INITIAL_SAMPLING
int     NumInitialLocalCandidates;
int     NumInitialDirectionalCandidates;
int     NumInitialBrdfCandidates;
int     NumInitialSkyCandidates;
int     InitialSampleVisibility;
#endif

int     InputSlice;
int     OutputSlice;
int     HistoryReservoir;
int     MaxTemporalHistory;
int     MaxSSSTemporalHistory;
int     ApplyApproximateVisibilityTest;
int     SpatiallyHashTemporalReprojection;
float   TemporalDepthRejectionThreshold;
float   TemporalNormalRejectionThreshold;

void ReconstructPrevTranslatedWorldPositionAndCameraDirectionFromDeviceZ(uint2 PixelCoord, float DeviceZ, out float3 OutTranslatedWorldPosition, out float3 OutCameraDirection)
{
    // reprojection to prior frame based on TSRDilateVelocity

    float2 PixelPos = PixelCoord - View.ViewRectMin.xy;

    float2 ViewportUV = (PixelPos.xy + 0.5f) * View.ViewSizeAndInvSize.zw;

    float2 PrevScreenPos = ViewportUVToScreenPos(ViewportUV);
    float PrevDepth = ConvertFromDeviceZ(DeviceZ);

    float4 PrevClipPosition = float4(PrevScreenPos * (View.ViewToClip[3][3] < 1.0f ? PrevDepth : 1.0f), PrevDepth, 1);

    float3 PreViewTranslationOffset = LWCToFloat(LWCSubtract(PrimaryView.PreViewTranslation, PrimaryView.PrevPreViewTranslation));
    float3 PrevTranslatedWorldPosition = mul(PrevClipPosition, View.PrevScreenToTranslatedWorld).xyz + PreViewTranslationOffset;

    OutTranslatedWorldPosition = PrevTranslatedWorldPosition.xyz;
    OutCameraDirection = normalize(PrevTranslatedWorldPosition.xyz - View.PrevTranslatedWorldCameraOrigin);
}

FGBufferSurface CreateRTXDIPrevSurface(uint2 Coord, FGBufferData CurrentGBuffer) {

    float3 PrevWorldNormal = normalize(DecodeNormal(NormalHistory.Load(int3(Coord, 0)).xyz));

    FGBufferSurface Surface = (FGBufferSurface)0;
    Surface.GBuffer = CurrentGBuffer;
    Surface.GBuffer.WorldNormal = PrevWorldNormal;
    Surface.DeviceZ = DepthHistory.Load(int3(Coord, 0)).r;
    Surface.GBuffer.Depth = ConvertFromDeviceZ(Surface.DeviceZ);

    ReconstructPrevTranslatedWorldPositionAndCameraDirectionFromDeviceZ(Coord, Surface.DeviceZ, Surface.TranslatedWorldPosition, Surface.ViewDirection);
    return Surface;
}

// Permutation for temporal sampling to increase noise
int2 ApplyPermutationSampling(int2 InPosition)
{
    // Hash the frame index to produce a high frequency perturbation on the temporal sample selected
    // This introduces more noise, but reduces larger scale artifacts
    // Presently using the weaker hash function, no quality difference was seen with StrongIntegerHash
    uint UniformRandom = WeakIntegerHash(View.StateFrameIndex);
    int2 HashOffset = { UniformRandom & 3, (UniformRandom >> 2) & 3 };

    InPosition -= HashOffset;
    InPosition.xy ^= 3;
    InPosition += HashOffset;

    return InPosition;
}

/***************************************************************************************************
 *
 *  ApplyTemporalResampling
 *
 *  Shader to handle temporal resampling of light reservoirs.
 *
 ***************************************************************************************************/
[shader("raygeneration")]
void ApplyTemporalResamplingRGS()
{
    uint2 PixelCoord = DispatchRaysIndex().xy + View.ViewRectMin.xy;

    if (any(DispatchRaysIndex().xy > View.ViewSizeAndInvSize.xy))
    {
        return;
    }

    uint LinearIndex = CalcLinearIndex(PixelCoord);
    FRandomContext RandContext = FRandomContext::Create(LinearIndex, View.StateFrameIndex + HistoryReservoir * 32 + 63 * TEMPORAL_SAMPLE_PASS_NUM);

    // seed an RNG to be coherent across a small tile
    uint CoherentLinearIndex = CalcLinearIndex(DispatchRaysIndex().xy / 8);
    FRandomContext CoherentRandContext = FRandomContext::Create(CoherentLinearIndex, View.StateFrameIndex + HistoryReservoir * 32 + 63 * INITIAL_SAMPLE_PASS_NUM);

    // Get Surface Data
    FGBufferSurface Surface = GetGBufferSurfaceFromSceneTexturesLoad(PixelCoord);
    Surface.LightingChannelMask = GetSceneLightingChannel(PixelCoord);

    int LocalMaxTemporalHistory = UseSubsurfaceProfile(Surface.GBuffer.ShadingModelID) ? MaxSSSTemporalHistory : MaxTemporalHistory;

    RTXDI_SDK_Reservoir state = RTXDI_SDK_Reservoir::Empty();

    int2 SelectedPrevSample = -1;

    const bool bIsDepthValid = Surface.DeviceZ > 0.0;
    const bool bIsValidPixel = Surface.GBuffer.ShadingModelID != SHADINGMODELID_UNLIT && bIsDepthValid;

    bool UsePermutationSampling = false;
    if (SpatiallyHashTemporalReprojection != 0)
    {
        UsePermutationSampling = PermutationSamplingMask[PixelCoord].r;
    }

    if (bIsValidPixel)
    {
#if !FUSE_TEMPORAL_AND_INITIAL_SAMPLING
        RTXDI_SDK_Reservoir curSample = RTXDI_SDK_Reservoir::Load(ReadReservoirData(PixelCoord, InputSlice));
#else
        RTXDI_SDK_Reservoir curSample = RTXDI_SDK_Reservoir::Empty();
        InitialCandidateOptions Options;

        Options.NumLocalSamples = NumInitialLocalCandidates;
        Options.NumDirectionalSamples = NumInitialDirectionalCandidates;
        Options.NumSkyLightSamples = RISSkylightBufferTiles > 0 ? NumInitialSkyCandidates : 0;
        Options.NumBrdfSamples = NumInitialBrdfCandidates;
        Options.InitialSampleVisibility = InitialSampleVisibility;

        curSample = ProduceInitialSample(
            PixelCoord,
            Surface,
            Options,
            RandContext,
            CoherentRandContext);
#endif

        int historyLimit = min(RTXDI_SDK_Reservoir::MaxM,LocalMaxTemporalHistory * curSample.M);

        int selectedLightPrevID = -1;

        if (curSample.sampleRef.IsValid())
        {
            uint currentLightID = curSample.sampleRef.GetLightIndex();

            // map back to the previous frame's light list enab le better bias correction
            selectedLightPrevID = SampledLightData.LightIndexBackwardRemapTable[currentLightID];
        }

#if RESERVOIR_CORRECTION_IN_RESAMPLING
        ReverseCorrectReservoir(curSample, Surface);
#endif

        state.CombineReservoirs(curSample, /* random = */ 0.5, curSample.targetPdf);

        // Backproject this pixel to last frame

        // start by just using our sample position
        int2 prevPos = PixelCoord;
        float ExpectedPrevLinearDepth = Surface.GBuffer.Depth;

        float2 ViewUV = (DispatchRaysIndex().xy + 0.5) * View.ViewSizeAndInvSize.zw;
        float4 NDC = float4(ViewUV * float2(2, -2) + float2(-1, 1), Surface.DeviceZ, 1);

        if (Surface.GBuffer.Velocity.x > 0.0)
        {
            float2 Velocity = DecodeVelocityFromTexture(Surface.GBuffer.Velocity).xy;
            float2 PrevNDC = NDC.xy - Velocity;
            float2 PrevUV = (PrevNDC.xy * float2(1, -1) + 1) * 0.5;

            // randomize selection within the footprint of the back-projected sample and clamp to the viewport extents
            if(!UsePermutationSampling)
                PrevUV = PrevUV + (RandContext.GenerateSample2D() - 0.5) * View.ViewSizeAndInvSize.zw;

            prevPos = saturate(PrevUV) * View.ViewSizeAndInvSize.xy + View.ViewRectMin.xy;

            ExpectedPrevLinearDepth = ConvertFromDeviceZ(Surface.DeviceZ - DecodeVelocityFromTexture(Surface.GBuffer.Velocity).z);
        }
        else
        {
            float4 PrevNDC = mul(NDC, View.ClipToPrevClip);
            PrevNDC.xyz /= PrevNDC.w;
            float2 PrevUV = (PrevNDC.xy * float2(1, -1) + 1) * 0.5;

            // randomize selection within the footprint of the back-projected sample and clamp to the viewport extents
            PrevUV = saturate(PrevUV + (RandContext.GenerateSample2D() - 0.5) * View.ViewSizeAndInvSize.zw);

            prevPos = PrevUV * View.ViewSizeAndInvSize.xy + View.ViewRectMin.xy;

            ExpectedPrevLinearDepth = ConvertFromDeviceZ(PrevNDC.z);
        }

        //ToDo - full GBuffer not available for last frame, so we're going to need to approximate using current + channels we have
        // could make a better approximation than what is used here
        FGBufferSurface PrevSurface;
        bool foundNeighbor = false;
        const float radius = 4;

        // Try to find a matching surface in the neighborhood of the reprojected pixel
        for (int i = 0; i < 9; i++)
        {
            int2 offset = 0;
            int2 idx = prevPos;
            if (i > 0)
            {
                offset = int2((RandContext.GenerateSample2D() - 0.5f) * radius);
                idx = prevPos + offset;
            }
            else if(UsePermutationSampling)
            {
                idx = ApplyPermutationSampling(idx);
            }

            PrevSurface = CreateRTXDIPrevSurface(idx, Surface.GBuffer);

            // TODO: refine sample rejection tests
            if (dot(Surface.GBuffer.WorldNormal, PrevSurface.GBuffer.WorldNormal) < TemporalNormalRejectionThreshold)
            {
                continue;
            }

            if (abs(ExpectedPrevLinearDepth - PrevSurface.GBuffer.Depth) / ExpectedPrevLinearDepth > TemporalDepthRejectionThreshold)
            {
                continue;
            }

            prevPos = idx;
            foundNeighbor = true;
            break;
        }

        bool selectedPreviousSample = false;
        uint previousM = 0;
        float previousWeight = 0;

        if (foundNeighbor)
        {
            // Resample the previous frame sample into the current reservoir, but reduce the light's weight
            // according to the bilinear weight of the current pixel
            RTXDI_SDK_Reservoir prevSample = RTXDI_SDK_Reservoir::Load(ReadReservoirHistoryData(prevPos, HistoryReservoir));
            prevSample.M = min(prevSample.M, historyLimit);

            int originalPrevLightID = prevSample.sampleRef.GetLightIndex();

            // Map the light ID from the previous frame into the current frame, if it still exists
            if (prevSample.sampleRef.IsValid())
            {
                int mappedLightID = -1;

                SelectedPrevSample = prevPos;

                // remap light data
                mappedLightID = SampledLightData.LightIndexRemapTable[prevSample.sampleRef.GetLightIndex()];

                // invalid index
                if (mappedLightID == -1)
                {
                    // Kill the reservoir
                    prevSample.weightSum = 0;
                    prevSample.sampleRef = RTXDI_SDK_LightSampleRef::Invalid();
                }
                else
                {
                    // Sample is valid - modify the light ID stored
                    prevSample.sampleRef.SetLightIndex(mappedLightID);
                }
            }

            if (prevSample.sampleRef.IsValid())
            {
                bool Visible = true;
#if (VISIBILITY_BEFORE_COMBINE)
                if (ApplyApproximateVisibilityTest)
                {
                    Visible = LoadAndCheckApproximateVisibility(PixelCoord, Surface, prevSample.sampleRef);
                }
#endif
                // skip combining if we know that the light is not visible from this sample to avoid merging the history
                if (Visible)
                {
                    FSampledLightData Light;
                    FLightSampleLocation LightSample;
                    GetLightSampleData(Surface, prevSample.sampleRef, LightSample, Light);

                    const uint PrevLightIdx = prevSample.sampleRef.GetLightIndex();
                    const bool ValidPrevSample = (LightSample.Pdf > 0.f) && (Surface.LightingChannelMask & GetSampledLightChannelMask(PrevLightIdx));

                    previousM = prevSample.M;
                    previousWeight = ValidPrevSample ? GetApproximateLightSampleWeightWithLight(Surface, LightSample, Light) : 0.f;

#if RESERVOIR_CORRECTION_IN_RESAMPLING
                    if (LightSample.Pdf > 0.f)
                        previousWeight /= LightSample.Pdf;
#endif

                    if (state.CombineReservoirs(prevSample, RandContext.GenerateSample1D(), previousWeight))
                    {
                        selectedPreviousSample = true;
                        selectedLightPrevID = int(originalPrevLightID);
                    }
                }
            }
        }

        // Use prior light history to improve normalization

        // Compute the unbiased normalization term (instead of using 1/M)
        float pi = state.targetPdf;   // Since it was selected, this is known to be equiv to lightWeight(state.sampleRef, context)
        float piSum = state.targetPdf * curSample.M;

        if (state.sampleRef.IsValid() && selectedLightPrevID >= 0 && previousM > 0)
        {
            // remap into last frame's light list
            RTXDI_SDK_LightSampleRef SampleRef = state.sampleRef;
            SampleRef.SetLightIndex(selectedLightPrevID + SampledLightData.LightHistoryOffset);

#if RESERVOIR_CORRECTION_IN_RESAMPLING
            FSampledLightData Light;
            FLightSampleLocation LightSample;
            GetLightSampleData(PrevSurface, SampleRef, LightSample, Light);

            float pt = GetApproximateLightSampleWeightWithLight(PrevSurface, LightSample, Light);
            pt = (LightSample.Pdf > 0.f) ? (pt / LightSample.Pdf) : 0.f;
#else
            float pt = GetApproximateLightSampleWeight(PrevSurface, SampleRef);
#endif

#if (!VISIBILITY_BEFORE_COMBINE)
            //ToDo - does this need to be a permutation?
            if (ApplyApproximateVisibilityTest && pt > 0)
            {
                bool Visible = LoadAndCheckApproximateVisibility(PixelCoord, Surface, state.sampleRef);

                if (!Visible)
                {
                    pt = 0;
                }
            }
#endif

            pi = selectedPreviousSample ? pt : pi;
            piSum += pt * previousM;
        }
#if RESERVOIR_CORRECTION_IN_RESAMPLING
        CorrectReservoir(state, Surface);
#endif

        state.FinalizeResampling(pi, piSum);
    }

    WriteReservoirData(PixelCoord, OutputSlice, state.Store());

    uint EncodedSample = (SelectedPrevSample.x & 0xffff) | ((SelectedPrevSample.y & 0xffff) << 16);
    RWPreviousSampleUAV[PixelCoord] = EncodedSample;
}
yasukichi commented 8 months ago

ApplyBoilingFilterCS

};

- シェーダー(BoilingFilter.usf)
```C++
int InputSlice;
int OutputSlice;
float BoilingFilterStrength;

/***************************************************************************************************
 *
 *  Boiling Filter
 *
 *  Identify runaway samples and kill them to avoid them dominating the result.
 *
 ***************************************************************************************************/

#define RTXDI_BOILING_FILTER_MIN_LANE_COUNT 32

groupshared float s_weights[(RTXDI_BOILING_FILTER_GROUP_SIZE * RTXDI_BOILING_FILTER_GROUP_SIZE) / RTXDI_BOILING_FILTER_MIN_LANE_COUNT];
groupshared uint s_count[(RTXDI_BOILING_FILTER_GROUP_SIZE * RTXDI_BOILING_FILTER_GROUP_SIZE) / RTXDI_BOILING_FILTER_MIN_LANE_COUNT];

// Boiling filter that is applied at the end of the temporal resampling pass.
// The filter analyzes the weights of all reservoirs in a thread group, and discards
// the reservoirs whose weights are very high, i.e. above a certain threshold.
// If the temporal resampling pass is implemented as a ray-gen shader, and therefore
// doesn't have access to thread groups or shared memory, the boiling filter can be applied
// as a separate compute pass.
// Returns true if the reservoir in this thread passes the filter and should be kept.
bool RTXDI_SDK_BoilingFilter(uint2 LocalIndex, float FilterStrength, float weight)
{
    // Apply the boiling filter.
    // Boiling happens when some highly unlikely light is discovered and it is relevant
    // for a large surface area around the pixel that discovered it. Then this light sample
    // starts to propagate to the neighborhood through spatiotemporal reuse, which looks like
    // a flash. We can detect such lights because their weight is significantly higher than 
    // the weight of their neighbors. So, compute the average group weight and apply a threshold.
    if (FilterStrength > 0.f)
    {
        float boilingFilterMultiplier = 10.f / FilterStrength - 9.f;

        // Start with average nonzero weight within the wavefront
        float waveWeight = WaveActiveSum(weight);
        uint waveCount = WaveActiveCountBits(weight > 0);

        // Store the results of each wavefront into shared memory
        uint linearThreadIndex = LocalIndex.x + LocalIndex.y * RTXDI_BOILING_FILTER_GROUP_SIZE;
        uint waveIndex = linearThreadIndex / WaveGetLaneCount();

        if (WaveIsFirstLane())
        {
            s_weights[waveIndex] = waveWeight;
            s_count[waveIndex] = waveCount;
        }

        GroupMemoryBarrierWithGroupSync();

        // Reduce the per-wavefront averages into a global average using one wavefront
        if (linearThreadIndex < (RTXDI_BOILING_FILTER_GROUP_SIZE * RTXDI_BOILING_FILTER_GROUP_SIZE) / WaveGetLaneCount())
        {
            waveWeight = s_weights[linearThreadIndex];
            waveCount = s_count[linearThreadIndex];

            waveWeight = WaveActiveSum(waveWeight);
            waveCount = WaveActiveSum(waveCount);

            if (linearThreadIndex == 0)
            {
                s_weights[0] = (waveCount > 0) ? (waveWeight / float(waveCount)) : 0.0;
            }
        }

        GroupMemoryBarrierWithGroupSync();

        // Read the per-group average and apply the threshold
        float averageNonzeroWeight = s_weights[0];
        if (weight > averageNonzeroWeight * boilingFilterMultiplier)
        {
            return false;
        }
    }

    return true;
}

[numthreads(RTXDI_BOILING_FILTER_GROUP_SIZE, RTXDI_BOILING_FILTER_GROUP_SIZE, 1)]
void BoilingFilterCS(uint3 GlobalIndex : SV_DispatchThreadID, uint3 LocalIndex : SV_GroupThreadID)
{
    uint2 PixelCoord = GlobalIndex.xy + View.ViewRectMin.xy;

    if (any(GlobalIndex.xy >= View.ViewSizeAndInvSize.xy))
    {
        PixelCoord -= int2(RTXDI_BOILING_FILTER_GROUP_SIZE, RTXDI_BOILING_FILTER_GROUP_SIZE);
    }

    RTXDI_SDK_Reservoir Sample = RTXDI_SDK_Reservoir::Load(ReadReservoirData(PixelCoord, InputSlice));

    if (!RTXDI_SDK_BoilingFilter(LocalIndex.xy, BoilingFilterStrength, Sample.weightSum))
    {
        Sample = RTXDI_SDK_Reservoir::Empty();
    }

    if (all(GlobalIndex.xy < View.ViewSizeAndInvSize.xy))
    {
        WriteReservoirData(PixelCoord, OutputSlice, Sample.Store());
    }
}
yasukichi commented 8 months ago

ApplySpatialResamplingRGS

};

- シェーダー(ApplySpatialResampling.usf)
```C++
int     InputSlice;
int     OutputSlice;
int     HistoryReservoir;
int     SpatialSamples;
int     SpatialSamplesBoost;
int     ApplyApproximateVisibilityTest;
int     DiscountNaiveSamples;
float   SpatialSamplingRadius;
float   SpatialDepthRejectionThreshold;
float   SpatialNormalRejectionThreshold;
int     MaxTemporalHistory;
int     MaxSSSTemporalHistory;
uint    NeighborOffsetMask;

/***************************************************************************************************
 *
 *  ApplySpatialResampling
 *
 *  Shader to handle spatial resampling of light reservoirs.
 *
 ***************************************************************************************************/
//
// Presently, using RGS as it allows occlusion rays. Simplifies code management to not have shader
// type not change due to runtime option
//
[shader("raygeneration")]
void ApplySpatialResamplingRGS()
{
    uint2 PixelCoord = DispatchRaysIndex().xy + View.ViewRectMin.xy;

    if (any(DispatchRaysIndex().xy > View.ViewSizeAndInvSize.xy))
    {
        return;
    }

    uint LinearIndex = CalcLinearIndex(PixelCoord);
    FRandomContext RandContext = FRandomContext::Create(LinearIndex, View.StateFrameIndex + HistoryReservoir * 32 + SPATIAL_SAMPLE_PASS_NUM * 63);

    // Get Surface data
    FGBufferSurface Surface = GetGBufferSurfaceFromSceneTexturesLoad(PixelCoord);

    RTXDI_SDK_Reservoir state = RTXDI_SDK_Reservoir::Empty();

    const bool bIsDepthValid = Surface.DeviceZ > 0.0;
    const bool bIsValidPixel = Surface.GBuffer.ShadingModelID != SHADINGMODELID_UNLIT && bIsDepthValid;

    if (bIsValidPixel)
    {
        RTXDI_SDK_Reservoir CenterSample = RTXDI_SDK_Reservoir::Load(ReadReservoirData(PixelCoord, InputSlice));

        SpatialSamplingParameters Params = 
        {
            SpatialSamples,
            SpatialSamplesBoost,
            SpatialSamplingRadius,
            SpatialNormalRejectionThreshold,
            SpatialDepthRejectionThreshold,
            InputSlice,
            DiscountNaiveSamples,
            ApplyApproximateVisibilityTest,
            true,
            UseSubsurfaceProfile(Surface.GBuffer.ShadingModelID) ? MaxSSSTemporalHistory : MaxTemporalHistory,
            NeighborOffsetMask
        };
/* ココ */
        ApplySpatialResampling(PixelCoord, Surface, CenterSample, Params, RandContext, state);
    }

    WriteReservoirData(PixelCoord, OutputSlice, state.Store());
}

if RESERVOIR_CORRECTION_IN_RESAMPLING

ReverseCorrectReservoir(centerSample, Surface);

endif

state.CombineReservoirs(centerSample, /* random = */ 0.5f, centerSample.targetPdf);

//
// Walk the specified number of neighbors, resampling using RIS
//

// Two sample modes for spatial resampling
//   predefined low-discrepency sequence
//   random data stored in local array for use with second pass
int NumSamples = Param.SpatialSamples;
if (centerSample.M < Param.MaxTemporalHistory)
{
    NumSamples = max(NumSamples, Param.SpatialSamplesBoost);
}

if !USE_LDS_FOR_SPATIAL_RESAMPLE

NumSamples = min(NumSamples, MAX_SPATIAL_SAMPLES);

int2 SamplePoints[MAX_SPATIAL_SAMPLES] = (int2[MAX_SPATIAL_SAMPLES])0;

else

uint StartIdx = RandContext.GenerateSample1D() * Param.NeighborOffsetMask;

// using uint mask to track samples, so absolute limit is 32
NumSamples = min(NumSamples, 32);

endif

for (int i = 0; i < NumSamples; ++i)
{

if !USE_LDS_FOR_SPATIAL_RESAMPLE

    float2 Offset = RandContext.GenerateSample2D() * 2.0f - 1.0f;
    int2 SampleCoord = round(float2(PixelCoord) + (Offset * Param.SpatialSamplingRadius));

    SamplePoints[i] = SampleCoord;

else

    float2 Offset = NeighborOffsets[(StartIdx + i) & Param.NeighborOffsetMask] * 2.0f - 1.0f;
    int2 SampleCoord = round(float2(PixelCoord) + (Offset * Param.SpatialSamplingRadius));

endif

    if (any(SampleCoord < 0) || any(SampleCoord >= View.BufferSizeAndInvSize.xy) || all(SampleCoord == PixelCoord))
    {
        continue;
    }

    // Get adjacent Surface data
    FGBufferSurface AdjSurface = GetGBufferSurfaceFromSceneTexturesLoad(SampleCoord);

    // TODO: refine sample rejection tests
    if (dot(Surface.GBuffer.WorldNormal, AdjSurface.GBuffer.WorldNormal) < Param.SpatialNormalRejectionThreshold)
    {
        continue;
    }

    if (abs(Surface.GBuffer.Depth - AdjSurface.GBuffer.Depth) / Surface.GBuffer.Depth > Param.SpatialDepthRejectionThreshold)
    {
        continue;
    }

    if (Surface.GBuffer.ShadingModelID != AdjSurface.GBuffer.ShadingModelID)
    {
        continue;
    }

    RTXDI_SDK_Reservoir neighborSample = RTXDI_SDK_Reservoir::Load(ReadReservoirData(SampleCoord, Param.InputSlice));

if RESERVOIR_CORRECTION_IN_RESAMPLING

    ReverseCorrectReservoir(neighborSample, AdjSurface);

endif

    if (Param.DiscountNaiveSamples && neighborSample.M <= 2)
    {
        // Reject the sample to avoid merging data for which we have little confidence
        continue;
    }

    // Load that neighbor's RIS state, do resampling
    float neighborWeight = 0.0f;
    if (neighborSample.sampleRef.IsValid())
    {
        //ToDo - does this need to be a permutation?
        bool Visible = true;
        FSampledLightData Light;
        FLightSampleLocation LightSample;

        GetLightSampleData(Surface, neighborSample.sampleRef, LightSample, Light);

if (VISIBILITY_BEFORE_COMBINE)

        if (Param.ApplyApproximateVisibilityTest)
        {
            Visible = CheckApproximateVisibility(SampleCoord, Surface, LightSample);
        }

endif

        neighborWeight = Visible ? GetApproximateLightSampleWeightWithLight(Surface, LightSample, Light) : 0;

if RESERVOIR_CORRECTION_IN_RESAMPLING

        if (LightSample.Pdf > 0.f)
            neighborWeight /= LightSample.Pdf;

endif

        {
            cachedResult |= (1u << uint(i));
            if (state.CombineReservoirs(neighborSample, RandContext.GenerateSample1D(), neighborWeight))
            {
                selected = i;
                selectedLightIdx = neighborSample.sampleRef.GetLightIndex();
                SelectedUV = neighborSample.sampleRef.GetUV();
            }
        }
    }
}

if (state.sampleRef.IsValid())
{
    if (Param.UseMISRenormalization)
    {
        // Compute the unbiased normalization term (instead of using 1/M)
        float pi = state.targetPdf;
        float piSum = state.targetPdf * centerSample.M;

        // To do this, we need to walk our neighbors again
        for (int i = 0; i < NumSamples; ++i)
        {
            // If we skipped this neighbor above, do so again.
            if ((cachedResult & (1u << uint(i))) == 0) continue;

            // Handle the rare cases when there is no light
            if (selectedLightIdx == -1) continue;

if !USE_LDS_FOR_SPATIAL_RESAMPLE

            int2 SampleCoord = SamplePoints[i];

else

            float2 Offset = NeighborOffsets[(StartIdx + i) & Param.NeighborOffsetMask] * 2.0f - 1.0f;
            int2 SampleCoord = round(float2(PixelCoord) + (Offset * Param.SpatialSamplingRadius));

endif

            // Get adjacent Surface data
            FGBufferSurface AdjSurface = GetGBufferSurfaceFromSceneTexturesLoad(SampleCoord);

if RESERVOIR_CORRECTION_IN_RESAMPLING

            FSampledLightData Light;
            FLightSampleLocation LightSample;
            GetLightSampleData(AdjSurface, state.sampleRef, LightSample, Light);

            // Get the PDF of the sample RIS selected in the first loop, above, *at this neighbor* 
            float ps = GetApproximateLightSampleWeightWithLight(AdjSurface, LightSample, Light);

            ps = (LightSample.Pdf > 0.f) ? (ps / LightSample.Pdf) : 0.f;

else

            float ps = GetApproximateLightSampleWeight(AdjSurface, state.sampleRef);

endif

if 1

            //ToDo - does this need to be a permutation?
            if (Param.ApplyApproximateVisibilityTest && ps > 0)
            {
                bool Visible = LoadAndCheckApproximateVisibility(SampleCoord, AdjSurface, state.sampleRef);

                if (!Visible)
                {
                    ps = 0;
                }
            }

endif

            RTXDI_SDK_Reservoir neighborSample = RTXDI_SDK_Reservoir::Load(ReadReservoirData(SampleCoord, Param.InputSlice));

            // Select this sample for the (normalization) numerator if this particular neighbor pixel
            //     was the one we selected via RIS in the first loop, above.
            pi = selected == i ? ps : pi;

            // Add to the sums of weights for the (normalization) denominator
            piSum += ps * neighborSample.M;
        }

if RESERVOIR_CORRECTION_IN_RESAMPLING

        CorrectReservoir(state, Surface);

endif

        // Use "MIS-like" normalization
        state.FinalizeResampling(pi, piSum);
    }
    else
    {

if RESERVOIR_CORRECTION_IN_RESAMPLING

        CorrectReservoir(state, Surface);

endif

        state.FinalizeResampling(1.0, state.M);
    }
}

}

yasukichi commented 8 months ago

EvaluateSampledLightingRGS

int InputSlice; int NumReservoirs;

int MaxTemporalHistory; int MaxSSSTemporalHistory; int DemodulateMaterials; int SecondarySpatialPassSamples; int FeedbackVisibility; uint AlwaysSampleDirLight; uint NeighborOffsetMask;

/*** *

if RTXDI_IGNORE_BRDF_BASECOLOR

            // restore the colors
            Surface.GBuffer.DiffuseColor = DiffuseColor;
            Surface.GBuffer.SpecularColor = SpecularColor;

endif

        }

        if (risSample.sampleRef.IsValid())
        {
            HitLightingSamplingResult LocalResult = (HitLightingSamplingResult)0;
            LocalResult = EvalLightingInternal< HitLightingSamplingResult, FGBufferSurface>( risSample.sampleRef, risSample.weightSum,  /* UCW */
                                                                      PixelCoord, Surface, bInexactShadowedGeometry, 
                RandContext);

            if (LocalResult.bEvaluateContribution)
            {
                if (Reservoir == 0)
                {
                    LuminanceHistory.x = Luminance(LocalResult.DiffuseExitantRadiance.xyz);
                    LuminanceHistory.y = Luminance(LocalResult.SpecularExitantRadiance.xyz);
                }
            }
            else
            {
                if (FeedbackVisibility)
                {
                    risSample.weightSum = 0.0f;
                    risSample.targetPdf = 0.0f;
                }
            }

            DiffuseExitantRadiance += LocalResult.DiffuseExitantRadiance.xyz;
            SpecularExitantRadiance += LocalResult.SpecularExitantRadiance.xyz;
            HitCount += LocalResult.HitCount;
            RayDistance += LocalResult.RayDistance;

if RESERVOIR_CORRECTION_IN_RESAMPLING

            ReverseCorrectReservoir(risSample, Surface);

endif

        }
        else
        {
            // sample occluded, kill it for the history
            risSample = RTXDI_SDK_Reservoir::Empty();
        }
        WriteReservoirHistoryData(PixelCoord, Reservoir, risSample.Store());
    }

    DiffuseExitantRadiance /= float(NumReservoirs);
    SpecularExitantRadiance /= float(NumReservoirs);

    RayDistance /= max(HitCount, 1.0);

    // Apply the pre-exposure scale to the resulting shaded value
    DiffuseExitantRadiance *= View.PreExposure;
    SpecularExitantRadiance *= View.PreExposure;

    if (DemodulateMaterials  && Surface.GBuffer.ShadingModelID != SHADINGMODELID_HAIR)
    {
        DiffuseExitantRadiance /= Surface.GBuffer.DiffuseColor * 0.999 + 0.001;
        SpecularExitantRadiance /= Surface.GBuffer.SpecularColor * 0.999 + 0.001;
    }

    DiffuseExitantRadiance = ClampToHalfFloatRange(DiffuseExitantRadiance);
    SpecularExitantRadiance = ClampToHalfFloatRange(SpecularExitantRadiance);
}
else
{
    // Invalid pixel, write empty reservoir
    for (int Reservoir = 0; Reservoir < NumReservoirs; Reservoir++)
    {
        WriteReservoirHistoryData(PixelCoord, Reservoir, risSample.Store());
    }
}

RWLuminanceUAV[PixelCoord] = ClampToHalfFloatRange(LuminanceHistory* View.PreExposure);

RWDiffuseUAV[PixelCoord].rgb = DiffuseExitantRadiance;
RWDiffuseUAV[PixelCoord].a = AlwaysSampleDirLight ? HitCount : 1.0;

RWSpecularUAV[PixelCoord].rgb = SpecularExitantRadiance;
RWSpecularUAV[PixelCoord].a = 1.0;

RWRayDistanceUAV[PixelCoord] = RayDistance;

}

yasukichi commented 8 months ago

SampleDirectLighting

void ReverseCorrectReservoir(inout RTXDI_SDK_Reservoir Reservoir, in FGBufferSurface Surface) { if (!Reservoir.sampleRef.IsValid()) return;

FSampledLightData Light;
FLightSampleLocation LightSample;
GetLightSampleData(Surface, Reservoir.sampleRef, LightSample, Light);

if (LightSample.Pdf > 0.f)
{
    Reservoir.weightSum *= LightSample.Pdf;
    Reservoir.targetPdf /= LightSample.Pdf;
}
else
{
    Reservoir.weightSum = 0.f;
    Reservoir.targetPdf = 0.f;
}

}

template<> bool CheckApproximateVisibility(uint2 PixelCoord, FGBufferSurface Surface, FLightSampleLocation LightSample) { FRayDesc Ray; Ray.Origin = Surface.TranslatedWorldPosition; Ray.Direction = LightSample.Direction; Ray.TMin = 0.1f; Ray.TMax = LightSample.Distance;

if (InexactShadows != 0)
{
    const uint Stencil = SceneStencilTexture.Load(int3(PixelCoord, 0)) STENCIL_COMPONENT_SWIZZLE;
    if (Stencil & 1)
    {
        // Always bias the max amount as this is the conservative test
        Ray.TMin += MaxBiasForInexactGeometry;
    }
}

Ray.TMin = max(Ray.TMin, AvoidSelfIntersectionDistance);
Ray.TMin = min(Ray.TMin, Ray.TMax);

// ToDo - respect the transmissive flag on lights
float NoL = dot(Surface.GetWorldNormal(), Ray.Direction);
if (NoL > 0.0f)
{
    ApplyCameraRelativeDepthBias(Ray, PixelCoord, Surface.DeviceZ, Surface.GetWorldNormal(), MaxNormalBias);
}
else
{
    ApplyPositionBias(Ray, -Surface.GetWorldNormal(), MaxNormalBias);
}

uint RayFlags = RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH | RAY_FLAG_SKIP_CLOSEST_HIT_SHADER;
RayFlags |= VisibilityFaceCull == 1 ? RAY_FLAG_CULL_FRONT_FACING_TRIANGLES : 0;
RayFlags |= VisibilityFaceCull == 2 ? RAY_FLAG_CULL_BACK_FACING_TRIANGLES : 0;
RayFlags |= VisibilityApproximateTestMode == 1 ? RAY_FLAG_FORCE_OPAQUE : 0;
RayFlags |= VisibilityApproximateTestMode == 2 ? RAY_FLAG_CULL_NON_OPAQUE : 0;
const uint InstanceInclusionMask = RAY_TRACING_MASK_SHADOW;

FMinimalPayload MinimalPayload = TraceVisibilityRay(
    TLAS,
    RayFlags,
    InstanceInclusionMask,
    Ray);

return !MinimalPayload.IsHit();

}

template bool LoadAndCheckApproximateVisibility(uint2 PixelCoord, SurfaceType Surface, RTXDI_SDK_LightSampleRef SampleRef) { uint LightIndex = SampleRef.GetLightIndex(); float2 LightUV = SampleRef.GetUV();

FSampledLightData Light = GetSampledDeferredLightDataWithSkylight(LightIndex, LightUV, SkylightTexture, SkylightTextureSampler);
FLightSampleLocation LightSample = ComputeLightSampleLocation(Light, Surface, LightUV);

return CheckApproximateVisibility(PixelCoord, Surface, LightSample);

}

// Estimate the radiance emitted by a light that contributes to the given GBuffer Surface template<> float3 GetDynamicLightingEstimate(FGBufferSurface Surface, FLightSampleLocation LightSample, FSampledLightData Light) { const FDeferredLightData LightData = Light.LightData;

const float3 L = LightSample.Direction;
const float3 V = -Surface.ViewDirection;
const float3 N = GetClearCoatBottomNormal(Surface.GBuffer, Surface.GetWorldNormal());

const float NoL = saturate(dot(N, L));

const FShadowTerms ShadowTerms = { 0.0, 1.0, 0.0, LightData.HairTransmittance };
FDirectLighting Lighting = EvaluateBxDF(Surface.GBuffer, N, V, L, NoL, ShadowTerms);
Lighting.Specular *= LightData.SpecularScale; //extra scale to reduce/enhance specular from lights

// JCaoNV - Ideally, this doesn't need to be here. That way we can probably remove Pdf in the FLightSampleLocation structure.
const float LightMask = (LightSample.Pdf > 0.0) ? 1.f : 0.f;

// The target PDF we are currently using is the unshadowed light contribution from the rendering equation.
// Please be noted that the cosine term in the rendering equation is already encoded inside the `FDirectLighting` result.
// The only thing that is missing is the visibility and that is done on purpose to keep performance in budget.
const float3 LightRadiance = Light.GetLightRadiance();
const float3 LightEstimate = LightSample.Attenuation * LightMask * (Lighting.Diffuse + Lighting.Specular + Lighting.Transmission) * LightRadiance;

return max(LightEstimate, 0.0f);

}

//------------------------------------------------------------------------------------------------------------------------------------ // Light Sample Weight //------------------------------------------------------------------------------------------------------------------------------------

template float GetApproximateLightSampleWeightWithLight(SurfaceType Surface, FLightSampleLocation LightSample, FSampledLightData Light) { return Luminance(GetDynamicLightingEstimate(Surface, LightSample, Light)); }

template float GetApproximateLightSampleWeight(SurfaceType Surface, RTXDI_SDK_LightSampleRef SampleRef) { FSampledLightData Light; FLightSampleLocation LightSample; GetLightSampleData(Surface, SampleRef, LightSample, Light);

return GetApproximateLightSampleWeightWithLight(Surface, LightSample, Light);

}

template RTXDI_SDK_Reservoir ProduceInitialSample( uint2 PixelCoord, SurfaceType Surface, InitialCandidateOptions Options, inout FRandomContext RandContext, inout FRandomContext CoherentRandContext) { / ココ / RTXDI_SDK_Reservoir LocalReservoir = ProduceLocalSample( Surface, Options, RandContext, CoherentRandContext); / ココ / RTXDI_SDK_Reservoir DirectionalReservoir = ProduceDirectionalSample( Surface, Options, RandContext, CoherentRandContext); / ココ / RTXDI_SDK_Reservoir SkylightReservoir = ProduceSkylightSample( Surface, Options, RandContext, CoherentRandContext);

if SAMPLE_BRDF_RAYS

/ ココ / RTXDI_SDK_Reservoir BrdfReservoir = ProduceBrdfSample( Surface, Options, RandContext, CoherentRandContext);

endif // SAMPLE_BRDF_RAYS

RTXDI_SDK_Reservoir State = RTXDI_SDK_Reservoir::Empty();

float LocalVisibilityFactor = 1.0f;
float DirectionalVisibilityFactor = 1.0f;
bool PostTestVisibility = Options.InitialSampleVisibility == 1;

if (Options.InitialSampleVisibility == 2)
{
    if (LocalReservoir.sampleRef.IsValid())
    {
        LocalVisibilityFactor = LoadAndCheckApproximateVisibility(PixelCoord, Surface, LocalReservoir.sampleRef) ? 1.0f : 0.0f;
    }
    if (DirectionalReservoir.sampleRef.IsValid())
    {
        DirectionalVisibilityFactor = LoadAndCheckApproximateVisibility(PixelCoord, Surface, DirectionalReservoir.sampleRef) ? 1.0f : 0.0f;
    }
}

State.CombineReservoirs(LocalReservoir, 0.5f, LocalReservoir.targetPdf * LocalVisibilityFactor);
State.CombineReservoirs(DirectionalReservoir, RandContext.GenerateSample1D(), DirectionalReservoir.targetPdf * DirectionalVisibilityFactor);
State.CombineReservoirs(SkylightReservoir, RandContext.GenerateSample1D(), SkylightReservoir.targetPdf);

if SAMPLE_BRDF_RAYS

State.CombineReservoirs(BrdfReservoir, RandContext.GenerateSample1D(), BrdfReservoir.targetPdf);

endif // SAMPLE_BRDF_RAYS

if (PostTestVisibility && State.sampleRef.IsValid())
{
    bool Visible = LoadAndCheckApproximateVisibility(PixelCoord, Surface, State.sampleRef);
    if (!Visible)
    {
        State.sampleRef = RTXDI_SDK_LightSampleRef::Invalid();
        State.weightSum = 0;
        State.targetPdf = 0;
    }
}

State.FinalizeResampling(1.0f, 1.0f);
State.M = 1;

return State;

}

template RTXDI_SDK_Reservoir ProduceLocalSample( SurfaceType Surface, InitialCandidateOptions Options, inout FRandomContext RandContext, inout FRandomContext CoherentRandContext) { RTXDI_SDK_Reservoir LocalReservoir = RTXDI_SDK_Reservoir::Empty();

if (RIS_SAMPLE_LOCAL_LIGHTS == RIS_SAMPLE_LOCAL_LIGHTS_DIRECT)

// This is fine since this will not be read
const int FakeReGIRCell = 0;
const float FakeInvApproximatedNormalizationFactor = 1.f;

ProduceLocalSampleRandom(
    Surface,
    Options,
    FakeReGIRCell,
    FakeInvApproximatedNormalizationFactor,
    RandContext,
    CoherentRandContext,
    LocalReservoir);

elif (RIS_SAMPLE_LOCAL_LIGHTS == RIS_SAMPLE_LOCAL_LIGHTS_REGIR)

float3 CellJitter = CoherentRandContext.GenerateSample3D();
CellJitter -= 0.5f;

const float JitterScale = ReGIRGetJitterScale(Surface.TranslatedWorldPosition);
const float3 SamplePosition = Surface.TranslatedWorldPosition + CellJitter * JitterScale;

const int Cell = ReGIRGetCell(SamplePosition);

if (Cell == -1)
{
    // no Regir cell found, due to being outside the grid, fallback
    ProduceLocalSampleRandom(
        Surface,
        Options,
        Cell,
        1.f,    /* won't be used */
        RandContext,
        CoherentRandContext,
        LocalReservoir);
}
else
{
    // Please refer to the [reference 2] for further implementation detail in this code branch
    const float ApproximatedNormalizationFactor = ReGIRNormalizationBuffer[Cell];
    const float InvApproximatedNormalizationFactor = ApproximatedNormalizationFactor > 0.f ? (1.f / ApproximatedNormalizationFactor) : 0.f;

    // Multiple importance sampling is needed here to counter the potential bad samples produced from ReGir,
    // which may cause boiling issue.
    ProduceLocalSampleRandom(
        Surface,
        Options,
        Cell,
        InvApproximatedNormalizationFactor,
        RandContext,
        CoherentRandContext,
        LocalReservoir,
        Options.NumLocalSamples);

    ProduceLocalSampleReGIR(
        Surface,
        Options,
        Cell,
        InvApproximatedNormalizationFactor,
        RandContext,
        CoherentRandContext,
        LocalReservoir);
}

elif (RIS_SAMPLE_LOCAL_LIGHTS == RIS_SAMPLE_LOCAL_LIGHTS_RIS)

ProduceLocalSampleRIS(
    Surface,
    Options,
    RandContext,
    CoherentRandContext,
    LocalReservoir);

else

#error "Undefined local light sampling method"

endif

LocalReservoir.FinalizeResampling(1.0, 1.0);
LocalReservoir.M = 1;

return LocalReservoir;

}

template RTXDI_SDK_Reservoir ProduceDirectionalSample( SurfaceType Surface, InitialCandidateOptions Options, inout FRandomContext RandContext, inout FRandomContext CoherentRandContext) { RTXDI_SDK_Reservoir DirectionalReservoir = RTXDI_SDK_Reservoir::Empty();

// Discrete PDF of picking the light source
const float InvLightPickingPdf = (float)SampledLightData.DirectionalLightCount;

const int FakeCell = -1;
const float FakeInvApproximatedNormalizationFactor = 1.f;

for (int i = 0; i < Options.NumDirectionalSamples; i++)
{
    float3 RandSample = RandContext.GenerateSample3D();

    uint DirectionalLightIndex = uint(RandSample.x * SampledLightData.DirectionalLightCount);
    DirectionalLightIndex = min(DirectionalLightIndex, SampledLightData.DirectionalLightCount - 1) + SampledLightData.DirectionalLightStart;

    const float2 SampleLocation = RandSample.yz;
    const RTXDI_SDK_LightSampleRef SampleRef = RTXDI_SDK_LightSampleRef::Create(DirectionalLightIndex, SampleLocation);

    const float LightPickingPdf = (InvLightPickingPdf > 0.f) ? (1.f / InvLightPickingPdf) : 0.f;

    UpdateReservoirWithNewProposalSample(Surface, SampleRef, 
        FakeCell, FakeInvApproximatedNormalizationFactor, InvLightPickingPdf,
        Options.NumDirectionalSamples, 0, 0,
        RTXDI_SAMPLING_LIGHT, LightPickingPdf,
        DirectionalReservoir, RandContext);
}

DirectionalReservoir.FinalizeResampling(1.0, 1.0);
DirectionalReservoir.M = 1;

return DirectionalReservoir;

}

template RTXDI_SDK_Reservoir ProduceSkylightSample( SurfaceType Surface, InitialCandidateOptions Options, inout FRandomContext RandContext, inout FRandomContext CoherentRandContext) { RTXDI_SDK_Reservoir SkylightReservoir = RTXDI_SDK_Reservoir::Empty();

const int FakeCell = -1;
const float FakeInvApproximatedNormalizationFactor = 1.f;

for (int i = 0; i < Options.NumSkyLightSamples; i++)
{
    // Select a tile of random samples for the thread using the coherent sampler improve coherence of local threads
    const uint Tile = uint(CoherentRandContext.GenerateSample1D() * RISSkylightBufferTiles) % RISSkylightBufferTiles;

    // This is essentially RIS, for further detail please check [reference 1] at the end of this file.
    const uint Sample = uint(RandContext.GenerateSample1D() * RISSkylightBufferTileSize) % RISSkylightBufferTileSize;
    const uint2 SampleData = RISSkylightBuffer[Tile * RISSkylightBufferTileSize + Sample];
    const uint SkyLightIdx = 0;

    const float InvLightTilePickingPdf = asfloat(SampleData.y);

    const float2 SampleLocation = float2(SampleData.x & 0xffff, SampleData.x >> 16) * 1.0 / float(0xffff);

    const RTXDI_SDK_LightSampleRef SampleRef = RTXDI_SDK_LightSampleRef::Create(SkyLightIdx, SampleLocation);

    const float LightPickingPdf = InvLightTilePickingPdf > 0.f ? (1.f / InvLightTilePickingPdf) : 0.f;

    UpdateReservoirWithNewProposalSample(Surface, SampleRef, 
        FakeCell, FakeInvApproximatedNormalizationFactor, InvLightTilePickingPdf,
        Options.NumSkyLightSamples, Options.NumBrdfSamples, 0,
        RTXDI_SAMPLING_LIGHT, LightPickingPdf,
        SkylightReservoir, RandContext);
}

SkylightReservoir.FinalizeResampling(1.0, 1.0);
SkylightReservoir.M = 1;

return SkylightReservoir;

}

RTXDI_SDK_Reservoir ProduceBrdfSample( FGBufferSurface Surface, InitialCandidateOptions Options, inout FRandomContext RandContext, inout FRandomContext CoherentRandContext) { RTXDI_SDK_Reservoir BrdfReservoir = RTXDI_SDK_Reservoir::Empty();

if RIS_SAMPLE_LOCAL_LIGHTS == RIS_SAMPLE_LOCAL_LIGHTS_REGIR

const int   ReGIRCell = ReGIRGetCell(Surface.TranslatedWorldPosition);
const float ApproximatedNormalizationFactor = ReGIRNormalizationBuffer[ReGIRCell];

else

const int       ReGIRCell = 0;
const float     ApproximatedNormalizationFactor = 1.f;

endif

const float InvApproximatedNormalizationFactor = ApproximatedNormalizationFactor > 0.f ? (1.f / ApproximatedNormalizationFactor) : 0.f;

for (int i = 0; i < Options.NumBrdfSamples; i++)
{
    float2 RandSample = RandContext.GenerateSample2D();

    const BrdfRaySample BrdfSample = DefaultLit_SampleBrdf(Surface.GetRTXDISurface(), RandSample);

    if (BrdfSample.OutPdf <= 0.f)
        continue;

    FRayDesc Ray = (FRayDesc)0;
    Ray.Origin = Surface.TranslatedWorldPosition;
    Ray.Direction = BrdfSample.OutDirection;
    Ray.TMin = 0.1f;
    Ray.TMax = POSITIVE_INFINITY;

    float HitDistance = -1.f;
    int LightIdx = TraceRayAgainstLights(Ray, HitDistance);

/* int TraceRayAgainstLights(FRayDesc Ray, out float Distance) { FTraceRayInlineContext TraceRayInlineContext = CreateTraceRayInlineContext(); FTraceRayInlineResult TraceResult = TraceRayInline(LightTLAS, RAY_FLAG_FORCE_OPAQUE, 0xff, Ray.GetNativeDesc(), TraceRayInlineContext); if (TraceResult.IsHit()) { Distance = TraceResult.HitT; return TraceResult.InstanceID; }

return RTXDI_INVALID_LIGHT_ID;

} */ float2 SampleLocation = float2(0.f, 0.f);

    uint NumHitLightSamples = 0;
    uint NumReGIRSamples = 0;

    if (LightIdx == RTXDI_INVALID_LIGHT_ID)
    {
        if(!EnableSkySampling)
            continue;

        // Convert the 3D world space coordinate to 2D texture space coordinate
        SampleLocation = InverseEquiAreaSphericalMapping(BrdfSample.OutDirection.yzx);
        NumHitLightSamples = Options.NumSkyLightSamples;
        LightIdx = 0;
    }
    else
    {
        // Light sample position in world space.
        const float3 LightSamplePosition = Ray.Origin + Ray.Direction * HitDistance;

        FSampledLightData Light = GetSampledDeferredLightDataWithSkylight(LightIdx, float2(0.f, 0.f), SkylightTexture, SkylightTextureSampler);

        SampleLocation = ConstructUVFromWorldPosition(Light.LightData, 
                                                      Surface.TranslatedWorldPosition, 
                                                      Surface.GetWorldNormal(), 
                                                      Light.LightType,
                                                      LightSamplePosition);

        NumHitLightSamples = Options.NumLocalSamples;

if (RIS_SAMPLE_LOCAL_LIGHTS == RIS_SAMPLE_LOCAL_LIGHTS_REGIR)

        NumReGIRSamples = Options.NumLocalSamples;

endif

    }

    const RTXDI_SDK_LightSampleRef SampleRef = RTXDI_SDK_LightSampleRef::Create(LightIdx, SampleLocation);

    UpdateReservoirWithNewProposalSample(Surface, SampleRef, 
        ReGIRCell, InvApproximatedNormalizationFactor, 0.f, /* this is not read inside */
        NumHitLightSamples, Options.NumBrdfSamples, NumReGIRSamples,
        RTXDI_SAMPLING_BRDF, BrdfSample.OutPdf,
        BrdfReservoir, RandContext);
}

BrdfReservoir.FinalizeResampling(1.0, 1.f);
BrdfReservoir.M = 1;

return BrdfReservoir;

}

template void ProduceLocalSampleRandom( SurfaceType Surface, InitialCandidateOptions Options, in int Cell, in float InvApproximatedNormalizationFactor, inout FRandomContext RandContext, inout FRandomContext CoherentRandContext, inout RTXDI_SDK_Reservoir LocalReservoir, in uint NumReGIRSample = 0) { // Discrete PDF of picking a light const float InvLightPickingPdf = float(SampledLightData.LocalLightCount); const float LightPickingPdf = (InvLightPickingPdf > 0.f) ? (1.f / InvLightPickingPdf) : 0.f;

for (uint i = 0; i < Options.NumLocalSamples; i++)
{
    float3 RandSample = RandContext.GenerateSample3D();

    // Uniformly pick a light source with each light having the same properbility of being picked.
    const uint LocalLightIdx = uint(RandSample.x * SampledLightData.LocalLightCount) % SampledLightData.LocalLightCount + SampledLightData.LocalLightStart;

    const float2 SampleLocation = RandSample.yz;
    const RTXDI_SDK_LightSampleRef SampleRef = RTXDI_SDK_LightSampleRef::Create(LocalLightIdx, SampleLocation);

    UpdateReservoirWithNewProposalSample(Surface, SampleRef,
        Cell, InvApproximatedNormalizationFactor, InvLightPickingPdf,
        Options.NumLocalSamples, Options.NumBrdfSamples, NumReGIRSample,
        RTXDI_SAMPLING_LIGHT, LightPickingPdf,
        LocalReservoir, RandContext);
}

}

template void ProduceLocalSampleReGIR( SurfaceType Surface, InitialCandidateOptions Options, in int Cell, in float InvApproximatedNormalizationFactor, inout FRandomContext RandContext, inout FRandomContext CoherentRandContext, inout RTXDI_SDK_Reservoir LocalReservoir) { const int ReGIRTileSize = ReGIRGetTileSize();

// valid cell, produce a sample
for (uint i = 0; i < Options.NumLocalSamples; i++)
{
    float3 RandSample = RandContext.GenerateSample3D();

    const uint Sample = uint(RandSample.x * ReGIRTileSize) % ReGIRTileSize;
    const uint4 SampleData = ReGIRBuffer[Cell * ReGIRTileSize + Sample];
    float InvLightPickingPdf = 0.f;

    if (SampleData.x != 0xffffffff)
    {
        const uint      LocalLightIdx = SampleData.x + SampledLightData.LocalLightStart;

        const float     ApproximatedLightPickingPdf = asfloat(SampleData.w) * InvApproximatedNormalizationFactor;

        // This is essentially RIS, for further detail please check [reference 1] at the end of this file.
        const float InvLightPickingPdf = asfloat(SampleData.y);

        const float2 SampleLocation = RandSample.yz;
        const RTXDI_SDK_LightSampleRef SampleRef = RTXDI_SDK_LightSampleRef::Create(LocalLightIdx, SampleLocation);

        UpdateReservoirWithNewProposalSample(Surface, SampleRef, 
            Cell, InvApproximatedNormalizationFactor, InvLightPickingPdf,
            Options.NumLocalSamples, Options.NumBrdfSamples, Options.NumLocalSamples,
            RTXDI_SAMPLING_REGIR, ApproximatedLightPickingPdf,
            LocalReservoir, RandContext);
    }
    else
    {
        // feed a null sample to avoid bias
        LocalReservoir.StreamSample(RTXDI_SDK_LightSampleRef::Invalid(), 1.f, 0.f, 0.f);
    }
}

}

template void ProduceLocalSampleRIS( SurfaceType Surface, InitialCandidateOptions Options, inout FRandomContext RandContext, inout FRandomContext CoherentRandContext, inout RTXDI_SDK_Reservoir LocalReservoir) { // Select a tile of random samples for the thread using the coherent sampler improve coherence of local threads uint Tile = uint(CoherentRandContext.GenerateSample1D() * RISBufferTiles) % RISBufferTiles;

// This is fine since this will not be read
const int FakeCell = -1;
const float FakeInvApproximatedNormalizationFactor = 1.f;

for (uint i = 0; i < Options.NumLocalSamples; i++)
{
    float3 RandSample = RandContext.GenerateSample3D();

    // This is essentially RIS, for further detail please check [reference 1] at the end of this file.
    const uint Sample = uint(RandSample.x * RISBufferTileSize) % RISBufferTileSize;
    const uint2 SampleData = RISBuffer[Tile * RISBufferTileSize + Sample];
    const uint LocalLightIdx = SampleData.x + SampledLightData.LocalLightStart;

    const float InvLightPickingPdf = asfloat(SampleData.y);
    const float LightPickingPdf = InvLightPickingPdf > 0.f ? (1.f / InvLightPickingPdf) : 0.f;

    const float2 SampleLocation = RandSample.yz;
    const RTXDI_SDK_LightSampleRef SampleRef = RTXDI_SDK_LightSampleRef::Create(LocalLightIdx, SampleLocation);

    UpdateReservoirWithNewProposalSample(Surface, SampleRef, 
        FakeCell, FakeInvApproximatedNormalizationFactor, InvLightPickingPdf,
        Options.NumLocalSamples, Options.NumBrdfSamples, 0,
        RTXDI_SAMPLING_LIGHT, LightPickingPdf, 
        LocalReservoir, RandContext);
}

}

template void UpdateReservoirWithNewProposalSample(const SurfaceType Surface, const RTXDI_SDK_LightSampleRef SampleRef, const int ReGIRCell, const float InvApproximatedNormalizationFactor, const float RealInvLightPickingPdf, const uint NumLightSamples, const uint NumBrdfSamples, const uint NumReGIRSamples, const uint SamplingMethod, const float InSamplingMethodPdf, inout RTXDI_SDK_Reservoir Reservoir, inout FRandomContext RandContext) { const bool SamplingBrdf = (SamplingMethod == RTXDI_SAMPLING_BRDF);

const float RisRand = RandContext.GenerateSample1D();

// Draw a light sample from given light sources
FSampledLightData Light;
FLightSampleLocation LightSample;
GetLightSampleData(Surface, SampleRef, LightSample, Light);

// Conditional PDF of picking a light sample when a specific light is picked.
const float LightSamplePdf = LightSample.Pdf;

// Whether the sample is valid.
const uint LightIdx = SampleRef.GetLightIndex();
bool ValidSample = Surface.GetLightingChannelMask() & GetSampledLightChannelMask(LightIdx);
if (InSamplingMethodPdf <= 0.f)
    ValidSample = false;
if (!SamplingBrdf && LightSamplePdf <= 0.f)
    ValidSample = false;

// Target PDF of RIS, this is the unshadowed light contribution with cosine term included.
const float TargetPdf = ValidSample ? GetApproximateLightSampleWeightWithLight(Surface, LightSample, Light) : 0.f;

// Multiple importance sampling weight for initial samples
const float MisWeight = GetLightMISWeight(Surface, Light, SampleRef, LightSample, LightIdx,
    ReGIRCell, InvApproximatedNormalizationFactor, NumLightSamples, NumBrdfSamples, NumReGIRSamples, LightSamplePdf,
    SamplingMethod, InSamplingMethodPdf, RandContext);

// Proposal PDF has two parts
//  - The discrete PDF of picking a light source
//  - The conditional (continous) PDF of picking a sample on the picked light source
const float InvProposalPdf = ValidSample ? (SamplingBrdf ? (1.f / InSamplingMethodPdf) : (RealInvLightPickingPdf / LightSamplePdf)) : 0.f;

// MIS weight is counted as part of the RIS weight here
const float RisWeight = MisWeight * TargetPdf * InvProposalPdf;

// WRS algorithm execution
Reservoir.StreamSample(SampleRef, RisRand, TargetPdf, RisWeight);

}

#### DirectLightSamplingCommon
- シェーダー(DirectLightSamplingCommon.ush)
```C++
// ReGIR specific data
float   ReGIRCellSize;
int3    ReGIRCellCount;
int     ReGIRTileSize;
float   ReGIRLevels;

float ReGIRComputeLevel(float3 TranslatedWorldPosition)
{
    const float3 GridCoord = (TranslatedWorldPosition - View.TranslatedWorldCameraOrigin);
    const float3 CellCoord = GridCoord / ReGIRCellSize;
    const float3 TileLevel = ceil(log2(max(1.0, abs(CellCoord))) - log2(ReGIRCellCount * 0.5));
    const float Level = max(0.0, max(TileLevel.x, max(TileLevel.y, TileLevel.z)));
    return min(Level, ReGIRLevels - 1.0);;
}

int ReGIRGetCell(float3 TranslatedWorldPosition)
{
    const float3 GridCoord = (TranslatedWorldPosition - View.TranslatedWorldCameraOrigin);
    const float Level = ReGIRComputeLevel(TranslatedWorldPosition);
    const float3 TileCoord = GridCoord / (ReGIRCellSize * exp2(Level)) + ReGIRCellCount * 0.5f;

    if (Level >= ReGIRLevels || any(TileCoord < 0.0) || any(TileCoord > ReGIRCellCount))
        return -1;

    const int3 IntTileCoord = TileCoord;
    return Level * (ReGIRCellCount.x * ReGIRCellCount.y * ReGIRCellCount.z) + IntTileCoord.x + IntTileCoord.y * (ReGIRCellCount.x) + IntTileCoord.z * (ReGIRCellCount.x * ReGIRCellCount.y);
}

BrdfRaySample DefaultLit_SampleBrdf(in const FRTXDISurface Surface, in float2 RandSample)
{
    const FGBufferData GBuffer = Surface.GBuffer;

    const float3 N = GBuffer.WorldNormal;
    const float3 V = -Surface.ViewDirection;

    BrdfRaySample Sample = (BrdfRaySample)0;

    float2 Alpha;
    float3x3 Basis = GetGGXBasis(GBuffer.Roughness, GBuffer.Anisotropy, GBuffer.WorldNormal, GBuffer.WorldTangent, Alpha);

    const float3 LV = mul(Basis, V);

    const float NoV = saturate(LV.z);
    const FBxDFEnergyTerms Spec = ComputeGGXSpecEnergyTerms(GBuffer.Roughness, NoV, GBuffer.SpecularColor);

    // Probability of picking diffuse lobe vs. specular lobe
    const float3 Diffuse = GBuffer.DiffuseColor * (1 - Spec.E);
    const float3 Specular = Spec.E;
    const float SumDiffuse = Diffuse.x + Diffuse.y + Diffuse.z;
    const float SumSpecular = Specular.x + Specular.y + Specular.z;
    const float DiffuseRatio = SumDiffuse / (SumDiffuse + SumSpecular);

#if SUPPORTS_ANISOTROPIC_MATERIALS
    bool bHasAnisotropy = HasAnisotropy(GBuffer.SelectiveOutputMask);
#else
    bool bHasAnisotropy = false;
#endif

    // Randomly choose to sample diffuse or specular
    float3 L = 0, H = 0;
    if (RandSample.x < DiffuseRatio)
    {
        RandSample.x /= DiffuseRatio;

        // Lambert
        // TODO: evaluate CosineSampleHemisphereConcentric
        float4 Result = CosineSampleHemisphere(RandSample.xy);

        L = Result.xyz;
        H = normalize(L + LV);
    }
    else
    {
        RandSample.x -= DiffuseRatio;
        RandSample.x /= (1.0 - DiffuseRatio);

        H = ImportanceSampleVisibleGGX(RandSample.xy, Alpha, LV).xyz;

        L = reflect(-LV, H);

        // invalid output direction, exit early
        if (L.z <= 0)
            return Sample;
    }

    L = normalize(mul(L, Basis));

    Sample.OutDirection = L;
    Sample.OutPdf = EvaluateBrdfPdfInternal(N, L, V, GBuffer.WorldTangent, Alpha, bHasAnisotropy, DiffuseRatio);

    return Sample;
}

FSampledLightData GetSampledDeferredLightDataPrimitiveLight(int LightIndex)
{
    FSampledLightData L = (FSampledLightData)0;
    FPackedSampledLightingData RayTracingLightData = GetSampledLightData(LightIndex);

    const uint LightType = RayTracingLightData.Type;

    L.LightData.TranslatedWorldPosition = RayTracingLightData.LightPosition; // LWC  ToDo
    L.LightData.InvRadius = RayTracingLightData.InvRadius;
    L.LightData.Color = RayTracingLightData.LightColor;
    L.LightData.FalloffExponent = RayTracingLightData.FalloffExponent;
    L.LightData.Direction = RayTracingLightData.Direction;
    L.LightData.Tangent = RayTracingLightData.Tangent;
    L.LightData.SpotAngles = RayTracingLightData.SpotAngles;
    L.LightData.SourceRadius = RayTracingLightData.SourceRadius;
    L.LightData.SourceLength = RayTracingLightData.SourceLength;
    L.LightData.SoftSourceRadius = RayTracingLightData.SoftSourceRadius;
    L.LightData.SpecularScale = RayTracingLightData.SpecularScale;
    L.LightData.RectLightData.BarnCosAngle = RayTracingLightData.RectLightBarnCosAngle;
    L.LightData.RectLightData.BarnLength = RayTracingLightData.RectLightBarnLength;
    L.LightData.DistanceFadeMAD = RayTracingLightData.DistanceFadeMAD;

    L.LightData.ShadowMapChannelMask = float4(0, 0, 0, 0);
    L.LightData.ShadowedBits = 0; // Not lit dynamic shadows 
    L.LightData.ContactShadowLength = 0.0;
    L.LightData.ContactShadowLengthInWS = false;
    L.LightData.ContactShadowNonCastingIntensity = 0.0f;

    L.LightData.bRadialLight = (LightType != LIGHT_TYPE_DIRECTIONAL);
    L.LightData.bSpotLight = (LightType == LIGHT_TYPE_SPOT);
    L.LightData.bRectLight = (LightType == LIGHT_TYPE_RECT);

    L.LightData.HairTransmittance = InitHairTransmittanceData();

    if (LightType == LIGHT_TYPE_DIRECTIONAL)
    {
        L.LightData.bInverseSquared = false;
    }
    else
    {
        L.LightData.bInverseSquared = L.LightData.FalloffExponent == 0;
    }

    L.IESAtlasIndex = RayTracingLightData.LightProfileIndex;
    L.LightType = LightType;

    L.AtmosphereCloudShadowIndexAndFlags = RayTracingLightData.AtmosphereCloudShadowIndexAndFlags;

    L.LightData.RectLightData.AtlasData.AtlasMaxLevel = RayTracingLightData.RectLightAtlasMaxLevel;
    L.LightData.RectLightData.AtlasData.AtlasUVOffset.x = f16tof32(RayTracingLightData.RectLightAtlasUVOffset & 0xffff);
    L.LightData.RectLightData.AtlasData.AtlasUVOffset.y = f16tof32((RayTracingLightData.RectLightAtlasUVOffset >> 16) & 0xffff);
    L.LightData.RectLightData.AtlasData.AtlasUVScale.x = f16tof32(RayTracingLightData.RectLightAtlasUVScale & 0xffff);
    L.LightData.RectLightData.AtlasData.AtlasUVScale.y = f16tof32((RayTracingLightData.RectLightAtlasUVScale >> 16) & 0xffff);

    L.LightFunctionIndex = RayTracingLightData.FlagsLightFunctionAndMask & 0xff;

    L.LightID = RayTracingLightData.LightID;

    L.ColorToRadianceScale = 1.0f;

    if (LightType == LIGHT_TYPE_POINT || LightType == LIGHT_TYPE_SPOT)
    {
        float Area = 1.f;
        if (L.LightData.SourceLength > 0.f)
        {
            float Radius = L.LightData.SourceRadius;
            float Radius2 = Radius * Radius;
            float SourceLength = L.LightData.SourceLength;

            // the caps are two halves of a full sphere
            // the body is a cylinder
            // the common factor of 4*PI is accounted for at the end
            float CapsArea = Radius2;
            float BodyArea = 0.5 * Radius * SourceLength;

            Area = PI * (CapsArea + BodyArea);
        }
        else
        {
            Area = (L.LightData.SourceRadius < POINT_LIGHT_RADIUS_LIMIT) ? 1.f : (PI * Pow2(L.LightData.SourceRadius));
        }

        L.ColorToRadianceScale = 1.f / Area;
    }

    L.IsSkyLight = false;

    return L;
}

FSampledLightData GetSampledDeferredLightDataWithSkylight(int LightIndex, float2 LightUV, Texture2D<float4> SkylightTexture,SamplerState SkylightSampler)
{
    if (LightIndex >= SampledLightData.DirectionalLightStart)
    {
        return GetSampledDeferredLightDataPrimitiveLight(LightIndex);
    }

    FSampledLightData L = (FSampledLightData)0;

    L.LightData.TranslatedWorldPosition = 0.0f;
    L.LightData.InvRadius = 0.0f;
    L.LightData.Color = Texture2DSample(SkylightTexture, SkylightSampler, LightUV).xyz * SampledLightData.SkylightColor;
    L.LightData.FalloffExponent = 0.0f;
    L.LightData.Direction = normalize(EquiAreaSphericalMapping(LightUV).zxy);
    L.LightData.Tangent = 0;
    L.LightData.SpotAngles = 0;
    L.LightData.SourceRadius = 0;
    L.LightData.SourceLength = 0;
    L.LightData.SoftSourceRadius =0;
    L.LightData.SpecularScale = 0.0; // skylight contributions for specular are handled with reflections
    L.LightData.RectLightData.BarnCosAngle = 0;
    L.LightData.RectLightData.BarnLength = 0;
    L.LightData.DistanceFadeMAD = 0;

    L.LightData.ShadowMapChannelMask = float4(0, 0, 0, 0);
    L.LightData.ShadowedBits = 0; // Not lit dynamic shadows 
    L.LightData.ContactShadowLength = 0.0;
    L.LightData.ContactShadowLengthInWS = false;
    L.LightData.ContactShadowNonCastingIntensity = 0.0f;

    L.LightData.bRadialLight = false;
    L.LightData.bSpotLight = false;
    L.LightData.bRectLight = false;
    L.LightData.bInverseSquared = false;

    L.LightData.HairTransmittance = InitHairTransmittanceData();

    L.IESAtlasIndex = -1.0;
    L.LightData.RectLightData.AtlasData.AtlasMaxLevel = MAX_RECT_ATLAS_MIP;
    L.LightData.RectLightData.AtlasData.AtlasUVOffset = float2(0, 0);
    L.LightData.RectLightData.AtlasData.AtlasUVScale = float2(0, 0);
    L.LightType = LIGHT_TYPE_DIRECTIONAL;

    L.AtmosphereCloudShadowIndexAndFlags = 0;
    L.LightFunctionIndex = 0;
    L.LightID = 0;
    L.ColorToRadianceScale = 1.0f;
    L.IsSkyLight = true;

    return L;
}
yasukichi commented 8 months ago

RTXDI_SDK_Reservoir

// Code for managing reads/writes to reservoirs RWStructuredBuffer RWLightReservoirUAV; uint3 ReservoirBufferDim;

RWStructuredBuffer RWLightReservoirHistoryUAV; StructuredBuffer LightReservoirHistory; uint3 ReservoirHistoryBufferDim;

uint ComputeReservoirAddress(int2 PixelCoord, int Slice, uint3 BufferDim) { static const uint TileSize = 4; const uint RowStride = (BufferDim.x / TileSize) TileSize TileSize;

int2 Tile = PixelCoord / TileSize;
int2 TileCoord = PixelCoord % TileSize;

uint Address = Slice * BufferDim.x * BufferDim.y;
Address += Tile.y * RowStride;
Address += Tile.x * TileSize * TileSize;
Address += TileCoord.y * TileSize + TileCoord.x;

return Address;

}

uint4 ReadReservoirData(int2 PixelCoord, int Slice) { uint Address = ComputeReservoirAddress(PixelCoord, Slice, ReservoirBufferDim);

return RWLightReservoirUAV[Address];

}

void WriteReservoirData(int2 PixelCoord, int Slice, uint4 Value) { uint Address = ComputeReservoirAddress(PixelCoord, Slice, ReservoirBufferDim);

RWLightReservoirUAV[Address] = Value;

}

uint4 ReadReservoirHistoryData(int2 PixelCoord, int Slice) {

uint Address = ComputeReservoirAddress(PixelCoord, Slice, ReservoirHistoryBufferDim);

return LightReservoirHistory[Address];

}

void WriteReservoirHistoryData(int2 PixelCoord, int Slice, uint4 Value) {

uint Address = ComputeReservoirAddress(PixelCoord, Slice, ReservoirHistoryBufferDim);

RWLightReservoirHistoryUAV[Address] = Value;

}

// This structure represents a single light reservoir that stores the weights, the sample ref, // and sample count (M). It can be stored into uint4 and loaded from uint4, except the weightSum field. struct RTXDI_SDK_Reservoir { static const uint MaxM = 0xffff;

RTXDI_SDK_LightSampleRef sampleRef;
float weightSum;
float targetPdf;
float M;

// Cannonical weight when using pairwise MIS (ignored except during pairwise MIS computations)
float canonicalWeight;

static RTXDI_SDK_Reservoir Empty()
{
    RTXDI_SDK_Reservoir s;
    s.sampleRef = RTXDI_SDK_LightSampleRef::Invalid();
    s.targetPdf = 0;
    s.weightSum = 0;
    s.M = 0;
    s.canonicalWeight = 0;
    return s;
}

static RTXDI_SDK_Reservoir Load(RTXDI_SDK_PackedReservoir data)
{
    RTXDI_SDK_Reservoir res;

    res.sampleRef.lightData = data.x;
    res.sampleRef.uvData = data.y;

if !CUSTOM_FLOAT

    res.weightSum = f16tof32(data.z);
    res.targetPdf = f16tof32(data.z >> 16);
    res.M = data.w;

else

    const uint M = (data.w & 0xff) | ((data.z & 0xff) << 8);
    res.M = M;

    // unsigned 24 bit floats
    uint weightSum = (data.z & 0xffffff00);
    uint targetPdf = (data.w & 0xffffff00);
    res.weightSum = asfloat(weightSum >> 1);
    res.targetPdf = asfloat(targetPdf >> 1);

endif

    res.canonicalWeight = 0.0f;

    // Discard reservoirs that have Inf/NaN
    if (isinf(res.weightSum) || isnan(res.weightSum)) {
        res = Empty();
    }

    return res;
}

RTXDI_SDK_PackedReservoir Store()
{
    RTXDI_SDK_PackedReservoir data;
    data.x = sampleRef.lightData;
    data.y = sampleRef.uvData;

if !CUSTOM_FLOAT

    data.w = M;
    data.z = f32tof16(weightSum) | (f32tof16(targetPdf) << 16);

else

    const float RoundConst = 1.0f / float(1 << 18); // 1/(2^18) is the first bit unrepresented by the custom float)
    float RoundedWeight = weightSum + weightSum * RoundConst;
    float RoundedPdf = targetPdf + targetPdf * RoundConst;

    //remove sign as values are guaranteed positive
    uint uWeightSum = asuint(max(0.0, RoundedWeight)) << 1;
    uint uTargetPdf = asuint(max(0.0, RoundedPdf)) << 1;

    //chop to 24 bits
    uWeightSum = uWeightSum & 0xffffff00;
    uTargetPdf = uTargetPdf & 0xffffff00;

    //clamp M
    M = (uint)min((uint)M, MaxM);

    // pack M to bottom bits
    data.z = (uWeightSum) | (((uint)M >> 8) & 0xff);
    data.w = (uTargetPdf) | ((uint)M & 0xff);

endif

    return data;
}

void FinalizeResampling(float normalizationNumerator, float normalizationDenominator)
{
    float denominator = targetPdf * normalizationDenominator;

    weightSum = (denominator == 0.0) ? 0.0 : (weightSum * normalizationNumerator) / denominator;
}

// Adds a new, non-reservoir light sample into this reservoir.
// Algorithm (3) from the ReSTIR paper, Streaming RIS using weighted reservoir sampling.
bool StreamSample(RTXDI_SDK_LightSampleRef NewSampleRef, float Random, float NewTargetPdf, float RisWeight)
{
    // Add one sample to the counter
    M += 1;

    // Update the weight sum
    weightSum += RisWeight;

    // Decide if we will randomly pick this sample
    bool selectSample = (Random * weightSum < RisWeight);

    // If we did select this sample, update the relevant data.
    // New samples don't have visibility or age information, we can skip that.
    if (selectSample)
    {
        sampleRef = NewSampleRef;
        targetPdf = NewTargetPdf;
    }

    return selectSample;
}

// Adds `newReservoir` into `reservoir`, returns true if the new reservoir's sample was selected.
// This is a very general form, allowing input parameters to specfiy normalization and targetPdf
// rather than computing them from `newReservoir`.  Named "internal" since these parameters take
// different meanings (e.g., in RTXDI_CombineReservoirs() or RTXDI_StreamNeighborWithPairwiseMIS())
bool InternalSimpleResample(
    const RTXDI_SDK_Reservoir newReservoir,
    float random,
    float newTargetPdf,                             // Usually closely related to the sample normalization, 
    float sampleNormalization,                      //     typically off by some multiplicative factor 
    float sampleM                                   // In its most basic form, should be newReservoir.M
)
{
    // What's the current weight (times any prior-step RIS normalization factor)
    float risWeight = newTargetPdf * sampleNormalization;

    // Our *effective* candidate pool is the sum of our candidates plus those of our neighbors
    M += sampleM;

    // Update the weight sum
    weightSum += risWeight;

    // Decide if we will randomly pick this sample
    bool selectSample = (random * weightSum < risWeight);

    // If we did select this sample, update the relevant data
    if (selectSample)
    {
        sampleRef = newReservoir.sampleRef;
        targetPdf = newTargetPdf;
    }

    return selectSample;
}

// Adds a reservoir with one sample into this reservoir.
// Algorithm (4) from the ReSTIR paper, Combining the streams of multiple reservoirs.
// Normalization - Equation (6) - is postponed until all reservoirs are combined.
bool CombineReservoirs(RTXDI_SDK_Reservoir newReservoir, float random, float newTargetPdf)
{
    return InternalSimpleResample(
        newReservoir,
        random,
        newTargetPdf,
        newReservoir.weightSum * newReservoir.M,
        newReservoir.M
    );
}

};