From https://github.com/CliMA/ClimaCore.jl/issues/1602, here is a reproducer where JET misses inference failure. Profile does capture the failure, and the resulting slowdown is observed in a benchmark using BenchmarkTools.
Below is a reproducer that:
clones and preps the environment
a script the JET-tests, profiles using Profile, and benchmarks using BenchmarkTools.
prep environment:
git clone https://github.com/CliMA/ClimaAtmos.jl
cd ClimaAtmos.jl/
git checkout 0eef0e3d8d44a7cfd553d56e55609c1ed0d77e0f
julia --project=perf
using Pkg
Pkg.add(Pkg.PackageSpec(;name="Thermodynamics", rev="1701c9aa474f43814e5d16373606f5ffd0eb6763"))
script:
import ClimaCore;
import Thermodynamics as TD
import Thermodynamics.Parameters as TDP
import CloudMicrophysics as CM
import ClimaCore.Fields as Fields
import ClimaCore.Operators as Operators
import ClimaCore.Spaces as Spaces
import ClimaCore.Geometry as Geometry
import ClimaComms
@isdefined(TU) || include(joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"));
import .TestUtilities as TU;
FT = Float64;
zelem=25
cspace = TU.CenterExtrudedFiniteDifferenceSpace(FT;zelem, helem=10);
fspace = Spaces.FaceExtrudedFiniteDifferenceSpace(cspace);
space = cspace;
@show ClimaComms.device(cspace)
# From ClimaAtmos
import CLIMAParameters as CP
aliases = string.(fieldnames(TD.Parameters.ThermodynamicsParameters));
toml_dict = CP.create_toml_dict(FT);
# pairs = CP.get_parameter_values(toml_dict, aliases, "Thermodynamics");
# const thermo_params = TD.Parameters.ThermodynamicsParameters{FT}(; pairs...)
const thermo_params = TD.Parameters.ThermodynamicsParameters(toml_dict)
const cm_params = CM.Parameters.Parameters0M(FT, toml_dict)
x = (;
ᶜts = Fields.Field(TD.PhaseEquil{FT}, space),
ᶜS_ρq_tot = Fields.Field(FT, space),
ρ = Fields.Field(FT, space),
)
total_specific_humidity(tp, ts) = TD.PhasePartition(tp, ts).tot
function compute_precipitation_cache!(x)
(; ᶜts, ρ, ᶜS_ρq_tot) = x
@. ᶜS_ρq_tot =
ρ * CM.Microphysics0M.remove_precipitation(
cm_params,
TD.PhasePartition(thermo_params, ᶜts),
)
# @. ᶜS_ρq_tot = ρ * TD.PhasePartition(thermo_params, ᶜts).tot # allocates!
# @. ᶜS_ρq_tot = ρ * total_specific_humidity(thermo_params, ᶜts) # allocation-free
return nothing
end
compute_precipitation_cache!(x)
import JET
JET.@test_opt compute_precipitation_cache!(x)
function do_work!(x)
for i in 1:200
compute_precipitation_cache!(x)
end
end
import Profile, ProfileCanvas
@info "Compiling first..."
do_work!(x) # compile first
@info "Collecting profile..."
Profile.clear()
prof = Profile.@profile do_work!(x)
results = Profile.fetch()
Profile.clear()
@info "Generating html..."
ProfileCanvas.html_file("flame.html", results)
@info "Benchmarking..."
import BenchmarkTools
trial = BenchmarkTools.@benchmark compute_precipitation_cache!($x)
show(stdout, MIME("text/plain"), trial)
I'll note that the "fix" to this problem turned out to be a matter of not inlining the call to PhaseEquil in Thermodynamics.jl. Regardless, I was surprised to see JET miss this inference failure.
From https://github.com/CliMA/ClimaCore.jl/issues/1602, here is a reproducer where JET misses inference failure. Profile does capture the failure, and the resulting slowdown is observed in a benchmark using BenchmarkTools.
Below is a reproducer that:
prep environment:
script:
I'll note that the "fix" to this problem turned out to be a matter of not inlining the call to
PhaseEquil
in Thermodynamics.jl. Regardless, I was surprised to see JET miss this inference failure.