CarloLucibello / GraphNeuralNetworks.jl

Graph Neural Networks in Julia
https://carlolucibello.github.io/GraphNeuralNetworks.jl/dev/
MIT License
220 stars 46 forks source link

find and fix invalidations #271

Open CarloLucibello opened 1 year ago

CarloLucibello commented 1 year ago

See https://timholy.github.io/SnoopCompile.jl/stable/snoopr/

CarloLucibello commented 1 year ago
SnoopCompile script ```julia using SnoopCompileCore invalidations = @snoopr begin using GraphNeuralNetworks using Flux # using CUDA # using Graphs # using Random, Statistics, LinearAlgebra end function workload() num_graphs = 3 gs = [rand_graph(5, 10) for _ in 1:num_graphs] g = Flux.batch(gs) x = rand(Float32, 4, g.num_nodes) model = GNNChain(GCNConv(4 => 4, relu), GCNConv(4 => 4), GlobalPool(max), Dense(4, 1)) y = model(g, x) # @assert size(y) == (1, num_graphs) end tinf = @snoopi_deep begin workload() end using SnoopCompile trees = invalidation_trees(invalidations) staletrees = precompile_blockers(trees, tinf) @show length(uinvalidated(invalidations)) # show total invalidations show(trees[end]) # show the most invalidating method # Count number of children (number of invalidations per invalidated method) n_invalidations = map(SnoopCompile.countchildren, trees) # (optional) plot the number of children per method invalidations import Plots Plots.plot( 1:length(trees), n_invalidations; markershape=:circle, xlabel="i-th method invalidation", label="Number of children per method invalidations" ) # (optional) report invalidations summary using PrettyTables # needed for `report_invalidations` to be defined SnoopCompile.report_invalidations(; invalidations, process_filename = x -> last(split(x, ".julia/packages/")), n_rows = 0, # no-limit (show all invalidations) ) ```
results table ``` ┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─────────────────────┬───────────────┬─────────────────┐ │ : │ Function Name │ Invalidations │ Invalidations % │ │ │ │ │ (xᵢ/∑x) │ ├───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┼─────────────────────┼───────────────┼─────────────────┤ │ CUDA/ZdCxS/lib/cusparse/types.jl:17 │ convert │ 703 │ 23 │ │ NNlibCUDA/C6t0p/src/batchedadjtrans.jl:15 │ convert │ 684 │ 22 │ │ ProgressLogging/6KXlp/src/ProgressLogging.jl:211 │ convert │ 300 │ 10 │ │ BangBang/90Cfb/src/macro.jl:61 │ broadcasted │ 160 │ 5 │ │ FLoops/6PVny/src/reduce.jl:349 │ broadcasted │ 160 │ 5 │ │ Transducers/iubDU/src/air.jl:16 │ broadcasted │ 160 │ 5 │ │ Optimisers/BT5bT/src/interface.jl:219 │ broadcasted │ 160 │ 5 │ │ GPUArrays/7TiO1/src/host/abstractarray.jl:220 │ deepcopy │ 67 │ 2 │ │ CUDA/ZdCxS/src/array.jl:225 │ deepcopy_internal │ 65 │ 2 │ │ NNlibCUDA/C6t0p/src/cudnn/activations.jl:28 │ materialize │ 57 │ 2 │ │ FillArrays/disQb/src/fillbroadcast.jl:78 │ broadcasted │ 52 │ 2 │ │ CUDA/ZdCxS/src/device/intrinsics/wmma.jl:140 │ convert │ 45 │ 1 │ │ BFloat16s/uUmkF/src/bfloat16.jl:61 │ isnan │ 45 │ 1 │ │ StaticArrays/4WE4t/src/indexing.jl:370 │ unsafe_view │ 36 │ 1 │ │ MicroCollections/jGxA3/src/arrays.jl:80 │ isassigned │ 26 │ 1 │ │ LLVM/s3bxG/src/execution.jl:45 │ convert │ 26 │ 1 │ │ /home/lucibello/.vscode-server/extensions/julialang.language-julia-1.38.2/scripts/packages/IJuliaCore/src/stdio.jl:24 │ unwrapcontext │ 25 │ 1 │ │ OrderedCollections/PRayh/src/ordered_dict.jl:95 │ convert │ 22 │ 1 │ │ InitialValues/OWP8V/src/InitialValues.jl:238 │ promote_rule │ 21 │ 1 │ │ ShowCases/m4GFK/src/list.jl:51 │ IteratorEltype │ 19 │ 1 │ │ Graphs/7SMZs/src/SimpleGraphs/simpleedge.jl:26 │ Pair │ 18 │ 1 │ │ Transducers/iubDU/src/basics.jl:104 │ print │ 18 │ 1 │ │ StaticArrays/4WE4t/src/SArray.jl:70 │ cconvert │ 15 │ 0 │ │ GPUArrays/7TiO1/src/host/mapreduce.jl:10 │ mapreducedim! │ 15 │ 0 │ │ LLVM/s3bxG/src/types.jl:12 │ convert │ 15 │ 0 │ │ CUDA/ZdCxS/src/compiler/gpucompiler.jl:73 │ method_table │ 14 │ 0 │ │ Transducers/iubDU/src/processes.jl:885 │ Set │ 11 │ 0 │ │ ProgressLogging/6KXlp/src/ProgressLogging.jl:216 │ cmp │ 11 │ 0 │ │ CUDA/ZdCxS/src/pointer.jl:56 │ cconvert │ 11 │ 0 │ │ ChainRulesCore/a4mIA/src/tangent_types/thunks.jl:56 │ fill │ 10 │ 0 │ │ GPUArrays/7TiO1/src/host/broadcast.jl:18 │ BroadcastStyle │ 9 │ 0 │ │ ForwardDiff/vXysl/src/dual.jl:427 │ promote_rule │ 7 │ 0 │ │ FillArrays/disQb/src/trues.jl:26 │ to_indices │ 5 │ 0 │ │ StatsBase/XgjIN/src/statmodels.jl:90 │ isequal │ 5 │ 0 │ │ InitialValues/OWP8V/src/InitialValues.jl:154 │ min │ 5 │ 0 │ │ GPUArrays/7TiO1/src/host/quirks.jl:25 │ _axes │ 5 │ 0 │ │ KrylovKit/diNbc/src/dense/linalg.jl:56 │ IteratorEltype │ 4 │ 0 │ │ DataStructures/59MD0/src/priorityqueue.jl:360 │ merge! │ 4 │ 0 │ │ AbstractFFTs/0uOAT/src/definitions.jl:11 │ eltype │ 4 │ 0 │ │ /home/lucibello/.vscode-server/extensions/julialang.language-julia-1.38.2/scripts/packages/VSCodeServer/src/display.jl:158 │ display │ 4 │ 0 │ │ /home/lucibello/.vscode-server/extensions/julialang.language-julia-1.38.2/scripts/packages/VSCodeServer/src/notebookdisplay.jl:31 │ display │ 4 │ 0 │ │ DataStructures/59MD0/src/sparse_int_set.jl:213 │ zip │ 3 │ 0 │ │ ShowCases/m4GFK/src/style.jl:39 │ print │ 3 │ 0 │ │ ShowCases/m4GFK/src/list.jl:50 │ IteratorSize │ 3 │ 0 │ │ SpecialFunctions/gXPNz/src/expint.jl:8 │ * │ 3 │ 0 │ │ ForwardDiff/vXysl/src/dual.jl:436 │ convert │ 2 │ 0 │ │ StructArrays/dNQpc/src/staticarrays_support.jl:34 │ instantiate │ 2 │ 0 │ │ CUDA/ZdCxS/src/compiler/gpucompiler.jl:56 │ runtime_module │ 2 │ 0 │ │ CUDA/ZdCxS/src/compiler/gpucompiler.jl:65 │ link_libraries! │ 2 │ 0 │ │ /home/lucibello/.vscode-server/extensions/julialang.language-julia-1.38.2/scripts/packages/IJuliaCore/src/stdio.jl:18 │ unlock │ 2 │ 0 │ │ StaticArrays/4WE4t/src/SOneTo.jl:57 │ getproperty │ 1 │ 0 │ │ IRTools/LbzBn/src/ir/utils.jl:50 │ walk │ 1 │ 0 │ │ StatsBase/XgjIN/src/statmodels.jl:91 │ isequal │ 1 │ 0 │ │ DataStructures/59MD0/src/swiss_dict.jl:646 │ iterate │ 1 │ 0 │ │ InitialValues/OWP8V/src/InitialValues.jl:258 │ convert │ 1 │ 0 │ │ GPUArrays/7TiO1/src/host/quirks.jl:24 │ _axes │ 1 │ 0 │ │ IRTools/LbzBn/src/ir/utils.jl:53 │ prewalk │ 0 │ 0 │ │ DelimitedFiles/aGcsu/src/DelimitedFiles.jl:835 │ show │ 0 │ 0 │ │ GPUArrays/7TiO1/src/host/abstractarray.jl:10 │ serialize │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:215 │ code_cache │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:213 │ get_world_counter │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:226 │ may_compress │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:229 │ verbose_stmt_info │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:219 │ unlock_mi_inference │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:214 │ get_inference_cache │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:225 │ may_optimize │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:227 │ may_discard_trees │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:218 │ lock_mi_inference │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:221 │ add_remark! │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:212 │ OptimizationParams │ 0 │ 0 │ │ GPUCompiler/S3TWf/src/jlgen.jl:211 │ InferenceParams │ 0 │ 0 │ └───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────┴───────────────┴─────────────────┘ ```