YingboMa / MaBLAS.jl

Other
25 stars 0 forks source link

The analytical model for blocking #10

Open YingboMa opened 4 years ago

YingboMa commented 4 years ago
julia> using Hwloc
julia> function params(::Type{T}) where T
           micro_m = LoopVectorization.mᵣ * VectorizationBase.pick_vector_width(T)
           micro_n = LoopVectorization.nᵣ
           topology = Hwloc.topology_load()
           l3 = getdatacache(topology, :L3Cache)
           l2 = getdatacache(l3, :L2Cache)
           l1 = getdatacache(l2, :L1Cache)
           l1associativity = l1.attr.associativity
           l1associativity = l1associativity == 0 ? 8 : # assume 8 if unknown
                               l1associativity == -1 ? l1.attr.size ÷ l1.attr.linesize : # fully associative
                               l1associativity
           l1sets = l1.attr.size ÷ (l1.attr.linesize * l1associativity)
           ncachelines_per_panelA = floor(Int, (l1associativity - 1) / (1 + micro_n/micro_m))
           cache_k = ncachelines_per_panelA * l1.attr.linesize * l1sets ÷ (micro_m * sizeof(T))
       end
params (generic function with 1 method)
julia> getdatacache(topology, name) = for t in topology.children
           isdefined(t, :type_) || return nothing
           return t.type_ === name && t.attr.type_ in (:Unified, :Data) ? t : getdatacache(t, name)
       end
getdatacache (generic function with 1 method)
julia> params(Float64)
213
julia> params(Float32)
256

someone may find this code useful.

chriselrod commented 4 years ago

This is an, uhh, interesting choice

julia> params(Float32)
76

julia> params(Float64)
76