smillerc / MPIHaloArrays.jl

An array type for MPI halo data exchange in Julia
MIT License
22 stars 3 forks source link

why does the allocated memory in a for loop keep increasing #9

Closed crazyfireji closed 1 year ago

crazyfireji commented 1 year ago

this is my code:

using MPI, MPIHaloArrays
using FortranFiles
using CircularArrays
using JLD2
using Statistics

MPI.Init()
const comm = MPI.COMM_WORLD
const rank = MPI.Comm_rank(comm)
const nprocs = MPI.Comm_size(comm)
const root = 0 # root rank

@assert nprocs == 40 "This example is designed with $(nprocs) processes..."

function gauss_filter_parallel_1d(data::AbstractArray{T, 3}, radius, nhalo::Int) where T<:Real

    nx, ny, nz = size(data)

    filtered_data = zeros(eltype(data),nx,ny,nz)

    data = CircularArray(data[:,:,:])

    dz = 14.0 / nz

    r_gridz = ceil(Int, radius / dz)  

    G = zeros(2*r_gridz+1)

    for k in 1:2*r_gridz+1
        dis_z = (k-r_gridz-1)*dz    
        G[k] = sqrt(6/(π*4*radius^2))*exp(-6*(dis_z)^2/(4*radius^2))    
    end

    G = G .* size(G) ./ sum(G)

    for i in 1+nhalo:nx+nhalo
        for j in 1+nhalo:ny+nhalo
            for k in 1+nhalo:nz+nhalo
                kmin = k-r_gridz
                kmax = k+r_gridz
                filtered_data[i,j,k] = mean( data[i, j, kmin:kmax] .* G)
            end
        end
    end

    return filtered_data
end

function main(filepath, nx_g, ny_g, nz_g, nx, ny, nz, nhalo,
topology, steps, Δ, δᵥ, Ndims)

    for n in steps

        f = "$(filepath)OCFD$(string(n,pad=7)).dat"

        flow_data = rand(nx, ny, nz, 5)#initializer3D(f, nx_g, ny_g, nz_g, Ndims, root, nhalo, topology)

        ρ = MPIHaloArray(flow_data[:,:,:,1], topology, nhalo; do_corners = false)
        u = MPIHaloArray(flow_data[:,:,:,2], topology, nhalo; do_corners = false)
        v = MPIHaloArray(flow_data[:,:,:,3], topology, nhalo; do_corners = false)
        w = MPIHaloArray(flow_data[:,:,:,4], topology, nhalo; do_corners = false)
        T = MPIHaloArray(flow_data[:,:,:,5], topology, nhalo; do_corners = false)

        fillhalo!(ρ, 0)
        fillhalo!(u, 0)
        fillhalo!(v, 0)
        fillhalo!(w, 0)
        fillhalo!(T, 0)

        updatehalo!(ρ)
        updatehalo!(u)
        updatehalo!(v)
        updatehalo!(w)
        updatehalo!(T)

        ρ1 = ρ.data
        u1 = u.data .* ρ.data
        v1 = v.data .* ρ.data
        w1 = w.data .* ρ.data
        T1 = T.data .* ρ.data

        for m in Δ

            gl = δᵥ * m / 2

            ρ.data = gauss_filter_parallel_1d(ρ1, gl, nhalo)
            u.data = gauss_filter_parallel_1d(u1, gl, nhalo)
            v.data = gauss_filter_parallel_1d(v1, gl, nhalo)
            w.data = gauss_filter_parallel_1d(w1, gl, nhalo)
            T.data = gauss_filter_parallel_1d(T1, gl, nhalo)

            G1=gatherglobal(ρ)
            G2=gatherglobal(u)
            G3=gatherglobal(v)
            G4=gatherglobal(w)
            G5=gatherglobal(T)

            if rank == root

                G2 ./= G1
                G3 ./= G1
                G4 ./= G1
                G5 ./= G1

                println("Test$(string(n,pad=7))_g_filt_$(m).jld2")

            end # end rank == 0
        end #end Δ loop

    end #end for n loop

end

Ndims = 5
δᵥ = 0.0355
npx0, npy0, npz0 = 1, 40, 1
nx_g, ny_g, nz_g = 1401, 160, 128
halo_dims = (1, 2, 3)

sizes = MPIHaloArrays.get_subdomain_sizes((nx_g,ny_g,nz_g),(npx0,npy0,npz0),halo_dims) 

nx, ny, nz = sizes[:,rank + 1] |> Tuple

nhalo = 0
topology = CartesianTopology(comm, [npx0,npy0,npz0], [false, false, true])
filepath = "/homeu_jyc/Data/M29/Flateplate/Tw284/70W-80W/"

steps = range(start = 798100, step = 100, stop = 798500)

Δ = [15, 60, 120, 240]# .* δᵥ ./ 2.0

main(filepath, nx_g, ny_g, nz_g, nx, ny, nz, nhalo, topology, steps, Δ, δᵥ, Ndims)

GC.gc()

MPI.Finalize()

this is the memory allocated, if allocated memory beyond my cp, it will be killed. 4df2436bb814e1f2

smillerc commented 1 year ago

Look for type instabilities, e.g., @code_warntype gauss_filter_parallel_1d(ρ1, gl, nhalo). There are a few spots where you are using slices that will create copies. Do @views instead, since these don't allocated. Did you try running with julia --track-allocation=user flag? This can give you hints as to where you're allocating

crazyfireji commented 1 year ago

Look for type instabilities, e.g., @code_warntype gauss_filter_parallel_1d(ρ1, gl, nhalo). There are a few spots where you are using slices that will create copies. Do @views instead, since these don't allocated. Did you try running with julia --track-allocation=user flag? This can give you hints as to where you're allocating

thank you so much, I've tried @views and G=nothing ; data =nothing in gauss_filter_parallel_1d(ρ1, gl, nhalo); , but it still allocate memory. I tried julia --track-allication=user, but I got some error, I'll try it again tomorrow.

crazyfireji commented 1 year ago

It was solved, but I have to execute GC.gc() in the end of for n in steps loop manually.