Open KirillZubov opened 1 year ago
the first script, training neural operator for a family of Burger equations. training is mapping between functional space of some initial conditions {u(t0 x)} and solution of equation u(t,x) here use only data
using MAT
using Plots
using DataDeps, MAT, MLUtils
using NeuralOperators, Flux
using CUDA, FluxTraining, BSON
import Flux: params
using BSON: @save, @load
# Burgers' equation dataset from
# [fourier_neural_operator](https://github.com/zongyi-li/fourier_neural_operator)
# """,
# "http://www.med.cgu.edu.tw/NeuralOperators/Burgers_R10.zip",
#### data
burgermat = matopen("path here"))
b = read(burgermat)
input = b["input"]
output = b["output"]
tspan = b["tspan"]
x = b["x"]
a = b["a"]
size(input)
size(output)
output_ = Array{Float32,4}(undef, 1, 101, 128, 1200)
for i in 1:1200
output_[1, :, :, i] = output[i, :, :]
end
input_ = Array{Float32,4}(undef, 3, 101, 128, 1200)
size(input_[1, :, :, :])
#init cond
for i in 1:101
input_[1, i, :, :] = input[:, :]'
end
#time
ts = reshape(repeat(LinRange(0, 1, 101), 128), 101, 128)
for i in 1:1200
input_[2, :, :, i] = ts
end
#cord
xs = reshape(repeat(LinRange(0, 1, 128), 101), 128, 101)'
for i in 1:1200
input_[3, :, :, i] = xs
end
###
size(input_)
size(output_)
input_data = input_[:, 1:100, :, 1:900]
output_data = output_[:, 1:100, :, 1:900]
size(input_data)
size(output_data)
# model = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(1, 128),
# σ=gelu)
m = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(10, 16),
σ=gelu)
res = model(input_data[:, 1:10, 1:128, 1:1])
ratio = 0.9
batchsize = 50
data_train = (input_data, output_data)
data = DataLoader(data_train |> gpu , batchsize=batchsize, shuffle=true)
cuda = true
η₀ = 1.0f-3
λ = 1.0f-4
epochs = 100
if cuda && CUDA.has_cuda()
device = gpu
CUDA.allowscalar(false)
@info "Training on GPU"
else
device = cpu
@info "Training on CPU"
end
optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
lossfunc(x,y) = l₂loss(m(x),y)
m = m |> gpu
xtrain = input_data[:, :, :, 1:1] |> gpu
ytrain = output_data[:, :, :, 1:1] |> gpu
evalcb() = @show(lossfunc(xtrain, ytrain))
m(xtrain)
lossfunc(xtrain, ytrain)
evalcb()
CUDA.memory_status()
CUDA.reclaim()
epochs = 100
Flux.@epochs epochs Flux.train!(lossfunc, params(m), data, optimiser, cb=evalcb)
# for epoch in 1: epochs
# d =generate_data()
# time_evolution_loop()
# Flux.train!(lossfunc, params(m), d, optimiser, cb=evalcb)
# end
model = m |> cpu
j = 100
res = model(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
plot(output_data[1, i, :, j])
plot!(res[i,:])
end
gif(anim, fps=15)
the second script added a loss function for the differential operator (PINNs ) data + pde
# ## pde + data
# dim
# 1) dim
# 2) time
# 3) space
# 4) a - initial condition
_epsilon = one(Float32) / cbrt(eps(Float32))
_epsilonpow = (_epsilon/2)^2
#time
# input_data = input_data |> gpu
input_data = input_data |> cpu
input_dtl = copy(input_data)
input_dtr = copy(input_data)
dt = cbrt(eps(Float32))
input_dtr[2, :, :, :] .= input_data[2, :, :, :] .+ dt
input_dtl[2, :, :, :] .= input_data[2, :, :, :] .- dt
#cord
input_dxl = copy(input_data)
input_dxr = copy(input_data)
dx = cbrt(eps(Float32))
input_dxr[3, :, :, :] = input_data[3, :, :, :] .+ dx
input_dxl[3, :, :, :] = input_data[3, :, :, :] .- dx
###
m = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(10, 16),
σ=gelu)
del = (m(input_dtr[:, :, :, 1:10]) .- m(input_dtl[:, :, :, 1:10])) .* _epsilon
function derivative_dt(input_dtr,input_dtl)
(m(input_dtr) .- m(input_dtl)) .* _epsilon
end
function derivative_dx(input_dxr,input_dxl)
(m(input_dxr) .- m(input_dxl)) .* _epsilon
end
function derivative_dx2(input_data,input_dxr,input_dxl)
(m(input_dxr) .+ m(input_dxl) .- 2 .* m(input_data)) .* _epsilonpow
end
@time derivative_dx2(input_data[:, :, :, 1:5],input_dxr[:, :, :, 1:5], input_dxl[:, :, :, 1:5]);
@time derivative_dx(input_dxr[:, :, :, 1:5], input_dxl[:, :, :, 1:5]);
@time derivative_dt(input_dtr[:, :, :, 1:5], input_dtl[:, :, :, 1:5]);
@time m(input_data[:, :, :, 1:5]) ;
size_ = 100
input_data_ = input_data[:, :, :, 1:size_];
output_data_ = output_data[:, :, :, 1:size_];
input_dxr_ = input_dxr[:, :, :, 1:size_];
input_dxl_ = input_dxl[:, :, :, 1:size_];
input_dtr_ = input_dtr[:, :, :, 1:size_];
input_dtl_ = input_dtl[:, :, :, 1:size_];
m = m |> gpu
CUDA.@time derivative_dx2(input_data[:,:,:,1:1] |> gpu, input_dxl[:,:,:,1:1] |> gpu, input_dxr[:,:,:,1:1] |> gpu);
CUDA.@time derivative_dx( input_dxr[:,:,:,1:1] |> gpu, input_dxl[:,:,:,1:1] |> gpu);
CUDA.@time derivative_dt( input_dtr[:,:,:,1:1] |> gpu, input_dtl[:,:,:,1:1] |> gpu);
CUDA.@time m(input_data[:,:,:,1:1] |> gpu) ;
batchsize = 10
data_train = (input_data_,input_dxr_,input_dxl_,input_dtr_,input_dtl_, output_data_)
data = DataLoader(data_train |> gpu , batchsize=batchsize, shuffle=true)
cuda = true
η₀ = 1.0f-3
λ = 1.0f-4
optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
xtrain = input_data[:, :, :, 1:1] |> gpu
ytrain = output_data[:, :, :, 1:1] |> gpu
dxltrain = input_dxl[:, :, :, 1:1] |> gpu
dxrtrain = input_dxr[:, :, :, 1:1] |> gpu
dtrtrain = input_dtr[:, :, :, 1:1] |> gpu
dtltrain = input_dtl[:,:,:,1:1] |> gpu
function l₂loss2(𝐲)
feature_dims = 2:(ndims(𝐲)-1)
loss = sum(.√(sum(abs2, 𝐲, dims=feature_dims)))
# y_norm = sum(.√(sum(abs2, 𝐲, dims=feature_dims)))
return loss #/ y_norm
end
l₂loss2(m(xtrain))
l₂loss(m(xtrain), ytrain)
#burger
# eq = Dt(u(t, x)) + u(t, x) * Dx(u(t, x)) - ν * Dxx(u(t, x)) ~ 0
ν = 0.01
derivative_operator(x, dxr, dxl, dtr, dtl) = derivative_dt(dtr, dtl) .+ m(x) .* derivative_dx(dxr, dxl) .- ν .* derivative_dx2(x, dxr, dxl)
pde_loss(x,dxr,dxl,dtr,dtl) = l₂loss2(derivative_operator(x, dxr, dxl, dtr, dtl))
data_loss(x, y) = l₂loss(m(x) , y)
lossfull(x, dxr, dxl, dtr, dtl, y) = data_loss(x, y) + pde_loss(x,dxr,dxl,dtr,dtl)
evalcb() = @show(lossfull(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain, ytrain))
l₂loss2(derivative_operator(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain))
pde_loss(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain)
data_loss(xtrain, ytrain)
lossfull(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain, ytrain)
evalcb()
# CUDA.devices()
CUDA.reclaim()
CUDA.memory_status()
epochs = 80
Flux.@epochs epochs Flux.train!(lossfull, params(m), data, optimiser, cb=evalcb)
model = m |> cpu
j = 1
res = model(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
plot(output_data[1, i, :, j])
plot!(res[i,:])
end
gif(anim, fps=15)
fine-tuning optimization with PINNs
here it's used already trained neural operator over the entire given some functional space of initial conditions how pre-trained state for PINNs solver in order to achieve higher accuracy for a single instance of burger equation
# fine tuning with PINNS
m = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(10, 16),σ=gelu)
j =1 # index of one of initial condtions from dataset
input_data_ = input_data[:, :, :, j:j] |> gpu
output_data_ = output_data[:, :, :, j:j] |> gpu
input_dxr_ = input_dxr[:, :, :, j:j] |> gpu
input_dxl_ = input_dxl[:, :, :, j:j] |> gpu
input_dtr_ = input_dtr[:, :, :, j:j] |> gpu
input_dtl_ = input_dtl[:, :, :, j:j] |> gpu
batchsize = 1
data_train = (input_data_,input_dxr_,input_dxl_,input_dtr_,input_dtl_, output_data_)
data = DataLoader(data_train |> gpu , batchsize=batchsize, shuffle=true)
ν = 0.01
derivative_operator(x, dxr, dxl, dtr, dtl) = derivative_dt(dtr, dtl) .+ m(x) .* derivative_dx(dxr, dxl) .- ν .* derivative_dx2(x, dxr, dxl)
pde_loss(x,dxr,dxl,dtr,dtl) = l₂loss2(derivative_operator(x, dxr, dxl, dtr, dtl))
data_loss(x, y) = l₂loss(m(x) , y)
lossfull(x, dxr, dxl, dtr, dtl, y) = data_loss(x, y) + pde_loss(x,dxr,dxl,dtr,dtl)
#model saved here https://github.com/KirillZubov/pino_model/tree/main/burger/model
mtrained = BSON.load("/home/kirillzubov/model_burger_pino_full.bson")[:m]
j = 100
res = mtrained(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
plot(output_data[1, i, :, j])
plot!(res[i,:])
end
gif(anim, "burger_neural_operator.gif", fps=15)
mtrained = mtrained |> gpu
m = m |> gpu
op_loss(x) = l₂loss(mtrained(x) , m(x))
derivative_operator(x, dxr, dxl, dtr, dtl) = derivative_dt(dtr, dtl) .+ m(x) .* derivative_dx(dxr, dxl) .- ν .* derivative_dx2(x, dxr, dxl)
pde_loss(x,dxr,dxl,dtr,dtl) = l₂loss2(derivative_operator(x, dxr, dxl, dtr, dtl))
lossfull(x, dxr, dxl, dtr, dtl, y) = pde_loss(x,dxr,dxl,dtr,dtl) + op_loss(x) #data_loss(x, y)
evalcb() = @show(lossfull(input_data_, input_dxr_, input_dxl_, input_dtr_, input_dtl_, output_data))
mtrained(xtrain)
m(xtrain)
op_loss(xtrain)
pde_loss(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain)
lossfull(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain, ytrain)
evalcb()
CUDA.reclaim()
CUDA.memory_status()
epochs = 1000
η₀ = 1.0f-4
λ = 1.0f-5
optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))
Flux.@epochs epochs Flux.train!(lossfull, params(m), data, optimiser, cb=evalcb)
model = m |> cpu
mtrainedc = mtrained |> cpu
j = 1
res = model(input_data[:, :, :, j:j])[1, :, :, 1]
res2 = mtrainedc(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
plot(output_data[1, i, :, j])
plot!(res[i,:])
plot!(res2[i,:])
end
gif(anim, fps=15)
Couple of strategy/scenarios 1) train-to-train model
2) physics informed neural operator integrate both into one training scheme
3) use each individual prediction, solution, and data together to adaptive update the predict for some the core /kernel/ Green's function of class PDEs(major equations for some problem ) as a pre-trained state.
https://zongyi-li.github.io/neural-operator/Neural-Operator-CS159-0503-2022.pdf https://www2.compute.dtu.dk/~apek/SCIML2022/Surrogate_methods.pdf
Physics-Informed Neural Operators https://arxiv.org/abs/2111.03794 Graph Kernel Network https://arxiv.org/abs/2003.03485 Generative Adversarial Neural Operators https://arxiv.org/abs/2205.03017
TODO
provide support to the main Neural Operators https://github.com/SciML/NeuralOperators.jl/tree/main/src : symbolic part, inner types, strategies, training, transfer learning, GPU
I would like to experiment with this database and do something interesting as demonstration example https://developer.ibm.com/data/spot-challenge-wildfires/