Integrate NeuralOperator and NeuralPDE

KirillZubov commented 1 year ago

Couple of strategy/scenarios 1) train-to-train model

train a sub-class of PDEs with NeuralOperator
transfer learning pre-trained prediction from NeuralOperator problem to PINNs
PINNs training with pre-trained model for the instance of the PDE

2) physics informed neural operator integrate both into one training scheme

3) use each individual prediction, solution, and data together to adaptive update the predict for some the core /kernel/ Green's function of class PDEs(major equations for some problem ) as a pre-trained state.

https://zongyi-li.github.io/neural-operator/Neural-Operator-CS159-0503-2022.pdf https://www2.compute.dtu.dk/~apek/SCIML2022/Surrogate_methods.pdf

Physics-Informed Neural Operators https://arxiv.org/abs/2111.03794 Graph Kernel Network https://arxiv.org/abs/2003.03485 Generative Adversarial Neural Operators https://arxiv.org/abs/2205.03017

TODO

provide support to the main Neural Operators https://github.com/SciML/NeuralOperators.jl/tree/main/src : symbolic part, inner types, strategies, training, transfer learning, GPU
I would like to experiment with this database and do something interesting as demonstration example https://developer.ibm.com/data/spot-challenge-wildfires/

KirillZubov commented 1 year ago

https://github.com/SciML/NeuralOperators.jl/issues/83

KirillZubov commented 1 year ago

the first script, training neural operator for a family of Burger equations. training is mapping between functional space of some initial conditions {u(t0 x)} and solution of equation u(t,x) here use only data

using MAT
using Plots
using DataDeps, MAT, MLUtils
using NeuralOperators, Flux
using CUDA, FluxTraining, BSON
import Flux: params
using BSON: @save, @load

#   Burgers' equation dataset from
#         [fourier_neural_operator](https://github.com/zongyi-li/fourier_neural_operator)
#         """,
#         "http://www.med.cgu.edu.tw/NeuralOperators/Burgers_R10.zip",

#### data
burgermat = matopen("path here"))
b = read(burgermat)

input = b["input"]
output =  b["output"]

tspan = b["tspan"]
x = b["x"]
a = b["a"]

size(input)
size(output)

output_ = Array{Float32,4}(undef, 1, 101, 128, 1200)
for i in 1:1200
    output_[1, :, :, i] = output[i, :, :]
end

input_ = Array{Float32,4}(undef, 3, 101, 128, 1200)

size(input_[1, :, :, :])
#init cond
for i in 1:101
    input_[1, i, :, :] = input[:, :]'
end

#time
ts = reshape(repeat(LinRange(0, 1, 101), 128), 101, 128)
for i in 1:1200
    input_[2, :, :, i] = ts
end

#cord
xs = reshape(repeat(LinRange(0, 1, 128), 101), 128, 101)'
for i in 1:1200
    input_[3, :, :, i] = xs
end

###
size(input_)
size(output_)

input_data = input_[:, 1:100, :, 1:900] 
output_data = output_[:, 1:100, :,  1:900] 

size(input_data)
size(output_data)

# model = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(1, 128),
#     σ=gelu)  

m = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(10, 16),
    σ=gelu)      

res = model(input_data[:, 1:10, 1:128, 1:1])

ratio = 0.9
batchsize = 50

data_train = (input_data, output_data)

data = DataLoader(data_train |> gpu  , batchsize=batchsize, shuffle=true)

cuda = true
η₀ = 1.0f-3
λ = 1.0f-4
epochs = 100

if cuda && CUDA.has_cuda()
    device = gpu
    CUDA.allowscalar(false)
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end

optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))

lossfunc(x,y) = l₂loss(m(x),y)

m = m |> gpu

xtrain = input_data[:, :, :, 1:1] |> gpu
ytrain = output_data[:, :, :, 1:1] |> gpu

evalcb() = @show(lossfunc(xtrain, ytrain))

m(xtrain)
lossfunc(xtrain, ytrain)
evalcb()

CUDA.memory_status()
CUDA.reclaim()

epochs = 100
Flux.@epochs epochs Flux.train!(lossfunc, params(m), data, optimiser, cb=evalcb)

# for epoch in 1: epochs
#     d  =generate_data()
#     time_evolution_loop()
#     Flux.train!(lossfunc, params(m), d, optimiser, cb=evalcb)
# end

model = m |> cpu

j = 100
res = model(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
    plot(output_data[1, i, :, j])
    plot!(res[i,:])
end
gif(anim, fps=15)

the second script added a loss function for the differential operator (PINNs ) data + pde

# ## pde + data 
# dim
# 1) dim
# 2) time
# 3) space
# 4) a - initial condition

 _epsilon = one(Float32) / cbrt(eps(Float32))
 _epsilonpow =  (_epsilon/2)^2

#time
# input_data = input_data |> gpu
  input_data = input_data |> cpu

input_dtl = copy(input_data)
input_dtr = copy(input_data)

dt = cbrt(eps(Float32))

input_dtr[2, :, :, :] .=  input_data[2, :, :, :] .+ dt
input_dtl[2, :, :, :] .=  input_data[2, :, :, :] .- dt

#cord
input_dxl = copy(input_data)
input_dxr = copy(input_data)

dx = cbrt(eps(Float32))

input_dxr[3, :, :, :] =  input_data[3, :, :, :] .+ dx
input_dxl[3, :, :, :] = input_data[3, :, :, :]  .- dx
###

m = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(10, 16),
    σ=gelu)     

del = (m(input_dtr[:, :, :, 1:10]) .- m(input_dtl[:, :, :, 1:10])) .* _epsilon 

function derivative_dt(input_dtr,input_dtl)
    (m(input_dtr) .- m(input_dtl)) .* _epsilon
end   

function derivative_dx(input_dxr,input_dxl)
     (m(input_dxr) .- m(input_dxl)) .* _epsilon
end   

function derivative_dx2(input_data,input_dxr,input_dxl)
     (m(input_dxr) .+ m(input_dxl) .- 2 .* m(input_data)) .* _epsilonpow
end   

@time derivative_dx2(input_data[:, :, :, 1:5],input_dxr[:, :, :, 1:5], input_dxl[:, :, :, 1:5]);
@time derivative_dx(input_dxr[:, :, :, 1:5], input_dxl[:, :, :, 1:5]);
@time  derivative_dt(input_dtr[:, :, :, 1:5], input_dtl[:, :, :, 1:5]);
@time m(input_data[:, :, :, 1:5]) ;

size_ = 100

input_data_ = input_data[:, :, :, 1:size_];
output_data_ = output_data[:, :, :, 1:size_]; 
input_dxr_ = input_dxr[:, :, :, 1:size_]; 
input_dxl_ = input_dxl[:, :, :, 1:size_]; 
input_dtr_ = input_dtr[:, :, :, 1:size_]; 
input_dtl_ = input_dtl[:, :, :, 1:size_]; 

m = m |> gpu

CUDA.@time derivative_dx2(input_data[:,:,:,1:1] |> gpu,  input_dxl[:,:,:,1:1] |> gpu, input_dxr[:,:,:,1:1] |> gpu);
CUDA.@time derivative_dx( input_dxr[:,:,:,1:1] |> gpu,  input_dxl[:,:,:,1:1] |> gpu);
CUDA.@time  derivative_dt( input_dtr[:,:,:,1:1] |> gpu, input_dtl[:,:,:,1:1] |> gpu);
CUDA.@time m(input_data[:,:,:,1:1] |> gpu) ;

batchsize = 10
data_train = (input_data_,input_dxr_,input_dxl_,input_dtr_,input_dtl_, output_data_)

data = DataLoader(data_train |> gpu  , batchsize=batchsize, shuffle=true)

cuda = true
η₀ = 1.0f-3
λ = 1.0f-4

optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))

xtrain = input_data[:, :, :, 1:1] |> gpu
ytrain = output_data[:, :, :, 1:1]  |> gpu
dxltrain = input_dxl[:, :, :, 1:1] |> gpu
dxrtrain = input_dxr[:, :, :, 1:1] |> gpu
dtrtrain = input_dtr[:, :, :, 1:1] |> gpu
dtltrain = input_dtl[:,:,:,1:1] |> gpu

function l₂loss2(𝐲)
    feature_dims = 2:(ndims(𝐲)-1)
    loss = sum(.√(sum(abs2, 𝐲, dims=feature_dims)))
    # y_norm = sum(.√(sum(abs2, 𝐲, dims=feature_dims)))
    return loss #/ y_norm
end

l₂loss2(m(xtrain))
l₂loss(m(xtrain), ytrain)

#burger
# eq = Dt(u(t, x)) + u(t, x) * Dx(u(t, x)) - ν * Dxx(u(t, x)) ~ 0
ν = 0.01

derivative_operator(x, dxr, dxl, dtr, dtl) = derivative_dt(dtr, dtl) .+ m(x) .* derivative_dx(dxr, dxl) .- ν .* derivative_dx2(x, dxr, dxl)

pde_loss(x,dxr,dxl,dtr,dtl) = l₂loss2(derivative_operator(x, dxr, dxl, dtr, dtl))
data_loss(x, y) = l₂loss(m(x) , y)
lossfull(x, dxr, dxl, dtr, dtl, y) =  data_loss(x, y) + pde_loss(x,dxr,dxl,dtr,dtl) 

evalcb() = @show(lossfull(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain, ytrain))

l₂loss2(derivative_operator(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain))
pde_loss(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain)
data_loss(xtrain, ytrain)
lossfull(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain, ytrain)
evalcb()

# CUDA.devices()
CUDA.reclaim()
CUDA.memory_status()

epochs = 80
Flux.@epochs epochs Flux.train!(lossfull, params(m), data, optimiser, cb=evalcb)

model = m |> cpu
j = 1
res = model(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
    plot(output_data[1, i, :, j])
    plot!(res[i,:])
end
gif(anim, fps=15)

KirillZubov commented 1 year ago

fine-tuning optimization with PINNs

here it's used already trained neural operator over the entire given some functional space of initial conditions how pre-trained state for PINNs solver in order to achieve higher accuracy for a single instance of burger equation

# fine tuning with PINNS

m = FourierNeuralOperator(ch=(3, 64, 64, 64, 64, 64, 128, 1), modes=(10, 16),σ=gelu)   

j =1 # index of one of initial condtions from dataset  
input_data_ = input_data[:, :, :, j:j] |> gpu
output_data_ = output_data[:, :, :, j:j] |> gpu
input_dxr_ = input_dxr[:, :, :, j:j] |> gpu
input_dxl_ = input_dxl[:, :, :, j:j] |> gpu
input_dtr_ = input_dtr[:, :, :, j:j] |> gpu
input_dtl_ = input_dtl[:, :, :, j:j] |> gpu

batchsize = 1
data_train = (input_data_,input_dxr_,input_dxl_,input_dtr_,input_dtl_, output_data_)

data = DataLoader(data_train |> gpu  , batchsize=batchsize, shuffle=true)

ν = 0.01

derivative_operator(x, dxr, dxl, dtr, dtl) = derivative_dt(dtr, dtl) .+ m(x) .* derivative_dx(dxr, dxl) .- ν .* derivative_dx2(x, dxr, dxl)

pde_loss(x,dxr,dxl,dtr,dtl) = l₂loss2(derivative_operator(x, dxr, dxl, dtr, dtl))
data_loss(x, y) = l₂loss(m(x) , y)
lossfull(x, dxr, dxl, dtr, dtl, y) =  data_loss(x, y) + pde_loss(x,dxr,dxl,dtr,dtl) 

#model saved here https://github.com/KirillZubov/pino_model/tree/main/burger/model
mtrained = BSON.load("/home/kirillzubov/model_burger_pino_full.bson")[:m]
j = 100
res = mtrained(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
    plot(output_data[1, i, :, j])
    plot!(res[i,:])
end

gif(anim, "burger_neural_operator.gif", fps=15)

mtrained = mtrained |> gpu 
m = m |> gpu

op_loss(x) = l₂loss(mtrained(x) , m(x))

derivative_operator(x, dxr, dxl, dtr, dtl) = derivative_dt(dtr, dtl) .+ m(x) .* derivative_dx(dxr, dxl) .- ν .* derivative_dx2(x, dxr, dxl)
pde_loss(x,dxr,dxl,dtr,dtl) = l₂loss2(derivative_operator(x, dxr, dxl, dtr, dtl))

lossfull(x, dxr, dxl, dtr, dtl, y) =  pde_loss(x,dxr,dxl,dtr,dtl) + op_loss(x) #data_loss(x, y) 

evalcb() = @show(lossfull(input_data_, input_dxr_, input_dxl_, input_dtr_, input_dtl_, output_data))

mtrained(xtrain)
m(xtrain)
op_loss(xtrain)
pde_loss(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain)
lossfull(xtrain, dxrtrain, dxltrain, dtrtrain, dtltrain, ytrain)

evalcb()

CUDA.reclaim()
CUDA.memory_status()

epochs = 1000
η₀ = 1.0f-4
λ = 1.0f-5
optimiser = Flux.Optimiser(WeightDecay(λ), Flux.Adam(η₀))

Flux.@epochs epochs Flux.train!(lossfull, params(m), data, optimiser, cb=evalcb)

model = m |> cpu
mtrainedc = mtrained |> cpu

j = 1
res = model(input_data[:, :, :, j:j])[1, :, :, 1]
res2 = mtrainedc(input_data[:, :, :, j:j])[1, :, :, 1]
anim = @animate for i in 1:100
    plot(output_data[1, i, :, j])
    plot!(res[i,:])
    plot!(res2[i,:])
end
gif(anim, fps=15)

KirillZubov commented 1 year ago

somegif

SciML / NeuralPDE.jl

Integrate NeuralOperator and NeuralPDE #575