How to use different input in the network for different loss term?

forxltk commented 2 years ago

Hi, Dr. Lu. I'm trying to replicate the result of the following paper: https://arxiv.org/abs/2006.05311 using DeepXDE.

It is Stefan problems with free boundary. For forward problem, the free boundary s is s(t) with nets, and the temperature is u(x, t) with netu. The governing equation and boundary conditions are

By using additional equation for free boundary in the PDE ,there are two solutions: a) compute the residual of free boundary only if x is closed to s(t). But I have not got a satisfactory result so far.

And I turn to solution b) used in the paper. That is to directly use s(t) as the input of netu so that we can use all the sample points by skipping check x is closed to s(t).

I use output_transform to implement s as function only of t and evaluate u by [s(t), t]. But it seems u is not related to[x, t].

def modify_output(X, Y):
    x, t = X[:, 0:1], X[:, 1:2]
    x_t = tf.concat([x, t], axis=1)

    s_layer_size = 40
    s = tf.layers.dense(t, s_layer_size, tf.nn.tanh)
    s = tf.layers.dense(s, s_layer_size, tf.nn.tanh)
    s = tf.layers.dense(s, s_layer_size, tf.nn.tanh)
    s = tf.layers.dense(s, s_layer_size, tf.nn.tanh)
    s = tf.layers.dense(s, s_layer_size, tf.nn.tanh)
    s_output = tf.layers.dense(s, 1, None)
    s_concat = tf.concat([s_output, t], axis=1)

    u_layer_size = 40
    u = tf.layers.dense(s_concat, u_layer_size, tf.nn.tanh)
    u = tf.layers.dense(u, u_layer_size, tf.nn.tanh)
    u = tf.layers.dense(u, u_layer_size, tf.nn.tanh)
    u = tf.layers.dense(u, u_layer_size, tf.nn.tanh)
    u = tf.layers.dense(u, u_layer_size, tf.nn.tanh)
    u_output = tf.layers.dense(u, 1, None)
    final_output = tf.concat([s_output, u_output], axis=1)
    return final_output

The question is how to modify the network structure or loss function so that for PDE loss, the input of netu is (x, t), while for free boundary loss, (s(t), t) is the input.

lululxvi commented 2 years ago

In your code, there are two outputs: s(t) and u(s(t), t). For the networks, I think you still need two outputs: u(x, t) and s(t). Eqs. 3.1 to 3.4 are easy to do, but Eqs. 3.5 and 3.6 are tricky. I think it should be doable, but I haven't had a good idea yet. Have you checked the code in that paper?

forxltk commented 2 years ago

Yes, here is the code for Eqs. 3.5 and the networks,

# Evaluate predictions
self.s_pred = self.net_s(self.t_u_tf)  # self.t_u_tf is t
self.u_pred = self.net_u(self.x_u_tf, self.t_u_tf)

self.u_Sbc_pred = self.net_u((self.s_pred - self.mu_x) / self.sigma_x, self.t_u_tf)

# Boundary loss
self.loss_Sbc = tf.reduce_mean(tf.square(self.u_Sbc_pred))

# Evaluates the forward pass
def forward_pass_u(self, H):
     num_layers = len(self.layers_u)
     for l in range(0, num_layers - 2):
         W = self.weights_u[l]
         b = self.biases_u[l]
         H = tf.tanh(tf.add(tf.matmul(H, W), b))
     W = self.weights_u[-1]
     b = self.biases_u[-1]
     H = tf.add(tf.matmul(H, W), b)
     return H

def forward_pass_s(self, H):
     num_layers = len(self.layers_s)
     for l in range(0, num_layers - 2):
         W = self.weights_s[l]
         b = self.biases_s[l]
         H = tf.tanh(tf.add(tf.matmul(H, W), b))
     W = self.weights_s[-1]
     b = self.biases_s[-1]
     H = tf.add(tf.matmul(H, W), b)
     return H

def net_u(self, x, t):
     u = self.forward_pass_u(tf.concat([x, t], 1))
     return u

def net_s(self, t):
     s = self.forward_pass_s(t)
     return s

The implementation seems straightforward. For u, (x, t) is passed. For free boundary, t is used to compute s(t), and then u(s(t), t) is evaluated.

Maybe I should have three outputs: s(t), u(x, t) and u(s(t), t) so that I can define the free boundary condition in the PDE system?

lululxvi commented 2 years ago

Yes, we can do the same way as they did. One network for u(x, t), one network for s(t), and then re-use the u network to compute u(s(t), t).

forxltk commented 2 years ago

Nice, I will try it out!

forxltk commented 2 years ago

I have another question: how to reuse network? I use the following code, but the result is not good.

def modify_output(X, Y):
    x, t = X[:, 0:1], X[:, 1:2]
    x_t = tf.concat([x, t], axis=1)

    s_layer_size = 40
    s = tf.layers.dense(t, s_layer_size, tf.nn.tanh)
    s = tf.layers.dense(s, s_layer_size, tf.nn.tanh)
    s = tf.layers.dense(s, s_layer_size, tf.nn.tanh)
    s = tf.layers.dense(s, s_layer_size, tf.nn.tanh)
    s = tf.layers.dense(s, s_layer_size, tf.nn.tanh)
    s_output = tf.layers.dense(s, 1, None)
    s_concat = tf.concat([t, s_output], axis=1)

    u_layer_size = 40
    u = tf.layers.dense(x_t, u_layer_size, tf.nn.tanh)
    u = tf.layers.dense(u, u_layer_size, tf.nn.tanh)
    u = tf.layers.dense(u, u_layer_size, tf.nn.tanh)
    u = tf.layers.dense(u, u_layer_size, tf.nn.tanh)
    u = tf.layers.dense(u, u_layer_size, tf.nn.tanh)
    u_output = tf.layers.dense(u, 1, None)

    ust = tf.layers.dense(s_concat, u_layer_size, tf.nn.tanh)
    ust = tf.layers.dense(ust, u_layer_size, tf.nn.tanh)
    ust = tf.layers.dense(ust, u_layer_size, tf.nn.tanh)
    ust = tf.layers.dense(ust, u_layer_size, tf.nn.tanh)
    ust = tf.layers.dense(ust, u_layer_size, tf.nn.tanh)
    ust_output = tf.layers.dense(ust, 1, None)

    final_output = tf.concat([u_output, s_output, ust_output], axis=1)
    return final_output

I the reason is there are three separate networks. The networks to compute u and u(s(t)) are not the same one. So could you tell me how to reuse the network?

Best regards!

lululxvi commented 2 years ago

One way is to use implement the weights and biases as the code in that paper. There should be some other easier ways, but I am not sure what is the best way to do this. You might check keras API.

forxltk commented 2 years ago

OK, I have modified the code.

def modify_output(X, Y):
    x, t = X[:, 0:1], X[:, 1:2]
    x_t = tf.concat([x, t], axis=1)

    model_s = models.Sequential()
    s_layer_size = 40

    model_s.add(layers.Dense(s_layer_size, activation='tanh'))
    model_s.add(layers.Dense(s_layer_size, activation='tanh'))
    model_s.add(layers.Dense(s_layer_size, activation='tanh'))
    model_s.add(layers.Dense(s_layer_size, activation='tanh'))
    model_s.add(layers.Dense(s_layer_size, activation='tanh'))
    model_s.add(layers.Dense(1))
    s_output = model_s(t)

    s_concat = tf.concat([t, s_output], axis=1)

    u_layer_size = 40
    model_u = models.Sequential()
    model_u.add(layers.Dense(u_layer_size, activation='tanh'))
    model_u.add(layers.Dense(u_layer_size, activation='tanh'))
    model_u.add(layers.Dense(u_layer_size, activation='tanh'))
    model_u.add(layers.Dense(u_layer_size, activation='tanh'))
    model_u.add(layers.Dense(u_layer_size, activation='tanh'))
    model_u.add(layers.Dense(1))
    u_output = model_u(x_t)

    ust_output = model_u(s_concat)

    final_output = tf.concat([u_output, s_output, ust_output], axis=1)
    return final_output

The result improved, but still incorrect. And the free boundary losses are not decreased.

I think I should use the weights and biases or normalized s in the paper. I will have a try.

There are no more questions. Many thanks!

forxltk commented 2 years ago

This problem has been solved. I find two errors in my code.

The order of s(t) and t is wrong. It should be s_concat = tf.concat([s_output, t], axis=1)
The Neumann bc of free boundary is du(s, t)/ds. However, u(s, t) and s are both outputs of the NN. In previous code, I used dde.grad.jacobian(u, u, i=2, j=1) in the definition of pde, but there is not gradients between the two outputs. That's the reason that free boundary losses are not decreased. Now I have defined it in the network structure with tf.gradients.

Here is the result:

The result seems good that I haven't use normalization as the paper.

Here is the code in case someone is interested:

import matplotlib.pyplot as plt
import numpy as np
import deepxde as dde
import tensorflow as tf
from tensorflow.keras import layers, models

#dde.config.real.set_float64()

s0 = 2-np.sqrt(3)
gt = 2
h1t = 0

tol = 0.001

def pde(x, u):  # u:u(x,t), s:(t), u(s(t), t)
    main_pde = dde.grad.jacobian(u, x, i=0, j=1)-dde.grad.hessian(u, x, i=0, j=0, component=0)

    # free surface
    fs1 = u[:, 2:3]

    # du(s, t)/ds
    fs2 = u[:, 3:4] - tf.sqrt(3 - 2 * x[:, 1:2])
    return [main_pde, fs1, fs2]

def boundary_l(x, on_boundary):
    return on_boundary and np.isclose(x[0], 0.0)

def func_bc_dux(x, u, _):
    return dde.grad.jacobian(u, x, i=0, j=0)-gt

geom = dde.geometry.Interval(0.0, 1.0)
timedomain = dde.geometry.TimeDomain(0.0, 1.0)
geomtime = dde.geometry.GeometryXTime(geom, timedomain)

IC_u = dde.IC(geomtime, lambda x: -1/2*x[:, 0:1]**2+2*x[:, 0:1]-1/2, lambda _, on_initial: on_initial, component=0)
IC_s = dde.IC(geomtime, lambda x: s0, lambda _, on_initial: on_initial, component=1)
BC_u = dde.OperatorBC(geomtime, func_bc_dux, boundary_l)

BC = [IC_u, IC_s, BC_u]

data = dde.data.TimePDE(
    geomtime, pde, BC,
    num_domain=10000,
    num_boundary=20,
    num_initial=200,
    )

layer_size = [2, 2]
activation = "tanh"
initializer = "Glorot uniform"

net = dde.maps.FNN(layer_size, activation, initializer)

def modify_output(X, Y):
    x, t = X[:, 0:1], X[:, 1:2]
    x_t = tf.concat([x, t], axis=1)

    model_s = models.Sequential()
    s_layer_size = 100

    model_s.add(layers.Dense(s_layer_size, activation='tanh'))
    model_s.add(layers.Dense(s_layer_size, activation='tanh'))
    model_s.add(layers.Dense(s_layer_size, activation='tanh'))
    model_s.add(layers.Dense(1))
    s_output = model_s(t)

    s_concat = tf.concat([s_output, t], axis=1)

    u_layer_size = 100
    model_u = models.Sequential()
    model_u.add(layers.Dense(u_layer_size, activation='tanh'))
    model_u.add(layers.Dense(u_layer_size, activation='tanh'))
    model_u.add(layers.Dense(u_layer_size, activation='tanh'))
    model_u.add(layers.Dense(1))
    u_output = model_u(x_t)

    ust_output = model_u(s_concat)

    dust_ds = tf.gradients(ust_output, s_output)[0]

    final_output = tf.concat([u_output, s_output, ust_output, dust_ds], axis=1)
    return final_output

net.apply_output_transform(modify_output)
model = dde.Model(data, net)

#loss_weights = [1, 1, 1, 1, 1, 1]
model.compile('adam', lr=0.001)#, loss_weights=loss_weights)
checkpointer = dde.callbacks.ModelCheckpoint(
     "model/model.ckpt", verbose=1, save_better_only=True, period=1000)
model.restore("model/model.ckpt-19000.ckpt", verbose=1)
#model.train(epochs=20000, display_every=1000, callbacks=[checkpointer])

X = geomtime.random_points(5000)
x = X[:, 0]
t = X[:, 1]
output = model.predict(X)

u = output[:, 0]
s = output[:, 1]

l2_difference_s_0 = dde.metrics.l2_relative_error(2-np.sqrt(3-2*t), s)
print("L2 relative error in s:", l2_difference_s_0)

sa = np.argsort(t)
s = s[sa]
t = t[sa]

plt.plot(s, t, label="Pred")
plt.plot(2-np.sqrt(3-2*t), t, label="True")
plt.legend()
plt.xlabel("s")
plt.ylabel("t")
plt.show()

lululxvi / deepxde

How to use different input in the network for different loss term? #663