lululxvi / deepxde

A library for scientific machine learning and physics-informed learning
https://deepxde.readthedocs.io
GNU Lesser General Public License v2.1
2.68k stars 749 forks source link

Test loss opposite from train loss #222

Closed Clement465 closed 3 years ago

Clement465 commented 3 years ago

Hi @lululxvi,

I have been getting a problem where my test loss is opposite from my train loss (as my training loss decreases, test loss increases proportionally)

Example of the loss plot: loss Example of points sampled: Boundary points: 400 Domain points: 1500 Test points: 1000 points

PDE used:

def pde(inputs, outputs, _):
    dphi_x=tf.gradients(outputs,inputs)[0][:,0:1]
    dphi_y=tf.gradients(outputs,inputs)[0][:,1:]
    dphi_xx=tf.gradients(dphi_x,inputs)[0][:,0:1]
    dphi_yy=tf.gradients(dphi_y,inputs)[0][:,1:]
    return dphi_xx+dphi_yy

Do you have any suggestions that I can try out?

Thank you very much.

Clement465 commented 3 years ago

Hi Lu Lu,

Just to elaborate, I have plotted the train loss that is contributed to by the pde with the test loss below. loss(only pde)

The weird thing is that although the same metric (the pde) is used to evaluate these two losses, and the training and test points are located in the same region, the test loss increases as the training loss decreases.

lululxvi commented 3 years ago

Cannot really tell what happened from the information you give.

Clement465 commented 3 years ago

Hi @lululxvi,

I have included my code below. I am trying to solve for potential flow over a NACA 4 digit airfoil.

The boundary conditions and pde are as follows: image

Main Code

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf
import math

import deepxde as dde
from deepxde.geometry.csg import CSGDifference
import matplotlib.pyplot as plt
import pandas as pd
import sympy as sp

from SALib.sample import sobol_sequence
from scipy import spatial

from geometry_airfoil import NACA4D

rect_points = 200
airfoil_points = 200
domain_points = 1500
test_points = 1000

num_epochs = 5000

U = 1
rho = 1.225

NACA_NUM = 2412
chord_len = 1.0
LE = [0,0]

load_model = False
restore_model = False
restore_path = "model/model.ckpt-10000"

class my_CSGDifference(CSGDifference):
    def __init__(self, geom1, geom2):
        super(my_CSGDifference, self).__init__(geom1, geom2)

    def random_boundary_points(self, n, random="pseudo"):
        n = airfoil_points + rect_points
        x = []
        temp = self.geom1.random_boundary_points(rect_points*3, random=random)

        for i in temp:
            if ((not self.geom2.inside(i)) and len(x) < rect_points):
                x+= [i]

        temp = self.geom2.random_boundary_points(airfoil_points, random=random)

        for i in temp:
            x+=[i]
        return np.random.permutation(x)[:n]

    def random_points(self, n, random="pseudo"):
        x = []

        temp = self.geom1.random_points(n*3, random=random)

        for i in temp:
            if (not self.geom2.inside(i)) and len(x) < n:
                x+=[i]
        return np.array(x[:n])

def main():
    dde.config.real.set_float64()
    # tf.compat.v1.enable_eager_execution()

    airfoil = NACA4D(LE, chord_len, NACA_NUM)
    rectangular = dde.geometry.Rectangle([-8, -8], [15, 8])
    geom = my_CSGDifference(rectangular, airfoil)

    def pde(inputs, outputs, _):
        dphi_x=tf.gradients(outputs,inputs)[0][:,0:1]
        dphi_y=tf.gradients(outputs,inputs)[0][:,1:]
        dphi_xx=tf.gradients(dphi_x,inputs)[0][:,0:1]
        dphi_yy=tf.gradients(dphi_y,inputs)[0][:,1:]
        #return the laplace equation of phi
        return dphi_xx+dphi_yy

    def bc_rectu(inputs, outputs, _):
        return tf.gradients(outputs,inputs)[0][:,0:1] - U

    def bc_rectv(inputs, outputs, _):
        return tf.gradients(outputs,inputs)[0][:,1:]

    def bc_airfoil(inputs, outputs, X):
        M = int(NACA_NUM/1000) / 100.0
        P = (int(NACA_NUM/100) % 10) / 10.0
        T = (NACA_NUM % 100) / 100.0

        thetas = np.zeros((X.shape[0],1))

        temp = (X - np.array(LE))/chord_len
        if (M == 0):
            const1 = 0
            const2 = 0
        else:
            const1 = M/(P**2)
            const2 = M/((1-P)**2)

        for i, point in enumerate(temp):
            x1 = point[0]
            y1 = point[1]

            # if not (np.all(point >= airfoil.xmin) and np.all(point <= airfoil.xmax)):
            #   continue

            if not (airfoil.on_boundary(point)):
                continue

            x = sp.symbols('x')
            y = sp.Function('y')(x)

            if (x1 <= P):
                y = const1*(2*P*x-x**2)
            else:
                y = const2*(1 - 2*P + 2*P*x - x**2)

            D2 = sp.Function('D2')(x)
            D2 = (y-y1)**2 + (x-x1)**2

            diffeq = sp.Eq(D2.diff(x),0)

            x_val = sp.nsolve(diffeq,x,0.5)
            y_val = y.subs(x, x_val)

            dist = (D2.subs(x, x_val))**0.5

            thickness = T/0.2*(0.2969 * x_val**0.5 - 0.126*x_val - 0.3516*x_val**2 + 0.2843*x_val**3 - 0.1036*x_val**4)

            t = sp.Function('t')(x)

            diffeq = y.diff(x)
            m = diffeq.subs(x, x_val)

            t = T/0.2*(0.2969 * x**0.5 - 0.126*x - 0.3516*x**2 + 0.2843*x**3 - 0.1036*x**4)
            diffeq2 = t.diff(x)
            K = diffeq2.subs(x, x_val)
            if (y1 >= y_val):
                tangent = (m*(m**2+1)**0.5 + K)  /  ((m**2+1)**0.5 - m*K)
            else:
                tangent = (m*(m**2+1)**0.5 - K)  /  ((m**2+1)**0.5 + m*K)
            angle = math.atan(tangent)

            thetas[i] = angle

        u=tf.gradients(outputs,inputs)[0][:,0:1]
        v=tf.gradients(outputs,inputs)[0][:,1:]

        return u*tf.math.sin(thetas) - v*tf.math.cos(thetas)

    def bc_teu(inputs, outputs, _):
        return tf.gradients(outputs,inputs)[0][:,0:1]

    def bc_tev(inputs, outputs, _):
        return tf.gradients(outputs,inputs)[0][:,1:]

    def boundary_rect(x, on_boundary):
        return on_boundary and rectangular.on_boundary(x)

    def boundary_airfoil(x, on_boundary):
        return on_boundary and airfoil.on_boundary(x)

    def boundary_te(x, on_boundary):
        return on_boundary and airfoil.on_trailing_edge(x)

    bc_rectu1 =  dde.OperatorBC(geom, bc_rectu, boundary_rect)
    bc_rectv1 =  dde.OperatorBC(geom, bc_rectv, boundary_rect)
    bc_airfoil1 = dde.OperatorBC(geom, bc_airfoil, boundary_airfoil)
    bc_teu1 = dde.OperatorBC(geom, bc_teu, boundary_te)
    bc_tev1 = dde.OperatorBC(geom, bc_tev, boundary_te)

    bcs = [bc_rectu1, bc_rectv1, bc_airfoil1, bc_teu1, bc_tev1]

    data = dde.data.PDE(geom, pde, bcs, num_domain=domain_points,num_boundary=1, num_test=test_points)

    net = dde.maps.FNN([2] + [50] * 10 + [1], "tanh", "Glorot uniform")
    model = dde.Model(data, net)
    checkpoint = dde.callbacks.ModelCheckpoint("./model/model.ckpt", verbose=1, save_better_only=True, period = 500)

    ## Train the model
    model.compile("adam", lr=0.001)
    if (load_model):
        model.restore(restore_path, verbose=1)
    else:
        if (restore_model):
            losshistory, train_state = model.train(epochs=num_epochs,display_every=1,callbacks = [checkpoint], model_restore_path = restore_path)
        else:
            losshistory, train_state = model.train(epochs=num_epochs,display_every=1,callbacks = [checkpoint])

        print(data.num_bcs)

        X_train, y_train, X_test, y_test, best_y, best_ystd = train_state.packed_data()

        plt.figure()
        ax = plt.subplot(111)
        ax.set_aspect(1.0)
        ax.plot(X_test[:,0], X_test[:, 1],'.r', label = 'Test points')
        ax.plot(X_train[:,0], X_train[:, 1],'.b', label = 'Training points')

        # Shrink current axis by 20%
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

        # Put a legend to the right of the current axis
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

        plt.xlabel("x")
        plt.ylabel("y")
        plt.tight_layout()
        plt.savefig('points.png', bbox_extra_artists=(lgd,), bbox_inches='tight', dpi = 200)

        dde.postprocessing.save_loss_history(losshistory, "loss.dat")
        dde.postprocessing.save_best_state(train_state, "train.dat", "test.dat")
        dde.postprocessing.plot_loss_history(losshistory)
        plt.savefig('loss.png')
        dde.postprocessing.plot_best_state(train_state)

        plt.show()

if __name__ == "__main__":
    main()
    # exec(open('Post_process.py').read())

Airfoil Class

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import itertools

import numpy as np
import math
from SALib.sample import sobol_sequence
from scipy import stats
from sklearn import preprocessing

from deepxde.geometry.geometry import Geometry
from deepxde.geometry.geometry_2d import Rectangle

import sympy as sp

class NACA4D(Geometry):
    def __init__(self, leading_edge, chord_len, naca_num):
        self.leading_edge = np.array(leading_edge)
        self.trailing_edge = self.leading_edge + [chord_len, 0]
        self.chord_len = chord_len
        self.num = naca_num
        self.M = int(naca_num/1000) / 100.0
        self.P = (int(naca_num/100) % 10)/10.0
        self.T = (naca_num % 100) / 100.0 
        max_thickness = self.T/0.2*0.100012*chord_len
        max_camber = self.M*chord_len

        self.xmin = np.array([leading_edge[0] - 0.001*chord_len, leading_edge[1] - max_thickness])
        self.xmax = np.array([leading_edge[0] + 1.001*chord_len, leading_edge[1] + max_thickness*1.001 + max_camber])

        super(NACA4D, self).__init__(
            2, (self.xmin, self.xmax), self.chord_len
        )

    def dist_to_camber(self, coord):
        point = (coord - self.leading_edge)/self.chord_len

        M = self.M
        P = self.P
        T = self.T

        if (M == 0):
            const1 = 0
            const2 = 0
        else:
            const1 = M/(P**2)
            const2 = M/((1-P)**2)

        x = sp.symbols('x')
        y = sp.Function('y')(x)

        y1 = point[1]
        x1 = point[0]

        if (x1 <= P):
            y = const1*(2*P*x-x**2)
        else:
            y = const2*(1 - 2*P + 2*P*x - x**2)

        D2 = sp.Function('D2')(x)
        D2 = (y-y1)**2 + (x-x1)**2

        diffeq = sp.Eq(D2.diff(x),0)

        x_val = sp.nsolve(diffeq,x,1.0)
        y_val = y.subs(x, x_val)

        D = (D2.subs(x, x_val))**0.5
        thickness = T/0.2*(0.2969 * x_val**0.5 - 0.126*x_val - 0.3516*x_val**2 + 0.2843*x_val**3 - 0.1036*x_val**4)

        return float(thickness), float(D), float(x_val), float(y_val)

    def inside(self, x):
        if not (np.all(x >= self.xmin) and np.all(x <= self.xmax)):
            return False
        else:
            thickness, dist, x_val, y_val = self.dist_to_camber(x)
            return (dist <= thickness)

    def on_boundary(self, x):
        if not (np.all(x >= self.xmin) and np.all(x <= self.xmax)):
            return False
        else:
            thickness, dist, x_val, y_val = self.dist_to_camber(x)
            return (np.isclose(dist, thickness))

    def on_trailing_edge(self, x):
        return np.all(np.isclose(x, self.trailing_edge))

    def random_boundary_points(self, n, random="pseudo"):
        return self.uniform_boundary_points(n)

    def uniform_boundary_points(self, n):
        M = self.M
        P = self.P
        T = self.T

        if (M == 0):
            const1 = 0
            const2 = 0
        else:
            const1 = M/(P**2)
            const2 = M/((1-P)**2)

        num = int(n/2)

        x_c = np.zeros(num)
        y_c = np.zeros(num)
        x_u = np.zeros(num)
        y_u = np.zeros(num)
        x_l = np.zeros(num)
        y_l = np.zeros(num)
        thetas = np.zeros((num*2,1))

        for i in range(num):
            beta = i / (num - 1) * math.pi
            x_c[i] = (1-math.cos(beta))/2

            if (x_c[i] <= P):
                y_c[i] = const1 * (2*P*x_c[i] - x_c[i]**2)
            else:
                y_c[i] = const2 * (1 - 2*P + 2*P*x_c[i] - x_c[i]**2)

        yt = T/0.2*(0.2969 * x_c**0.5 - 0.126 * x_c - 0.3516 * x_c**2 + 0.2843 * x_c**3 - 0.1036 * x_c**4)

        for i in range(num):
            if (x_c[i] <= P):
                const = const1 * 2*(P-x_c[i])
            else:
                const = const2 * 2*(P-x_c[i])
            x_u[i] = x_c[i] - yt[i] * math.sin(math.atan(const))
            y_u[i] = y_c[i] + yt[i] * math.cos(math.atan(const))
            x_l[i] = x_c[i] + yt[i] * math.sin(math.atan(const))
            y_l[i] = y_c[i] - yt[i] * math.cos(math.atan(const))

        x_out = np.concatenate((x_u,np.flip(x_l)))
        y_out = np.concatenate((y_u,np.flip(y_l)))

        for i in range(num):
            x_val = x_c[i]
            y_val = y_c[i]
            if(i == 0):
                x_val = self.chord_len*1e-12
            thickness = yt[i]
            if (M == 0):
                const1 = 0
                const2 = 0
            else:
                const1 = M/(P**2)
                const2 = M/((1-P)**2)

            x = sp.symbols('x')
            y = sp.Function('y')(x)
            t = sp.Function('t')(x)

            if (x_val <= P):
                y = const1*(2*P*x-x**2)
            else:
                y = const2*(1 - 2*P + 2*P*x - x**2)

            diffeq = y.diff(x)
            m = diffeq.subs(x, x_val)

            t = T/0.2*(0.2969 * x**0.5 - 0.126*x - 0.3516*x**2 + 0.2843*x**3 - 0.1036*x**4)
            diffeq2 = t.diff(x)
            K = diffeq2.subs(x, x_val)

            tangent1 = (m*(m**2+1)**0.5 + K)  /  ((m**2+1)**0.5 - m*K)
            angle1 = math.atan(tangent1)

            tangent2 = (m*(m**2+1)**0.5 - K)  /  ((m**2+1)**0.5 + m*K)
            angle2 = math.atan(tangent2)

            thetas[i] = angle1
            thetas[num*2-1-i] = angle2

        outpoints = np.vstack((x_out, y_out)).T
        outpoints = outpoints * self.chord_len
        outpoints = outpoints + self.leading_edge

        self.points = outpoints
        self.thetas = thetas

        return outpoints
Clement465 commented 3 years ago

The problem seems to stem from the boundary points giving a high loss for the pde metric. Is there a way to have the pde not apply to the boundary points?

When I reduced the points on the airfoil to 10 and the points on the rectangular boundary to 4, the loss becomes like this: loss

lululxvi commented 3 years ago

To remove the BC points in PDE loss, you have to modify the code. But this usually not an issue. I guess one reason could be that different losses have different scales. also check FAQ: "Q: I failed to train the network or get the right solution, e.g., the training loss is large."

engsbk commented 2 years ago

Hi @lululxvi,

I have been getting a problem where my test loss is opposite from my train loss (as my training loss decreases, test loss increases proportionally)

Example of the loss plot: loss Example of points sampled: Boundary points: 400 Domain points: 1500 Test points: 1000 points

PDE used:

def pde(inputs, outputs, _):
  dphi_x=tf.gradients(outputs,inputs)[0][:,0:1]
  dphi_y=tf.gradients(outputs,inputs)[0][:,1:]
  dphi_xx=tf.gradients(dphi_x,inputs)[0][:,0:1]
  dphi_yy=tf.gradients(dphi_y,inputs)[0][:,1:]
  return dphi_xx+dphi_yy

Do you have any suggestions that I can try out?

Thank you very much.

Thank you for your contribution! Can you share the code you used to produce the plot for the training and test points?

Clement465 commented 2 years ago

@engsbk the version below produces a slight different figure, but the concept used should be the same

        losshistory, train_state = model.train(epochs=num_epochs,display_every=1,callbacks = [checkpoint])

        X_train, y_train, X_test, y_test, best_y, best_ystd = train_state.packed_data()

        X_bound = np.empty((X_train.shape[0],2))
        X_bound[:] = np.NaN

        for i, point in enumerate(X_train):
            if (geom.on_boundary(point)):
                X_bound[i,:] = point
                X_train[i,:] = np.NaN

        plt.figure()
        ax = plt.subplot(111)
        ax.set_aspect(1.0)
        ax.plot(X_test[:,0], X_test[:, 1],'.r', label = 'Test points')
        ax.plot(X_train[:,0], X_train[:, 1],'.b', label = 'Collocation points')
        ax.plot(X_bound[:,0], X_bound[:, 1],'.k', label = 'Boundary points')

        # Shrink current axis by 20%
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

        # Put a legend to the right of the current axis
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

        plt.xlabel("x")
        plt.ylabel("y")
        plt.tight_layout()
        plt.savefig('points.png', bbox_extra_artists=(lgd,), bbox_inches='tight', dpi = 200)