lululxvi / deepxde

A library for scientific machine learning and physics-informed learning
https://deepxde.readthedocs.io
GNU Lesser General Public License v2.1
2.7k stars 752 forks source link

Run DeepXde on HPC, GPU #1372

Open kmache opened 1 year ago

kmache commented 1 year ago

Dear Lu Lu Thanks a lot for this excellent package it helps a lot. I run my code on HPC for shallow water equations but get the following error


Traceback (most recent call last):
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/pinns4swe1D_deepxde.py", line 183, in <module>
    losshistory, train_state = model.train(iterations=20_000)
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/swe_env/lib/python3.10/site-packages/deepxde/utils/internal.py", line 22, in wrapper
    result = f(*args, **kwargs)
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/swe_env/lib/python3.10/site-packages/deepxde/model.py", line 619, in train
    self._test()
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/swe_env/lib/python3.10/site-packages/deepxde/model.py", line 808, in _test
    ) = self._outputs_losses(
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/swe_env/lib/python3.10/site-packages/deepxde/model.py", line 528, in _outputs_losses
    outs = outputs_losses(inputs, targets)
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/swe_env/lib/python3.10/site-packages/deepxde/model.py", line 308, in outputs_losses_train
    return outputs_losses(True, inputs, targets, self.data.losses_train)
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/swe_env/lib/python3.10/site-packages/deepxde/model.py", line 296, in outputs_losses
    losses = losses_fn(targets, outputs_, loss_fn, inputs, self)
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/swe_env/lib/python3.10/site-packages/deepxde/data/data.py", line 13, in losses_train
    return self.losses(targets, outputs, loss_fn, inputs, model, aux=aux)
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/swe_env/lib/python3.10/site-packages/deepxde/data/pde.py", line 170, in losses
    error = bc.error(self.train_x, inputs, outputs, beg, end)
  File "/p/project/deepacf/deeprain/mache1/ai4swes/pinns4swes1d/swe_env/lib/python3.10/site-packages/deepxde/icbc/initial_conditions.py", line 36, in error
    return outputs[beg:end, self.component : self.component + 1] - values
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and CPU!

here is my code

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import matplotlib.pyplot as plt
import numpy as np
import deepxde as dde
from deepxde.backend import torch
from deepxde.callbacks import EarlyStopping
from torch import pi
from pathlib import Path

print("Is PyTorch using GPU?", torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# set data type to float32
Dtype = torch.float64

main_path = os.getcwd()
results_path = os.path.join(main_path, 'results/')
models_path = os.path.join(main_path, 'trained_models')

# Define bottom topography 
def get_bottom(bottom_name):
    assert(isinstance(bottom_name, str))
    bottom0 = lambda x: 0.
    bottom0_x = lambda x: 0.

    bottom1 = lambda x: torch.where(torch.abs(x-10) <= 2, torch.sin((pi/4)*x),  0.)
    bottom1_x = lambda x: torch.where(torch.abs(x-10) < 2, (pi/4)*torch.cos((pi/4)*x), 0.)

    bottom2 = lambda x: torch.where(torch.abs(x-10) <= 2, 0.2-0.05*(x - 10)**2,  0.)
    bottom2_x = lambda x: torch.where(torch.abs(x-10) < 2, -0.1*(x-10), 0.)

    bottom3 = lambda x: torch.where(torch.abs(x-10) <= 2, 1-0.25*(x - 10)**2,  0.)
    bottom3_x = lambda x: torch.where(torch.abs(x-10) < 2, -0.5*(x-10), 0.)

    bottom4 = lambda x: torch.where(torch.abs(x-1.5) <= 0.1, 0.25*(torch.cos(10*pi*(x-1.5)) + 1), 0.)
    bottom4_x = lambda x:  torch.where(torch.abs(x-1.5) < 0.1, 2.5*pi*torch.sin(10*pi*(x-1.5)), 0.)
    if bottom_name == 'b0':
        return {'b': bottom0, 'b_x': bottom0_x}
    elif bottom_name == 'b1':
        return {'b': bottom1, 'b_x': bottom1_x}
    elif bottom_name == 'b2':
        return {'b': bottom2, 'b_x': bottom2_x}
    elif bottom_name == 'b3':
        return {'b': bottom3, 'b_x': bottom3_x}
    elif bottom_name == 'b4':
        return {'b': bottom4, 'b_x': bottom4_x}
    else:
        raise ValueError('Invalid bottom name, {} bottom not implemented'.format(bottom_name))

def _swe(bottom_name, g, source_term=True):
    def swe(X, U):
        bottom = get_bottom(bottom_name)
        bx = bottom['b_x']
        x, t = X[:, 0:1], X[:, 1:2]
        h = U[:, 0:1]
        u = U[:, 1:2]

        U1 = h
        U2 = h*u

        F1 = h*u
        F2 = h*u*u + 0.5 * g*h*h

        F1_x = dde.grad.jacobian(F1, X, i=0, j=0)
        F2_x = dde.grad.jacobian(F2, X, i=0, j=0)

        U1_t = dde.grad.jacobian(U1, X, i=0, j=1)
        U2_t = dde.grad.jacobian(U2, X, i=0, j=1)

        if source_term:
            h = 2 + torch.cos(x)*torch.cos(t)
            v = torch.sin(x)*torch.sin(t)/h
            S = torch.sin(x)*torch.cos(t)*(1 + v**2 - g*h) + 2*v*torch.cos(x)*torch.sin(t) + g*h*bx(x)
        else:
            S = torch.zeros_like(x)
        S = S.to(device)

        b_x = bx(x).to(device)

        equaz_1 = U1_t + F1_x
        equaz_2 = U2_t + F2_x + g*h*b_x - S

        return [equaz_1, equaz_2]
    return swe

g = 1.
xmin, xmax, tmax = 0., 20., 100.
input_dim = 2
output_dim = 2

def on_initial(_, on_initial):
    return on_initial

def boundary(_, on_boundary):
    return on_boundary

def boundary_0 (x, on_boundary):
    return on_boundary and np.isclose(x[0], xmin)

def boundary_L (x, on_boundary):
    return on_boundary and np.isclose(x[0], xmax)

def _transform_output(U0):
    def transform_output(x, U):
        t = x[:, 1:2]
        h = U[:, 0:1]
        u = U[:, 1:2]
        u_new = u*t + U0(x)
        h_new = h
        return torch.concat([h_new, u_new], axis=1).to(device)
    return transform_output

swe = _swe('b1', g, source_term=True)

def func_ic_u(x):
    return 0.0

def func_ic_h(X):
    X = torch.from_numpy(X)
    x = X[:, 0:1]
    t0 = 0.*torch.ones_like(x)
    h_ic  = 2 + torch.cos(x)*torch.cos(t0)
    return h_ic

def func_bc_h1(X):
    X = torch.from_numpy(X)
    t = X[:, 1:2]
    x_l = torch.ones_like(t) * xmin
    h_lbc = 2 + torch.cos(x_l)*torch.cos(t)
    return h_lbc

def func_bc_h2(X):
    X = torch.from_numpy(X)
    t = X[:, 1:2]
    x_r = torch.ones_like(t) * xmax
    h_rbc = 2 + torch.cos(x_r)*torch.cos(t)
    return h_rbc

def func_bc_u1(x):
    return 0.0

def func_bc_u2(X):
    X = torch.from_numpy(X)
    t = X[:, 1:2]
    x_r = torch.ones_like(t) * xmax
    h_rbc = 2 + torch.cos(x_r)*torch.cos(t)
    v_rbc = torch.sin(x_r)*torch.sin(t)/h_rbc
    return v_rbc

geom = dde.geometry.Interval(xmin, xmax)
timedomain = dde.geometry.TimeDomain(0.0, tmax)
geomtime = dde.geometry.GeometryXTime(geom, timedomain)

IC_h = dde.IC(geomtime, func_ic_h, on_initial, component=0)
IC_u = dde.IC(geomtime, func_ic_u, on_initial, component=1)

BC_h1 = dde.DirichletBC(geomtime, func_bc_h1, boundary_0, component=0)
BC_h2 = dde.DirichletBC(geomtime, func_bc_h2, boundary_L, component=0)

BC_u1 = dde.DirichletBC(geomtime, func_bc_u1,  boundary_0, component=1)
BC_u2 = dde.DirichletBC(geomtime, func_bc_u2,  boundary_L, component=1)

BC = [IC_h, BC_h1, BC_h2, BC_u1, BC_u2]

data = dde.data.TimePDE(geomtime, swe, BC, num_domain=6000, num_boundary=400, num_initial=600)

net = dde.maps.FNN(layer_sizes = [input_dim] + [60]*5 + [output_dim], activation="tanh",
                   kernel_initializer="Glorot uniform")

transform_output = _transform_output(func_ic_u)
net.apply_output_transform(lambda x, U: transform_output(x, U))

model = dde.Model(data, net)

model.compile('adam', lr=0.0005, loss_weights = None)
losshistory, train_state = model.train(iterations=20_000)```

what could be the problem please?
praksharma commented 1 year ago

Can you please remove this .to(device) from all the tensors? DeepXDE automatically finds your GPU. As a side comment, do you really need these old libraries?

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
kmache commented 1 year ago

Thanks, @praksharma for replying, I have removed to(device) but still have the same error


import os
import matplotlib.pyplot as plt
import numpy as np
import deepxde as dde
from deepxde.backend import torch
from deepxde.callbacks import EarlyStopping
from torch import pi
from pathlib import Path

print("Is PyTorch using GPU?", torch.cuda.is_available())
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set data type to float32
#Dtype = torch.float64

main_path = os.getcwd()
results_path = os.path.join(main_path, 'results/')
models_path = os.path.join(main_path, 'trained_models')

# Define bottom topography 
def get_bottom(bottom_name):
    assert(isinstance(bottom_name, str))
    bottom0 = lambda x: 0.
    bottom0_x = lambda x: 0.

    bottom1 = lambda x: torch.where(torch.abs(x-10) <= 2, torch.sin((pi/4)*x),  0.)
    bottom1_x = lambda x: torch.where(torch.abs(x-10) < 2, (pi/4)*torch.cos((pi/4)*x), 0.)

    bottom2 = lambda x: torch.where(torch.abs(x-10) <= 2, 0.2-0.05*(x - 10)**2,  0.)
    bottom2_x = lambda x: torch.where(torch.abs(x-10) < 2, -0.1*(x-10), 0.)

    bottom3 = lambda x: torch.where(torch.abs(x-10) <= 2, 1-0.25*(x - 10)**2,  0.)
    bottom3_x = lambda x: torch.where(torch.abs(x-10) < 2, -0.5*(x-10), 0.)

    bottom4 = lambda x: torch.where(torch.abs(x-1.5) <= 0.1, 0.25*(torch.cos(10*pi*(x-1.5)) + 1), 0.)
    bottom4_x = lambda x:  torch.where(torch.abs(x-1.5) < 0.1, 2.5*pi*torch.sin(10*pi*(x-1.5)), 0.)
    if bottom_name == 'b0':
        return {'b': bottom0, 'b_x': bottom0_x}
    elif bottom_name == 'b1':
        return {'b': bottom1, 'b_x': bottom1_x}
    elif bottom_name == 'b2':
        return {'b': bottom2, 'b_x': bottom2_x}
    elif bottom_name == 'b3':
        return {'b': bottom3, 'b_x': bottom3_x}
    elif bottom_name == 'b4':
        return {'b': bottom4, 'b_x': bottom4_x}
    else:
        raise ValueError('Invalid bottom name, {} bottom not implemented'.format(bottom_name))

def _swe(bottom_name, g, source_term=True):
    def swe(X, U):
        bottom = get_bottom(bottom_name)
        bx = bottom['b_x']
        x, t = X[:, 0:1], X[:, 1:2]
        h = U[:, 0:1]
        u = U[:, 1:2]

        U1 = h
        U2 = h*u

        F1 = h*u
        F2 = h*u*u + 0.5 * g*h*h

        F1_x = dde.grad.jacobian(F1, X, i=0, j=0)
        F2_x = dde.grad.jacobian(F2, X, i=0, j=0)

        U1_t = dde.grad.jacobian(U1, X, i=0, j=1)
        U2_t = dde.grad.jacobian(U2, X, i=0, j=1)

        if source_term:
            h = 2 + torch.cos(x)*torch.cos(t)
            v = torch.sin(x)*torch.sin(t)/h
            S = torch.sin(x)*torch.cos(t)*(1 + v**2 - g*h) + 2*v*torch.cos(x)*torch.sin(t) + g*h*bx(x)
        else:
            S = torch.zeros_like(x)
        b_x = bx(x) 
        equaz_1 = U1_t + F1_x
        equaz_2 = U2_t + F2_x + g*h*b_x - S

        return [equaz_1, equaz_2]
    return swe

g = 1.
xmin, xmax, tmax = 0., 20., 100.
input_dim = 2
output_dim = 2

def on_initial(_, on_initial):
    return on_initial

def boundary(_, on_boundary):
    return on_boundary

def boundary_0 (x, on_boundary):
    return on_boundary and np.isclose(x[0], xmin)

def boundary_L (x, on_boundary):
    return on_boundary and np.isclose(x[0], xmax)

def _transform_output(U0):
    def transform_output(x, U):
        t = x[:, 1:2]
        h = U[:, 0:1]
        u = U[:, 1:2]
        u_new = u*t + U0(x)
        h_new = h
        return torch.concat([h_new, u_new], axis=1)
    return transform_output

swe = _swe('b1', g, source_term=True)

def func_ic_u(x):
    return 0.0

def func_ic_h(X):
    X = torch.from_numpy(X)
    x = X[:, 0:1]
    t0 = 0.*torch.ones_like(x)
    h_ic  = 2 + torch.cos(x)*torch.cos(t0)
    return h_ic

def func_bc_h1(X):
    X = torch.from_numpy(X)
    t = X[:, 1:2]
    x_l = torch.ones_like(t) * xmin
    h_lbc = 2 + torch.cos(x_l)*torch.cos(t)
    return h_lbc

def func_bc_h2(X):
    X = torch.from_numpy(X)
    t = X[:, 1:2]
    x_r = torch.ones_like(t) * xmax
    h_rbc = 2 + torch.cos(x_r)*torch.cos(t)
    return h_rbc

def func_bc_u1(x):
    return 0.0

def func_bc_u2(X):
    X = torch.from_numpy(X)
    t = X[:, 1:2]
    x_r = torch.ones_like(t) * xmax
    h_rbc = 2 + torch.cos(x_r)*torch.cos(t)
    v_rbc = torch.sin(x_r)*torch.sin(t)/h_rbc
    return v_rbc

geom = dde.geometry.Interval(xmin, xmax)
timedomain = dde.geometry.TimeDomain(0.0, tmax)
geomtime = dde.geometry.GeometryXTime(geom, timedomain)

IC_h = dde.IC(geomtime, func_ic_h, on_initial, component=0)
IC_u = dde.IC(geomtime, func_ic_u, on_initial, component=1)

BC_h1 = dde.DirichletBC(geomtime, func_bc_h1, boundary_0, component=0)
BC_h2 = dde.DirichletBC(geomtime, func_bc_h2, boundary_L, component=0)

BC_u1 = dde.DirichletBC(geomtime, func_bc_u1,  boundary_0, component=1)
BC_u2 = dde.DirichletBC(geomtime, func_bc_u2,  boundary_L, component=1)

BC = [IC_h, BC_h1, BC_h2, BC_u1, BC_u2]

data = dde.data.TimePDE(geomtime, swe, BC, num_domain=10, num_boundary=10, num_initial=10)
print(type(data))
print(data)

net = dde.maps.FNN(layer_sizes = [input_dim] + [60]*5 + [output_dim], activation="tanh",
                   kernel_initializer="Glorot uniform")
print(type(net))
transform_output = _transform_output(func_ic_u)
net.apply_output_transform(lambda x, U: transform_output(x, U))

model = dde.Model(data, net)

model.compile('adam', lr=0.0005, loss_weights = None)
losshistory, train_state = model.train(iterations=20_000)

def plot_results(model, tmin, tmax, xmin, xmax, h_ref=None, u_ref=None, bottom_name='b1', save_result=False, 
                 path_result=None, path_model=None):
    step =  (tmax-tmin)/4
    plot_Time = np.arange(tmin, tmax+step, step)
    N = 150
    X = np.linspace(xmin, xmax, N)
    X = np.reshape(X, (len(X), 1))
    bottom = get_bottom(bottom_name)['b']
    x = torch.from_numpy(X)
    for i, _ in enumerate(plot_Time):
        T = np.ones((N, 1)) * plot_Time[i]
        t = torch.from_numpy(T)
        U_pinns  = model.predict(torch.cat((x, t), dim=1))
        h_pinns  = U_pinns[:,0].detach().numpy()
        u_pinns = U_pinns[:,1].detach().numpy()

        if h_ref is not None:
            h_exact = h_ref(x,t).numpy()
            h_exact = h_exact.reshape((N, ))
        if u_ref is not None:
            u_exact = u_ref(x,t).numpy()
            u_exact = u_exact.reshape((N, ))

        b = bottom(x).numpy()
        b = b.reshape((N, ))
        fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
        fig.tight_layout(pad=4.5)
        ax1.scatter(X, h_pinns+b, color='darkgreen', marker='o', facecolors="none", label='PINNs Solution')
        ax1.plot(x, h_exact+b, 'black', lw=2., label='exact solution')
        ax1.plot(x, b, label='bottom')
        ax2.scatter(X, u_pinns, color='darkgreen', marker='o', facecolors = "none",  label='PINNs Solution')
        ax2.plot(x, u_exact, 'black', lw=2., label='exact solution')
        x_vect = x.numpy()
        x_vect = x_vect.reshape((x.shape[0],))
        ax1.fill_between(x_vect, bottom(torch.from_numpy(x_vect)), color="gray")
        ax1.legend(loc='best')
        ax2.legend(loc='best')
        ax1.set_title(f'total water height h + b for t = {plot_Time[i]}')
        ax2.set_title(f'water velocity for t = {plot_Time[i]}')
        ax1.set_ylabel('$h + b$')
        ax2.set_ylabel('$u$')
        ax1.set_xlabel('$x$')
        ax2.set_xlabel('$x$')
        ax1.grid(True)
        ax2.grid(True)
        if save_result:
            plt.savefig(path_result + str(plot_Time[i])+'s.png', dpi=300)
        plt.show()

h_ref = lambda x, t: 2 + torch.cos(x)*torch.cos(t)
v_ref = lambda x, t: torch.sin(x)*torch.sin(t) / h_ref(x,t)

main_path = os.getcwd()
path_results = os.path.join(main_path, 'results/')
Path(path_results).mkdir(exist_ok=True)
plot_results(model, 0., tmax, xmin, xmax, h_ref, v_ref, bottom_name='b1', save_result=True, 
                 path_result=path_results)```
kmache commented 1 year ago

I even take the following code from the deepxde demo run it on HPC and get exactly the same error


import deepxde as dde
import numpy as np

def gen_testdata():
    data = np.load("../dataset/Burgers.npz")
    t, x, exact = data["t"], data["x"], data["usol"].T
    xx, tt = np.meshgrid(x, t)
    X = np.vstack((np.ravel(xx), np.ravel(tt))).T
    y = exact.flatten()[:, None]
    return X, y

def pde(x, y):
    dy_x = dde.grad.jacobian(y, x, i=0, j=0)
    dy_t = dde.grad.jacobian(y, x, i=0, j=1)
    dy_xx = dde.grad.hessian(y, x, i=0, j=0)
    return dy_t + y * dy_x - 0.01 / np.pi * dy_xx

geom = dde.geometry.Interval(-1, 1)
timedomain = dde.geometry.TimeDomain(0, 0.99)
geomtime = dde.geometry.GeometryXTime(geom, timedomain)

bc = dde.icbc.DirichletBC(geomtime, lambda x: 0, lambda _, on_boundary: on_boundary)
ic = dde.icbc.IC(
    geomtime, lambda x: -np.sin(np.pi * x[:, 0:1]), lambda _, on_initial: on_initial
)

data = dde.data.TimePDE(
    geomtime, pde, [bc, ic], num_domain=2540, num_boundary=80, num_initial=160
)
net = dde.nn.FNN([2] + [20] * 3 + [1], "tanh", "Glorot normal")
model = dde.Model(data, net)

model.compile("adam", lr=1e-3)
model.train(iterations=15000)
model.compile("L-BFGS")
losshistory, train_state = model.train()
dde.saveplot(losshistory, train_state, issave=True, isplot=True)

X, y_true = gen_testdata()
y_pred = model.predict(X)
f = model.predict(X, operator=pde)
print("Mean residual:", np.mean(np.absolute(f)))
print("L2 relative error:", dde.metrics.l2_relative_error(y_true, y_pred))
np.savetxt("test.dat", np.hstack((X, y_true, y_pred)))```
lululxvi commented 1 year ago

Is your tensorflow/pytorch installed correctly?

lxciwhr commented 1 year ago

Whether the problem has been solved