KindXiaoming / pykan

Kolmogorov Arnold Networks
MIT License
14.94k stars 1.38k forks source link

Can someone help me figure out how to change the MLP structure here to a KAN structure? #93

Closed jackzhangqing closed 3 months ago

jackzhangqing commented 5 months ago

class PhysicsInformedNN:

# Initialize the class
def __init__(self, x, y, u0_real, u0_imag, u0_real_xx, u0_real_yy, u0_imag_xx,u0_imag_yy, m, m0, eta, delta, layers, omega):

    X = np.concatenate([x, y], 1)

    self.lb = X.min(0)
    self.ub = X.max(0)

    self.X = X

    self.x = X[:,0:1]
    self.y = X[:,1:2]
    self.u0_real = u0_real
    self.u0_imag = u0_imag

    self.u0_real_xx = u0_real_xx
    self.u0_imag_xx = u0_imag_xx

    self.u0_real_yy = u0_real_yy
    self.u0_imag_yy = u0_imag_yy

    self.m = m
    self.m0 = m0
    self.eta = eta
    self.delta = delta

    self.layers = layers

    self.omega = omega

    # Initialize NN
    self.weights, self.biases = self.initialize_NN(layers)  

    # tf placeholders 
    self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                 log_device_placement=True))

    self.x_tf = tf.placeholder(tf.float32, shape=[None, self.x.shape[1]])
    self.y_tf = tf.placeholder(tf.float32, shape=[None, self.y.shape[1]])

    self.du_real_pred, self.du_imag_pred, self.f_real_pred, self.f_imag_pred, self.fq_real_pred, self.fq_imag_pred = self.net_NS(self.x_tf, self.y_tf)

    # loss function we define
    self.loss = tf.reduce_sum(tf.square(self.f_real_pred)) + tf.reduce_sum(tf.square(self.f_imag_pred)) + \
                tf.reduce_sum(tf.square(self.fq_real_pred)) + tf.reduce_sum(tf.square(self.fq_imag_pred)) 

    # optimizer used by default (in original paper)        

    self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.loss, 
                                                            method = 'L-BFGS-B', 
                                                            options = {'maxiter': 50000,
                                                                       'maxfun': 50000,
                                                                       'maxcor': 50,
                                                                       'maxls': 50,
                                                                       'ftol' : 1.0 * np.finfo(float).eps})        

    self.optimizer_Adam = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999,
                                                 epsilon=1e-08, use_locking=False, name='Adam')
    self.train_op_Adam = self.optimizer_Adam.minimize(self.loss)                    

    init = tf.global_variables_initializer()
    self.sess.run(init)

def initialize_NN(self, layers):        
    weights = []
    biases = []
    num_layers = len(layers) 
    for l in range(0,num_layers-1):
        W = self.xavier_init(size=[layers[l], layers[l+1]])
        b = tf.Variable(tf.zeros([1,layers[l+1]], dtype=tf.float32)+0.0, dtype=tf.float32)
        weights.append(W)
        biases.append(b)        
    return weights, biases 
def xavier_init(self, size):
    in_dim = size[0]
    out_dim = size[1]        
    xavier_stddev = np.sqrt(2/(in_dim + out_dim))
    return tf.Variable(tf.truncated_normal([in_dim, out_dim], stddev=xavier_stddev), dtype=tf.float32)

def neural_net(self, X, weights, biases):
    num_layers = len(weights) + 1
    H = 2.0*(X - self.lb)/(self.ub - self.lb) - 1.0
    for l in range(0,num_layers-2):
        W = weights[l]
        b = biases[l]
        #H = tf.tanh(tf.add(tf.matmul(H, W), b))
        H = tf.atan(tf.add(tf.matmul(H, W), b))
    W = weights[-1]
    b = biases[-1]
    Y = tf.add(tf.matmul(H, W), b)
    return Y
def net_NS(self, x, y):
    # output scattered wavefield: du_real du_imag, 
    # loss function: L du+omega^2 dm u0 
    omega = self.omega
    m = self.m
    m0 = self.m0
    eta = self.eta
    delta = self.delta
    u0_real = self.u0_real
    u0_imag = self.u0_imag

    u0_real_xx = self.u0_real_xx
    u0_imag_xx = self.u0_imag_xx

    u0_real_yy = self.u0_real_yy
    u0_imag_yy = self.u0_imag_yy

    p_and_q = self.neural_net(tf.concat([x,y], 1), self.weights, self.biases)

    du_real = p_and_q[:,0:1]
    du_imag = p_and_q[:,1:2]
    q_real = p_and_q[:,2:3]
    q_imag = p_and_q[:,3:4]

    du_real_x = tf.gradients(du_real, x)[0]
    du_real_y = tf.gradients(du_real, y)[0]
    du_real_xx = tf.gradients(du_real_x, x)[0]
    du_real_yy = tf.gradients(du_real_y, y)[0]

    du_imag_x = tf.gradients(du_imag, x)[0]
    du_imag_y = tf.gradients(du_imag, y)[0]
    du_imag_xx = tf.gradients(du_imag_x, x)[0]
    du_imag_yy = tf.gradients(du_imag_y, y)[0]

    q_real_x = tf.gradients(q_real, x)[0]
    q_real_y = tf.gradients(q_real, y)[0]
    q_real_xx = tf.gradients(q_real_x, x)[0]
    q_real_yy = tf.gradients(q_real_y, y)[0]

    q_imag_x = tf.gradients(q_imag, x)[0]
    q_imag_y = tf.gradients(q_imag, y)[0]
    q_imag_xx = tf.gradients(q_imag_x, x)[0]
    q_imag_yy = tf.gradients(q_imag_y, y)[0]

    f_real =  omega*omega*m*du_real + du_real_xx + q_real_xx + du_real_yy/(1+2*delta) + omega*omega*(m-m0)*u0_real + (1/(1+2*delta)-1)*u0_real_yy 
    f_imag =  omega*omega*m*du_imag + du_imag_xx + q_imag_xx + du_imag_yy/(1+2*delta) + omega*omega*(m-m0)*u0_imag + (1/(1+2*delta)-1)*u0_imag_yy

    fq_real = omega*omega*m*q_real + 2*eta*(du_real_xx + q_real_xx) + 2*eta*u0_real_xx
    fq_imag = omega*omega*m*q_imag + 2*eta*(du_imag_xx + q_imag_xx) + 2*eta*u0_imag_xx

    return du_real, du_imag, f_real, f_imag, fq_real, fq_imag        

def callback(self, loss):
    print('Loss: %.3e' % (loss))
    misfit1.append(loss) 

def train(self, nIter): 
    tf_dict = {self.x_tf: self.x, self.y_tf: self.y}

    start_time = time.time()
    for it in range(nIter):
        self.sess.run(self.train_op_Adam, tf_dict)
        loss_value = self.sess.run(self.loss, tf_dict)
        misfit.append(loss_value)         
        # Print
        if it % 10 == 0:
            elapsed = time.time() - start_time
            loss_value = self.sess.run(self.loss, tf_dict)
            #misfit.append(loss_value)
            print('It: %d, Loss: %.3e,Time: %.2f' % 
                  (it, loss_value, elapsed))
            start_time = time.time()

    self.optimizer.minimize(self.sess,
                            feed_dict = tf_dict,
                            fetches = [self.loss],
                            loss_callback = self.callback)

def predict(self, x_star, y_star):

    tf_dict = {self.x_tf: x_star, self.y_tf: y_star}

    du_real_star = self.sess.run(self.du_real_pred, tf_dict)
    du_imag_star = self.sess.run(self.du_imag_pred, tf_dict)

    return du_real_star, du_imag_star
KindXiaoming commented 5 months ago

Hi, here is a minimal example how you can use KAN to solve PDE, hope this helps!

jackzhangqing commented 5 months ago

Hi, here is a minimal example how you can use KAN to solve PDE, hope this helps!

Thank you for your assistance. Following your example, I replaced the MLP in PINN with the KAN for solving the PDE process. Below are the modified code and the error messages. I've been working on this for quite some time and am unsure how to resolve it effectively。

加载数据 Helm1

data = scipy.io.loadmat('./data/Homo_4Hz_singlesource_ps.mat') ''' A 40401x1 646416 double complex B 40401x1 646416 double complex C 40401x1 646416 double complex Ps 40401x1 646416 double complex

U_imag 40401x1 323208 double U_real 40401x1 323208 double

m 40401x1 323208 double x_star 40401x1 323208 double z_star 40401x1 323208 double ''' x = data['x_star'] z = data['z_star']

ps = data['Ps'] m = data['m'] A = data['A'] B = data['B'] C = data['C']

N = x.shape[0] N_train = N idx = np.random.choice(N, N_train, replace=False) x_train = x[idx, :] z_train = z[idx, :] x = torch.tensor(x_train, dtype=torch.float32) z = torch.tensor(z_train, dtype=torch.float32)

在第二维上连接 x 和 z,形成一个 40401*2 的张量

x_i = torch.cat((x_tensor, z_tensor), dim=1)

ps_train = ps[idx, :] m_train = m[idx, :] A_train = A[idx, :] B_train = B[idx, :] C_train = C[idx, :]

定义KAN模型

第一种情况:2个输入 2个输出

MLP: [2, 40, 40, 40, 40, 40, 40, 40, 40, 2]

model = KAN(width=[2, 10, 10, 10, 10, 2], grid=5, k=3, grid_eps=1.0, noise_scale_base=0.25)

实现了一个批量样本的雅可比矩阵的计算。

def batch_jacobian(func, x, create_graph=False):

x in shape (Batch, Length)

def _func_sum(x):
    return func(x).sum(dim=0)
return autograd.functional.jacobian(_func_sum, x, create_graph=create_graph).permute(1,0,2)

计算梯度

def fwd_gradients(Y, x): dummy = torch.ones_like(Y) G = torch.autograd.grad(Y, x, grad_outputs=dummy, retain_graph=True)[0] Y_x = torch.autograd.grad(G, dummy, retain_graph=True)[0] return Y_x

steps = 25000

loss_values = []

def train(): optimizer = LBFGS(model.parameters(),lr=1, history_size=10, line_search_fn="strong_wolfe", tolerance_grad=1e-32, tolerance_change=1e-32, tolerance_ys=1e-32)

pbar = tqdm(range(steps), desc='description')

global loss

for it in pbar:
    def closure():

        optimizer.zero_grad()

        # pde loss
        ureal_and_uimag = model(torch.cat((x, z), dim=1))
        u_real = ureal_and_uimag[:, 0:1]
        u_imag = ureal_and_uimag[:, 1:2]
        u = torch.complex(u_real, u_imag)

        dudx = fwd_gradients(u, x)
        dudz = fwd_gradients(u, z)
        dudxx = fwd_gradients((A_train * dudx), x)
        dudzz = fwd_gradients((B_train * dudz), z)

        f_loss = C_train*omega*omega*m_train*u + dudxx + dudzz - ps_train
        loss = torch.sum(torch.square(torch.abs(f_loss)))

        loss.backward()

        loss_values.append(loss.item())

        return loss

    if it % 10 == 0 and it < 30000:
        model.update_grid_from_samples(torch.cat((x, z), dim=1))

    optimizer.step(closure)

    if it % 10 == 0:
        pbar.set_description("loss: %.2e" % (loss.cpu().detach().numpy()))

scipy.io.savemat('loss_KAN.mat', {'loss': loss_values})

torch.save(model.state_dict(), 'trained_KAN_Helm1.pt')

train()


description: 0%| | 0/25000 [00:12<?, ?it/s] Traceback (most recent call last): File "D:/0_Suda_Py/pykan-master/test_KAN_Helm1.py", line 127, in train() File "D:/0_Suda_Py/pykan-master/test_KAN_Helm1.py", line 117, in train optimizer.step(closure) File "C:\Users\Zhangqing\anaconda3\envs\pytorch\lib\site-packages\torch\optim\optimizer.py", line 140, in wrapper out = func(*args, kwargs) File "C:\Users\Zhangqing\anaconda3\envs\pytorch\lib\site-packages\torch\autograd\grad_mode.py", line 27, in decorate_context return func(*args, *kwargs) File "D:\0_Suda_Py\pykan-master\kan\LBFGS.py", line 319, in step orig_loss = closure() File "C:\Users\Zhangqing\anaconda3\envs\pytorch\lib\site-packages\torch\autograd\grad_mode.py", line 27, in decorate_context return func(args, kwargs) File "D:/0_Suda_Py/pykan-master/test_KAN_Helm1.py", line 100, in closure dudx = fwd_gradients(u, x) File "D:/0_Suda_Py/pykan-master/test_KAN_Helm1.py", line 71, in fwd_gradients G = torch.autograd.grad(Y, x, grad_outputs=dummy, retain_graph=True)[0] File "C:\Users\Zhangqing\anaconda3\envs\pytorch\lib\site-packages\torch\autograd__init__.py", line 302, in grad allow_unused, accumulate_grad=False) # Calls into the C++ engine to run the backward pass RuntimeError: One of the differentiated Tensors does not require grad