Is that meaning using Adam optimizer is not converge? Or there is other issue with the code.
Below is the code and error:
loader2 = {'train_input': train_X1, 'train_label': train_Y1, 'test_input': test_X1, 'test_label': test_Y1}
KAN_model2 = KAN(width=[17,5,5,3,3,1], seed=0)
KAN_model2.train(loader2,opt="Adam")
I think the input dimension is working perfectly fine since i'm doing the same params and input using LBFGS
File ~/Desktop/PINN4SOH/KANPINN-env/lib/python3.8/site-packages/kan/KAN.py:244, in KAN.update_grid_from_samples(self, x)
242 for l in range(self.depth):
243 self.forward(x)
--> 244 self.act_fun[l].update_grid_from_samples(self.acts[l])
File env/lib/python3.8/site-packages/kan/KANLayer.py:218, in KANLayer.update_grid_from_samples(self, x)
216 grid_uniform = torch.cat([grid_adaptive[:, [0]] - margin + (grid_adaptive[:, [-1]] - grid_adaptive[:, [0]] + 2 margin) a for a in np.linspace(0, 1, num=self.grid.shape[1])], dim=1)
217 self.grid.data = self.grid_eps grid_uniform + (1 - self.grid_eps) grid_adaptive
--> 218 self.coef.data = curve2coef(x_pos, y_eval, self.grid, self.k, device=self.device)
File env/lib/python3.8/site-packages/kan/spline.py:137, in curve2coef(x_eval, y_eval, grid, k, device)
...
[136]env/lib/python3.8/site-packages/kan/spline.py:136) mat = B_batch(x_eval, grid, k, device=device).permute(0, 2, 1)
--> 137 coef = torch.linalg.lstsq(mat.to('cpu'), y_eval.unsqueeze(dim=2).to('cpu')).solution[:, :, 0] # sometimes 'cuda' version may diverge
138 return coef.to(device)
torch.linalg.lstsq may diverge, regardless of whether you're using LBFGS or Adam. Please check if your data has a column which is the same (or nearly the same) for all samples.
Is that meaning using Adam optimizer is not converge? Or there is other issue with the code. Below is the code and error: loader2 = {'train_input': train_X1, 'train_label': train_Y1, 'test_input': test_X1, 'test_label': test_Y1} KAN_model2 = KAN(width=[17,5,5,3,3,1], seed=0) KAN_model2.train(loader2,opt="Adam") I think the input dimension is working perfectly fine since i'm doing the same params and input using LBFGS
RuntimeError Traceback (most recent call last) Cell In[69], line 4 2 loader2 = {'train_input': train_X1, 'train_label': train_Y1, 'test_input': test_X1, 'test_label': test_Y1} 3 KAN_model2 = KAN(width=[17,5,5,3,3,1], seed=0) ----> 4 KAN_model2.train(loader2,opt="Adam")
File env/lib/python3.8/site-packages/kan/KAN.py:898, in KAN.train(self, dataset, opt, steps, log, lamb, lamb_l1, lamb_entropy, lamb_coef, lamb_coefdiff, update_grid, grid_update_num, loss_fn, lr, stop_grid_update_step, batch, small_mag_threshold, small_reg_factor, metrics, sglr_avoid, save_fig, in_vars, out_vars, beta, save_fig_freq, img_folder, device) 895 test_id = np.random.choice(dataset['test_input'].shape[0], batch_sizetest, replace=False) 897 if % grid_updatefreq == 0 and < stop_grid_update_step and update_grid: --> 898 self.update_grid_from_samples(dataset['train_input'][train_id].to(device)) 900 if opt == "LBFGS": 901 optimizer.step(closure)
File ~/Desktop/PINN4SOH/KANPINN-env/lib/python3.8/site-packages/kan/KAN.py:244, in KAN.update_grid_from_samples(self, x) 242 for l in range(self.depth): 243 self.forward(x) --> 244 self.act_fun[l].update_grid_from_samples(self.acts[l])
File env/lib/python3.8/site-packages/kan/KANLayer.py:218, in KANLayer.update_grid_from_samples(self, x) 216 grid_uniform = torch.cat([grid_adaptive[:, [0]] - margin + (grid_adaptive[:, [-1]] - grid_adaptive[:, [0]] + 2 margin) a for a in np.linspace(0, 1, num=self.grid.shape[1])], dim=1) 217 self.grid.data = self.grid_eps grid_uniform + (1 - self.grid_eps) grid_adaptive --> 218 self.coef.data = curve2coef(x_pos, y_eval, self.grid, self.k, device=self.device)
File env/lib/python3.8/site-packages/kan/spline.py:137, in curve2coef(x_eval, y_eval, grid, k, device) ... [136]env/lib/python3.8/site-packages/kan/spline.py:136) mat = B_batch(x_eval, grid, k, device=device).permute(0, 2, 1) --> 137 coef = torch.linalg.lstsq(mat.to('cpu'), y_eval.unsqueeze(dim=2).to('cpu')).solution[:, :, 0] # sometimes 'cuda' version may diverge 138 return coef.to(device)