A model is trained on TF backend and converted to PT model, failed on test.
The PT trained model works fine when converting to TF model
DeePMD-kit Version
DeePMD-kit v1.3.3.dev1844+g08e18fe5
TensorFlow Version
2.14.1
How did you download the software?
Offline packages
Input Files, Running Commands, Error Log, etc.
Traceback (most recent call last):
File "/home/penganyang/.local/bin/dp", line 8, in
sys.exit(main())
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/main.py", line 805, in main
deepmd_main(args)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/entrypoints/main.py", line 64, in main
test(dict_args)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/entrypoints/test.py", line 146, in test
err = test_ener(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/entrypoints/test.py", line 329, in test_ener
ret = dp.eval(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/infer/deep_pot.py", line 139, in eval
results = self.deep_eval.eval(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/infer/deep_eval.py", line 242, in eval
out = self._eval_func(self._eval_model, numb_test, natoms)(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/infer/deep_eval.py", line 303, in eval_func
return self.auto_batch_size.execute_all(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/utils/batch_size.py", line 195, in execute_all
n_batch, result = self.execute(execute_with_batch_size, index, natoms)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/utils/batch_size.py", line 112, in execute
raise e
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/utils/batch_size.py", line 109, in execute
n_batch, result = callable(max(batch_nframes, 1), start_index)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/utils/batch_size.py", line 173, in execute_with_batch_size
return (end_index - start_index), callable(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/infer/deep_eval.py", line 362, in _eval_model
batch_output = model(
File "/opt/mamba/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/train/wrapper.py", line 174, in forward
model_pred = self.model[task_key](
File "/opt/mamba/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript, serialized code (most recent call last):
File "code/torch/deepmd/pt/model/model/ener_model.py", line 20, in forward
aparam: Optional[Tensor]=None,
do_atomic_virial: bool=False) -> Dict[str, Tensor]:
model_ret = (self).forward_common(coord, atype, box, fparam, aparam, do_atomic_virial, )
model_predict = annotate(Dict[str, Tensor], {})
torch._set_item(model_predict, "atom_energy", model_ret["energy"])
File "code/__torch__/deepmd/pt/model/model/ener_model.py", line 136, in forward_common
_15 = _13(coord, atype, (self).get_rcut(), (self).get_sel(), (self).mixed_types(), box, )
extended_coord, extended_atype, mapping, nlist, = _15
model_predict_lower = (self).forward_common_lower(extended_coord, extended_atype, nlist, mapping, fparam, aparam, do_atomic_virial, )
~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
model_predict = _14(model_predict_lower, (self).model_output_def(), mapping, do_atomic_virial, )
return model_predict
File "code/__torch__/deepmd/pt/model/model/ener_model.py", line 157, in forward_common_lower
extended_coord0 = torch.view(extended_coord, [nframes, -1, 3])
nlist3 = (self).format_nlist(extended_coord0, extended_atype, nlist, )
atomic_ret = (self).forward_atomic(extended_coord0, extended_atype, nlist3, mapping, fparam, aparam, )
~~~~~~~~~~~~~~~~~~~~ <--- HERE
model_predict = _16(atomic_ret, (self).fitting_output_def(), extended_coord0, do_atomic_virial, )
return model_predict
File "code/__torch__/deepmd/pt/model/model/ener_model.py", line 218, in forward_atomic
pass
descriptor = self.descriptor
_27 = (descriptor).forward(extended_coord, extended_atype, nlist, mapping, )
~~~~~~~~~~~~~~~~~~~ <--- HERE
descriptor0, rot_mat, g2, h2, sw, = _27
fitting_net = self.fitting_net
File "code/__torch__/deepmd/pt/model/descriptor/se_r.py", line 99, in forward
_23 = torch.select(sec7, 0, 2)
sec8 = self.sec
_24 = torch.select(sec8, 0, 3)
~~~~~~~~~~~~ <--- HERE
mm1 = torch.slice(_22, 1, annotate(int, _23), annotate(int, _24))
_25 = torch.slice(dmatrix1)
Traceback of TorchScript, original code (most recent call last):
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/model/model/ener_model.py", line 33, in forward
do_atomic_virial: bool = False,
) -> Dict[str, torch.Tensor]:
model_ret = self.forward_common(
~~~~~~~~~~~~~~~~~~~ <--- HERE
coord,
atype,
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/model/model/make_model.py", line 131, in forward_common
box=box,
)
model_predict_lower = self.forward_common_lower(
~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
extended_coord,
extended_atype,
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/model/model/make_model.py", line 189, in forward_common_lower
extended_coord = extended_coord.view(nframes, -1, 3)
nlist = self.format_nlist(extended_coord, extended_atype, nlist)
atomic_ret = self.forward_atomic(
~~~~~~~~~~~~~~~~~~~ <--- HERE
extended_coord,
extended_atype,
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/model/atomic_model/dp_atomic_model.py", line 152, in forward_atomic
if self.do_grad_r() or self.do_grad_c():
extended_coord.requires_grad_(True)
descriptor, rot_mat, g2, h2, sw = self.descriptor(
~~~~~~~~~~~~~~~ <--- HERE
extended_coord,
extended_atype,
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/model/descriptor/se_r.py", line 251, in forward
for ii, ll in enumerate(self.filter_layers.networks):
# nfnl x nt
mm = exclude_mask[:, self.sec[ii] : self.sec[ii + 1]]
~~~~~~~~~~~~~~~~ <--- HERE
# nfnl x nt x 1
ss = dmatrix[:, self.sec[ii] : self.sec[ii + 1], :]
RuntimeError: select(): index 3 out of range for tensor of size [3] at dimension 0
### Steps to Reproduce
`
cd examples/water/se_e2_r
`
`
dp train input.json
`
`
dp freeze -o model.pb
`
`
dp convert-backend model.pb model.pth
`
`
dp --pt test -m model.pth -s ../data/data_3 -n 30
`
### Further Information, Files, and Links
torch version 2.0.0+cu118
Bug summary
A model is trained on TF backend and converted to PT model, failed on test.
The PT trained model works fine when converting to TF model
DeePMD-kit Version
DeePMD-kit v1.3.3.dev1844+g08e18fe5
TensorFlow Version
2.14.1
How did you download the software?
Offline packages
Input Files, Running Commands, Error Log, etc.
Traceback (most recent call last): File "/home/penganyang/.local/bin/dp", line 8, in
sys.exit(main())
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/main.py", line 805, in main
deepmd_main(args)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/entrypoints/main.py", line 64, in main
test(dict_args)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/entrypoints/test.py", line 146, in test
err = test_ener(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/entrypoints/test.py", line 329, in test_ener
ret = dp.eval(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/infer/deep_pot.py", line 139, in eval
results = self.deep_eval.eval(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/infer/deep_eval.py", line 242, in eval
out = self._eval_func(self._eval_model, numb_test, natoms)(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/infer/deep_eval.py", line 303, in eval_func
return self.auto_batch_size.execute_all(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/utils/batch_size.py", line 195, in execute_all
n_batch, result = self.execute(execute_with_batch_size, index, natoms)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/utils/batch_size.py", line 112, in execute
raise e
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/utils/batch_size.py", line 109, in execute
n_batch, result = callable(max(batch_nframes, 1), start_index)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/utils/batch_size.py", line 173, in execute_with_batch_size
return (end_index - start_index), callable(
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/infer/deep_eval.py", line 362, in _eval_model
batch_output = model(
File "/opt/mamba/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/mnt/user/penganyang/test/deepmd-kit/deepmd/pt/train/wrapper.py", line 174, in forward
model_pred = self.model[task_key](
File "/opt/mamba/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript, serialized code (most recent call last):
File "code/torch/deepmd/pt/model/model/ener_model.py", line 20, in forward
aparam: Optional[Tensor]=None,
do_atomic_virial: bool=False) -> Dict[str, Tensor]:
model_ret = (self).forward_common(coord, atype, box, fparam, aparam, do_atomic_virial, )