pyscf / gpu4pyscf

A plugin to use Nvidia GPU in PySCF package
GNU General Public License v3.0
119 stars 21 forks source link

`int2c2e` doesn't work #211

Open n-gao opened 1 week ago

n-gao commented 1 week ago

It appears that the int2c2e function is not working properly. The following code doesn't work

import pyscf
from pyscf.df.addons import make_auxmol
from gpu4pyscf.df.int3c2e import get_int2c2e
m = pyscf.gto.M(atom='''
        C   -1.1367    0.0103    0.0000
        C    0.1372   -0.0024    0.0000
        C    1.0258    1.2064    0.0000
        C    2.3997    1.1549    0.0000
        O    2.9930    0.0003    0.0000
        O    3.0209    2.3217    0.0000
        C    1.0136    2.4426    0.0000
        C   -0.3603    2.4961    0.0000
        H   -2.0364   -0.6442    0.0000
        H    1.7805   -0.5956    0.0000
        H   -2.0216    0.6637    0.0000
        H    0.0660   -1.0129    0.0000
        O   -0.5710    3.7755    0.0000
        C    3.9200    0.9435    0.0000
        H   -1.2845    3.9715    0.0000
        H    4.5367    1.2207    0.0000
    ''', basis='6-31G(d)')
aux_mol = make_auxmol(m, 'weigend')
int2c2e = get_int2c2e(m, aux_mol)

Error

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[19], [line 23](vscode-notebook-cell:?execution_count=19&line=23)
      [4](vscode-notebook-cell:?execution_count=19&line=4) m = pyscf.gto.M(atom='''
      [5](vscode-notebook-cell:?execution_count=19&line=5)         C   -1.1367    0.0103    0.0000
      [6](vscode-notebook-cell:?execution_count=19&line=6)         C    0.1372   -0.0024    0.0000
   (...)
     [20](vscode-notebook-cell:?execution_count=19&line=20)         H    4.5367    1.2207    0.0000
     [21](vscode-notebook-cell:?execution_count=19&line=21)     ''', basis='6-31G(d)')
     [22](vscode-notebook-cell:?execution_count=19&line=22) aux_mol = make_auxmol(m, 'weigend')
---> [23](vscode-notebook-cell:?execution_count=19&line=23) int2c2e = get_int2c2e(m, aux_mol)

File /ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1563, in get_int2c2e(mol, auxmol, direct_scf_tol)
   [1561](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1561) intopt = VHFOpt(mol, auxmol, 'int2e')
   [1562](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1562) intopt.build(direct_scf_tol, diag_block_with_triu=True, aosym=True)
-> [1563](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1563) int2c = get_int2c2e_sorted(mol, auxmol, intopt=intopt)
   [1564](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1564) aux_idx = np.argsort(intopt.aux_ao_idx)
   [1565](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1565) int2c = int2c[np.ix_(aux_idx, aux_idx)]

File /ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1478, in get_int2c2e_sorted(mol, auxmol, intopt, direct_scf_tol, aosym, omega, stream)
   [1476](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1476) strides = np.array([1, naux_cart, naux_cart, naux_cart*naux_cart], dtype=np.int32)
   [1477](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1477) for k_id, log_q_k in enumerate(intopt.aux_log_qs):
-> [1478](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1478)     bins_locs_k = _make_s_index_offsets(log_q_k, nbins)
   [1479](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1479)     cp_k_id = k_id + len(intopt.log_qs)
   [1480](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/df/int3c2e.py:1480)     for l_id, log_q_l in enumerate(intopt.aux_log_qs):

File /ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/scf/hf.py:1014, in _make_s_index_offsets(log_q, nbins, cutoff)
   [1012](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/scf/hf.py:1012) scale = nbins / np.log(min(cutoff, .1))
   [1013](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/scf/hf.py:1013) s_index = np.floor(scale * log_q).astype(np.int32)
-> [1014](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/scf/hf.py:1014) bins = np.bincount(s_index)
   [1015](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/scf/hf.py:1015) if bins.size < nbins:
   [1016](https://vscode-remote+ssh-002dremote-002bgpu06.vscode-resource.vscode-cdn.net/ceph/ssd/staff/gaoni/repos/mldft/.venv/lib/python3.12/site-packages/gpu4pyscf/scf/hf.py:1016)     bins = np.append(bins, np.zeros(nbins-bins.size, dtype=np.int32))

ValueError: 'list' argument must have no negative elements

Python 3.12.5 gpu4pyscf-cuda12x 1.0.2 gpu4pyscf-libxc-cuda12x 0.5 pyscf 2.6.2 numpy 1.26.4 cupy-cuda12x 13.3.0 cudatoolkit 12.5.0

wxj6000 commented 1 week ago

@n-gao Thank you for your feedback. The bug is fixed in https://github.com/pyscf/gpu4pyscf/pull/212. However, it is not necessary faster than its implementation on CPU. Mostly, we are still using int2c2e on CPU. It probably can be much faster than CPU for large systems with further optimization.

And I moved int2c2e to avoid possible confusions. Here is some test script.

import cupy
import pyscf
from pyscf.df.addons import make_auxmol
from gpu4pyscf.scf.int2c2e import get_int2c2e

m = pyscf.gto.M(atom='''
        C   -1.1367    0.0103    0.0000
        C    0.1372   -0.0024    0.0000
        C    1.0258    1.2064    0.0000
        C    2.3997    1.1549    0.0000
        O    2.9930    0.0003    0.0000
        O    3.0209    2.3217    0.0000
        C    1.0136    2.4426    0.0000
        C   -0.3603    2.4961    0.0000
        H   -2.0364   -0.6442    0.0000
        H    1.7805   -0.5956    0.0000
        H   -2.0216    0.6637    0.0000
        H    0.0660   -1.0129    0.0000
        O   -0.5710    3.7755    0.0000
        C    3.9200    0.9435    0.0000
        H   -1.2845    3.9715    0.0000
        H    4.5367    1.2207    0.0000
    ''', basis='6-31G(d)')
aux_mol = make_auxmol(m, 'weigend')

start_event = cupy.cuda.Event()
end_event = cupy.cuda.Event()
start_event.record()
for i in range(100):
    int2c2e = get_int2c2e(aux_mol)
end_event.record()
end_event.synchronize()
elapsed_time = cupy.cuda.get_elapsed_time(start_event, end_event) 
print(f'{elapsed_time/1000:.3f} s', 'with GPU')

start_event = cupy.cuda.Event()
end_event = cupy.cuda.Event()
start_event.record()
for i in range(100):
    int2c2e = aux_mol.intor('int2c2e_sph')
    int2c2e = cupy.asarray(int2c2e)
end_event.record()
end_event.synchronize()
elapsed_time = cupy.cuda.get_elapsed_time(start_event, end_event) 
print(f'{elapsed_time/1000:.3f} s', 'with CPU')