gnina / libmolgrid

Comprehensive library for fast, GPU accelerated molecular gridding for deep learning workflows
https://gnina.github.io/libmolgrid/
Apache License 2.0
144 stars 48 forks source link

Error in pytest #87

Open lh12565 opened 2 years ago

lh12565 commented 2 years ago

Hi, when I installed the molgrid, I run the pytest .. as you said in https://github.com/mattragoza/conda-molgrid. there are errors as follows:

pytest ..

.....
==============================
*** Open Babel Warning  in ReadMolecule
  WARNING: Problem interpreting the valence field of an atom
The valence field specifies a valence 3 that is
less than the observed explicit valence 4.
WARNING: Problem interpreting the valence field of an atom
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
WARNING: Problem interpreting the valence field of an atom
The valence field specifies a valence 3 that is
less than the observed explicit valence 5.
WARNING: Problem interpreting the valence field of an atom
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
WARNING: Problem interpreting the valence field of an atom
The valence field specifies a valence 1 that is
less than the observed explicit valence 2.
WARNING: Problem interpreting the valence field of an atom
The valence field specifies a valence 3 that is
less than the observed explicit valence 4.

..............                             [ 32%]
../test/test_grid.py .                                                      [ 33%]
../test/test_gridinterp.py ......                                           [ 43%]
../test/test_gridio.py .                                                    [ 44%]
../test/test_gridmaker.py ..........                                        [ 60%]
../test/test_numpy.py ....                                                  [ 66%]
../test/test_torch.py ........                                              [ 78%]
../test/test_torch_cnn.py F                                                 [ 80%]
../test/test_transform.py ...                                               [ 84%]
../test/test_typing.py ..........                                           [100%]

==================================== FAILURES =====================================
_____________________________ test_coordset_from_mol ______________________________

    def test_coordset_from_mol():
        import psutil,os,gc
        import pytest
        import molgrid
        from openbabel import pybel  #3.0

        m = pybel.readstring('smi','c1ccccc1CO')
        m.addh()
        m.make3D()

        gc.collect()
        before = psutil.Process(os.getpid()).memory_info().rss / 1024

        for i in range(100):
            m = pybel.readstring('smi','c1ccccc1CO')
            m.addh()
            m.make3D()
            c = molgrid.CoordinateSet(m)

        gc.collect()
        after = psutil.Process(os.getpid()).memory_info().rss / 1024

>       assert before == after
E       assert 263424.0 == 263636.0

../test/test_coordinateset.py:49: AssertionError
______________________________ test_train_torch_cnn _______________________________

    @pytest.mark.slow()
    def test_train_torch_cnn():
        batch_size = 50
        datadir = os.path.dirname(__file__)+'/data'
        fname = datadir+"/small.types"

        molgrid.set_random_seed(0)
        torch.manual_seed(0)
        np.random.seed(0)

        class Net(nn.Module):
            def __init__(self, dims):
                super(Net, self).__init__()
                self.pool0 = nn.MaxPool3d(2)
                self.conv1 = nn.Conv3d(dims[0], 32, kernel_size=3, padding=1)
                self.pool1 = nn.MaxPool3d(2)
                self.conv2 = nn.Conv3d(32, 64, kernel_size=3, padding=1)
                self.pool2 = nn.MaxPool3d(2)
                self.conv3 = nn.Conv3d(64, 128, kernel_size=3, padding=1)

                self.last_layer_size = dims[1]//8 * dims[2]//8 * dims[3]//8 * 128
                self.fc1 = nn.Linear(self.last_layer_size, 2)

            def forward(self, x):
                x = self.pool0(x)
                x = F.relu(self.conv1(x))
                x = self.pool1(x)
                x = F.relu(self.conv2(x))
                x = self.pool2(x)
                x = F.relu(self.conv3(x))
                x = x.view(-1, self.last_layer_size)
                x = self.fc1(x)
                return x

        def weights_init(m):
            if isinstance(m, nn.Conv3d) or isinstance(m, nn.Linear):
                init.xavier_uniform_(m.weight.data)
                init.constant_(m.bias.data, 0)

        batch_size = 50
        e = molgrid.ExampleProvider(data_root=datadir+"/structs",balanced=True,shuffle=True)
        e.populate(fname)

        gmaker = molgrid.GridMaker()
        dims = gmaker.grid_dimensions(e.num_types())
        tensor_shape = (batch_size,)+dims

        model = Net(dims).to('cuda')
        model.apply(weights_init)

        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

        input_tensor = torch.zeros(tensor_shape, dtype=torch.float32, device='cuda')
        float_labels = torch.zeros(batch_size, dtype=torch.float32)

        losses = []
        for iteration in range(100):
            #load data
            batch = e.next_batch(batch_size)
            gmaker.forward(batch, input_tensor, 0, random_rotation=False) #not rotating since convergence is faster this way
            batch.extract_label(0, float_labels)
            labels = float_labels.long().to('cuda')

            optimizer.zero_grad()
>           output = model(input_tensor)

../test/test_torch_cnn.py:75: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../../miniconda3/envs/molgrid/lib/python3.9/site-packages/torch/nn/modules/module.py:1110: in _call_impl
    return forward_call(*input, **kwargs)
../test/test_torch_cnn.py:42: in forward
    x = self.fc1(x)
../../../miniconda3/envs/molgrid/lib/python3.9/site-packages/torch/nn/modules/module.py:1110: in _call_impl
    return forward_call(*input, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = Linear(in_features=27648, out_features=2, bias=True)
input = tensor([[0.0071, 0.0033, 0.0000,  ..., 0.0270, 0.0000, 0.0000],
        [0.0336, 0.0000, 0.0000,  ..., 0.0000, 0.0059,...393],
        [0.0537, 0.0000, 0.0070,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:1', grad_fn=<ViewBackward0>)

    def forward(self, input: Tensor) -> Tensor:
>       return F.linear(input, self.weight, self.bias)
E       RuntimeError: CUDA error: CUBLAS_STATUS_NOT_SUPPORTED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`

../../../miniconda3/envs/molgrid/lib/python3.9/site-packages/torch/nn/modules/linear.py:103: RuntimeError
================================ warnings summary =================================
test/test_numpy.py::test_numpy
  /NAS/luohao/software/molgrid/libmolgrid/test/test_numpy.py:12: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
  Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
    a3d = np.arange(27).astype(np.float).reshape(3,3,-1)

test/test_numpy.py::test_numpy_conv
  /NAS/luohao/software/molgrid/libmolgrid/test/test_numpy.py:40: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
  Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
    a3d = np.arange(27).astype(np.float).reshape(3,3,-1)

-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
============================= short test summary info =============================
FAILED ../test/test_coordinateset.py::test_coordset_from_mol - assert 263424.0 =...
FAILED ../test/test_torch_cnn.py::test_train_torch_cnn - RuntimeError: CUDA erro...
============== 2 failed, 63 passed, 2 warnings in 220.38s (0:03:40) ===============

I don't know why the before !=after (os.getpid() returns the same value). CUDA also seems to have an error. Thanks!

dkoes commented 2 years ago

The test_coordinateset.py error is identifying a memory leak. The amount of memory in use before and after the code should be the same. If profiling is turned on, you might see this error.

The other error appears to be coming from pytorch. What versions of operating system/pytorch/CUDA are you using?

lh12565 commented 2 years ago

If another user or program have operations on memory, can cause this error? Can I ignore this error? my system/pytorch/CUDA are as follows: CentOS Linux release 7.8.2003 (Core) pytorch 1.11.0

CUDA: nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2021 NVIDIA Corporation Built on Mon_May__3_19:15:13_PDT_2021 Cuda compilation tools, release 11.3, V11.3.109 Build cuda_11.3.r11.3/compiler.29920130_0 Thanks!

dkoes commented 2 years ago

You can probably ignore the error, unless you are seeing memory leakage in real world practice.

lh12565 commented 2 years ago

Hi, @dkoes when I run make install, there is a note at the end:

[ 29%] Built target libmolgrid_shared
[ 59%] Built target libmolgrid_static
[ 65%] Built target molgrid
[ 68%] Built target test_coordinateset_cpp
[ 71%] Built target test_grid_cpp
[ 75%] Built target test_grid_cu
[ 78%] Built target test_gridmaker_cpp
[ 81%] Built target test_gridmaker_cu
[ 84%] Built target test_gridinterp_cpp
[ 87%] Built target test_mgrid_cpp
[ 90%] Built target test_mgrid_cu
[ 93%] Built target test_quaternion_cpp
[ 96%] Built target test_transform_cpp
[100%] Built target test_transform_cu
Install the project...
-- Install configuration: "Release"
-- Installing: /home/lh/.local/lib/libmolgrid.so
-- Up-to-date: /home/lh/.local/lib/libmolgrid.a
-- Up-to-date: /home/lh/.local/include/libmolgrid/libmolgrid.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/grid.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/managed_grid.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/quaternion.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/transform.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/atom_typer.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/example.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/coordinateset.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/exampleref_providers.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/example_extractor.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/example_provider.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/example_dataset.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/grid_maker.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/grid_interpolater.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/coord_cache.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/common.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/grid_io.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/cartesian_grid.h
-- Up-to-date: /home/lh/.local/include/libmolgrid/config.h
Unknown option: -C
usage: git [--version] [--help] [-c name=value]
           [--exec-path[=<path>]] [--html-path] [--man-path] [--info-path]
           [-p|--paginate|--no-pager] [--no-replace-objects] [--bare]
           [--git-dir=<path>] [--work-tree=<path>] [--namespace=<name>]
           <command> [<args>]
Cannot obtain version number from git.

I don't know how to solve it. Thanks!

dkoes commented 2 years ago

I would guess you need to upgrade your version of git.