AIRI-Institute / nablaDFT

nablaDFT: Large-Scale Conformational Energy and Hamiltonian Prediction benchmark and dataset
https://doi.org/10.1039/D2CP03966D
MIT License
159 stars 16 forks source link

Error on import #11

Closed KonstantinUshenin closed 4 months ago

KonstantinUshenin commented 10 months ago

The import of NablaDFT:

from nablaDFT.dataset import NablaDFT

raises an error:

/home/kostanew/anaconda3/lib/python3.9/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory warn(f"Failed to load image Python extension: {e}")

OSError                                   Traceback (most recent call last)
Input In [4], in <cell line: 1>()
----> 1 from nablaDFT.dataset import NablaDFT

File ~/anaconda3/lib/python3.9/site-packages/nablaDFT/__init__.py:1, in <module>
----> 1 from . import dataset
      2 from . import schnet
      3 from . import painn

File ~/anaconda3/lib/python3.9/site-packages/nablaDFT/dataset/__init__.py:1, in <module>
----> 1 from .nablaDFT_dataset import *
      2 from .hamiltonian_database import HamiltonianDatabase

File ~/anaconda3/lib/python3.9/site-packages/nablaDFT/dataset/nablaDFT_dataset.py:11, in <module>
      9 from ase.db import connect
     10 from torch.utils.data import Subset
---> 11 from torch_geometric.data.lightning import LightningDataset
     12 from torch_geometric.data import InMemoryDataset, Data
     13 from schnetpack.data import AtomsDataFormat, AtomsDataModule, load_dataset

File ~/anaconda3/lib/python3.9/site-packages/torch_geometric/data/lightning/__init__.py:1, in <module>
----> 1 from .datamodule import LightningDataset, LightningNodeData, LightningLinkData
      3 __all__ = classes = [
      4     'LightningDataset',
      5     'LightningNodeData',
      6     'LightningLinkData',
      7 ]

File ~/anaconda3/lib/python3.9/site-packages/torch_geometric/data/lightning/datamodule.py:20, in <module>
     17 from torch_geometric.typing import InputEdges, InputNodes, OptTensor
     19 try:
---> 20     from pytorch_lightning import LightningDataModule as PLLightningDataModule
     21     no_pytorch_lightning = False
     22 except (ImportError, ModuleNotFoundError):

File ~/anaconda3/lib/python3.9/site-packages/pytorch_lightning/__init__.py:27, in <module>
     25 from lightning_fabric.utilities.seed import seed_everything  # noqa: E402
     26 from lightning_fabric.utilities.warnings import disable_possible_user_warnings  # noqa: E402
---> 27 from pytorch_lightning.callbacks import Callback  # noqa: E402
     28 from pytorch_lightning.core import LightningDataModule, LightningModule  # noqa: E402
     29 from pytorch_lightning.trainer import Trainer  # noqa: E402

File ~/anaconda3/lib/python3.9/site-packages/pytorch_lightning/callbacks/__init__.py:14, in <module>
      1 # Copyright The Lightning AI team.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 14 from pytorch_lightning.callbacks.batch_size_finder import BatchSizeFinder
     15 from pytorch_lightning.callbacks.callback import Callback
     16 from pytorch_lightning.callbacks.checkpoint import Checkpoint

File ~/anaconda3/lib/python3.9/site-packages/pytorch_lightning/callbacks/batch_size_finder.py:24, in <module>
     21 from typing import Optional
     23 import pytorch_lightning as pl
---> 24 from pytorch_lightning.callbacks.callback import Callback
     25 from pytorch_lightning.tuner.batch_size_scaling import _scale_batch_size
     26 from pytorch_lightning.utilities.exceptions import MisconfigurationException, _TunerExitException

File ~/anaconda3/lib/python3.9/site-packages/pytorch_lightning/callbacks/callback.py:22, in <module>
     19 from torch.optim import Optimizer
     21 import pytorch_lightning as pl
---> 22 from pytorch_lightning.utilities.types import STEP_OUTPUT
     25 class Callback:
     26     r"""Abstract base class used to build new callbacks.
     27 
     28     Subclass this class and override any of the relevant hooks
     29 
     30     """

File ~/anaconda3/lib/python3.9/site-packages/pytorch_lightning/utilities/types.py:40, in <module>
     38 from torch import Tensor
     39 from torch.optim import Optimizer
---> 40 from torchmetrics import Metric
     41 from typing_extensions import NotRequired, Required
     43 from lightning_fabric.utilities.types import _TORCH_LRSCHEDULER, LRScheduler, ProcessGroup, ReduceLROnPlateau

File ~/anaconda3/lib/python3.9/site-packages/torchmetrics/__init__.py:14, in <module>
     11 _PACKAGE_ROOT = os.path.dirname(__file__)
     12 _PROJECT_ROOT = os.path.dirname(_PACKAGE_ROOT)
---> 14 from torchmetrics import functional  # noqa: E402
     15 from torchmetrics.aggregation import (  # noqa: E402
     16     CatMetric,
     17     MaxMetric,
   (...)
     22     SumMetric,
     23 )
     24 from torchmetrics.audio._deprecated import _PermutationInvariantTraining as PermutationInvariantTraining  # noqa: E402

File ~/anaconda3/lib/python3.9/site-packages/torchmetrics/functional/__init__.py:14, in <module>
      1 # Copyright The Lightning team.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 14 from torchmetrics.functional.audio._deprecated import _permutation_invariant_training as permutation_invariant_training
     15 from torchmetrics.functional.audio._deprecated import _pit_permutate as pit_permutate
     16 from torchmetrics.functional.audio._deprecated import (
     17     _scale_invariant_signal_distortion_ratio as scale_invariant_signal_distortion_ratio,
     18 )

File ~/anaconda3/lib/python3.9/site-packages/torchmetrics/functional/audio/__init__.py:14, in <module>
      1 # Copyright The Lightning team.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 14 from torchmetrics.functional.audio.pit import permutation_invariant_training, pit_permutate
     15 from torchmetrics.functional.audio.sdr import scale_invariant_signal_distortion_ratio, signal_distortion_ratio
     16 from torchmetrics.functional.audio.snr import (
     17     complex_scale_invariant_signal_noise_ratio,
     18     scale_invariant_signal_noise_ratio,
     19     signal_noise_ratio,
     20 )

File ~/anaconda3/lib/python3.9/site-packages/torchmetrics/functional/audio/pit.py:23, in <module>
     20 from torch import Tensor
     21 from typing_extensions import Literal
---> 23 from torchmetrics.utilities import rank_zero_warn
     24 from torchmetrics.utilities.imports import _SCIPY_AVAILABLE
     26 # _ps_dict: cache of permutations
     27 # it's necessary to cache it, otherwise it will consume a large amount of time

File ~/anaconda3/lib/python3.9/site-packages/torchmetrics/utilities/__init__.py:14, in <module>
      1 # Copyright The Lightning team.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 14 from torchmetrics.utilities.checks import check_forward_full_state_property
     15 from torchmetrics.utilities.data import apply_to_collection
     16 from torchmetrics.utilities.distributed import class_reduce, reduce

File ~/anaconda3/lib/python3.9/site-packages/torchmetrics/utilities/checks.py:25, in <module>
     22 import torch
     23 from torch import Tensor
---> 25 from torchmetrics.metric import Metric
     26 from torchmetrics.utilities.data import select_topk, to_onehot
     27 from torchmetrics.utilities.enums import DataType

File ~/anaconda3/lib/python3.9/site-packages/torchmetrics/metric.py:30, in <module>
     27 from torch import Tensor
     28 from torch.nn import Module
---> 30 from torchmetrics.utilities.data import (
     31     _flatten,
     32     _squeeze_if_scalar,
     33     apply_to_collection,
     34     dim_zero_cat,
     35     dim_zero_max,
     36     dim_zero_mean,
     37     dim_zero_min,
     38     dim_zero_sum,
     39 )
     40 from torchmetrics.utilities.distributed import gather_all_tensors
     41 from torchmetrics.utilities.exceptions import TorchMetricsUserError

File ~/anaconda3/lib/python3.9/site-packages/torchmetrics/utilities/data.py:22, in <module>
     19 from torch import Tensor
     21 from torchmetrics.utilities.exceptions import TorchMetricsUserWarning
---> 22 from torchmetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_12, _XLA_AVAILABLE
     23 from torchmetrics.utilities.prints import rank_zero_warn
     25 METRIC_EPS = 1e-6

File ~/anaconda3/lib/python3.9/site-packages/torchmetrics/utilities/imports.py:48, in <module>
     46 _GAMMATONE_AVAILABEL: bool = package_available("gammatone")
     47 _TORCHAUDIO_AVAILABEL: bool = package_available("torchaudio")
---> 48 _TORCHAUDIO_GREATER_EQUAL_0_10: Optional[bool] = compare_version("torchaudio", operator.ge, "0.10.0")
     49 _SACREBLEU_AVAILABLE: bool = package_available("sacrebleu")
     50 _REGEX_AVAILABLE: bool = package_available("regex")

File ~/anaconda3/lib/python3.9/site-packages/lightning_utilities/core/imports.py:77, in compare_version(package, op, version, use_base_version)
     68 """Compare package version with some requirements.
     69 
     70 >>> compare_version("torch", operator.ge, "0.1")
   (...)
     74 
     75 """
     76 try:
---> 77     pkg = importlib.import_module(package)
     78 except (ImportError, pkg_resources.DistributionNotFound):
     79     return False

File ~/anaconda3/lib/python3.9/importlib/__init__.py:127, in import_module(name, package)
    125             break
    126         level += 1
--> 127 return _bootstrap._gcd_import(name[level:], package, level)

File ~/anaconda3/lib/python3.9/site-packages/torchaudio/__init__.py:1, in <module>
----> 1 from torchaudio import (  # noqa: F401
      2     _extension,
      3     compliance,
      4     datasets,
      5     functional,
      6     io,
      7     kaldi_io,
      8     models,
      9     pipelines,
     10     sox_effects,
     11     transforms,
     12     utils,
     13 )
     14 from torchaudio.backend import get_audio_backend, list_audio_backends, set_audio_backend
     16 try:

File ~/anaconda3/lib/python3.9/site-packages/torchaudio/_extension.py:135, in <module>
    127         if ta_version != t_version:
    128             raise RuntimeError(
    129                 "Detected that PyTorch and TorchAudio were compiled with different CUDA versions. "
    130                 f"PyTorch has CUDA version {t_version} whereas TorchAudio has CUDA version {ta_version}. "
    131                 "Please install the TorchAudio version that matches your PyTorch version."
    132             )
--> 135 _init_extension()
    136 _check_cuda_version()

File ~/anaconda3/lib/python3.9/site-packages/torchaudio/_extension.py:105, in _init_extension()
    102             except Exception:
    103                 pass
--> 105 _load_lib("libtorchaudio")
    106 # This import is for initializing the methods registered via PyBind11
    107 # This has to happen after the base library is loaded
    108 from torchaudio import _torchaudio  # noqa

File ~/anaconda3/lib/python3.9/site-packages/torchaudio/_extension.py:52, in _load_lib(lib)
     50 if not path.exists():
     51     return False
---> 52 torch.ops.load_library(path)
     53 torch.classes.load_library(path)
     54 return True

File ~/anaconda3/lib/python3.9/site-packages/torch/_ops.py:643, in _Ops.load_library(self, path)
    638 path = _utils_internal.resolve_library_path(path)
    639 with dl_open_guard():
    640     # Import the shared library into the process, thus running its
    641     # static (global) initialization code in order to register custom
    642     # operators with the JIT.
--> 643     ctypes.CDLL(path)
    644 self.loaded_libraries.add(path)

File ~/anaconda3/lib/python3.9/ctypes/__init__.py:382, in CDLL.__init__(self, name, mode, handle, use_errno, use_last_error, winmode)
    379 self._FuncPtr = _FuncPtr
    381 if handle is None:
--> 382     self._handle = _dlopen(self._name, mode)
    383 else:
    384     self._handle = handle

OSError: libtorch_cuda_cpp.so: cannot open shared object file: No such file or directory
KonstantinUshenin commented 10 months ago

Solution of this issue requires downgrading of CUDA below 12 version:

sudo apt-get install libcudnn8=8.8.1.3-1+cuda11.8
sudo apt-get install libcudnn8-dev=8.8.1.3-1+cuda11.8
KuzmaKhrabrov commented 10 months ago

This is a cuda-installation specific error, we will provide an up to date config with a major update, but unfortunately PyTorch and PyTorch geometric codes are not guaranteed to work with all the versions of drivers.

KuzmaKhrabrov commented 4 months ago

Errors are fixed with the recent release, so now we have a complete setup.py