NVIDIA / apex

A PyTorch Extension: Tools for easy mixed precision and distributed training in Pytorch
BSD 3-Clause "New" or "Revised" License
8.17k stars 1.35k forks source link

ModuleNotFoundError: No module named 'fused_layer_norm_cuda', ubuntu 22.04, Successfully installed apex-0.1 #1755

Open dhamaraiselvi opened 7 months ago

dhamaraiselvi commented 7 months ago
model = models.build_model(cfg, self, from_checkpoint)

File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/models/init.py", line 106, in build_model return model.build_model(cfg, task) File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/models/transformer/transformer_legacy.py", line 133, in build_model return super().build_model(cfg, task) File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/models/transformer/transformer_base.py", line 97, in build_model encoder = cls.build_encoder(cfg, src_dict, encoder_embed_tokens) File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/models/transformer/transformer_legacy.py", line 143, in build_encoder return super().build_encoder( File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/models/transformer/transformer_base.py", line 115, in build_encoder return TransformerEncoderBase(cfg, src_dict, embed_tokens) File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/models/transformer/transformer_encoder.py", line 96, in init [self.build_encoder_layer(cfg) for i in range(cfg.encoder.layers)] File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/models/transformer/transformer_encoder.py", line 96, in [self.build_encoder_layer(cfg) for i in range(cfg.encoder.layers)] File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/models/transformer/transformer_encoder.py", line 106, in build_encoder_layer layer = transformer_layer.TransformerEncoderLayerBase( File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/modules/transformer_layer.py", line 43, in init self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=cfg.export) File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/fairseq/modules/layer_norm.py", line 32, in LayerNorm return FusedLayerNorm(normalized_shape, eps, elementwise_affine) File "/home/dhamaraiselvi/.local/lib/python3.10/site-packages/apex/normalization/fused_layer_norm.py", line 294, in init fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda") File "/usr/lib/python3.10/importlib/init.py", line 126, in import_module return _bootstrap._gcd_import(name[level:], package, level) File "", line 1050, in _gcd_import File "", line 1027, in _find_and_load File "", line 1004, in _find_and_load_unlocked ModuleNotFoundError: No module named 'fused_layer_norm_cuda' dhamaraiselvi@dhamaraiselvi-hp-zbook-power:~/Machine_Translation/Model_V1$ python3 Python 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0] on linux Type "help", "copyright", "credits" or "license" for more information.

import apex from apex.normalization.fused_layer_norm import FusedLayerNorm print(dir(FusedLayerNorm)) ['T_destination', 'annotations', 'call', 'class', 'delattr', 'dict', 'dir', 'doc', 'eq', 'format', 'ge', 'getattr', 'getattribute', 'getstate', 'gt', 'hash', 'init', 'init_subclass', 'le', 'lt', 'module', 'ne', 'new', 'reduce', 'reduce_ex', 'repr', 'setattr', 'setstate', 'sizeof', 'str', 'subclasshook', 'weakref', '_apply', '_call_impl', '_compiled_call_impl', '_get_backward_hooks', '_get_backward_pre_hooks', '_get_name', '_load_from_state_dict', '_maybe_warn_non_full_backward_hook', '_named_members', '_register_load_state_dict_pre_hook', '_register_state_dict_hook', '_replicate_for_data_parallel', '_save_to_state_dict', '_slow_forward', '_version', '_wrapped_call_impl', 'add_module', 'apply', 'bfloat16', 'buffers', 'call_super_init', 'children', 'compile', 'cpu', 'cuda', 'double', 'dump_patches', 'eval', 'extra_repr', 'float', 'forward', 'get_buffer', 'get_extra_state', 'get_parameter', 'get_submodule', 'half', 'ipu', 'load_state_dict', 'modules', 'named_buffers', 'named_children', 'named_modules', 'named_parameters', 'parameters', 'register_backward_hook', 'register_buffer', 'register_forward_hook', 'register_forward_pre_hook', 'register_full_backward_hook', 'register_full_backward_pre_hook', 'register_load_state_dict_post_hook', 'register_module', 'register_parameter', 'register_state_dict_pre_hook', 'requiresgrad', 'reset_parameters', 'set_extra_state', 'share_memory', 'state_dict', 'to', 'to_empty', 'train', 'type', 'xpu', 'zero_grad']

KeyboardInterrupt exit() dhamaraiselvi@dhamaraiselvi-hp-zbook-power:~/Machine_Translation/Model_V1$ nvcc --version nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2021 NVIDIA Corporation Built on Thu_Nov_18_09:45:30_PST_2021 Cuda compilation tools, release 11.5, V11.5.119 Build cuda_11.5.r11.5/compiler.30672275_0

dhamaraiselvi commented 7 months ago

unable to resolve it. Please suggest some solution

xujin1184104394 commented 3 months ago

I encountered the same problem, has it been solved? Doesn’t the official provide a solution?

liuxi0099 commented 2 weeks ago

same problem in branch 22.04-dev

root@32972dbe780d:~/apex# pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
Using pip 24.1 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)
...
  adding 'apex/mlp/__init__.py'
  adding 'apex/mlp/mlp.py'
  adding 'apex/multi_tensor_apply/__init__.py'
  adding 'apex/multi_tensor_apply/multi_tensor_apply.py'
  adding 'apex/normalization/__init__.py'
  adding 'apex/normalization/fused_layer_norm.py'
  **no fused_layer_norm_cuda here**
  adding 'apex/normalization/instance_norm.py'
  adding 'apex/optimizers/__init__.py'
  adding 'apex/optimizers/fused_adagrad.py'
  adding 'apex/optimizers/fused_adam.py'
  adding 'apex/optimizers/fused_lamb.py'
  adding 'apex/optimizers/fused_mixed_precision_lamb.py'
  adding 'apex/optimizers/fused_novograd.py'
  adding 'apex/optimizers/fused_sgd.py'
...