Closed amirrezasalimi closed 5 months ago
I repro the same issue. Any manual operation to prepare the environment?
![Uploading image.png…]() Try the recommended version of deepspeed and pytorch_lightning
My environment is torch 2.1.0+cu121 pytorch-lightning 1.9.4 deepspeed 0.12.3.
Hi JL-er i occured the same error, RuntimeError: Error building extension 'wkv6'
and torch 2.4.0+11.5 pytorch-lightning 1.9.5 deepspeed 0.14.4
i tried your envirment set, but doesn't work.
(rwkv) gavin@gavin-System-Product-Name:~/workspace/RWKV-PEFT$ bash lora.sh INFO:pytorch_lightning.utilities.rank_zero:########## work in progress ########## [2024-08-12 17:45:25,066] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect) [WARNING] async_io requires the dev libaio .so object and headers but these were not found. [WARNING] async_io: please install the libaio-dev package with apt [WARNING] If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found. [WARNING] Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH [WARNING] sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.4 [WARNING] using untested triton version (3.0.0), only 1.0.0 is known to be compatible /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:49: FutureWarning: torch.cuda.amp.custom_fwd(args...)is deprecated. Please usetorch.amp.custom_fwd(args..., device_type='cuda')instead. def forward(ctx, input, weight, bias=None): /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:67: FutureWarning:torch.cuda.amp.custom_bwd(args...)is deprecated. Please usetorch.amp.custom_bwd(args..., device_type='cuda')` instead.
def backward(ctx, grad_output):
INFO:pytorch_lightning.utilities.rank_zero:
############################################################################
RWKV-5 BF16 on 1x1 GPU, bsz 1x1x2=2, deepspeed_stage_1 with grad_cp
Data = /home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json (binidx), ProjDir = /home/gavin/workspace/RWKV-PEFT/out_model/lora
Epoch = 0 to 3 (will continue afterwards), save every 1 epoch
Each "epoch" = 100 steps, 200 samples, 819200 tokens
Model = 32 n_layer, 4096 n_embd, 4096 ctx_len
Adam = lr 0.0005 to 5e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08
Found torch 2.4.0+cu121, recommend 1.13.1+cu117 or newer
Found deepspeed 0.14.4, recommend 0.7.0 (faster than newer versions)
Found pytorch_lightning 1.9.5, recommend 1.9.5
############################################################################
INFO:pytorch_lightning.utilities.rank_zero:{'load_model': '/home/gavin/data/model/rwkv6/RWKV-x060-World-7B-v2.1-20240507-ctx4096.pth', 'wandb': '', 'proj_dir': '/home/gavin/workspace/RWKV-PEFT/out_model/lora', 'random_seed': -1, 'data_file': '/home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 4096, 'epoch_steps': 100, 'epoch_count': 4, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 14336, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0005, 'lr_final': 5e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 1, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'head_size_a': 64, 'head_size_divisor': 8, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_random_steps': 0, 'my_testing': 'x060', 'my_exit': 99999999, 'my_exit_tokens': 0, 'emb': False, 'lora': True, 'lora_load': '', 'lora_r': 128, 'lora_alpha': 256.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ffn,time,ln', 'LISA': False, 'lisa_r': 2, 'lisa_k': 100, 'PISSA': False, 'svd_niter': 4, 'pissa_load': '', 'pissa_init': '', 'quant': 'none', 'dataload': 'get', 'state_tune': False, 'chunk_ctx': 512, 'fla': False, 'train_type': 'none', 'loss_mask': 'none', 'optim': 'none', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-08-12-17-45-25', 'betas': (0.9, 0.99), 'real_bsz': 2, 'run_name': '65536 ctx4096 L32 D4096'}
/home/gavin/workspace/RWKV-PEFT/fla/ops/based/chunk_fuse.py:326: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, scale=1):
/home/gavin/workspace/RWKV-PEFT/fla/ops/based/chunk_fuse.py:362: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, dz):
/home/gavin/workspace/RWKV-PEFT/fla/ops/based/parallel.py:297: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, scale):
/home/gavin/workspace/RWKV-PEFT/fla/ops/based/parallel.py:334: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, dz):
/home/gavin/workspace/RWKV-PEFT/fla/ops/gla/chunk_fuse.py:315: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, g, scale, initial_state, output_final_state):
/home/gavin/workspace/RWKV-PEFT/fla/ops/gla/chunk_fuse.py:409: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, d_final_state=None):
/home/gavin/workspace/RWKV-PEFT/fla/ops/gla/recurrent_fuse.py:266: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, gk, gv, scale=None, initial_state=None, output_final_state=False, reverse=False):
/home/gavin/workspace/RWKV-PEFT/fla/ops/gla/recurrent_fuse.py:319: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, d_final_state=None):
/home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk.py:265: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, initial_state, output_final_state):
/home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk.py:310: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, d_ht=None):
/home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk_fuse.py:237: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, initial_state, output_final_state):
/home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk_fuse.py:290: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, d_final_state=None):
/home/gavin/workspace/RWKV-PEFT/fla/ops/retention/parallel.py:273: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v):
/home/gavin/workspace/RWKV-PEFT/fla/ops/retention/parallel.py:304: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do):
/home/gavin/workspace/RWKV-PEFT/fla/ops/abc/recurrent_fuse.py:212: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, s, g, scale=None, initial_state=None, output_final_state=False, reverse=False):
/home/gavin/workspace/RWKV-PEFT/fla/ops/abc/recurrent_fuse.py:280: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, dht=None):
/home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk.py:252: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, scale, initial_state, output_final_state):
/home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk.py:297: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, d_ht=None):
/home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk_fuse.py:226: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, scale, initial_state, output_final_state):
/home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk_fuse.py:276: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, d_final_state=None):
/home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/wy_fast.py:315: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, k, v, beta, chunk_size):
/home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/wy_fast.py:324: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, dw, du):
/home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/utils.py:196: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, k, v, beta, chunk_size):
/home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/utils.py:205: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, do2):
/home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk_fuse.py:330: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, beta, BT, initial_state, output_final_state, checkpoint_level=0):
/home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk_fuse.py:348: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, d_final_state=None):
/home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk.py:496: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, beta, BT, initial_state, output_final_state, checkpoint_level=1):
/home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk.py:516: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, d_ht=None):
/home/gavin/workspace/RWKV-PEFT/fla/ops/rebased/parallel.py:296: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, q, k, v, scale):
/home/gavin/workspace/RWKV-PEFT/fla/ops/rebased/parallel.py:333: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, dz):
/home/gavin/workspace/RWKV-PEFT/fla/ops/rwkv6/recurrent_fuse.py:243: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead.
def forward(ctx, r, k, v, w, u, scale=None, initial_state=None, output_final_state=False, reverse=False):
/home/gavin/workspace/RWKV-PEFT/fla/ops/rwkv6/recurrent_fuse.py:286: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead.
def backward(ctx, do, d_final_state=None):
RWKV_MY_TESTING x060
Using /home/gavin/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /home/gavin/.cache/torch_extensions/py310_cu121/wkv6/build.ninja...
/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py:1965: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation.
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
warnings.warn(
Building extension module wkv6...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
[1/2] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output wkv6_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=wkv6 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="gcc" -DPYBIND11_STDLIB="libstdcpp" -DPYBIND11_BUILD_ABI="cxxabi1011" -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/TH -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/THC -isystem /home/gavin/miniconda3/envs/rwkv/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -D__CUDA_NO_HALF_CONVERSIONS_ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -D_N_=64 -D_T_=4096 -std=c++17 -c /home/gavin/workspace/RWKV-PEFT/cuda/wkv6_cuda.cu -o wkv6_cuda.cuda.o
FAILED: wkv6_cuda.cuda.o
/usr/bin/nvcc --generate-dependencies-with-compile --dependency-output wkv6_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=wkv6 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="gcc" -DPYBIND11_STDLIB="libstdcpp" -DPYBIND11_BUILD_ABI="cxxabi1011" -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/TH -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/THC -isystem /home/gavin/miniconda3/envs/rwkv/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -D__CUDA_NO_HALF_CONVERSIONS_ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -D_N_=64 -D_T_=4096 -std=c++17 -c /home/gavin/workspace/RWKV-PEFT/cuda/wkv6_cuda.cu -o wkv6_cuda.cuda.o
ptxas info : 1 bytes gmem
ptxas info : Compiling entry function 'Z19kernel_backward_222IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2' for 'sm_86'
ptxas info : Function properties for Z19kernel_backward_222IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2
16376 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
ptxas info : Used 96 registers, 512 bytes smem, 424 bytes cmem[0]
ptxas info : Compiling entry function 'Z19kernel_backward_111IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2_S7_S7_S7' for 'sm_86'
ptxas info : Function properties for Z19kernel_backward_111IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2_S7_S7_S7
0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
ptxas info : Used 128 registers, 1536 bytes smem, 448 bytes cmem[0]
ptxas info : Compiling entry function 'Z14kernel_forwardIN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_PS2' for 'sm_86'
ptxas info : Function properties for Z14kernel_forwardIN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_PS2
0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
ptxas info : Used 100 registers, 1024 bytes smem, 416 bytes cmem[0]
/usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with ‘...’:
435 | function(_Functor&& __f)
| ^
/usr/include/c++/11/bits/std_function.h:435:145: note: ‘_ArgTypes’
/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with ‘...’:
530 | operator=(_Functor&& __f)
| ^
/usr/include/c++/11/bits/std_function.h:530:146: note: ‘_ArgTypes’
ninja: build stopped: subcommand failed.
Traceback (most recent call last):
File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2105, in _run_ninja_build
subprocess.run(
File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/subprocess.py", line 526, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/gavin/workspace/RWKV-PEFT/train.py", line 296, in
from src.trainer import train_callback, generate_init_weight
File "/home/gavin/workspace/RWKV-PEFT/src/trainer.py", line 6, in
from .model import LORA_CONFIG
File "/home/gavin/workspace/RWKV-PEFT/src/model.py", line 190, in
wkv6_cuda = load(name="wkv6", sources=["cuda/wkv6_op.cpp", f"cuda/wkv6_cuda.cu"],
File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1312, in load
return _jit_compile(
File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1722, in _jit_compile
_write_ninja_file_and_build_library(
File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1834, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2121, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'wkv6'
Hi JL-er i occured the same error, RuntimeError: Error building extension 'wkv6'
and torch 2.4.0+11.5 pytorch-lightning 1.9.5 deepspeed 0.14.4
i tried your envirment set, but doesn't work.
(rwkv) gavin@gavin-System-Product-Name:~/workspace/RWKV-PEFT$ bash lora.sh INFO:pytorch_lightning.utilities.rank_zero:########## work in progress ########## [2024-08-12 17:45:25,066] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect) [WARNING] async_io requires the dev libaio .so object and headers but these were not found. [WARNING] async_io: please install the libaio-dev package with apt [WARNING] If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found. [WARNING] Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH [WARNING] sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.4 [WARNING] using untested triton version (3.0.0), only 1.0.0 is known to be compatible /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:49: FutureWarning: torch.cuda.amp.custom_fwd(args...)is deprecated. Please usetorch.amp.custom_fwd(args..., device_type='cuda')instead. def forward(ctx, input, weight, bias=None): /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:67: FutureWarning:torch.cuda.amp.custom_bwd(args...)is deprecated. Please usetorch.amp.custom_bwd(args..., device_type='cuda')` instead. def backward(ctx, grad_output): INFO:pytorch_lightning.utilities.rank_zero: ############################################################################ RWKV-5 BF16 on 1x1 GPU, bsz 1x1x2=2, deepspeed_stage_1 with grad_cp Data = /home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json (binidx), ProjDir = /home/gavin/workspace/RWKV-PEFT/out_model/lora Epoch = 0 to 3 (will continue afterwards), save every 1 epoch Each "epoch" = 100 steps, 200 samples, 819200 tokens Model = 32 n_layer, 4096 n_embd, 4096 ctx_len Adam = lr 0.0005 to 5e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08 Found torch 2.4.0+cu121, recommend 1.13.1+cu117 or newer Found deepspeed 0.14.4, recommend 0.7.0 (faster than newer versions) Found pytorch_lightning 1.9.5, recommend 1.9.5 ############################################################################ INFO:pytorch_lightning.utilities.rank_zero:{'load_model': '/home/gavin/data/model/rwkv6/RWKV-x060-World-7B-v2.1-20240507-ctx4096.pth', 'wandb': '', 'proj_dir': '/home/gavin/workspace/RWKV-PEFT/out_model/lora', 'random_seed': -1, 'data_file': '/home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 4096, 'epoch_steps': 100, 'epoch_count': 4, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 14336, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0005, 'lr_final': 5e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 1, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'head_size_a': 64, 'head_size_divisor': 8, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_random_steps': 0, 'my_testing': 'x060', 'my_exit': 99999999, 'my_exit_tokens': 0, 'emb': False, 'lora': True, 'lora_load': '', 'lora_r': 128, 'lora_alpha': 256.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ffn,time,ln', 'LISA': False, 'lisa_r': 2, 'lisa_k': 100, 'PISSA': False, 'svd_niter': 4, 'pissa_load': '', 'pissa_init': '', 'quant': 'none', 'dataload': 'get', 'state_tune': False, 'chunk_ctx': 512, 'fla': False, 'train_type': 'none', 'loss_mask': 'none', 'optim': 'none', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-08-12-17-45-25', 'betas': (0.9, 0.99), 'real_bsz': 2, 'run_name': '65536 ctx4096 L32 D4096'} /home/gavin/workspace/RWKV-PEFT/fla/ops/based/chunk_fuse.py:326: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale=1): /home/gavin/workspace/RWKV-PEFT/fla/ops/based/chunk_fuse.py:362: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, dz): /home/gavin/workspace/RWKV-PEFT/fla/ops/based/parallel.py:297: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale): /home/gavin/workspace/RWKV-PEFT/fla/ops/based/parallel.py:334: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, dz): /home/gavin/workspace/RWKV-PEFT/fla/ops/gla/chunk_fuse.py:315: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, g, scale, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/gla/chunk_fuse.py:409: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/gla/recurrent_fuse.py:266: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, gk, gv, scale=None, initial_state=None, output_final_state=False, reverse=False): /home/gavin/workspace/RWKV-PEFT/fla/ops/gla/recurrent_fuse.py:319: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk.py:265: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk.py:310: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_ht=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk_fuse.py:237: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk_fuse.py:290: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/parallel.py:273: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/parallel.py:304: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do): /home/gavin/workspace/RWKV-PEFT/fla/ops/abc/recurrent_fuse.py:212: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, s, g, scale=None, initial_state=None, output_final_state=False, reverse=False): /home/gavin/workspace/RWKV-PEFT/fla/ops/abc/recurrent_fuse.py:280: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, dht=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk.py:252: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk.py:297: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_ht=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk_fuse.py:226: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk_fuse.py:276: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/wy_fast.py:315: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, k, v, beta, chunk_size): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/wy_fast.py:324: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, dw, du): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/utils.py:196: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, k, v, beta, chunk_size): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/utils.py:205: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, do2): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk_fuse.py:330: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, beta, BT, initial_state, output_final_state, checkpoint_level=0): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk_fuse.py:348: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk.py:496: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, beta, BT, initial_state, output_final_state, checkpoint_level=1): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk.py:516: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_ht=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/rebased/parallel.py:296: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale): /home/gavin/workspace/RWKV-PEFT/fla/ops/rebased/parallel.py:333: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, dz): /home/gavin/workspace/RWKV-PEFT/fla/ops/rwkv6/recurrent_fuse.py:243: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, r, k, v, w, u, scale=None, initial_state=None, output_final_state=False, reverse=False): /home/gavin/workspace/RWKV-PEFT/fla/ops/rwkv6/recurrent_fuse.py:286: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): RWKV_MY_TESTING x060 Using /home/gavin/.cache/torch_extensions/py310_cu121 as PyTorch extensions root... Detected CUDA files, patching ldflags Emitting ninja build file /home/gavin/.cache/torch_extensions/py310_cu121/wkv6/build.ninja... /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py:1965: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST']. warnings.warn( Building extension module wkv6... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) [1/2] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output wkv6_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=wkv6 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="gcc" -DPYBIND11_STDLIB="libstdcpp" -DPYBIND11_BUILD_ABI="cxxabi1011" -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/TH -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/THC -isystem /home/gavin/miniconda3/envs/rwkv/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -D__CUDA_NO_HALF_CONVERSIONS_ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -D_N_=64 -D_T_=4096 -std=c++17 -c /home/gavin/workspace/RWKV-PEFT/cuda/wkv6_cuda.cu -o wkv6_cuda.cuda.o FAILED: wkv6_cuda.cuda.o /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output wkv6_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=wkv6 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="gcc" -DPYBIND11_STDLIB="libstdcpp" -DPYBIND11_BUILD_ABI="cxxabi1011" -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/TH -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/THC -isystem /home/gavin/miniconda3/envs/rwkv/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -D__CUDA_NO_HALF_CONVERSIONS_ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -D_N_=64 -D_T_=4096 -std=c++17 -c /home/gavin/workspace/RWKV-PEFT/cuda/wkv6_cuda.cu -o wkv6_cuda.cuda.o ptxas info : 1 bytes gmem ptxas info : Compiling entry function 'Z19kernel_backward_222IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2' for 'sm_86' ptxas info : Function properties for Z19kernel_backward_222IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2 16376 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 96 registers, 512 bytes smem, 424 bytes cmem[0] ptxas info : Compiling entry function 'Z19kernel_backward_111IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2_S7_S7_S7' for 'sm_86' ptxas info : Function properties for Z19kernel_backward_111IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2_S7_S7_S7 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 128 registers, 1536 bytes smem, 448 bytes cmem[0] ptxas info : Compiling entry function 'Z14kernel_forwardIN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_PS2' for 'sm_86' ptxas info : Function properties for Z14kernel_forwardIN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_PS2 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 100 registers, 1024 bytes smem, 416 bytes cmem[0] /usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with ‘...’: 435 | function(_Functor&& __f) | ^ /usr/include/c++/11/bits/std_function.h:435:145: note: ‘_ArgTypes’ /usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with ‘...’: 530 | operator=(_Functor&& __f) | ^ /usr/include/c++/11/bits/std_function.h:530:146: note: ‘_ArgTypes’ ninja: build stopped: subcommand failed. Traceback (most recent call last): File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2105, in _run_ninja_build subprocess.run( File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/subprocess.py", line 526, in run raise CalledProcessError(retcode, process.args, subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1. The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/home/gavin/workspace/RWKV-PEFT/train.py", line 296, in from src.trainer import train_callback, generate_init_weight File "/home/gavin/workspace/RWKV-PEFT/src/trainer.py", line 6, in from .model import LORA_CONFIG File "/home/gavin/workspace/RWKV-PEFT/src/model.py", line 190, in wkv6_cuda = load(name="wkv6", sources=["cuda/wkv6_op.cpp", f"cuda/wkv6_cuda.cu"], File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1312, in load return _jit_compile( File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1722, in _jit_compile _write_ninja_file_and_build_library( File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1834, in _write_ninja_file_and_build_library _run_ninja_build( File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2121, in _run_ninja_build raise RuntimeError(message) from e RuntimeError: Error building extension 'wkv6'
Try checking if torch.cuda.is_available() is working torch 2.2.0+cu121
Hi JL-er i occured the same error, RuntimeError: Error building extension 'wkv6' and torch 2.4.0+11.5 pytorch-lightning 1.9.5 deepspeed 0.14.4 i tried your envirment set, but doesn't work.
(rwkv) gavin@gavin-System-Product-Name:~/workspace/RWKV-PEFT$ bash lora.sh INFO:pytorch_lightning.utilities.rank_zero:########## work in progress ########## [2024-08-12 17:45:25,066] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect) [WARNING] async_io requires the dev libaio .so object and headers but these were not found. [WARNING] async_io: please install the libaio-dev package with apt [WARNING] If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found. [WARNING] Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH [WARNING] sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.4 [WARNING] using untested triton version (3.0.0), only 1.0.0 is known to be compatible /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:49: FutureWarning: torch.cuda.amp.custom_fwd(args...)is deprecated. Please usetorch.amp.custom_fwd(args..., device_type='cuda')instead. def forward(ctx, input, weight, bias=None): /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/deepspeed/runtime/zero/linear.py:67: FutureWarning:torch.cuda.amp.custom_bwd(args...)is deprecated. Please usetorch.amp.custom_bwd(args..., device_type='cuda')` instead. def backward(ctx, grad_output): INFO:pytorch_lightning.utilities.rank_zero: ############################################################################ RWKV-5 BF16 on 1x1 GPU, bsz 1x1x2=2, deepspeed_stage_1 with grad_cp Data = /home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json (binidx), ProjDir = /home/gavin/workspace/RWKV-PEFT/out_model/lora Epoch = 0 to 3 (will continue afterwards), save every 1 epoch Each "epoch" = 100 steps, 200 samples, 819200 tokens Model = 32 n_layer, 4096 n_embd, 4096 ctx_len Adam = lr 0.0005 to 5e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08 Found torch 2.4.0+cu121, recommend 1.13.1+cu117 or newer Found deepspeed 0.14.4, recommend 0.7.0 (faster than newer versions) Found pytorch_lightning 1.9.5, recommend 1.9.5 ############################################################################ INFO:pytorch_lightning.utilities.rank_zero:{'load_model': '/home/gavin/data/model/rwkv6/RWKV-x060-World-7B-v2.1-20240507-ctx4096.pth', 'wandb': '', 'proj_dir': '/home/gavin/workspace/RWKV-PEFT/out_model/lora', 'random_seed': -1, 'data_file': '/home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 4096, 'epoch_steps': 100, 'epoch_count': 4, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 14336, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0005, 'lr_final': 5e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 1, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'head_size_a': 64, 'head_size_divisor': 8, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_random_steps': 0, 'my_testing': 'x060', 'my_exit': 99999999, 'my_exit_tokens': 0, 'emb': False, 'lora': True, 'lora_load': '', 'lora_r': 128, 'lora_alpha': 256.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ffn,time,ln', 'LISA': False, 'lisa_r': 2, 'lisa_k': 100, 'PISSA': False, 'svd_niter': 4, 'pissa_load': '', 'pissa_init': '', 'quant': 'none', 'dataload': 'get', 'state_tune': False, 'chunk_ctx': 512, 'fla': False, 'train_type': 'none', 'loss_mask': 'none', 'optim': 'none', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-08-12-17-45-25', 'betas': (0.9, 0.99), 'real_bsz': 2, 'run_name': '65536 ctx4096 L32 D4096'} /home/gavin/workspace/RWKV-PEFT/fla/ops/based/chunk_fuse.py:326: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale=1): /home/gavin/workspace/RWKV-PEFT/fla/ops/based/chunk_fuse.py:362: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, dz): /home/gavin/workspace/RWKV-PEFT/fla/ops/based/parallel.py:297: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale): /home/gavin/workspace/RWKV-PEFT/fla/ops/based/parallel.py:334: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, dz): /home/gavin/workspace/RWKV-PEFT/fla/ops/gla/chunk_fuse.py:315: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, g, scale, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/gla/chunk_fuse.py:409: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/gla/recurrent_fuse.py:266: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, gk, gv, scale=None, initial_state=None, output_final_state=False, reverse=False): /home/gavin/workspace/RWKV-PEFT/fla/ops/gla/recurrent_fuse.py:319: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk.py:265: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk.py:310: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_ht=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk_fuse.py:237: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/chunk_fuse.py:290: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/parallel.py:273: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v): /home/gavin/workspace/RWKV-PEFT/fla/ops/retention/parallel.py:304: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do): /home/gavin/workspace/RWKV-PEFT/fla/ops/abc/recurrent_fuse.py:212: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, s, g, scale=None, initial_state=None, output_final_state=False, reverse=False): /home/gavin/workspace/RWKV-PEFT/fla/ops/abc/recurrent_fuse.py:280: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, dht=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk.py:252: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk.py:297: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_ht=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk_fuse.py:226: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale, initial_state, output_final_state): /home/gavin/workspace/RWKV-PEFT/fla/ops/linear_attn/chunk_fuse.py:276: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/wy_fast.py:315: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, k, v, beta, chunk_size): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/wy_fast.py:324: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, dw, du): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/utils.py:196: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, k, v, beta, chunk_size): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/utils.py:205: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, do2): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk_fuse.py:330: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, beta, BT, initial_state, output_final_state, checkpoint_level=0): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk_fuse.py:348: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk.py:496: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, beta, BT, initial_state, output_final_state, checkpoint_level=1): /home/gavin/workspace/RWKV-PEFT/fla/ops/delta_rule/chunk.py:516: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_ht=None): /home/gavin/workspace/RWKV-PEFT/fla/ops/rebased/parallel.py:296: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, q, k, v, scale): /home/gavin/workspace/RWKV-PEFT/fla/ops/rebased/parallel.py:333: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, dz): /home/gavin/workspace/RWKV-PEFT/fla/ops/rwkv6/recurrent_fuse.py:243: FutureWarning: torch.cuda.amp.custom_fwd(args...) is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda') instead. def forward(ctx, r, k, v, w, u, scale=None, initial_state=None, output_final_state=False, reverse=False): /home/gavin/workspace/RWKV-PEFT/fla/ops/rwkv6/recurrent_fuse.py:286: FutureWarning: torch.cuda.amp.custom_bwd(args...) is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda') instead. def backward(ctx, do, d_final_state=None): RWKV_MY_TESTING x060 Using /home/gavin/.cache/torch_extensions/py310_cu121 as PyTorch extensions root... Detected CUDA files, patching ldflags Emitting ninja build file /home/gavin/.cache/torch_extensions/py310_cu121/wkv6/build.ninja... /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py:1965: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST']. warnings.warn( Building extension module wkv6... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) [1/2] /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output wkv6_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=wkv6 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="gcc" -DPYBIND11_STDLIB="libstdcpp" -DPYBIND11_BUILD_ABI="cxxabi1011" -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/TH -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/THC -isystem /home/gavin/miniconda3/envs/rwkv/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -D__CUDA_NO_HALF_CONVERSIONS_ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -D_N_=64 -D_T_=4096 -std=c++17 -c /home/gavin/workspace/RWKV-PEFT/cuda/wkv6_cuda.cu -o wkv6_cuda.cuda.o FAILED: wkv6_cuda.cuda.o /usr/bin/nvcc --generate-dependencies-with-compile --dependency-output wkv6_cuda.cuda.o.d -DTORCH_EXTENSION_NAME=wkv6 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE="gcc" -DPYBIND11_STDLIB="libstdcpp" -DPYBIND11_BUILD_ABI="cxxabi1011" -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/TH -isystem /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/include/THC -isystem /home/gavin/miniconda3/envs/rwkv/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -D__CUDA_NO_HALF_CONVERSIONS_ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -D_N_=64 -D_T_=4096 -std=c++17 -c /home/gavin/workspace/RWKV-PEFT/cuda/wkv6_cuda.cu -o wkv6_cuda.cuda.o ptxas info : 1 bytes gmem ptxas info : Compiling entry function 'Z19kernel_backward_222IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2' for 'sm_86' ptxas info : Function properties for Z19kernel_backward_222IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2 16376 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 96 registers, 512 bytes smem, 424 bytes cmem[0] ptxas info : Compiling entry function 'Z19kernel_backward_111IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2_S7_S7_S7' for 'sm_86' ptxas info : Function properties for Z19kernel_backward_111IN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_S4_PS2_S7_S7_S7 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 128 registers, 1536 bytes smem, 448 bytes cmem[0] ptxas info : Compiling entry function 'Z14kernel_forwardIN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_PS2' for 'sm_86' ptxas info : Function properties for Z14kernel_forwardIN3c108BFloat16EEviiiiPKT_S4_S4_PKfS4_PS2 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 100 registers, 1024 bytes smem, 416 bytes cmem[0] /usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with ‘...’: 435 | function(_Functor&& __f) | ^ /usr/include/c++/11/bits/std_function.h:435:145: note: ‘_ArgTypes’ /usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with ‘...’: 530 | operator=(_Functor&& __f) | ^ /usr/include/c++/11/bits/std_function.h:530:146: note: ‘_ArgTypes’ ninja: build stopped: subcommand failed. Traceback (most recent call last): File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2105, in _run_ninja_build subprocess.run( File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/subprocess.py", line 526, in run raise CalledProcessError(retcode, process.args, subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1. The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/home/gavin/workspace/RWKV-PEFT/train.py", line 296, in from src.trainer import train_callback, generate_init_weight File "/home/gavin/workspace/RWKV-PEFT/src/trainer.py", line 6, in from .model import LORA_CONFIG File "/home/gavin/workspace/RWKV-PEFT/src/model.py", line 190, in wkv6_cuda = load(name="wkv6", sources=["cuda/wkv6_op.cpp", f"cuda/wkv6_cuda.cu"], File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1312, in load return _jit_compile( File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1722, in _jit_compile _write_ninja_file_and_build_library( File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1834, in _write_ninja_file_and_build_library _run_ninja_build( File "/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 2121, in _run_ninja_build raise RuntimeError(message) from e RuntimeError: Error building extension 'wkv6'
Try checking if torch.cuda.is_available() is working torch 2.2.0+cu121
my cuda is available(), i even remove this enviroment and install again all the package including torch2.2.0 +cu121, it still occurs this error, RuntimeError: Error building extension 'wkv6'
py310_cu121
deleting the home/gavin/.cache/pytorch_extensions
py310_cu121
deleting the home/gavin/.cache/pytorch_extensions
sorry... it seems doesn't work.. I believe it still exists some hidden confilcts. i already set it to python310_cu121 torch version 2.2.0+cu121 pytorch-lightning 1.9.5
(rwkv) gavin@gavin-System-Product-Name:~/workspace/RWKV-PEFT$ bash lora.sh
INFO:pytorch_lightning.utilities.rank_zero:########## work in progress ##########
[2024-08-15 14:50:01,159] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[WARNING] Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH
[WARNING] sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.3
[WARNING] using untested triton version (2.3.1), only 1.0.0 is known to be compatible
INFO:pytorch_lightning.utilities.rank_zero:
############################################################################
#
# RWKV-5 BF16 on 1x1 GPU, bsz 1x1x2=2, deepspeed_stage_1 with grad_cp
#
# Data = /home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json (binidx), ProjDir = /home/gavin/workspace/RWKV-PEFT/out_model/lora
#
# Epoch = 0 to 3 (will continue afterwards), save every 1 epoch
#
# Each "epoch" = 100 steps, 200 samples, 819200 tokens
#
# Model = 32 n_layer, 4096 n_embd, 4096 ctx_len
#
# Adam = lr 0.0005 to 5e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08
#
# Found torch 2.3.1, recommend 1.13.1+cu117 or newer
# Found deepspeed 0.14.4, recommend 0.7.0 (faster than newer versions)
# Found pytorch_lightning 1.9.5, recommend 1.9.5
#
############################################################################
INFO:pytorch_lightning.utilities.rank_zero:{'load_model': '/home/gavin/data/model/rwkv6/RWKV-x060-World-7B-v2.1-20240507-ctx4096.pth', 'wandb': '', 'proj_dir': '/home/gavin/workspace/RWKV-PEFT/out_model/lora', 'random_seed': -1, 'data_file': '/home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 4096, 'epoch_steps': 100, 'epoch_count': 4, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 14336, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0005, 'lr_final': 5e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 1, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'head_size_a': 64, 'head_size_divisor': 8, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_random_steps': 0, 'my_testing': 'x060', 'my_exit': 99999999, 'my_exit_tokens': 0, 'emb': False, 'lora': True, 'lora_load': '', 'lora_r': 128, 'lora_alpha': 256.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ffn,time,ln', 'LISA': False, 'lisa_r': 2, 'lisa_k': 100, 'PISSA': False, 'svd_niter': 4, 'pissa_load': '', 'pissa_init': '', 'quant': 'none', 'dataload': 'get', 'state_tune': False, 'chunk_ctx': 512, 'fla': False, 'train_type': 'none', 'loss_mask': 'none', 'optim': 'none', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-08-15-14-50-01', 'betas': (0.9, 0.99), 'real_bsz': 2, 'run_name': '65536 ctx4096 L32 D4096'}
RWKV_MY_TESTING x060
Using /home/gavin/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /home/gavin/.cache/torch_extensions/py310_cu121/wkv6/build.ninja...
/home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py:1967: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation.
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
warnings.warn(
Building extension module wkv6...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
ninja: no work to do.
Loading extension module wkv6...
INFO:pytorch_lightning.utilities.rank_zero:Current vocab size = 65536 (make sure it's correct)
Traceback (most recent call last):
File "/home/gavin/workspace/RWKV-PEFT/train.py", line 299, in <module>
train_data = MyDataset(args)
File "/home/gavin/workspace/RWKV-PEFT/src/dataset.py", line 24, in __init__
self.data = MMapIndexedDataset(args.data_file)
File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 179, in __init__
self._do_init(path, skip_warmup)
File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 189, in _do_init
self._index = self.Index(index_file_path(self._path), skip_warmup)
File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 105, in __init__
with open(path, "rb") as stream:
FileNotFoundError: [Errno 2] No such file or directory: '/home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json.idx'
Exception ignored in: <function MMapIndexedDataset.Index.__del__ at 0x70efe8351a20>
Traceback (most recent call last):
File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 150, in __del__
self._bin_buffer_mmap._mmap.close()
AttributeError: 'Index' object has no attribute '_bin_buffer_mmap'
Exception ignored in: <function MMapIndexedDataset.__del__ at 0x70efe8351fc0>
Traceback (most recent call last):
File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 202, in __del__
self._bin_buffer_mmap._mmap.close()
AttributeError: 'MMapIndexedDataset' object has no attribute '_bin_buffer_mmap'
FileNotFoundError: [Errno 2] No such file or directory: '/home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json.idx'
FileNotFoundError: [Errno 2] No such file or directory: '/home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json.idx' Check if your file path is correct, and I suggest using the binidx data format, see RWKV-LM for details. Your previous error (Loading extension module wkv6...) has been resolved.
py310_cu121
deleting the home/gavin/.cache/pytorch_extensions
sorry... it seems doesn't work.. I believe it still exists some hidden confilcts. i already set it to python310_cu121 torch version 2.2.0+cu121 pytorch-lightning 1.9.5
(rwkv) gavin@gavin-System-Product-Name:~/workspace/RWKV-PEFT$ bash lora.sh INFO:pytorch_lightning.utilities.rank_zero:########## work in progress ########## [2024-08-15 14:50:01,159] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect) [WARNING] Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH [WARNING] sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.3 [WARNING] using untested triton version (2.3.1), only 1.0.0 is known to be compatible INFO:pytorch_lightning.utilities.rank_zero: ############################################################################ # # RWKV-5 BF16 on 1x1 GPU, bsz 1x1x2=2, deepspeed_stage_1 with grad_cp # # Data = /home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json (binidx), ProjDir = /home/gavin/workspace/RWKV-PEFT/out_model/lora # # Epoch = 0 to 3 (will continue afterwards), save every 1 epoch # # Each "epoch" = 100 steps, 200 samples, 819200 tokens # # Model = 32 n_layer, 4096 n_embd, 4096 ctx_len # # Adam = lr 0.0005 to 5e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08 # # Found torch 2.3.1, recommend 1.13.1+cu117 or newer # Found deepspeed 0.14.4, recommend 0.7.0 (faster than newer versions) # Found pytorch_lightning 1.9.5, recommend 1.9.5 # ############################################################################ INFO:pytorch_lightning.utilities.rank_zero:{'load_model': '/home/gavin/data/model/rwkv6/RWKV-x060-World-7B-v2.1-20240507-ctx4096.pth', 'wandb': '', 'proj_dir': '/home/gavin/workspace/RWKV-PEFT/out_model/lora', 'random_seed': -1, 'data_file': '/home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 4096, 'epoch_steps': 100, 'epoch_count': 4, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 14336, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0005, 'lr_final': 5e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 1, 'dropout': 0, 'weight_decay': 0, 'weight_decay_final': -1, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'head_size_a': 64, 'head_size_divisor': 8, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_random_steps': 0, 'my_testing': 'x060', 'my_exit': 99999999, 'my_exit_tokens': 0, 'emb': False, 'lora': True, 'lora_load': '', 'lora_r': 128, 'lora_alpha': 256.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ffn,time,ln', 'LISA': False, 'lisa_r': 2, 'lisa_k': 100, 'PISSA': False, 'svd_niter': 4, 'pissa_load': '', 'pissa_init': '', 'quant': 'none', 'dataload': 'get', 'state_tune': False, 'chunk_ctx': 512, 'fla': False, 'train_type': 'none', 'loss_mask': 'none', 'optim': 'none', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_1', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-08-15-14-50-01', 'betas': (0.9, 0.99), 'real_bsz': 2, 'run_name': '65536 ctx4096 L32 D4096'} RWKV_MY_TESTING x060 Using /home/gavin/.cache/torch_extensions/py310_cu121 as PyTorch extensions root... Detected CUDA files, patching ldflags Emitting ninja build file /home/gavin/.cache/torch_extensions/py310_cu121/wkv6/build.ninja... /home/gavin/miniconda3/envs/rwkv/lib/python3.10/site-packages/torch/utils/cpp_extension.py:1967: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST']. warnings.warn( Building extension module wkv6... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) ninja: no work to do. Loading extension module wkv6... INFO:pytorch_lightning.utilities.rank_zero:Current vocab size = 65536 (make sure it's correct) Traceback (most recent call last): File "/home/gavin/workspace/RWKV-PEFT/train.py", line 299, in <module> train_data = MyDataset(args) File "/home/gavin/workspace/RWKV-PEFT/src/dataset.py", line 24, in __init__ self.data = MMapIndexedDataset(args.data_file) File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 179, in __init__ self._do_init(path, skip_warmup) File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 189, in _do_init self._index = self.Index(index_file_path(self._path), skip_warmup) File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 105, in __init__ with open(path, "rb") as stream: FileNotFoundError: [Errno 2] No such file or directory: '/home/gavin/workspace/self_recognition/data/20240710105508_200_self_cognition_data.json.idx' Exception ignored in: <function MMapIndexedDataset.Index.__del__ at 0x70efe8351a20> Traceback (most recent call last): File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 150, in __del__ self._bin_buffer_mmap._mmap.close() AttributeError: 'Index' object has no attribute '_bin_buffer_mmap' Exception ignored in: <function MMapIndexedDataset.__del__ at 0x70efe8351fc0> Traceback (most recent call last): File "/home/gavin/workspace/RWKV-PEFT/src/binidx.py", line 202, in __del__ self._bin_buffer_mmap._mmap.close() AttributeError: 'MMapIndexedDataset' object has no attribute '_bin_buffer_mmap'
May I ask if you have joined the official RWKV QQ group? You can ask questions there and check the official RWKV tutorial。
hey i am getting this error :