hustvl / Vim

[ICML 2024] Vision Mamba: Efficient Visual Representation Learning with Bidirectional State Space Model
Apache License 2.0
2.55k stars 160 forks source link

subprocess.CalledProcessError #48

Open hhhyyyqqq opened 3 months ago

hhhyyyqqq commented 3 months ago

/home/ai1015/anaconda3/envs/vim/bin/python /mnt/data/ai1015/Vim/vim/main.py --model vim_tiny_patch16_224_bimambav2_final_pool_mean_abs_pos_embed_with_midclstok_div2 Not using distributed mode Namespace(gpu=0, batch_size=2, epochs=300, bce_loss=False, unscale_lr=False, model='vim_tiny_patch16_224_bimambav2_final_pool_mean_abs_pos_embed_with_midclstok_div2', input_size=224, drop=0.0, drop_path=0.1, model_ema=True, model_ema_decay=0.99996, model_ema_force_cpu=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.05, sched='cosine', lr=0.0005, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-05, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.3, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, train_mode=True, ThreeAugment=False, src=False, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', teacher_model='regnety_160', teacher_path='', distillation_type='none', distillation_alpha=0.5, distillation_tau=1.0, cosub=False, finetune='', attn_only=False, data_path='/mnt/data/ai1015/Vim/data/imagenet/', data_set='IMNET', inat_category='name', output_dir='', device='cuda', seed=0, resume='', start_epoch=0, eval=False, eval_crop_ratio=0.875, dist_eval=False, num_workers=10, pin_mem=True, distributed=False, world_size=1, dist_url='env://', if_amp=True, if_continue_inf=False, if_nan2num=False, if_random_cls_token_position=False, if_random_token_rank=False, local_rank=0) Creating model: vim_tiny_patch16_224_bimambav2_final_pool_mean_abs_pos_embed_with_midclstok_div2 number of params: 6228874 Start training for 300 epochs /usr/bin/ld: skipping incompatible /lib/i386-linux-gnu/libcuda.so when searching for -lcuda /usr/bin/ld: skipping incompatible /lib/i386-linux-gnu/libcuda.so when searching for -lcuda /usr/bin/ld: cannot find -lcuda collect2: error: ld returned 1 exit status Traceback (most recent call last): File "/mnt/data/ai1015/Vim/vim/main.py", line 550, in main(args) File "/mnt/data/ai1015/Vim/vim/main.py", line 482, in main train_stats = train_one_epoch( File "/mnt/data/ai1015/Vim/vim/engine.py", line 54, in train_one_epoch outputs = model(samples, if_random_cls_token_position=args.if_random_cls_token_position, if_random_token_rank=args.if_random_token_rank) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, kwargs) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, *kwargs) File "/mnt/data/ai1015/Vim/vim/models_mamba.py", line 543, in forward x = self.forward_features(x, inference_params, if_random_cls_token_position=if_random_cls_token_position, if_random_token_rank=if_random_token_rank) File "/mnt/data/ai1015/Vim/vim/models_mamba.py", line 480, in forward_features hidden_states, residual = layer( File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(args, kwargs) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, kwargs) File "/mnt/data/ai1015/Vim/vim/models_mamba.py", line 115, in forward hidden_states, residual = fused_add_norm_fn( File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/mamba_ssm/ops/triton/layernorm.py", line 478, in rms_norm_fn return LayerNormFn.apply(x, weight, bias, residual, eps, prenorm, residual_in_fp32, True) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/torch/autograd/function.py", line 539, in apply return super().apply(*args, *kwargs) # type: ignore[misc] File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/mamba_ssm/ops/triton/layernorm.py", line 411, in forward y, mean, rstd, residual_out = _layer_norm_fwd( File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/mamba_ssm/ops/triton/layernorm.py", line 155, in _layer_norm_fwd _layer_norm_fwd_1pass_kernel[(M,)]( File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/triton/runtime/autotuner.py", line 100, in run timings = {config: self._bench(args, config=config, kwargs) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/triton/runtime/autotuner.py", line 100, in timings = {config: self._bench(*args, config=config, *kwargs) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/triton/runtime/autotuner.py", line 83, in _bench return do_bench(kernel_call, warmup=self.warmup, rep=self.rep, quantiles=(0.5, 0.2, 0.8)) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/triton/testing.py", line 104, in do_bench fn() File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/triton/runtime/autotuner.py", line 81, in kernel_call self.fn.run(args, num_warps=config.num_warps, num_stages=config.num_stages, **current) File "", line 63, in _layer_norm_fwd_1pass_kernel File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/triton/compiler/compiler.py", line 425, in compile so_path = make_stub(name, signature, constants) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/triton/compiler/make_launcher.py", line 39, in make_stub so = _build(name, src_path, tmpdir) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/triton/common/build.py", line 90, in _build ret = subprocess.check_call(cc_cmd) File "/home/ai1015/anaconda3/envs/vim/lib/python3.10/subprocess.py", line 369, in check_call raise CalledProcessError(retcode, cmd) subprocess.CalledProcessError: Command '['/usr/bin/gcc', '/tmp/tmpci9keay7/main.c', '-O3', '-I/home/ai1015/anaconda3/envs/vim/lib/python3.10/site-packages/triton/common/../third_party/cuda/include', '-I/home/ai1015/anaconda3/envs/vim/include/python3.10', '-I/tmp/tmpci9keay7', '-shared', '-fPIC', '-lcuda', '-o', '/tmp/tmpci9keay7/_layer_norm_fwd_1pass_kernel.cpython-310-x86_64-linux-gnu.so', '-L/lib/x86_64-linux-gnu', '-L/lib/i386-linux-gnu', '-L/lib/i386-linux-gnu']' returned non-zero exit status 1.

what is the problem?