Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Convert to turbomind format: 0%| | 0/64 [00:00<?, ?it/s]Traceback (most recent call last):
File "/home/nlp/miniconda3/envs/baoy/bin/lmdeploy", line 8, in
sys.exit(run())
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/cli/entrypoint.py", line 37, in run
args.run(args)
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/cli/serve.py", line 283, in api_server
run_api_server(args.model_path,
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/serve/openai/api_server.py", line 1191, in serve
VariableInterface.async_engine = pipeline_class(
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/serve/async_engine.py", line 206, in init
self._build_turbomind(model_path=model_path,
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/serve/async_engine.py", line 254, in _build_turbomind
self.engine = tm.TurboMind.from_pretrained(
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/turbomind.py", line 396, in from_pretrained
return cls(model_path=pretrained_model_name_or_path,
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/turbomind.py", line 170, in init
self.model_comm = self._from_hf(model_source=model_source,
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/turbomind.py", line 305, in _from_hf
output_model.export()
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/deploy/target_model/base.py", line 273, in export
self.export_transformer_block(bin, i)
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/deploy/target_model/w4.py", line 156, in export_transformer_block
self.save_split(w2_sz, f'layers.{i}.feed_forward.w2.scales_zeros', 0)
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/deploy/target_model/base.py", line 246, in save_split
assert tensor.shape[split_dim] % tp == 0
AssertionError
Checklist
Describe the bug
qwen1.5-32b-chat-awq-4bit 设置 --tp 2 不会报错, 设置--tp4 就会报错,
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. Convert to turbomind format: 0%| | 0/64 [00:00<?, ?it/s]Traceback (most recent call last): File "/home/nlp/miniconda3/envs/baoy/bin/lmdeploy", line 8, in
sys.exit(run())
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/cli/entrypoint.py", line 37, in run
args.run(args)
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/cli/serve.py", line 283, in api_server
run_api_server(args.model_path,
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/serve/openai/api_server.py", line 1191, in serve
VariableInterface.async_engine = pipeline_class(
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/serve/async_engine.py", line 206, in init
self._build_turbomind(model_path=model_path,
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/serve/async_engine.py", line 254, in _build_turbomind
self.engine = tm.TurboMind.from_pretrained(
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/turbomind.py", line 396, in from_pretrained
return cls(model_path=pretrained_model_name_or_path,
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/turbomind.py", line 170, in init
self.model_comm = self._from_hf(model_source=model_source,
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/turbomind.py", line 305, in _from_hf
output_model.export()
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/deploy/target_model/base.py", line 273, in export
self.export_transformer_block(bin, i)
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/deploy/target_model/w4.py", line 156, in export_transformer_block
self.save_split(w2_sz, f'layers.{i}.feed_forward.w2.scales_zeros', 0)
File "/home/nlp/miniconda3/envs/baoy/lib/python3.10/site-packages/lmdeploy/turbomind/deploy/target_model/base.py", line 246, in save_split
assert tensor.shape[split_dim] % tp == 0
AssertionError
Reproduction
CUDA_VISIBLE_DEVICES=0,1,2,3 nohup lmdeploy serve api_server /opt/nlp/pretrain_models/Qwen1.5-32B-Chat-awq4 \ --model-name qwen \ --server-name 0.0.0.0 \ --server-port 23333 \ --tp 4 \ --rope-scaling-factor 2.0 \ --session-len 32000 \ --quant-policy 8 \ --model-format awq > 32.log 2>&1 &
Environment
Error traceback
No response