Open liulhdarks opened 1 year ago
Also very interested, if anyone has updates about this
Yes, please, add this functionality, thank you.
Interested too, thx a lot
Minimal script illustrating the issue:
# launched with
# deepspeed --num_gpus 4 deepspeed_starcoder.py
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import deepspeed
local_rank = int(os.getenv('LOCAL_RANK', '0'))
world_size = int(os.getenv('WORLD_SIZE', '1'))
checkpoint = "bigcode/starcoder"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
ds_config = {
"fp16": {"enabled": False},
"bf16": {"enabled": True},
"zero_optimization": {
"stage": 3,
"overlap_comm": True,
"allgather_partitions": True,
"allgather_bucket_size": 2e8,
"reduce_scatter": True,
"contiguous_gradients": True,
"sub_group_size": 1e9,
"reduce_bucket_size": "auto",
"stage3_prefetch_bucket_size": "auto",
"stage3_param_persistence_threshold": "auto",
"stage3_max_live_parameters": 1e9,
"stage3_max_reuse_distance": 1e9,
"stage3_gather_16bit_weights_on_model_save": True,
},
"train_batch_size": 1,
}
from transformers.deepspeed import HfDeepSpeedConfig
dschf = HfDeepSpeedConfig(ds_config) # keep this object alive
model = AutoModelForCausalLM.from_pretrained(checkpoint)
print("Loaded mode")
ds_engine = deepspeed.init_inference(model=model, mp_size=world_size, dtype=torch.bfloat16, replace_with_kernel_inject=True)
print("Initialized DeepSpeed inference engine")
generator = pipeline('text-generation', model=ds_engine.module, tokenizer=tokenizer, device=local_rank)
ret = generator("DeepSpeed is", do_sample=True, max_new_tokens=200)
if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
print(ret)
and it gives an error "'weight' must be 2-D" triggered by the embedding lookup from self.wte(input_ids)
in modeling_gpt_bigcode.py
from the HuggingFace Transformers library.
Can anyone please tell how long it might take to add the support?
When will support huggingface GPT BigCode model