unslothai / unsloth

Finetune Llama 3.1, Mistral, Phi & Gemma LLMs 2-5x faster with 80% less memory
https://unsloth.ai
Apache License 2.0
16.1k stars 1.1k forks source link

ValueError: Unknown RoPE scaling type longrope #1042

Open XCYXHL opened 1 week ago

XCYXHL commented 1 week ago

os: windows I think my environment is ready use jupyter notebook locally

when i run these:

"from unsloth import FastLanguageModel import torch max_seq_length = 8192 # Choose any! We auto support RoPE Scaling internally! dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.

4bit pre quantized models we support for 4x faster downloading + no OOMs.

fourbit_models = [ "unsloth/Meta-Llama-3.1-8B-bnb-4bit", # Llama-3.1 15 trillion tokens model 2x faster! "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", "unsloth/Meta-Llama-3.1-70B-bnb-4bit", "unsloth/Meta-Llama-3.1-405B-bnb-4bit", # We also uploaded 4bit for 405b! "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster! "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit", "unsloth/mistral-7b-v0.3-bnb-4bit", # Mistral v3 2x faster! "unsloth/mistral-7b-instruct-v0.3-bnb-4bit", "unsloth/Phi-3.5-mini-instruct", # Phi-3.5 2x faster! "unsloth/Phi-3-medium-4k-instruct", "unsloth/gemma-2-9b-bnb-4bit", "unsloth/gemma-2-27b-bnb-4bit", # Gemma 2x faster! ] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained( model_name = "unsloth/Phi-3.5-mini-instruct", max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, trust_remote_code=True,

token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf

)"

and this is the result:

Unsloth: WARNING trust_remote_code is True. Are you certain you want to do remote code execution? ==((====))== Unsloth 2024.8: Fast Llama patching. Transformers = 4.44.2. \ /| GPU: NVIDIA GeForce RTX 4060. Max memory: 7.996 GB. Platform = Windows. O^O/ _/ \ Pytorch: 2.3.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1. \ / Bfloat16 = TRUE. FA [Xformers = 0.0.26.post1. FA2 = False] "-____-" Free Apache license: http://github.com/unslothai/unsloth

ValueError: Unknown RoPE scaling type longrope

this is the detail:


ValueError Traceback (most recent call last) Cell In[12], line 27 8 fourbit_models = [ 9 "unsloth/Meta-Llama-3.1-8B-bnb-4bit", # Llama-3.1 15 trillion tokens model 2x faster! 10 "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", (...) 20 "unsloth/gemma-2-27b-bnb-4bit", # Gemma 2x faster! 21 ] # More models at https://huggingface.co/unsloth 24 # model_path = os.path.join('E:', 'Graduate', 'unsloth', 'Phi-3.5-mini-instruct') 25 26 # 这部分是源代码 ---> 27 model, tokenizer = FastLanguageModel.from_pretrained( 28 model_name = "unsloth/Phi-3.5-mini-instruct", 29 # model_name = "./Phi-3.5-mini-instruct", 30 # model_name = "E:/Graduate/unsloth/Phi-3.5-mini-instruct", 31 # local_files_only = True, 32 max_seq_length = max_seq_length, 33 dtype = dtype, 34 load_in_4bit = load_in_4bit, 35 trust_remotecode=True, 36 # token = "hf...", # use one if using gated models like meta-llama/Llama-2-7b-hf 37 ) 39 # 可以使用本地下载好的模型 40 # model_path = './Phi-3.5-mini-instruct' 41 # tokenizer_path = './Phi-3.5-mini-instruct' 42 # 43 # model = FastLanguageModel.from_pretrained() 44 # tokenizer = FastLanguageModel.from_pretrained(tokenizer_path)

File D:\anacond3\envs\unsloth\lib\site-packages\unsloth\models\loader.py:272, in FastLanguageModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, *args, *kwargs) 269 tokenizer_name = None 270 pass --> 272 model, tokenizer = dispatch_model.from_pretrained( 273 model_name = model_name, 274 max_seq_length = max_seq_length, 275 dtype = dtype, 276 load_in_4bit = load_in_4bit, 277 token = token, 278 device_map = device_map, 279 rope_scaling = rope_scaling, 280 fix_tokenizer = fix_tokenizer, 281 model_patcher = dispatch_model, 282 tokenizer_name = tokenizer_name, 283 trust_remote_code = trust_remote_code, 284 revision = revision if not is_peft else None, 285 args, **kwargs, 286 ) 288 if resize_model_vocab is not None: 289 model.resize_token_embeddings(resize_model_vocab)

File D:\anacond3\envs\unsloth\lib\site-packages\unsloth\models\llama.py:1376, in FastLlamaModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, model_patcher, tokenizer_name, trust_remote_code, kwargs) 1374 max_position_embeddings = max(max_seq_length, model_max_seq_length) 1375 kwargs.pop("attn_implementation", None); # No need since we auto call it -> 1376 model = AutoModelForCausalLM.from_pretrained( 1377 model_name, 1378 device_map = device_map, 1379 torch_dtype = dtype, 1380 quantization_config = bnb_config, 1381 token = token, 1382 max_position_embeddings = max_position_embeddings, 1383 trust_remote_code = trust_remote_code, 1384 attn_implementation = "eager", 1385 kwargs, 1386 ) 1387 # Return old flag 1388 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer

File D:\anacond3\envs\unsloth\lib\site-packages\transformers\models\auto\auto_factory.py:564, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, *kwargs) 562 elif type(config) in cls._model_mapping.keys(): 563 model_class = _get_model_class(config, cls._model_mapping) --> 564 return model_class.from_pretrained( 565 pretrained_model_name_or_path, model_args, config=config, hub_kwargs, kwargs 566 ) 567 raise ValueError( 568 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n" 569 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}." 570 )

File D:\anacond3\envs\unsloth\lib\site-packages\transformers\modeling_utils.py:3832, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, *kwargs) 3826 config = cls._autoset_attn_implementation( 3827 config, use_flash_attention_2=use_flash_attention_2, torch_dtype=torch_dtype, device_map=device_map 3828 ) 3830 with ContextManagers(init_contexts): 3831 # Let's make sure we don't run the init function of buffer modules -> 3832 model = cls(config, model_args, **model_kwargs) 3834 # make sure we use the model's config since the init call might have copied it 3835 config = model.config

File D:\anacond3\envs\unsloth\lib\site-packages\transformers\models\llama\modeling_llama.py:1116, in LlamaForCausalLM.init(self, config) 1114 def init(self, config): 1115 super().init(config) -> 1116 self.model = LlamaModel(config) 1117 self.vocab_size = config.vocab_size 1118 self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

File D:\anacond3\envs\unsloth\lib\site-packages\transformers\models\llama\modeling_llama.py:902, in LlamaModel.init(self, config) 898 self.vocab_size = config.vocab_size 900 self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx) 901 self.layers = nn.ModuleList( --> 902 [LlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)] 903 ) 904 self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps) 905 self.rotary_emb = LlamaRotaryEmbedding(config=config)

File D:\anacond3\envs\unsloth\lib\site-packages\transformers\models\llama\modeling_llama.py:902, in (.0) 898 self.vocab_size = config.vocab_size 900 self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx) 901 self.layers = nn.ModuleList( --> 902 [LlamaDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)] 903 ) 904 self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps) 905 self.rotary_emb = LlamaRotaryEmbedding(config=config)

File D:\anacond3\envs\unsloth\lib\site-packages\transformers\models\llama\modeling_llama.py:689, in LlamaDecoderLayer.init(self, config, layer_idx) 686 super().init() 687 self.hidden_size = config.hidden_size --> 689 self.self_attn = LLAMA_ATTENTION_CLASSES[config._attn_implementation](config=config, layer_idx=layer_idx) 691 self.mlp = LlamaMLP(config) 692 self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)

File :65, in LlamaAttentioninit(self, config, layer_idx)

ValueError: Unknown RoPE scaling type longrope

please, how to fix this

danielhanchen commented 6 days ago

Do you know what Unsloth version you are using? Have you tried upgrading Unsloth?