texttron / tevatron

Tevatron - A flexible toolkit for neural retrieval research and development.
http://tevatron.ai
Apache License 2.0
435 stars 87 forks source link

training error #132

Open riyajatar37003 opened 2 weeks ago

riyajatar37003 commented 2 weeks ago

06/18/2024 16:57:30 - INFO - tevatron.retriever.trainer - Saving model checkpoint to retriever-mistral/checkpoint-50 Traceback (most recent call last): File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code exec(code, run_globals) File tevatron/src/tevatron/retriever/driver/train.py", line 96, in main() File tevatron/src/tevatron/retriever/driver/train.py", line 89, in main trainer.train() # TODO: resume training File "/tmp/.local/lib/python3.10/site-packages/transformers/trainer.py", line 1885, in train return inner_training_loop( File "/tmp/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2291, in _inner_training_loop self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval) File "/tmp/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2732, in _maybe_log_save_evaluate self._save_checkpoint(model, trial, metrics=metrics) File "/tmp/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2811, in _save_checkpoint self.save_model(output_dir, _internal_call=True) File "/tmp/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3355, in save_model self._save(output_dir) File "/tmp/.local/lib/python3.10/site-packages/tevatron/retriever/trainer.py", line 31, in _save raise ValueError(f"Unsupported model class {self.model}") ValueError: Unsupported model class DenseModel( (encoder): PeftModelForFeatureExtraction( (base_model): LoraModel( (model): MistralModel( (embed_tokens): Embedding(32000, 4096) (layers): ModuleList( (0-31): 32 x MistralDecoderLayer( (self_attn): MistralSdpaAttention( (q_proj): lora.Linear( (base_layer): Linear(in_features=4096, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.1, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=8, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=8, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (k_proj): lora.Linear( (base_layer): Linear(in_features=4096, out_features=1024, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.1, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=8, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=8, out_features=1024, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (v_proj): lora.Linear( (base_layer): Linear(in_features=4096, out_features=1024, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.1, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=8, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=8, out_features=1024, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (o_proj): lora.Linear( (base_layer): Linear(in_features=4096, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.1, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=8, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=8, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (rotary_emb): MistralRotaryEmbedding() ) (mlp): MistralMLP( (gate_proj): lora.Linear( (base_layer): Linear(in_features=4096, out_features=14336, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.1, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=8, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=8, out_features=14336, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (up_proj): lora.Linear( (base_layer): Linear(in_features=4096, out_features=14336, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.1, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=4096, out_features=8, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=8, out_features=14336, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (down_proj): lora.Linear( (base_layer): Linear(in_features=14336, out_features=4096, bias=False) (lora_dropout): ModuleDict( (default): Dropout(p=0.1, inplace=False) ) (lora_A): ModuleDict( (default): Linear(in_features=14336, out_features=8, bias=False) ) (lora_B): ModuleDict( (default): Linear(in_features=8, out_features=4096, bias=False) ) (lora_embedding_A): ParameterDict() (lora_embedding_B): ParameterDict() ) (act_fn): SiLU() ) (input_layernorm): MistralRMSNorm() (post_attention_layernorm): MistralRMSNorm() ) ) (norm): MistralRMSNorm() ) ) ) (cross_entropy): CrossEntropyLoss() )