bash command

output_dir='lora/OneKE'
mkdir -p ${output_dir}
CUDA_VISIBLE_DEVICES="3" python3 src/finetune.py \
    --do_train --do_eval \
    --overwrite_output_dir \
    --model_name_or_path 'ZJUNLP/OneKE' \
    --stage 'sft' \
    --model_name 'llama' \
    --template 'llama2_zh' \
    --train_file 'data/NER/train.json' \
    --valid_file '/data/NER/dev.json' \
    --output_dir=${output_dir} \
    --per_device_train_batch_size 2 \
    --per_device_eval_batch_size 2 \
    --gradient_accumulation_steps 4 \
    --preprocessing_num_workers 16 \
    --num_train_epochs 10 \
    --learning_rate 5e-5 \
    --max_grad_norm 0.5 \
    --optim "adamw_torch" \
    --max_source_length 400 \
    --cutoff_len 700 \
    --max_target_length 300 \
    --evaluation_strategy "epoch" \
    --save_strategy "epoch" \
    --save_total_limit 10 \
    --lora_r 16 \
    --lora_alpha 32 \
    --lora_dropout 0.05 \
    --bf16 \
    --bits 4

Warning

===================================BUG REPORT===================================
Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
================================================================================
bin /newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda116.so
/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: /root/anaconda3/envs/deepke-llm did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...
  warn(msg)
/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/tmp/vscode-ipc-966375d3-03a6-46b1-860b-cfbaaee3f2b9.sock')}
  warn(msg)
/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/tmp/vscode-git-4b63ac264a.sock')}
  warn(msg)
CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...
/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: Found duplicate ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] files: {PosixPath('/usr/local/cuda/lib64/libcudart.so'), PosixPath('/usr/local/cuda/lib64/libcudart.so.11.0')}.. We'll flip a coin and try one of these, in order to fail forward.
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 116
CUDA SETUP: Loading binary /newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda116.so...
10/30/2024 15:28:40 - WARNING - args.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
[INFO|training_args.py:1267] 2024-10-30 15:28:40,321 >> Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
[INFO|training_args.py:1669] 2024-10-30 15:28:40,321 >> PyTorch: setting up devices
/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/transformers/training_args.py:1573: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.
  warnings.warn(
10/30/2024 15:28:40 - INFO - args.parser - Process rank: 0, device: cuda:0, n_gpu: 1
  distributed training: True, compute dtype: torch.bfloat16
10/30/2024 15:28:40 - INFO - args.parser - Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=True,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_backend=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=False,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
generation_config=None,
generation_max_length=None,
generation_num_beams=None,
gradient_accumulation_steps=4,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=5e-05,
length_column_name=length,
load_best_model_at_end=False,
local_rank=0,
log_level=passive,
log_level_replica=warning,
log_on_each_node=True,
logging_dir=lora/OneKE/runs/Oct30_15-28-40_7af9b2a1f2a9,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_steps=2,
logging_strategy=steps,
loss_scale=1.0,
lr_scheduler_type=linear,
max_grad_norm=0.5,
max_steps=-1,
metric_for_best_model=None,
mp_parameters=,
no_cuda=False,
num_train_epochs=10.0,
optim=adamw_torch,
optim_args=None,
output_dir=lora/OneKE,
overwrite_output_dir=True,
past_index=-1,
per_device_eval_batch_size=2,
per_device_train_batch_size=2,
predict_with_generate=False,
prediction_loss_only=False,
push_to_hub=False,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
ray_scope=last,
remove_unused_columns=False,
report_to=[],
resume_from_checkpoint=None,
run_name=lora/OneKE,
save_on_each_node=False,
save_safetensors=False,
save_steps=500,
save_strategy=epoch,
save_total_limit=10,
seed=42,
sharded_ddp=[],
skip_memory_metrics=True,
sortish_sampler=False,
tf32=None,
torch_compile=False,
torch_compile_backend=None,
torch_compile_mode=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_ipex=False,
use_legacy_prediction_loop=False,
use_mps_device=False,
warmup_ratio=0.0,
warmup_steps=0,
weight_decay=0.0,
xpu_backend=None,
)
10/30/2024 15:28:40 - INFO - __main__ - Start Time: 2024:10:30 15:28:40
10/30/2024 15:28:40 - INFO - __main__ - model_args:ModelArguments(model_name_or_path='/newdata/HJQ/Resouces/hub/ZJUNLP/OneKE', model_name='llama', cache_dir=None, use_fast_tokenizer=True, trust_remote_code=True, use_auth_token=False, model_revision='main', split_special_tokens=False, bits=4, adam8bit=False, double_quant=True, quant_type='nf4', checkpoint_dir=None)
data_args:DataArguments(train_file='/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/DeepKE/example/llm/InstructKGC/data/NER/train.json', valid_file='/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/DeepKE/example/llm/InstructKGC/data/NER/train.json', predict_file=None, preprocessing_num_workers=16, overwrite_cache=False, cache_path=None, template='llama2_zh', system_prompt=None, max_source_length=400, max_target_length=300, cutoff_len=700, val_set_size=1000, pad_to_max_length=False, ignore_pad_token_for_loss=True, train_on_prompt=False, language='zh', id_text='input')
training_args:TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=True,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_backend=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=False,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
generation_config=None,
generation_max_length=None,
generation_num_beams=None,
gradient_accumulation_steps=4,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=5e-05,
length_column_name=length,
load_best_model_at_end=False,
local_rank=0,
log_level=passive,
log_level_replica=warning,
log_on_each_node=True,
logging_dir=lora/OneKE/runs/Oct30_15-28-40_7af9b2a1f2a9,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_steps=2,
logging_strategy=steps,
loss_scale=1.0,
lr_scheduler_type=linear,
max_grad_norm=0.5,
max_steps=-1,
metric_for_best_model=None,
mp_parameters=,
no_cuda=False,
num_train_epochs=10.0,
optim=adamw_torch,
optim_args=None,
output_dir=lora/OneKE,
overwrite_output_dir=True,
past_index=-1,
per_device_eval_batch_size=2,
per_device_train_batch_size=2,
predict_with_generate=False,
prediction_loss_only=False,
push_to_hub=False,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
ray_scope=last,
remove_unused_columns=False,
report_to=[],
resume_from_checkpoint=None,
run_name=lora/OneKE,
save_on_each_node=False,
save_safetensors=False,
save_steps=500,
save_strategy=epoch,
save_total_limit=10,
seed=42,
sharded_ddp=[],
skip_memory_metrics=True,
sortish_sampler=False,
tf32=None,
torch_compile=False,
torch_compile_backend=None,
torch_compile_mode=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_ipex=False,
use_legacy_prediction_loop=False,
use_mps_device=False,
warmup_ratio=0.0,
warmup_steps=0,
weight_decay=0.0,
xpu_backend=None,
)
finetuning_args:FinetuningArguments(dpo_beta=0.1, ppo_logger=None, ppo_score_norm=False, ppo_target=6.0, ppo_whiten_rewards=False, ref_model=None, ref_model_checkpoint=None, ref_model_quantization_bit=None, reward_model=None, reward_model_checkpoint=None, reward_model_quantization_bit=None, reward_model_type='lora', lora_r=16, lora_alpha=32.0, lora_dropout=0.05, lora_target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'], additional_target=None, resume_lora_training=True, num_layer_trainable=3, name_module_trainable=['mlp'], stage='sft', finetuning_type='lora', upcast_layernorm=False, neft_alpha=0, export_dir=None, plot_loss=False)
generating_args:GenerationArguments(max_length=512, max_new_tokens=256, min_new_tokens=None, do_sample=False, num_beams=1, num_beam_groups=1, penalty_alpha=None, use_cache=True, temperature=1.0, top_k=50, top_p=1.0, typical_p=1.0, diversity_penalty=0.0, repetition_penalty=1.0, length_penalty=1.0, no_repeat_ngram_size=0)
10/30/2024 15:28:40 - INFO - __main__ - model_class:<class 'transformers.models.auto.modeling_auto.AutoModelForCausalLM'>
tokenizer_class:<class 'transformers.models.auto.tokenization_auto.AutoTokenizer'>
trainer_class:<class 'transformers.trainer.Trainer'>

[INFO|tokenization_utils_base.py:1821] 2024-10-30 15:28:40,359 >> loading file tokenizer.model
[INFO|tokenization_utils_base.py:1821] 2024-10-30 15:28:40,359 >> loading file tokenizer.json
[INFO|tokenization_utils_base.py:1821] 2024-10-30 15:28:40,359 >> loading file added_tokens.json
[INFO|tokenization_utils_base.py:1821] 2024-10-30 15:28:40,359 >> loading file special_tokens_map.json
[INFO|tokenization_utils_base.py:1821] 2024-10-30 15:28:40,359 >> loading file tokenizer_config.json
[INFO|configuration_utils.py:667] 2024-10-30 15:40:21,454 >> loading configuration file /newdata/HJQ/Resouces/hub/ZJUNLP/OneKE/config.json
[INFO|configuration_utils.py:725] 2024-10-30 15:40:21,455 >> Model config LlamaConfig {
  "_name_or_path": "/newdata/HJQ/Resouces/hub/ZJUNLP/OneKE",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 5120,
  "initializer_range": 0.02,
  "intermediate_size": 13824,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 40,
  "num_hidden_layers": 40,
  "num_key_value_heads": 40,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.30.2",
  "use_cache": false,
  "vocab_size": 55296
}

10/30/2024 15:40:21 - INFO - model.loader - Quantizing model to 4 bit.
[INFO|modeling_utils.py:2575] 2024-10-30 15:40:21,955 >> loading weights file /newdata/HJQ/Resouces/hub/ZJUNLP/OneKE/pytorch_model.bin.index.json
[INFO|modeling_utils.py:1173] 2024-10-30 15:40:21,958 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
[INFO|configuration_utils.py:577] 2024-10-30 15:40:21,959 >> Generate config GenerationConfig {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 0,
  "transformers_version": "4.30.2",
  "use_cache": false
}

[INFO|modeling_utils.py:2690] 2024-10-30 15:40:22,147 >> Detected 8-bit loading: activating 8-bit loading for this model
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████| 3/3 [00:19<00:00,  6.55s/it]
[INFO|modeling_utils.py:3295] 2024-10-30 15:40:42,108 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[WARNING|modeling_utils.py:3297] 2024-10-30 15:40:42,108 >> Some weights of LlamaForCausalLM were not initialized from the model checkpoint at /newdata/HJQ/Resouces/hub/ZJUNLP/OneKE and are newly initialized: ['model.layers.37.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.14.self_attn.rotary_emb.inv_freq', 'model.layers.35.self_attn.rotary_emb.inv_freq', 'model.layers.21.self_attn.rotary_emb.inv_freq', 'model.layers.39.self_attn.rotary_emb.inv_freq', 'model.layers.33.self_attn.rotary_emb.inv_freq', 'model.layers.36.self_attn.rotary_emb.inv_freq', 'model.layers.11.self_attn.rotary_emb.inv_freq', 'model.layers.24.self_attn.rotary_emb.inv_freq', 'model.layers.22.self_attn.rotary_emb.inv_freq', 'model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.28.self_attn.rotary_emb.inv_freq', 'model.layers.0.self_attn.rotary_emb.inv_freq', 'model.layers.10.self_attn.rotary_emb.inv_freq', 'model.layers.15.self_attn.rotary_emb.inv_freq', 'model.layers.32.self_attn.rotary_emb.inv_freq', 'model.layers.25.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.17.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotary_emb.inv_freq', 'model.layers.18.self_attn.rotary_emb.inv_freq', 'model.layers.23.self_attn.rotary_emb.inv_freq', 'model.layers.31.self_attn.rotary_emb.inv_freq', 'model.layers.5.self_attn.rotary_emb.inv_freq', 'model.layers.12.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.27.self_attn.rotary_emb.inv_freq', 'model.layers.13.self_attn.rotary_emb.inv_freq', 'model.layers.16.self_attn.rotary_emb.inv_freq', 'model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.30.self_attn.rotary_emb.inv_freq', 'model.layers.29.self_attn.rotary_emb.inv_freq', 'model.layers.38.self_attn.rotary_emb.inv_freq', 'model.layers.34.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn.rotary_emb.inv_freq']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[INFO|modeling_utils.py:2927] 2024-10-30 15:40:42,114 >> Generation config file not found, using a generation config created from the model config.
10/30/2024 15:40:42 - INFO - model.adapter - Gradient checkpointing enabled.
10/30/2024 15:40:42 - INFO - model.adapter - Fine-tuning method: LoRA
10/30/2024 15:42:33 - INFO - model.loader - trainable params: 62586880 || all params: 13317002240 || trainable%: 0.4700
10/30/2024 15:42:33 - INFO - __main__ - BOS:1,<s>       EOS:2,</s>      PAD:32000,<pad>
Using custom data configuration default-2ebd0fbf98721aa3
Loading Dataset Infos from /newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/datasets/packaged_modules/json
Overwrite dataset info from restored data version if exists.
Loading Dataset info from /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
Found cached dataset json (/root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
Loading Dataset info from /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
Using custom data configuration default-2ebd0fbf98721aa3
Loading Dataset Infos from /newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/datasets/packaged_modules/json
Overwrite dataset info from restored data version if exists.
Loading Dataset info from /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
Found cached dataset json (/root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
Loading Dataset info from /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96
process train dataset
[INFO|tokenization_utils_base.py:921] 2024-10-30 15:42:35,243 >> Assigning [] to the additional_special_tokens key of the tokenizer
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6.
Process #0 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00000_of_00006.arrow
Process #1 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00001_of_00006.arrow
Process #2 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00002_of_00006.arrow
Process #3 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00003_of_00006.arrow
Process #4 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00004_of_00006.arrow
Process #5 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00005_of_00006.arrow
Spawning 6 processes
Map (num_proc=6):   0%|                                                                    | 0/6 [00:00<?, ? examples/s]Caching processed dataset at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00000_of_00006.arrow
Map (num_proc=6):  17%|██████████                                                  | 1/6 [00:00<00:00,  5.39 examples/s]Caching processed dataset at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00001_of_00006.arrow
Caching processed dataset at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00002_of_00006.arrow
Caching processed dataset at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00003_of_00006.arrow
Caching processed dataset at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00004_of_00006.arrow
Map (num_proc=6):  83%|██████████████████████████████████████████████████          | 5/6 [00:00<00:00, 17.96 examples/s]Caching processed dataset at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00005_of_00006.arrow
Map (num_proc=6): 100%|████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 13.95 examples/s]
Concatenating 6 shards
input_ids:
[1, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 20255, 29889, 29871, 30919, 30392, 30287, 30502, 31616, 30909, 31931, 30313, 30210, 31931, 30880, 30267, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 6377, 2611, 4080, 1115, 376, 30919, 30392, 31756, 31649, 31174, 30448, 31195, 30988, 33488, 30683, 30210, 31756, 30613, 30267, 31088, 31594, 2080, 30275, 33488, 30683, 30544, 31277, 30733, 11010, 30495, 31349, 30210, 31195, 30988, 30214, 30413, 30946, 30505, 30210, 31195, 30988, 30832, 30883, 31086, 30742, 30816, 31025, 30746, 30267, 31088, 31590, 32484, 7249, 30578, 31277, 31767, 30210, 31168, 30607, 30742, 33069, 30267, 613, 376, 11010, 1115, 6796, 30313, 30834, 613, 376, 30533, 30687, 30956, 30669, 613, 376, 31263, 38034, 31429, 31901, 12436, 376, 2080, 1115, 376, 30505, 30810, 30755, 45899, 35381, 30413, 39865, 30577, 33228, 30214, 33963, 30505, 35289, 30658, 30287, 53432, 30383, 30658, 30313, 32758, 31900, 30214, 31951, 35344, 30015, 30578, 30578, 30417, 30805, 33581, 30214, 32760, 32760, 30417, 30544, 31548, 30024, 30214, 33126, 32296, 30539, 30578, 30214, 31502, 36939, 32791, 30544, 30658, 30313, 41804, 50817, 30214, 30688, 47053, 31462, 31325, 30822, 30214, 31157, 32044, 30592, 32748, 30214, 38006, 30417, 31502, 30374, 30544, 30882, 9092, 518, 29914, 25580, 29962, 8853, 30313, 30834, 1115, 19997, 376, 30533, 30687, 30956, 30669, 1115, 19997, 376, 31263, 38034, 31429, 31901, 1115, 5159, 29913, 2]
inputs:
<s> [INST] <<SYS>>
You are a helpful assistant. 你是一个乐于助人的助手。
<</SYS>>

{"instruction": "你是专门进行实体抽取的专家。请从input中抽取出符合schema定义的实体，不存在的实体类型返回空列表。请按照JSON字符串的格式回答。", "schema": ["人物", "地理位置", "组织机构"], "input": "在这里恕弟不恭之罪，敢在尊前一诤：前人论书，每曰“字字有来历，笔笔有出处”，细读公字，何尝跳出前人藩篱，自隶变而后，直至明季，兄有何新出？"} [/INST] {"人物": [], "地理位置": [], "组织机构": []}</s>
label_ids:
[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 8853, 30313, 30834, 1115, 19997, 376, 30533, 30687, 30956, 30669, 1115, 19997, 376, 31263, 38034, 31429, 31901, 1115, 5159, 29913, 2]
labels:
{"人物": [], "地理位置": [], "组织机构": []}</s>
process valid dataset
[INFO|tokenization_utils_base.py:921] 2024-10-30 15:42:35,962 >> Assigning [] to the additional_special_tokens key of the tokenizer
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6.
Process #0 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00000_of_00006.arrow
Process #1 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00001_of_00006.arrow
Process #2 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00002_of_00006.arrow
Process #3 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00003_of_00006.arrow
Process #4 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00004_of_00006.arrow
Process #5 will write at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_00005_of_00006.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/json/default-2ebd0fbf98721aa3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-0b09544ede012cec_*_of_00006.arrow
Concatenating 6 shards
input_ids:
[1, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 20255, 29889, 29871, 30919, 30392, 30287, 30502, 31616, 30909, 31931, 30313, 30210, 31931, 30880, 30267, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 6377, 2611, 4080, 1115, 376, 30919, 30392, 31756, 31649, 31174, 30448, 31195, 30988, 33488, 30683, 30210, 31756, 30613, 30267, 31088, 31594, 2080, 30275, 33488, 30683, 30544, 31277, 30733, 11010, 30495, 31349, 30210, 31195, 30988, 30214, 30413, 30946, 30505, 30210, 31195, 30988, 30832, 30883, 31086, 30742, 30816, 31025, 30746, 30267, 31088, 31590, 32484, 7249, 30578, 31277, 31767, 30210, 31168, 30607, 30742, 33069, 30267, 613, 376, 11010, 1115, 6796, 30313, 30834, 613, 376, 30533, 30687, 30956, 30669, 613, 376, 31263, 38034, 31429, 31901, 12436, 376, 2080, 1115, 376, 30505, 30810, 30755, 45899, 35381, 30413, 39865, 30577, 33228, 30214, 33963, 30505, 35289, 30658, 30287, 53432, 30383, 30658, 30313, 32758, 31900, 30214, 31951, 35344, 30015, 30578, 30578, 30417, 30805, 33581, 30214, 32760, 32760, 30417, 30544, 31548, 30024, 30214, 33126, 32296, 30539, 30578, 30214, 31502, 36939, 32791, 30544, 30658, 30313, 41804, 50817, 30214, 30688, 47053, 31462, 31325, 30822, 30214, 31157, 32044, 30592, 32748, 30214, 38006, 30417, 31502, 30374, 30544, 30882, 9092, 518, 29914, 25580, 29962, 8853, 30313, 30834, 1115, 19997, 376, 30533, 30687, 30956, 30669, 1115, 19997, 376, 31263, 38034, 31429, 31901, 1115, 5159, 29913, 2]
inputs:
<s> [INST] <<SYS>>
You are a helpful assistant. 你是一个乐于助人的助手。
<</SYS>>

{"instruction": "你是专门进行实体抽取的专家。请从input中抽取出符合schema定义的实体，不存在的实体类型返回空列表。请按照JSON字符串的格式回答。", "schema": ["人物", "地理位置", "组织机构"], "input": "在这里恕弟不恭之罪，敢在尊前一诤：前人论书，每曰“字字有来历，笔笔有出处”，细读公字，何尝跳出前人藩篱，自隶变而后，直至明季，兄有何新出？"} [/INST] {"人物": [], "地理位置": [], "组织机构": []}</s>
label_ids:
[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 8853, 30313, 30834, 1115, 19997, 376, 30533, 30687, 30956, 30669, 1115, 19997, 376, 31263, 38034, 31429, 31901, 1115, 5159, 29913, 2]
labels:
{"人物": [], "地理位置": [], "组织机构": []}</s>
[INFO|trainer.py:399] 2024-10-30 15:42:36,018 >> You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
[INFO|trainer.py:407] 2024-10-30 15:42:36,018 >> The model is loaded in 8-bit precision. To train this model you need to add additional modules inside the model such as adapters using `peft` library and freeze the model weights. Please check  the examples in https://github.com/huggingface/peft for more details.
10/30/2024 15:42:36 - INFO - __main__ - *** Train ***
10/30/2024 15:42:36 - INFO - __main__ - resume_from_checkpoint: None
[INFO|trainer.py:1786] 2024-10-30 15:42:36,175 >> ***** Running training *****
[INFO|trainer.py:1787] 2024-10-30 15:42:36,175 >>   Num examples = 6
[INFO|trainer.py:1788] 2024-10-30 15:42:36,175 >>   Num Epochs = 10
[INFO|trainer.py:1789] 2024-10-30 15:42:36,175 >>   Instantaneous batch size per device = 2
[INFO|trainer.py:1790] 2024-10-30 15:42:36,175 >>   Total train batch size (w. parallel, distributed & accumulation) = 8
[INFO|trainer.py:1791] 2024-10-30 15:42:36,175 >>   Gradient Accumulation steps = 4
[INFO|trainer.py:1792] 2024-10-30 15:42:36,175 >>   Total optimization steps = 10
[INFO|trainer.py:1793] 2024-10-30 15:42:36,182 >>   Number of trainable parameters = 62,586,880
  0%|                                                                                            | 0/10 [00:00<?, ?it/s][WARNING|logging.py:280] 2024-10-30 15:42:36,190 >> You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Traceback (most recent call last):
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/DeepKE/example/llm/InstructKGC/src/finetune.py", line 119, in <module>
    main()
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/DeepKE/example/llm/InstructKGC/src/finetune.py", line 114, in main
    train(model_args, data_args, training_args, finetuning_args, generating_args)
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/DeepKE/example/llm/InstructKGC/src/finetune.py", line 81, in train
    train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/transformers/trainer.py", line 1645, in train
    return inner_training_loop(
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/transformers/trainer.py", line 1938, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/transformers/trainer.py", line 2759, in training_step
    loss = self.compute_loss(model, inputs)
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/transformers/trainer.py", line 2784, in compute_loss
    outputs = model(**inputs)
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/accelerate/utils/operations.py", line 581, in forward
    return model_forward(*args, **kwargs)
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/accelerate/utils/operations.py", line 569, in __call__
    return convert_to_fp32(self.model_forward(*args, **kwargs))
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
    return func(*args, **kwargs)
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/peft/peft_model.py", line 922, in forward
    return self.base_model(
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/newdata/HJQ/Cultural_Tourism_Program/Information_Extraction/OneKE/venv_DeepKE_3_9/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
TypeError: forward() got an unexpected keyword argument 'length'
  0%|                                                                                            | 0/10 [00:00<?, ?it/s]

zjunlp / DeepKE

Error about Fine-Tuning OneKE- "TypeError: forward() got an unexpected keyword argument 'length" #598

bash command

Warning