Traceback (most recent call last):
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^^^
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 1377, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3466, in _map_single
batch = apply_function_on_filtered_inputs(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3345, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/services/lmt/repos/llms/zero_nlp/internlm-sft/train_sft.py", line 174, in generate_sources_targets
input_output = preprocess(
^^^^^^^^^^^
File "/home/services/lmt/repos/llms/zero_nlp/internlm-sft/train_sft.py", line 125, in preprocess
examples_tokenized, sources_tokenized = [_tokenize_fn(
^^^^^^^^^^^^^^
File "/home/services/lmt/repos/llms/zero_nlp/internlm-sft/train_sft.py", line 125, in <listcomp>
examples_tokenized, sources_tokenized = [_tokenize_fn(
^^^^^^^^^^^^^
File "/home/services/lmt/repos/llms/zero_nlp/internlm-sft/train_sft.py", line 94, in _tokenize_fn
tokenized_list = [
^
File "/home/services/lmt/repos/llms/zero_nlp/internlm-sft/train_sft.py", line 95, in <listcomp>
tokenizer(
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2829, in __call__
encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2935, in _call_one
return self.encode_plus(
^^^^^^^^^^^^^^^^^
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 3008, in encode_plus
return self._encode_plus(
^^^^^^^^^^^^^^^^^^
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py", line 576, in _encode_plus
batched_output = self._batch_encode_plus(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py", line 496, in _batch_encode_plus
self.set_truncation_and_padding(
File "/home/services/anaconda3/envs/text-webui-311/lib/python3.11/site-packages/transformers/tokenization_utils_fast.py", line 451, in set_truncation_and_padding
self._tokenizer.enable_truncation(**target)
OverflowError: int too big to convert
GPU数量越多,利用率越低,总体速度和单卡持平
train_sft.py 修改,添加model_max_length参数:
这里不修改会报错
train_zero2.sh
训练报告: https://api.wandb.ai/links/a86056549/zwfn6e72
依赖版本: transformers==4.38.2 peft==0.9.0 deepspeed==0.14.0