Closed djstrong closed 4 months ago
glm-4-9b-chat-1m:
File "EQ-Bench/lib/run_bench.py", line 368, in run_generic_benchmark
process_questions(benchmark_type, model, ooba_instance, inference_engine, results, model_path, prompt_type, tokenizer, launch_ooba, ooba_request_timeout, run_index, run_iter, verbo
se, n_attempts, openai_client, questions, eqbench_version, language, REVISE, judge_params, test_model_outputs, process_fn)
File "EQ-Bench/lib/run_bench.py", line 187, in process_questions
process_fn(question_id, q, model_path, prompt_type, model, tokenizer, results, run_index, run_iter, verbose,
File "EQ-Bench/lib/eq_bench_utils.py", line 54, in process_question
inference = run_query(model_path, prompt_type, prompt, [], COMPLETION_TOKENS, model, tokenizer, temp, inference_engine, ooba_instance, launch_ooba, ooba_request_timeout, openai_cli
ent)
File "EQ-Bench/lib/run_query.py", line 425, in run_query
return inference_fn(formatted_prompt, completion_tokens, model, tokenizer, temp)
File "EQ-Bench/lib/run_query.py", line 29, in run_pipeline_query
output = text_gen(prompt)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 258, in __call__
return super().__call__(Chat(text_inputs), **kwargs)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1243, in __call__
return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1249, in run_single
model_inputs = self.preprocess(inputs, **preprocess_params)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 277, in preprocess
inputs = self.tokenizer.apply_chat_template(
File ".cache/modules/transformers_modules/THUDM/glm-4-9b-chat-1m/dad3715719ac6ba89fc7c643099a223a1b0fe869/tokenization_chatglm.py", line 222, in apply_chat_template
output = self.batch_encode_plus(
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 3160, in batch_encode_plus
return self._batch_encode_plus(
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/tokenization_utils.py", line 807, in _batch_encode_plus
batch_outputs = self._batch_prepare_for_model(
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/tokenization_utils.py", line 879, in _batch_prepare_for_model
batch_outputs = self.pad(
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 3367, in pad
outputs = self._pad(
File ".cache/modules/transformers_modules/THUDM/glm-4-9b-chat-1m/dad3715719ac6ba89fc7c643099a223a1b0fe869/tokenization_chatglm.py", line 296, in _pad
assert self.padding_side == "left"
AssertionError```
gemma:
File "EQ-Bench/lib/run_bench.py", line 368, in run_generic_benchmark
process_questions(benchmark_type, model, ooba_instance, inference_engine, results, model_path, prompt_type, tokenizer, launch_ooba, ooba_request_timeout, run_index, run_iter, verbo
se, n_attempts, openai_client, questions, eqbench_version, language, REVISE, judge_params, test_model_outputs, process_fn)
File "EQ-Bench/lib/run_bench.py", line 187, in process_questions
process_fn(question_id, q, model_path, prompt_type, model, tokenizer, results, run_index, run_iter, verbose,
File "EQ-Bench/lib/eq_bench_utils.py", line 54, in process_question
inference = run_query(model_path, prompt_type, prompt, [], COMPLETION_TOKENS, model, tokenizer, temp, inference_engine, ooba_instance, launch_ooba, ooba_request_timeout, openai_client)
File "EQ-Bench/lib/run_query.py", line 425, in run_query
return inference_fn(formatted_prompt, completion_tokens, model, tokenizer, temp)
File "EQ-Bench/lib/run_query.py", line 29, in run_pipeline_query
output = text_gen(prompt)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 258, in __call__
return super().__call__(Chat(text_inputs), **kwargs)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1243, in __call__
return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1249, in run_single
model_inputs = self.preprocess(inputs, **preprocess_params)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 277, in preprocess
inputs = self.tokenizer.apply_chat_template(
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1812, in apply_chat_template
rendered_chat = compiled_template.render(
File "EQ-Bench/venv/lib/python3.10/site-packages/jinja2/environment.py", line 1304, in render
self.environment.handle_exception()
File "EQ-Bench/venv/lib/python3.10/site-packages/jinja2/environment.py", line 939, in handle_exception
raise rewrite_traceback_stack(source=source)
File "<template>", line 1, in top-level template code
File "EQ-Bench/venv/lib/python3.10/site-packages/jinja2/sandbox.py", line 394, in call
return __context.call(__obj, *args, **kwargs)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1852, in raise_exception
raise TemplateError(message)
jinja2.exceptions.TemplateError: System role not supported```
mistral 0.2
File "EQ-Bench/lib/run_bench.py", line 368, in run_generic_benchmark
process_questions(benchmark_type, model, ooba_instance, inference_engine, results, model_path, prompt_type, tokenizer, launch_ooba, ooba_request_timeout, run_index, run_iter, verbo
se, n_attempts, openai_client, questions, eqbench_version, language, REVISE, judge_params, test_model_outputs, process_fn)
File "EQ-Bench/lib/run_bench.py", line 187, in process_questions
process_fn(question_id, q, model_path, prompt_type, model, tokenizer, results, run_index, run_iter, verbose,
File "EQ-Bench/lib/eq_bench_utils.py", line 54, in process_question
inference = run_query(model_path, prompt_type, prompt, [], COMPLETION_TOKENS, model, tokenizer, temp, inference_engine, ooba_instance, launch_ooba, ooba_request_timeout, openai_cli
ent)
File "EQ-Bench/lib/run_query.py", line 425, in run_query
return inference_fn(formatted_prompt, completion_tokens, model, tokenizer, temp)
File "EQ-Bench/lib/run_query.py", line 29, in run_pipeline_query
output = text_gen(prompt)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 258, in __call__
return super().__call__(Chat(text_inputs), **kwargs)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1243, in __call__
return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1249, in run_single
model_inputs = self.preprocess(inputs, **preprocess_params)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 277, in preprocess
inputs = self.tokenizer.apply_chat_template(
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1812, in apply_chat_template
rendered_chat = compiled_template.render(
File "EQ-Bench/venv/lib/python3.10/site-packages/jinja2/environment.py", line 1304, in render
self.environment.handle_exception()
File "EQ-Bench/venv/lib/python3.10/site-packages/jinja2/environment.py", line 939, in handle_exception
raise rewrite_traceback_stack(source=source)
File "<template>", line 1, in top-level template code
File "EQ-Bench/venv/lib/python3.10/site-packages/jinja2/sandbox.py", line 394, in call
return __context.call(__obj, *args, **kwargs)
File "EQ-Bench/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 1852, in raise_exception
raise TemplateError(message)
jinja2.exceptions.TemplateError: Conversation roles must alternate user/assistant/user/assistant/...```
Thanks for reporting this. I've tried out each of these:
THUDM/glm-4-9b-chat
Gemma
Mistral-7B-Instruct-v0.2
Let me know if you are still having issues.
Thank you!
For glm-4-9b-chat I pasted the stacktrace above, so this model needs left
padding side but you are setting right
in load_model
function.
Just realised I hadn't actually pushed the changes removing the system prompt. I've just pushed them now.
I will try removing the left padding designation, it may be unncessary now anyway.
Removing those lines seems to have fixed the issue with glm-4-9b-chat. Thanks for your help figuring it out!
I am trying benchmarking new models, e.g.:
glm-4-9b-chat, , THUDM/glm-4-9b-chat, , , 1, transformers, , ,