Closed rkinas closed 2 months ago
Hi, could you provide your full logs?
I can also reproduce the error with commit bbe3b5f8eff55c63dee8a28ebedc7a3a46556598
Edit: same issue on main
with commit 6a48e4e7723611edda9f51d0fcb9c4d20b79b19d
Command to reproduce:
accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \
--model_args "pretrained=Qwen/Qwen1.5-0.5B-Chat" \
--use_chat_template \
--tasks "lighteval|math:algebra|0|0" \
--override_batch_size 1 \
--output_dir "./scratch/evals"
Curiously, limiting the number of samples works:
accelerate launch --multi_gpu --num_processes=8 run_evals_accelerate.py \
--model_args "pretrained=Qwen/Qwen1.5-0.5B-Chat" \
--use_chat_template \
--tasks "lighteval|math:algebra|0|0" \
--override_batch_size 1 \
--output_dir "./scratch/evals" \
--max_samples 10
Logs attached.
cc @clefourrier @NathanHB for viz
Some notes on this.
It looks like the math_normalizer
trys to parse a gold answer text:
The fraction $\\frac{1}{2y+1}$ fails to be defined only if the denominator is zero. This occurs when $y$ is the solution of the equation $$2y+1=0,$$ which is $y=-\\frac 12$. Thus the domain of $k(y)$ is $$\\boxed{\\left(-\\infty,-\\frac 12\\right)\\cup \\left(-\\frac 12,\\infty\\right)}.$$
This then fails in fix_fracs, which tries to parse the following:
(-\\infty,-\\frac12)\\cup(-\\frac12,\\infty)
and ends up failing when it tries to unpack:
a, b = substr # substr=12)\\cup(-
The parsing logic may need to be made more robust: Here is an example code from a private repo that may help with this:
import re
def last_boxed_only_string(string):
"""
Extracts the last LaTeX boxed or framed expression from a string.
Args:
string (str): The input string containing LaTeX expressions.
Returns:
str or None: The last boxed or framed expression, if found;
otherwise, None.
"""
idx = string.rfind("\\boxed")
if idx < 0:
idx = string.rfind("\\fbox")
if idx < 0:
return None
i = idx
right_brace_idx = None
num_left_braces_open = 0
while i < len(string):
if string[i] == "{":
num_left_braces_open += 1
if string[i] == "}":
num_left_braces_open -= 1
if num_left_braces_open == 0:
right_brace_idx = i
break
i += 1
if right_brace_idx is None:
retval = None
else:
retval = string[idx : right_brace_idx + 1]
return retval
def remove_boxed(s):
"""
Removes the LaTeX boxed command, returning the content inside the braces.
Args:
s (str): The string containing a LaTeX boxed expression.
Returns:
str or None: The content inside the boxed command, if valid;
otherwise, None.
"""
left = "\\boxed{"
try:
assert s[: len(left)] == left
assert s[-1] == "}"
length = len(left)
return s[length:-1]
except Exception:
return None
def extract_boxed_answer(pred_str, strip_double_curly_brace=False):
"""
Extracts the answer from a LaTeX boxed expression within
a prediction string.
Args:
pred_str (str): The string containing one or more LaTeX
boxed expressions.
strip_double_curly_brace (bool): If True, removes an additional
layer of braces.
Returns:
str or None: The extracted answer, if any; otherwise, None.
"""
boxed_str = last_boxed_only_string(pred_str)
if boxed_str is None:
return None
answer = remove_boxed(boxed_str)
if answer is None:
return None
if strip_double_curly_brace:
match = re.match("^\{(.*)\}$", answer) # noqa: W605
if match:
answer = match.group(1)
return answer
def normalize_final_answer(final_answer: str) -> str:
"""
Normalizes a final answer string by removing or replacing various LaTeX
and text elements.
Args:
final_answer (str): The answer string to normalize.
Returns:
str: The normalized answer string.
"""
match = re.search(r"(.*?)Problem:", final_answer, flags=re.S)
if match:
final_answer = match.group(1) # 返回匹配的第一部分,即"Problem"之前的所有文本
"""Normalize a final answer to a quantitative reasoning question."""
# final_answer = final_answer.split('=')[-1]
SUBSTITUTIONS = [
("an ", ""),
("a ", ""),
(".$", "$"),
("\\$", ""),
(r"\ ", ""),
(" ", ""),
("mbox", "text"),
(",\\text{and}", ","),
("\\text{and}", ","),
("\\text{m}", "\\text{}"),
("\\le", "<"),
]
REMOVED_EXPRESSIONS = [
"square",
"ways",
"integers",
"dollars",
"mph",
"inches",
"ft",
"hours",
"km",
"units",
"\\ldots",
"sue",
"points",
"feet",
"minutes",
"digits",
"cents",
"degrees",
"cm",
"gm",
"pounds",
"meters",
"meals",
"edges",
"students",
"childrentickets",
"multiples",
"\\text{s}",
"\\text{.}",
"\\text{\ns}",
"\\text{}^2",
"\\text{}^3",
"\\text{\n}",
"\\text{}",
r"\mathrm{th}",
r"^\circ",
r"^{\circ}",
r"\;",
r",\!",
"{,}",
'"',
"\\dots",
"\n",
"\r",
"\f",
]
for before, after in SUBSTITUTIONS:
final_answer = final_answer.replace(before, after)
for expr in REMOVED_EXPRESSIONS:
final_answer = final_answer.replace(expr, "")
# Extract answer that is in LaTeX math, is bold,
# is surrounded by a box, etc.
final_answer = re.sub(r"(\\text\{)(.*?)(\})", "\\2", final_answer)
final_answer = re.sub(r"(\\textbf\{)(.*?)(\})", "\\2", final_answer)
final_answer = re.sub(r"(\\overline\{)(.*?)(\})", "\\2", final_answer)
final_answer = re.sub(r"(\\boxed\{)(.*)(\})", "\\2", final_answer)
assert "\n" not in final_answer
assert "\r" not in final_answer
assert "\f" not in final_answer
if len(re.findall(r"finalansweris(.*)", final_answer)) > 0:
final_answer = re.findall(r"finalansweris(.*)", final_answer)[-1]
if len(re.findall(r"answer?is:?(.*)", final_answer)) > 0:
final_answer = re.findall(r"answer?is:?(.*)", final_answer)[-1]
if len(re.findall(r"oxed\{(.*?)\}", final_answer)) > 0:
final_answer = re.findall(r"oxed\{(.*?)\}", final_answer)[-1]
if len(re.findall(r"\$(.*?)\$", final_answer)) > 0:
final_answer = re.findall(r"\$(.*?)\$", final_answer)[-1]
final_answer = final_answer.strip()
if "rac" in final_answer and "\\frac" not in final_answer:
final_answer = final_answer.replace("rac", "\\frac")
final_answer = re.sub(r"(frac)([^{])(.)", "frac{\\2}{\\3}", final_answer)
final_answer = re.sub(r"(sqrt)([^{])", "sqrt{\\2}", final_answer)
final_answer = final_answer.replace("$", "")
if final_answer.replace(",", "").isdigit():
final_answer = final_answer.replace(",", "")
return final_answer
I am not able to run simple test from lighteval (math) tasks - I am getting "Too many values to unpack (expected 2)"
accelerate launch --multi_gpu --num_processes=4 run_evals_accelerate.py --model_args "pretrained=openchat/openchat-3.5-0106" --tasks "lighteval|math:algebra|0|0" --override_batch_size 1 --output_dir="./evals/"