stanfordnlp / dspy

DSPy: The framework for programming—not prompting—language models
https://dspy.ai
MIT License
19.19k stars 1.46k forks source link

ZeroDivisionError: division by zero when using MIPROv2 #1190

Closed leegang closed 5 months ago

leegang commented 5 months ago
--------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
File <timed exec>:3

File /opt/conda/lib/python3.10/site-packages/dspy/teleprompt/mipro_optimizer_v2.py:489, in MIPROv2.compile(self, student, trainset, valset, num_batches, max_bootstrapped_demos, max_labeled_demos, eval_kwargs, seed, minibatch, program_aware_proposer, requires_permission_to_run)
    487 sampler = optuna.samplers.TPESampler(seed=seed, multivariate=True)
    488 study = optuna.create_study(direction="maximize", sampler=sampler)
--> 489 score = study.optimize(objective_function, n_trials=num_batches)
    491 if best_program is not None and self.track_stats:
    492     best_program.trial_logs = trial_logs

File /opt/conda/lib/python3.10/site-packages/optuna/study/study.py:451, in Study.optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
    348 def optimize(
    349     self,
    350     func: ObjectiveFuncType,
   (...)
    357     show_progress_bar: bool = False,
    358 ) -> None:
    359     """Optimize an objective function.
    360 
    361     Optimization is done by choosing a suitable set of hyperparameter values from a given
   (...)
    449             If nested invocation of this method occurs.
    450     """
--> 451     _optimize(
    452         study=self,
    453         func=func,
    454         n_trials=n_trials,
    455         timeout=timeout,
    456         n_jobs=n_jobs,
    457         catch=tuple(catch) if isinstance(catch, Iterable) else (catch,),
    458         callbacks=callbacks,
    459         gc_after_trial=gc_after_trial,
    460         show_progress_bar=show_progress_bar,
    461     )

File /opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py:62, in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
     60 try:
     61     if n_jobs == 1:
---> 62         _optimize_sequential(
     63             study,
     64             func,
     65             n_trials,
     66             timeout,
     67             catch,
     68             callbacks,
     69             gc_after_trial,
     70             reseed_sampler_rng=False,
     71             time_start=None,
     72             progress_bar=progress_bar,
     73         )
     74     else:
     75         if n_jobs == -1:

File /opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py:159, in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
    156         break
    158 try:
--> 159     frozen_trial = _run_trial(study, func, catch)
    160 finally:
    161     # The following line mitigates memory problems that can be occurred in some
    162     # environments (e.g., services that use computing containers such as GitHub Actions).
    163     # Please refer to the following PR for further details:
    164     # https://github.com/optuna/optuna/pull/325.
    165     if gc_after_trial:

File /opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py:247, in _run_trial(study, func, catch)
    240         assert False, "Should not reach."
    242 if (
    243     frozen_trial.state == TrialState.FAIL
    244     and func_err is not None
    245     and not isinstance(func_err, catch)
    246 ):
--> 247     raise func_err
    248 return frozen_trial

File /opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py:196, in _run_trial(study, func, catch)
    194 with get_heartbeat_thread(trial._trial_id, study._storage):
    195     try:
--> 196         value_or_values = func(trial)
    197     except exceptions.TrialPruned as e:
    198         # TODO(mamu): Handle multi-objective cases.
    199         state = TrialState.PRUNED

File /opt/conda/lib/python3.10/site-packages/dspy/teleprompt/mipro_optimizer_v2.py:465, in MIPROv2.compile.<locals>.create_objective.<locals>.objective(trial)
    463 # If the best score was updated, do a full eval on the dev set
    464 if best_score_updated:
--> 465     full_dev_score = evaluate(
    466         best_program,
    467         devset=valset,
    468         display_table=0,
    469     )
    470     if self.wandb_run_id:
    471         wandb.log(
    472             {
    473                 "best_prog_so_far_train_score": best_score,
    474                 "best_prog_so_far_dev_score": full_dev_score,
    475             },
    476         )

File /opt/conda/lib/python3.10/site-packages/dspy/evaluate/evaluate.py:200, in Evaluate.__call__(self, program, metric, devset, num_threads, display_progress, display_table, return_all_scores, return_outputs)
    192 else:
    193     reordered_devset, ncorrect, ntotal = self._execute_multi_thread(
    194         wrapped_program,
    195         devset,
    196         num_threads,
    197         display_progress,
    198     )
--> 200 dspy.logger.info(f"Average Metric: {ncorrect} / {ntotal} ({round(100 * ncorrect / ntotal, 1)}%)")
    202 predicted_devset = sorted(reordered_devset)
    204 if return_outputs:  # Handle the return_outputs logic

ZeroDivisionError: division by zero

The metric function:

def metric(gold, pred, trace=None):
    try:
        question, answer = gold.question, gold.answer
        original = pred.get('answer')

        # 检查 "answer" 键是否存在
        assert original is not None, "The prediction does not contain an 'answer' key."

        # 提取数字
        original_number = extract_number(original)

        # 检查是否提取到数字
        assert original_number, "No numbers extracted from the prediction."

        # 获取最后一个数字
        original_number = original_number[-1]

        logging.info(f"Answer: {answer}")
        logging.info(f"Predicted Answer: {original_number}")

        # 计算分数
        score = 1 if answer == original_number else 0
        return score

    except ValueError as e:
        logging.error(f"ValueError: {e}")
        return 0
    except TypeError as e:
        logging.error(f"TypeError: {e}")
        return 0
    except AttributeError as e:
        logging.error(f"AttributeError: {e}")
        return 0
    except Exception as e:
        logging.error(f"Error in metric calculation: {e}")
        return 0

The program code:


class COT(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate = dspy.ChainOfThought(
            GenerateAnswer)

    def forward(self, question):
        try:
            pred = self.generate(question=question)
            print(pred.answer)
        except Exception as e:
            print(f"Error in generate: {e}")
            pred = None

        if pred is None:
            pred = {
                "question": question,
                "answer": [1,2]
            }

        pred['answer'] = pred.get('answer', [2,1])
        return pred

The signature code:

class GenerateAnswer(dspy.Signature):
    """Solve math questions."""

    question = dspy.InputField()
    answer= dspy.OutputField(desc='An intergerm from 0 to 999',format = str)
leegang commented 5 months ago

This is a temporary solution https://github.com/stanfordnlp/dspy/pull/1193

arnavsinghvi11 commented 5 months ago

Hi @leegang , thanks for raising this error. This results due to not specifying a valset when compiling with the optimizer.

We might want to update this to match the existing logic in defaulting to the trainset if the valset is not specified (as done in BootstrapFewShotRandomSearch) but will confirm shortly.

For more context on these data splits, please refer to #1181