Open albertlieyingadrian opened 2 months ago
Another error after setting up .env
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[6], line 4
1 from ape.types import ResponseFormat
2 from ape.types.response_format import JsonSchema
----> 4 optimized_prompt = await mipro.optimize(
5 student=gsm8k_base_prompt,
6 task_description="Solve math problems, come up with short factoid answers.",
7 trainset=trainset,
8 testset=testset,
9 log_dir="./.gsm8k_logs", # all logs will be saved here
10 eval_kwargs={
11 "max_errors": 3,
12 },
13 max_bootstrapped_demos=5, # maximum number of fewshot examples to use
14 max_labeled_demos=5, # maximum number of labeled examples to use
15 max_steps=20, # maximum number of optimization steps
16 goal_score=1.0, # goal score to achieve, stop optimization if achieved
17 response_format=ResponseFormat(type="json_object"),
18 requires_permission_to_run=False,
19 )
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/optimizer/mipro/mipro.py:331, in MIPRO.optimize(self, student, task_description, trainset, testset, max_steps, max_bootstrapped_demos, max_labeled_demos, goal_score, eval_kwargs, seed, minibatch, requires_permission_to_run, response_format, log_dir)
325 logger.info("starting optuna study")
327 study: optuna.Study = optuna.create_study(
328 direction="maximize",
329 sampler=optuna.samplers.TPESampler(seed=seed, multivariate=True),
330 )
--> 331 study.optimize(objective, n_trials=max_steps)
332 logger.info("finished optuna study")
333 if best_prompt is not None and self.track_stats:
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/optuna/study/study.py:475, in Study.optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
373 def optimize(
374 self,
375 func: ObjectiveFuncType,
(...)
382 show_progress_bar: bool = False,
383 ) -> None:
384 """Optimize an objective function.
385
386 Optimization is done by choosing a suitable set of hyperparameter values from a given
(...)
473 If nested invocation of this method occurs.
474 """
--> 475 _optimize(
476 study=self,
477 func=func,
478 n_trials=n_trials,
479 timeout=timeout,
480 n_jobs=n_jobs,
481 catch=tuple(catch) if isinstance(catch, Iterable) else (catch,),
482 callbacks=callbacks,
483 gc_after_trial=gc_after_trial,
484 show_progress_bar=show_progress_bar,
485 )
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/optuna/study/_optimize.py:63, in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
61 try:
62 if n_jobs == 1:
---> 63 _optimize_sequential(
64 study,
65 func,
66 n_trials,
67 timeout,
68 catch,
69 callbacks,
70 gc_after_trial,
71 reseed_sampler_rng=False,
72 time_start=None,
73 progress_bar=progress_bar,
74 )
75 else:
76 if n_jobs == -1:
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/optuna/study/_optimize.py:160, in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
157 break
159 try:
--> 160 frozen_trial = _run_trial(study, func, catch)
161 finally:
162 # The following line mitigates memory problems that can be occurred in some
163 # environments (e.g., services that use computing containers such as GitHub Actions).
164 # Please refer to the following PR for further details:
165 # https://github.com/optuna/optuna/pull/325.
166 if gc_after_trial:
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/optuna/study/_optimize.py:248, in _run_trial(study, func, catch)
241 assert False, "Should not reach."
243 if (
244 frozen_trial.state == TrialState.FAIL
245 and func_err is not None
246 and not isinstance(func_err, catch)
247 ):
--> 248 raise func_err
249 return frozen_trial
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/optuna/study/_optimize.py:197, in _run_trial(study, func, catch)
195 with get_heartbeat_thread(trial._trial_id, study._storage):
196 try:
--> 197 value_or_values = func(trial)
198 except exceptions.TrialPruned as e:
199 # TODO(mamu): Handle multi-objective cases.
200 state = TrialState.PRUNED
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/optimizer/mipro/mipro.py:248, in MIPRO.optimize.<locals>.objective(trial)
243 trial_logs[trial.number]["prompt_path"] = save_candidate_prompt(
244 candidate_prompt, log_dir, trial.number
245 )
247 batch_size: int = self._get_batch_size(minibatch, trainset)
--> 248 score: float = run_async(
249 eval_candidate_prompt(batch_size, trainset, candidate_prompt, evaluate)
250 )
252 categorical_key: str = ",".join(map(str, chosen_params))
253 param_score_dict[categorical_key].append((score, candidate_prompt))
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/utils/__init__.py:111, in run_async(coroutine)
108 raise
110 loop = asyncio.get_event_loop()
--> 111 return loop.run_until_complete(coroutine)
112 else:
113 return asyncio.run(coroutine)
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/nest_asyncio.py:98, in _patch_loop.<locals>.run_until_complete(self, future)
95 if not f.done():
96 raise RuntimeError(
97 'Event loop stopped before Future completed.')
---> 98 return f.result()
File /opt/homebrew/Cellar/python@3.12/3.12.6/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/futures.py:203, in Future.result(self)
201 self.__log_traceback = False
202 if self._exception is not None:
--> 203 raise self._exception.with_traceback(self._exception_tb)
204 return self._result
File /opt/homebrew/Cellar/python@3.12/3.12.6/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/tasks.py:316, in Task.__step_run_and_handle_result(***failed resolving arguments***)
314 result = coro.send(None)
315 else:
--> 316 result = coro.throw(exc)
317 except StopIteration as exc:
318 if self._must_cancel:
319 # Task is cancelled right before coro stops.
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/optimizer/utils.py:188, in eval_candidate_prompt(batch_size, trainset, candidate_prompt, evaluate)
185 score = await evaluate(candidate_prompt, testset=trainset, display_table=0)
186 # Or evaluate on a minibatch
187 else:
--> 188 score = await evaluate(
189 candidate_prompt,
190 testset=create_minibatch(trainset, batch_size),
191 display_table=0,
192 )
193 if isinstance(score, tuple):
194 score = score[0]
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/evaluate/evaluate.py:78, in Evaluate.__call__(self, prompt, metric, global_metric, testset, **kwargs)
76 config = self._update_config(metric, global_metric, testset, **kwargs)
77 self.total_score = 0
---> 78 results: List[EvaluationResult] = await self._process_testset(prompt, config)
79 global_result = await self._compute_global_metric(results, config)
80 return self._prepare_output(results, global_result, config)
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/evaluate/evaluate.py:139, in Evaluate._process_testset(self, prompt, config)
128 with tqdm.tqdm(
129 total=len(config.testset),
130 disable=not config.display_progress,
(...)
133 leave=True,
134 ) as pbar:
135 tasks = [
136 self._bounded_process_item(process_item, item, pbar, config)
137 for item in config.testset
138 ]
--> 139 results: List[EvaluationResult] = await asyncio.gather(*tasks)
140 return results
File /opt/homebrew/Cellar/python@3.12/3.12.6/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/tasks.py:385, in Task.__wakeup(self, future)
383 def __wakeup(self, future):
384 try:
--> 385 future.result()
386 except BaseException as exc:
387 # This may also be a cancellation.
388 self.__step(exc)
File /opt/homebrew/Cellar/python@3.12/3.12.6/Frameworks/Python.framework/Versions/3.12/lib/python3.12/asyncio/tasks.py:314, in Task.__step_run_and_handle_result(***failed resolving arguments***)
310 try:
311 if exc is None:
312 # We use the `send` method directly, because coroutines
313 # don't have `__iter__` and `__next__` methods.
--> 314 result = coro.send(None)
315 else:
316 result = coro.throw(exc)
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/evaluate/evaluate.py:144, in Evaluate._bounded_process_item(self, process_func, item, pbar, config)
142 async def _bounded_process_item(self, process_func, item, pbar, config):
143 async with asyncio.Semaphore(config.batch_size):
--> 144 result = await process_func(item)
145 self._update_progress(pbar, result.score)
146 return result
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/evaluate/evaluate.py:125, in Evaluate._process_testset.<locals>.process_item(example)
123 return result
124 except Exception as e:
--> 125 self._handle_error(e, config)
126 return EvaluationResult(example=outputs, prediction={}, score=0.0)
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/evaluate/evaluate.py:160, in Evaluate._handle_error(self, error, config)
158 self.error_count += 1
159 if self.error_count >= config.max_errors:
--> 160 raise error
161 logger.error(f"Error processing example: {error}")
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/evaluate/evaluate.py:121, in Evaluate._process_testset.<locals>.process_item(example)
119 if not prediction:
120 raise ValueError("Prediction is None")
--> 121 result = await config.metric(inputs=inputs, gold=outputs, pred=prediction, trace=None, metadata=metadata)
122 result = EvaluationResult(example=example, prediction=prediction, score=result.score, intermediate_values=result.intermediate_values)
123 return result
File ~/Documents/Random/Ape-Starter-Template/venv/lib/python3.12/site-packages/ape/metric/metric_base.py:53, in BaseMetric.__call__(self, inputs, gold, pred, trace, metadata)
32 async def __call__(
33 self,
34 inputs: Dict[str, Any],
(...)
38 metadata: Optional[Dict] = None,
39 ) -> MetricResult:
40 """
41 Unified method to compute the metric, handling both sync and async implementations.
42
(...)
51 MetricResult: An object containing the score and intermediate values.
52 """
---> 53 result = self.compute(inputs, gold, pred, trace, metadata)
54 if asyncio.iscoroutine(result):
55 return await result
TypeError: GSM8KMetric.compute() takes from 4 to 5 positional arguments but 6 were given
Created a PR to fix this issue https://github.com/weavel-ai/Ape-Starter-Template/pull/3
And also needed to comment these lines
Not sure if I set it up incorrectly but I keep getting an error "error reformating prompt" when trying to run
mipro.optimize
complete error logs