Closed colorwlof closed 2 weeks ago
To address the issues in the output_type_template.tmpl
file in version 2.0.4:
Incorrect type
Field: Ensure that the type
field is set to "answer"
instead of "value"
.
Prevent Reuse of Last Result: Add logic to handle unwanted results and prevent the system from reusing the last result indefinitely.
Here is a possible solution:
class CodeExecution(BaseLogicUnit):
# ... other methods ...
def execute(self, input: Any, **kwargs) -> Any:
self.context: PipelineContext = kwargs.get("context")
self._dfs = self.context.dfs
self._config = self.context.config
self._additional_dependencies = self.context.get("additional_dependencies", [])
self._current_code_executed = self.context.get("current_code_executed")
self.logger: Logger = kwargs.get("logger")
retry_count = 0
code_to_run = input
result = None
while retry_count <= self.context.config.max_retries:
try:
result = self.execute_code(code_to_run, code_context)
if self.context.get("output_type") != "" and (
output_helper := self.context.get("output_type")
):
(validation_ok, validation_errors) = OutputValidator.validate(
output_helper, result
)
if not validation_ok:
raise InvalidLLMOutputType(validation_errors)
if not OutputValidator.validate_result(result):
raise InvalidOutputValueMismatch(
f'Value type {type(result["value"])} must match with type {result["type"]}'
)
# Ensure the type field is set to "answer"
result["type"] = "answer"
break
except Exception as e:
traceback_errors = traceback.format_exc()
self.logger.log(f"Failed with error: {traceback_errors}", logging.ERROR)
if self.on_failure:
self.on_failure(code_to_run, traceback_errors)
if (
not self.context.config.use_error_correction_framework
or retry_count >= self.context.config.max_retries
):
raise e
retry_count += 1
self.logger.log(
f"Failed to execute code retrying with a correction framework "
f"[retry number: {retry_count}]",
level=logging.WARNING,
)
code_to_run = self._retry_run_code(
code_to_run, self.context, self.logger, e
)
return LogicUnitOutput(
result,
True,
"Code Executed Successfully",
{"content_type": "response", "value": ResponseSerializer.serialize(result)},
final_track_output=True,
)
Additionally, you can use the OutputValidator
class to ensure the validation of the type
and value
fields:
class OutputValidator:
@staticmethod
def validate(expected_type, result: dict[str, Any]) -> tuple[bool, Iterable[str]]:
validation_logs = []
result_type, result_value = result.get("type"), result.get("value")
type_ok = OutputValidator.validate_type(result_type, expected_type)
if not type_ok:
validation_logs.append(
f"The result dict contains inappropriate 'type'. "
f"Expected '{expected_type}', actual '{result_type}'."
)
value_ok = OutputValidator.validate_value(result_value, expected_type)
if not value_ok:
validation_logs.append(
f"result value {repr(result_value)} seems to be inappropriate "
f"for the type '{expected_type}'."
)
return all((type_ok, value_ok)), validation_logs
def validate_type(self, expected_type: str) -> bool:
return self == expected_type if expected_type else True
def validate_value(self, expected_type: str) -> bool:
if not expected_type:
return True
elif expected_type == "number":
return isinstance(self, (int, float))
elif expected_type == "string":
return isinstance(self, str)
elif expected_type == "dataframe":
return isinstance(self, (pd.DataFrame, pd.Series))
elif expected_type == "plot":
if not isinstance(self, (str, dict)):
return False
if isinstance(self, dict):
return True
path_to_plot_pattern = r"^(\/[\w.-]+)+(/[\w.-]+)*$|^[^\s/]+(/[\w.-]+)*$"
return bool(re.match(path_to_plot_pattern, self))
@staticmethod
def validate_result(result: dict) -> bool:
if not isinstance(result, dict) or "type" not in result:
raise InvalidOutputValueMismatch(
"Result must be in the format of dictionary of type and value"
)
if not result["type"]:
return False
elif result["type"] == "number":
return isinstance(result["value"], (int, float, np.int64))
elif result["type"] == "string":
return isinstance(result["value"], str)
elif result["type"] == "dataframe":
return isinstance(result["value"], (pd.DataFrame, pd.Series))
elif result["type"] == "plot":
if "plotly" in repr(type(result["value"])):
return True
if not isinstance(result["value"], (str, dict)):
return False
if isinstance(result["value"], dict) or (
isinstance(result["value"], str)
and "data:image/png;base64" in result["value"]
):
return True
path_to_plot_pattern = r"^(\/[\w.-]+)+(/[\w.-]+)*$|^[^\s/]+(/[\w.-]+)*$"
return bool(re.match(path_to_plot_pattern, result["value"]))
This code ensures that the type
field is always set to "answer"
and includes logic to handle retries and prevent the system from reusing the last result indefinitely [1].
To continue talking to Dosu, mention @dosu.
@colorwlof I'm sorry, I don't understand the first point. You can disable the cache for the second point.
System Info
2.0.4
🐛 Describe the bug
type (possible values "string", "number", "dataframe", "plot"). Example: { "type": "string",.........."value": f"The highest salary is {highest_salary}." }
As use Example, sometimes, the "value": instead of "answer". I think use Format better.
And the other bug ,if the ai creat a reslut that not i want , it will flow last result forever. unitl i delete the chace or unable chace.