ShishirPatil / gorilla

Gorilla: Training and Evaluating LLMs for Function Calls (Tool Calls)
https://gorilla.cs.berkeley.edu/
Apache License 2.0
11.52k stars 1.01k forks source link

[BFCL] Chore: Improve Inference Log Readability #746

Closed HuanzhiMao closed 2 weeks ago

HuanzhiMao commented 3 weeks ago

This PR improves the readability of the inference log; makes it read more like a conversation-style. The instruction on how to interpret/understand the log is also included. The inference log is included as part of the result file.

The score file is also improved in structure for better readability.

This will not change the leaderboard score.

Here is an example inference log.

"inference_log": [
    {
        "begin_of_turn_query": [
            {
                "role": "user",
                "content": "First cd within the 'ResearchDocs' directory and then locate any files titled 'report.csv'."
            }
        ],
        "step_0": [
            {
                "role": "assistant",
                "content": [
                    {
                        "cd": "{\"folder\":\"ResearchDocs\"}"
                    }
                ]
            },
            {
                "role": "handler_log",
                "content": "Successfully decoded model response.",
                "model_response_decoded": [
                    "cd(folder='ResearchDocs')"
                ]
            },
            {
                "role": "tool",
                "content": "{\"current_working_directory\": \"ResearchDocs\"}"
            }
        ],
        "step_1": [
            {
                "role": "assistant",
                "content": [
                    {
                        "find": "{\"path\":\"ResearchDocs\",\"name\":\"report.csv\"}"
                    }
                ]
            },
            {
                "role": "handler_log",
                "content": "Successfully decoded model response.",
                "model_response_decoded": [
                    "find(path='ResearchDocs',name='report.csv')"
                ]
            },
            {
                "role": "tool",
                "content": "{\"matches\": [\"ResearchDocs/report.csv\"]}"
            }
        ],
        "step_2": [
            {
                "role": "assistant",
                "content": "The file titled `report.csv` has been located in the `ResearchDocs` directory."
            },
            {
                "role": "handler_log",
                "content": "Error decoding the model response. Proceed to next turn.",
                "error": "'str' object has no attribute 'items'"
            }
        ]
    },
    {
        "begin_of_turn_query": [
            {
                "role": "user",
                "content": "Once you locate 'report.csv' in the 'ResearchDocs' directory, delve into it to find lines that reference the 'Quarterly Financial Overview'."
            }
        ],
        "step_0": [
            {
                "role": "assistant",
                "content": [
                    {
                        "grep": "{\"file_name\":\"report.csv\",\"pattern\":\"Quarterly Financial Overview\"}"
                    }
                ]
            },
            {
                "role": "handler_log",
                "content": "Successfully decoded model response.",
                "model_response_decoded": [
                    "grep(file_name='report.csv',pattern='Quarterly Financial Overview')"
                ]
            },
            {
                "role": "tool",
                "content": "{\"matching_lines\": [\"Line 1: Introduction Line 2: Quarterly Financial Overview Line 3: Details Line 4: More Details Line 5: Quarterly Financial Overview Line 6: Conclusion Line 7: Quarterly Financial Overview Line 8: Quarter has been successful. Line 9: Quarterly Financial Overview Line 10: Final Thoughts\"]}"
            }
        ],
        "step_1": [
            {
                "role": "assistant",
                "content": "The following lines in `report.csv` reference the \"Quarterly Financial Overview\":\n\n- Line 2: Quarterly Financial Overview\n- Line 5: Quarterly Financial Overview\n- Line 7: Quarterly Financial Overview\n- Line 9: Quarterly Financial Overview"
            },
            {
                "role": "handler_log",
                "content": "Error decoding the model response. Proceed to next turn.",
                "error": "'str' object has no attribute 'items'"
            }
        ]
    },
    {
        "begin_of_turn_query": [
            {
                "role": "user",
                "content": "Extract and display the last five lines from the 'report.csv' file in the 'ResearchDocs' directory, as these need to be emphasized for recent updates."
            }
        ],
        "step_0": [
            {
                "role": "assistant",
                "content": [
                    {
                        "tail": "{\"file_name\":\"report.csv\",\"lines\":5}"
                    }
                ]
            },
            {
                "role": "handler_log",
                "content": "Successfully decoded model response.",
                "model_response_decoded": [
                    "tail(file_name='report.csv',lines=5)"
                ]
            },
            {
                "role": "tool",
                "content": "{\"last_lines\": \"Line 1: Introduction Line 2: Quarterly Financial Overview Line 3: Details Line 4: More Details Line 5: Quarterly Financial Overview Line 6: Conclusion Line 7: Quarterly Financial Overview Line 8: Quarter has been successful. Line 9: Quarterly Financial Overview Line 10: Final Thoughts\"}"
            }
        ],
        "step_1": [
            {
                "role": "assistant",
                "content": "The last five lines from the `report.csv` file are as follows:\n\n- Line 6: Conclusion\n- Line 7: Quarterly Financial Overview\n- Line 8: Quarter has been successful.\n- Line 9: Quarterly Financial Overview\n- Line 10: Final Thoughts"
            },
            {
                "role": "handler_log",
                "content": "Error decoding the model response. Proceed to next turn.",
                "error": "'str' object has no attribute 'items'"
            }
        ]
    },
    {
        "begin_of_turn_query": [
            {
                "role": "user",
                "content": "Logging in as USR001. Lastly, upon completion of our file review, kindly message my colleague, John Levy, add him as new contact, that 'Latest Quarter Performance has been well.'"
            }
        ],
        "step_0": [
            {
                "role": "assistant",
                "content": [
                    {
                        "message_login": "{\"user_id\": \"USR001\"}"
                    },
                    {
                        "add_contact": "{\"user_name\": \"John Levy\"}"
                    }
                ]
            },
            {
                "role": "handler_log",
                "content": "Successfully decoded model response.",
                "model_response_decoded": [
                    "message_login(user_id='USR001')",
                    "add_contact(user_name='John Levy')"
                ]
            },
            {
                "role": "tool",
                "content": "{\"login_status\": true, \"message\": \"User 'USR001' logged in successfully.\"}"
            },
            {
                "role": "tool",
                "content": "{\"added_status\": true, \"user_id\": \"USR005\", \"message\": \"Contact 'John Levy' added successfully.\"}"
            }
        ],
        "step_1": [
            {
                "role": "assistant",
                "content": [
                    {
                        "get_user_id": "{\"user\":\"John Levy\"}"
                    }
                ]
            },
            {
                "role": "handler_log",
                "content": "Successfully decoded model response.",
                "model_response_decoded": [
                    "get_user_id(user='John Levy')"
                ]
            },
            {
                "role": "tool",
                "content": "{\"user_id\": \"USR005\"}"
            }
        ],
        "step_2": [
            {
                "role": "assistant",
                "content": [
                    {
                        "send_message": "{\"receiver_id\":\"USR005\",\"message\":\"Latest Quarter Performance has been well.\"}"
                    }
                ]
            },
            {
                "role": "handler_log",
                "content": "Successfully decoded model response.",
                "model_response_decoded": [
                    "send_message(receiver_id='USR005',message='Latest Quarter Performance has been well.')"
                ]
            },
            {
                "role": "tool",
                "content": "{\"sent_status\": true, \"message_id\": {\"new_id\": 67410}, \"message\": \"Message sent to 'USR005' successfully.\"}"
            }
        ],
        "step_3": [
            {
                "role": "assistant",
                "content": "You have been successfully logged in as **USR001**, and your colleague **John Levy** has been added as a new contact. The message \"Latest Quarter Performance has been well.\" has been sent to him successfully."
            },
            {
                "role": "handler_log",
                "content": "Error decoding the model response. Proceed to next turn.",
                "error": "'str' object has no attribute 'items'"
            }
        ]
    }
]