[Bug]: Async Initiate chat autogen workflow interrupts more for human input feedback vs. Sync initiate chat agents conversation flow.

Describe the bug

Async a_initiate_chat agents conversation is interrupting more for human feedback whereas same codebase but for Sync initiate_chat workflow only asks once for human feedback. Moreover, in case Async flow, it doesn't execute the generated code after first human no feedback provided at console prompt, but it asks again for the feedback and when user provides no feedback again, only then generated code is executed. Refer the attached console log as well as video recording for your ref.

This autogen framework's behavior of asking human feedback multiple times gets very chaotic when there are more agents involved and completely breaks the application logic specially around asking same feedback repeatedly by Async a_initiate_chat() agents' workflow.

Here is the output for your ref. for both Sync and Async program: SYNC output:

--------------------------------------------------------------------------------
coder (to user_proxy):

To get the date today, we can use the datetime module in Python. Here's a simple script:

'''python
# date_script.py
import datetime

def get_date():
    today = datetime.date.today()
    return today

print(get_date())
'''

Please execute this script to get the date today.

--------------------------------------------------------------------------------
Replying as user_proxy. Provide feedback to coder. Press enter to skip and use auto-reply, or type 'exit' to end the conversation: 

>>>>>>>> NO HUMAN INPUT RECEIVED.

>>>>>>>> USING AUTO REPLY...

>>>>>>>> EXECUTING CODE BLOCK (inferred language is python)...
user_proxy (to coder):

exitcode: 0 (execution succeeded)
Code output: 2024-09-26

--------------------------------------------------------------------------------
coder (to user_proxy):

TERMINATE

ASYNC output

--------------------------------------------------------------------------------
coder (to user_proxy):

To get the date today, we can use the datetime module in Python. Here's a simple script:

'''python
# date_script.py
import datetime

def get_date():
    today = datetime.date.today()
    return today

print(get_date())
'''

Please execute this script to get the date today.

--------------------------------------------------------------------------------
Replying as user_proxy. Provide feedback to coder. Press enter to skip and use auto-reply, or type 'exit' to end the conversation: 

>>>>>>>> NO HUMAN INPUT RECEIVED.

>>>>>>>> USING AUTO REPLY...
Replying as user_proxy. Provide feedback to coder. Press enter to skip and use auto-reply, or type 'exit' to end the conversation: 

>>>>>>>> NO HUMAN INPUT RECEIVED.

>>>>>>>> USING AUTO REPLY...

>>>>>>>> EXECUTING CODE BLOCK (inferred language is python)...
user_proxy (to coder):

exitcode: 0 (execution succeeded)
Code output: 2024-09-26

--------------------------------------------------------------------------------

Steps to reproduce

Refer the Async & Sync code for your ref. SYNC Code

import os
from autogen import (
    ConversableAgent,
    GroupChat,
    GroupChatManager,
)
from autogen.cache import Cache
from autogen.coding import DockerCommandLineCodeExecutor

from dotenv import load_dotenv
from model_config import ModelConfig

load_dotenv()
llm_env_config = ModelConfig.from_env()
os.makedirs("agent_code", exist_ok=True)

# Litellm config
litellm_config_list = [
    {
        "model": llm_env_config.model_name,
        "api_key": llm_env_config.api_key,
        "base_url": llm_env_config.api_url,
        "temperature": llm_env_config.temperature,
        "cache_seed": None,
        "price": [0, 0],
    },
]

config_list = {
    "config_list": litellm_config_list,
    "temperature": llm_env_config.temperature,
    "cache_seed": None,
}

def initiatize_agents():

    code_cmd_executor = DockerCommandLineCodeExecutor(
        work_dir="agent_code",
        timeout=60,
    )

    coder = ConversableAgent(
        name="coder",
        llm_config=config_list,
        max_consecutive_auto_reply=3,
        system_message="You are a helpful coding assistant  and expert in python eco system."
        "Return 'TERMINATE' when all the tasks are completed or if nothing else to be done.",
        description="Write the python code or bash commands to be executed by the user_proxy to complete a given task.",
        code_execution_config=False,
        human_input_mode="NEVER",
    )

    user_proxy = ConversableAgent(
        name="user_proxy",
        description="Execute the code or bash commands provided by the coder and reports the results back to coder.",
        human_input_mode="ALWAYS",
        max_consecutive_auto_reply=3,
        is_termination_msg=lambda msg: msg.get("content") is not None
        and "TERMINATE" in msg["content"],
        code_execution_config={
            "last_n_messages": "auto",
            "executor": code_cmd_executor,
        },
    )

    task = "What's the date today?"

    # Use Cache.disk to cache LLM responses. Change cache_seed for different responses.
    with Cache.disk(cache_seed=40) as cache:
        chat_results = user_proxy.initiate_chat(
            recipient=coder,
            message=task,
            cache=cache,
            summary_method="last_msg",
        )
        # return the chat summary
        return chat_results.summary

def main():
    res = initiatize_agents()
    if res:
        print(f"\n Agents response: {res}")
        print("\n\n####### Agents workflow completed ########\n\n")

if __name__ == "__main__":
    main()

ASYNC Code

import asyncio
import os
from autogen import (
    ConversableAgent,
    GroupChat,
    GroupChatManager,
)
from autogen.cache import Cache
from autogen.coding import DockerCommandLineCodeExecutor

from dotenv import load_dotenv
from model_config import ModelConfig

load_dotenv()
llm_env_config = ModelConfig.from_env()
os.makedirs("agent_code", exist_ok=True)

# Litellm config
litellm_config_list = [
    {
        "model": llm_env_config.model_name,
        "api_key": llm_env_config.api_key,
        "base_url": llm_env_config.api_url,
        "temperature": llm_env_config.temperature,
        "cache_seed": None,
        "price": [0, 0],
    },
]

config_list = {
    "config_list": litellm_config_list,
    "temperature": llm_env_config.temperature,
    "cache_seed": None,
}

async def initiatize_agents():

    code_cmd_executor = DockerCommandLineCodeExecutor(
        work_dir="agent_code",
        timeout=60,
    )

    coder = ConversableAgent(
        name="coder",
        llm_config=config_list,
        max_consecutive_auto_reply=3,
        system_message="You are a helpful coding assistant  and expert in python eco system."
        "Return 'TERMINATE' when all the tasks are completed or if nothing else to be done.",
        description="Write the python code or bash commands to be executed by the user_proxy to complete a given task.",
        code_execution_config=False,
        human_input_mode="NEVER",
    )

    user_proxy = ConversableAgent(
        name="user_proxy",
        description="Execute the code or bash commands provided by the coder and reports the results back to coder.",
        human_input_mode="ALWAYS",
        max_consecutive_auto_reply=3,
        is_termination_msg=lambda msg: msg.get("content") is not None
        and "TERMINATE" in msg["content"],
        code_execution_config={
            "last_n_messages": "auto",
            "executor": code_cmd_executor,
        },
    )

    task = "What's the date today?"

    # Use Cache.disk to cache LLM responses. Change cache_seed for different responses.
    with Cache.disk(cache_seed=40) as cache:
        chat_results = await user_proxy.a_initiate_chat(
            recipient=coder,
            message=task,
            cache=cache,
            summary_method="last_msg",
        )
        # return the chat summary
        return chat_results.summary

async def main():
    res = await initiatize_agents()
    if res:
        print(f"\n Agents response: {res}")
        print("\n\n####### Agents workflow completed ########\n\n")

if __name__ == "__main__":
    asyncio.run(main())

Model Used

No response

Expected Behavior

It should behave exactly same as Sync code block in case of a_initiate_chat() call.
It should be able to execute the generated code on receiving first human feedback in case of a_initiate_chat() call.

Screenshots and logs

Autogen_Sync_Async_behavior

Additional Information

No response

microsoft / autogen