Open qingyun-wu opened 1 year ago
The introduction of a LearnableAssistantAgent
class (inherited from AssistantAgent
) to serve as an assistant agent that could exhibit learning capabilities.
class LearnableAssistantAgent(AssistantAgent):
def __init__(self, name, system_message=None,
learning_constraints=None,
learning_objectives=None,
learning_results=None,
learning_data=None,
):
"""
learning_constraints (dict): a dict of learning constraints
learning_trigger:
(other resource):
learning_objectives (Callable/str): objectives for learning
learning_results (dict): learning results
learning_data (list/DataFrame/Numpy): data to learn from
"""
super().__init__(
name=name,
system_message=system_message)
self.setup_learning(
learning_constraints=learning_constraints,
learning_objectives=learning_objectives,
learning_results=learning_results,
learning_data=learning_data
)
# def _setup_learning_from_nlp(self, content):
# LLM(f"Extract xxx from {content}")
def setup_learning(self,
learning_constraints=None,
learning_objectives=None,
learning_results=None,
learning_data=None
):
"""
This function is used to set up learning-related parameters.
"""
if learning_constraints is not None:
self._learning_constraints = learning_constraints
if learning_objectives is not None:
self._learning_objectives = learning_objectives
if learning_results is not None:
self._learning_results = learning_results
if learning_data is not None:
self._learning_data = learning_data
def _learn(self):
"""
performs learning
"""
# do something
pass
def receive(self, message, sender):
# handle learning related set up
self.setup_learning(
enable_learning=message.get("enable_learning", None)
learning_objectives=message.get("learning_objectives", None),
constraints=message.get("constraints", None),
learning_results=message.get("learning_results", None),
conditions_to_enable_learn=message.get("conditions_to_enable_learn", None),
data4learning=message.get("data4learning", None),
conversation_log_as_data4learning=message.get("conversation_log_as_data4learning", None),
clear_old_data=message.get("clear_old_data", None),
)
if self.learning_constraints.get("learning_trigger", None):
self._learn()
# check if learning is enabled
# NOTE: but what if we want to learn async. Need to use multi-process/multi-worker
# do something with the message
# ...
from flaml.autogen.agent import LearnableAssistantAgent, UserProxyAgent
learning_setting = {
"learning_constraints": learning_constraints,
"learning_objectives": learning_objectives,
"learning_results": learning_results,
"learning_data": "log", # this means the user want to learn from the conversation log
}
assistant = LearnableAssistantAgent(name="assistant", **learning_setting)
user = UserProxyAgent(name="user")
assistant.receive(user.generate_init_prompt({"role": "user", "content": "Plot a rocket."}), user)
assistant.receive(user.generate_init_prompt({"role": "user", "content": "Plot a helicopter."}), user) # we expect the performance of the assistant to improve with the accumulation of experiences
# serialize the agents
pickle.dump(assistant, open("assistant.pkl", "wb"))
# load the agents
old_assistant = pickle.load(open("assistant.pkl", "rb"))
# here we would like to create a new assistant to conduct task.
# Motivations for creating a new AssistantAgent intead of just use
# the old agent: We would like to re-use certain part of the conversations from the old
# assistant (with certain filter)
learning_setting.update({
"learning_data": old_assistant.get_conversations(),
})
assistant = LearnableAssistantAgent(name="assistant", **learning_setting)
user = UserProxyAgent(name="user")
assistant.receive(user.generate_init_prompt({"role": "user", "content": "Plot a boat."}), user) # we expect this assistant could learn from the provided conversation
from flaml.autogen.agent import LearningAgent, TeachingAgent
import feedparser
import pickle
cs_learner = LearningAgent(name="cs_learner")
teacher = TeachingAgent(name="teacher")
learning_setting = {
"learning_constraints": {"learning_trigger": True, "cpu": 1},
"learning_objectives": "Continuously summarize the latest research trends based on all the learning data given.",
}
cs_feed = feedparser.parse("http://export.arxiv.org/rss/cs")
# Loop through each entry (article)
for entry in cs_feed.entries:
cs_data_tody = [entry.summary]
learning_setting.update({"learning_data": cs_data_tody})
cs_learner.receive(learning_setting, teacher) # the learner will try its best allowed capacity (under the learning constraints) toward the given objective.
learning_results = cs_learner.get_learning_results()
print(f"This is the learning results so far: {learning_results}")
# serialize the agents
pickle.dump(assistant, open("cs_learner.pkl", "wb"))
# load the agents
cs_learner = pickle.load(open("cs_learner.pkl", "rb"))
stats_learner = LearningAgent(name="stats_learner")
learning_setting = {
"learning_constraints": {"learning_trigger": True, "cpu": 1},
"learning_objectives": "Continuously summarize the latest research trends based on all the learning data given.",
"learning_results": cs_learner.get_learning_results()[-1],
"learning_func": my_LLM_func,
}
stats_feed = feedparser.parse("http://export.arxiv.org/rss/stat")
for entry in stats_feed.entries:
stats_data_today = [entry.summary]
learning_setting.update{"learning_data": stats_data_today}
stats_learner.receive(learning_setting, teacher)
Quick question regarding the Scenario 2.
Thanks! (Context, I was discussing with @sonichi on https://github.com/microsoft/FLAML/issues/1063 for our use case)
@weilinear, thank you for your interest and questions. Please find my responses to your questions below:
learning_results
is likely to be dependent on the specific learning algorithm employed. It could potentially be a hyper-parameter or a learnable parameter. learning_constraints
, we could consider including a learning_rate
parameter.Thanks for the explanation. A few follow-up questions
learning algorithm
in this context?learning_rate
works here and what will be the assumption we could make about higher/lower learning rate? Will that be some measurements of how fast learning_results
changes?Thanks for the explanation. A few follow-up questions
- What is considered a
learning algorithm
in this context?- How will
learning_rate
works here and what will be the assumption we could make about higher/lower learning rate? Will that be some measurements of how fastlearning_results
changes?
Hi @weilinear, the learning algorithm
is a function that can take learning data and learning objective, and optionally previous learning results as inputs and outputs learning results. In the context of a summarization task, a large language model could be used as the learning function.
The learning rate mentioned in my previous response is just an example of the potential ways for the user to make balance old learning_results and learning from new data. How it works depends on the specific learning function. To avoid confusion, I have removed learning_rate
from the code example and only keep a field named learning_func
.
Thank you!
Hi @weilinear,
Thank you for your interest. Could you please take a look at this PR: https://github.com/microsoft/FLAML/pull/1098 , especially the continual summarization example code in this file: https://github.com/microsoft/FLAML/blob/6891db656de7b8de4ecbaad892abafee8411d3a8/test/autogen/test_continual_summarization.py
Thank you!
Hi @weilinear, An example use case of continual summarization is demonstrated briefly in this notebook: https://github.com/microsoft/FLAML/blob/67a23b167ecad631882fc9c781a1178dc4b5cf50/notebook/autogen_agent_continual_summarization.ipynb
Could you take a look at this demo and share your comments/suggestions? I'd like to have a chat with you if you have time.
Thanks @qingyun-wu. Let me take a look at the demo and will leave some comments. I'm pretty much booked this week. Let's target to have a chat sometime late next week.