代理 1-> 代理 2> 代理 3....
# You can use other orchestration libraries but I found DSPy
# good for building fast, simpler and evaluation (making the application more relibale)
import dspy
# This object inherits from the dspy.Signature class
# The text inside """ is the prompt
class analytical_planner(dspy.Signature):
""" You are data analytics planner agent. You have access to three inputs
1. Datasets
2. Data Agent descriptions
3. User-defined Goal
You take these three inputs to develop a comprehensive plan to achieve the user-defined goal from the data & Agents available.
In case you think the user-defined goal is infeasible you can ask the user to redefine or add more description to the goal.
Give your output in this format:
plan: Agent1->Agent2->Agent3
plan_desc = Use Agent 1 for this reason, then agent2 for this reason and lastly agent3 for this reason.
You don't have to use all the agents in response of the query
# Input fields and their descriptions
dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df_name,columns set df as copy of df_name")
Agent_desc = dspy.InputField(desc= "The agents available in the system")
goal = dspy.InputField(desc="The user defined goal ")
# Output fields and their description
plan = dspy.OutputField(desc="The plan that would achieve the user defined goal")
plan_desc= dspy.OutputField(desc="The reasoning behind the chosen plan")
# I define analysis agents as those agents that are in the middle-layer
# they produce code for a specialised data analysis task
class preprocessing_agent(dspy.Signature):
""" You are a data pre-processing agent, your job is to take a user-defined goal and available dataset,
to build an exploratory analytics pipeline. You do this by outputing the required Python code.
You will only use numpy and pandas, to perform pre-processing and introductory analysis
dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df_name,columns set df as copy of df_name")
goal = dspy.InputField(desc="The user defined goal ")
commentary = dspy.OutputField(desc="The comments about what analysis is being performed")
code = dspy.OutputField(desc ="The code that does the data preprocessing and introductory analysis")
class statistical_analytics_agent(dspy.Signature):
""" You are a statistical analytics agent.
Your task is to take a dataset and a user-defined goal, and output
Python code that performs the appropriate statistical analysis to achieve that goal.
You should use the Python statsmodel library"""
dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df_name,columns set df as copy of df_name")
goal = dspy.InputField(desc="The user defined goal for the analysis to be performed")
commentary = dspy.OutputField(desc="The comments about what analysis is being performed")
code = dspy.OutputField(desc ="The code that does the statistical analysis using statsmodel")
class sk_learn_agent(dspy.Signature):
# Prompt
"""You are a machine learning agent.
Your task is to take a dataset and a user-defined goal, and output Python code that performs the appropriate machine learning analysis to achieve that goal.
You should use the scikit-learn library."""
# Input Fields
dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df_name,columns. set df as copy of df_name")
goal = dspy.InputField(desc="The user defined goal ")
# Output Fields
commentary = dspy.OutputField(desc="The comments about what analysis is being performed")
code = dspy.OutputField(desc ="The code that does the Exploratory data analysis")
## I worked on the data-viz agent and already optimized using DSPy.
## The only big difference is that this agents takes another input of styling index
class code_combiner_agent(dspy.Signature):
""" You are a code combine agent, taking Python code output from many agents and combining the operations into 1 output
You also fix any errors in the code"""
agent_code_list =dspy.InputField(desc="A list of code given by each agent")
refined_complete_code = dspy.OutputField(desc="Refined complete code base")
# The same signature used in Data Viz agent post
class Data_Viz(dspy.Signature):
You are AI agent who uses the goal to generate data visualizations in Plotly.
You have to use the tools available to your disposal
You must give an output as code, in case there is no relevant columns, just state that you don't have the relevant information
goal = dspy.InputField(desc="user defined goal which includes information about data and chart they want to plot")
dataframe_context = dspy.InputField(desc=" Provides information about the data in the data frame. Only use column names and dataframe_name as in this context")
styling_context = dspy.InputField(desc='Provides instructions on how to style your Plotly plots')
code= dspy.OutputField(desc="Plotly code that visualizes what the user needs according to the query & dataframe_index & styling_context")
# An optional agent that checks if the user-defined goal works well
class goal_refiner_agent(dspy.Signature):
"""You take a user-defined goal given to a AI data analyst planner agent,
you make the goal more elaborate using the datasets available and agent_desc"""
dataset = dspy.InputField(desc="Available datasets loaded in the system, use this df_name,columns set df as copy of df_name")
Agent_desc = dspy.InputField(desc= "The agents available in the system")
goal = dspy.InputField(desc="The user defined goal ")
refined_goal = dspy.OutputField(desc='Refined goal that helps the planner agent plan better')
# I choose a LLama-Index based retriever as it was more convenient.
# Basically you can feed your data in a multiple ways.
# Providing description about column names, dataframe reference
# And also what purpose the data was collected etc.
dataframe_index = VectorStoreIndex.from_documents(docs)
# I also defined a styling index for the data visualization agent.
# Which has natural language instructions on how to style different visualizations
style_index = VectorStoreIndex.from_documents(styling_instructions)
在 DSPy 中,要编译一个复杂的 LLM 应用程序,需要定义一个包含两个基本方法的模块: __init__ 和 forward。
__init__ 方法通过定义所有将在整个过程中使用的变量来初始化模块。而 forward 方法则是实现核心功能的地方。该方法概述了一个组件的输出如何与其他组件交互,从而有效地驱动应用程序的逻辑。
# This module takes only one input on initiation
class auto_analyst(dspy.Module):
def __init__(self,agents):
# Defines the available agents, their inputs, and description
self.agents = {}
self.agent_inputs ={}
self.agent_desc =[]
i =0
for a in agents:
name = a.__pydantic_core_schema__['schema']['model_name']
# Using CoT prompting as from experience it helps generate better responses
self.agents[name] = dspy.ChainOfThought(a)
agent_inputs[name] ={x.strip() for x in str(agents[i].__pydantic_core_schema__['cls']).split('->')[0].split('(')[1].split(',')}
# Defining the planner, refine_goal & code combiner agents seperately
# as they don't generate the code & analysis they help in planning,
# getting better goals & combine the code
self.planner = dspy.ChainOfThought(analytical_planner)
self.refine_goal = dspy.ChainOfThought(goal_refiner_agent)
self.code_combiner_agent = dspy.ChainOfThought(code_combiner_agent)
# these two retrievers are defined using llama-index retrievers
# you can customize this depending on how you want your agents
self.dataset =dataframe_index.as_retriever(k=1)
self.styling_index = style_index.as_retriever(similarity_top_k=1)
def forward(self, query):
# This dict is used to quickly pass arguments for agent inputs
dict_ ={}
# retrieves the relevant context to the query
dict_['dataset'] = self.dataset.retrieve(query)[0].text
dict_['styling_index'] = self.styling_index.retrieve(query)[0].text
dict_['Agent_desc'] = str(self.agent_desc)
# output_dictionary that stores all agent outputs
output_dict ={}
# this comes up with the plan
plan = self.planner(goal =dict_['goal'], dataset=dict_['dataset'], Agent_desc=dict_['Agent_desc'] )
output_dict['analytical_planner'] = plan
plan_list =[]
code_list =[]
# if the planner worked as intended it should give agents seperated by ->
if plan.plan.split('->'):
plan_list = plan.plan.split('->')
# in case the goal is unclear, it sends it to refined goal agent
refined_goal = self.refine_goal(dataset=data, goal=goal, Agent_desc= self.agent_desc)
# passes the goal and other inputs to all respective agents in the plan
for p in plan_list:
inputs = {x:dict_[x] for x in agent_inputs[p.strip()]}
# creates a list of all the generated code, to be combined as 1 script
# Stores the last output
output_dict['code_combiner_agent'] = self.code_combiner_agent(agent_code_list = str(code_list))
return output_dict
# you can store all available agent signatures as a list
agents =[preprocessing_agent, statistical_analytics_agent, sk_learn_agent,data_viz_agent]
# Define the agentic system
auto_analyst_system = auto_analyst(agents)
# the system is preloaded with Chicago crime data
goal = "What is the cause of crime in Chicago?"
# Asking the agentic system to perform analysis for this query
output = auto_analyst_system(query = goal)
对于这个查询 ="芝加哥的犯罪原因是什么?
下一个 Plotly 数据可视化代理