如果说大语言模型是”大脑”,那么 Agent(智能体) 就是拥有”手脚”和”工具”的完整个体。
Agent 不仅能思考(LLM 推理),还能行动(调用工具)、记忆(上下文管理)、规划(任务分解)。从自动化客服到科研助手,从代码生成到数据分析,Agent 正在重新定义 AI 的应用边界。
本文将系统介绍 LLM Agent 的核心架构、关键技术,以及使用 LangChain/AutoGPT 等框架的实战开发。
一、Agent 核心架构
1.1 什么是 Agent?
定义:Agent = LLM(推理) + Tools(工具) + Planning(规划) + Memory(记忆)
用户输入
↓
[感知 Perception] ← 记忆
↓
[规划 Planning] → 分解任务
↓
[决策 Decision] → 选择行动
↓
[执行 Action] → 调用工具
↓
[观察 Observation] → 获取结果
↓
(循环直到完成)
↓
返回最终答案
1.2 Agent 的分类
| 类型 | 特点 | 适用场景 | 代表框架 |
|---|---|---|---|
| ReAct | 推理+行动交替 | 单步任务、工具调用 | LangChain |
| Plan-and-Execute | 先规划再执行 | 复杂多步任务 | BabyAGI |
| Autonomous | 自主循环运行 | 长期目标任务 | AutoGPT |
| Multi-Agent | 多智能体协作 | 复杂系统、角色分工 | MetaGPT |
二、ReAct Agent:推理与行动交替
2.1 ReAct 原理
核心思想:Thought(思考)→ Action(行动)→ Observation(观察)循环。
示例对话:
User: 埃隆·马斯克的年龄的平方是多少?
Thought 1: 我需要先找到埃隆·马斯克的出生日期
Action 1: Search[埃隆·马斯克 出生日期]
Observation 1: 埃隆·马斯克出生于 1971 年 6 月 28 日
Thought 2: 现在我需要计算他的年龄
Action 2: Calculate[2024 - 1971]
Observation 2: 53
Thought 3: 现在计算 53 的平方
Action 3: Calculate[53 ** 2]
Observation 3: 2809
Thought 4: 我现在知道答案了
Final Answer: 埃隆·马斯克 53 岁,年龄的平方是 2809。
2.2 从零实现 ReAct Agent
import re
from typing import Dict, Callable, List, Tuple
import openai
class ReActAgent:
def __init__(self, tools: Dict[str, Callable], model="gpt-4", max_steps=10):
"""
Args:
tools: 工具字典 {tool_name: tool_function}
model: 使用的 LLM 模型
max_steps: 最大推理步数
"""
self.tools = tools
self.model = model
self.max_steps = max_steps
def run(self, question: str) -> str:
"""执行 Agent"""
prompt = self._build_initial_prompt(question)
for step in range(self.max_steps):
# 调用 LLM 生成下一步
response = self._call_llm(prompt)
# 解析 response
thought, action, action_input = self._parse_response(response)
print(f"\n--- Step {step + 1} ---")
print(f"Thought: {thought}")
print(f"Action: {action}[{action_input}]")
# 检查是否得出最终答案
if action.lower() == "final answer":
return action_input
# 执行 Action
observation = self._execute_action(action, action_input)
print(f"Observation: {observation}")
# 更新 prompt
prompt += f"\nObservation {step + 1}: {observation}\nThought {step + 2}:"
return "达到最大步数限制"
def _build_initial_prompt(self, question: str) -> str:
"""构建初始 prompt"""
tools_desc = "\n".join([
f"- {name}: {func.__doc__}"
for name, func in self.tools.items()
])
return f"""Answer the following question by reasoning step by step.
Available tools:
{tools_desc}
Use this format:
Thought: [your reasoning]
Action: [tool_name[input]]
Observation: [tool result will be provided]
... (repeat Thought/Action/Observation)
Thought: I now know the final answer
Action: Final Answer[your answer]
Question: {question}
Thought 1:"""
def _call_llm(self, prompt: str) -> str:
"""调用 LLM"""
response = openai.ChatCompletion.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
temperature=0,
max_tokens=200
)
return response.choices[0].message.content
def _parse_response(self, response: str) -> Tuple[str, str, str]:
"""解析 LLM 输出"""
# 提取 Thought
thought_match = re.search(r'Thought:?(.*?)(?=Action:|$)', response, re.DOTALL)
thought = thought_match.group(1).strip() if thought_match else ""
# 提取 Action 和 Input
action_match = re.search(r'Action:?\s*(\w+)\[(.*?)\]', response)
if action_match:
action = action_match.group(1)
action_input = action_match.group(2).strip()
else:
action = "Final Answer"
action_input = thought
return thought, action, action_input
def _execute_action(self, action: str, action_input: str) -> str:
"""执行工具调用"""
if action not in self.tools:
return f"Error: Tool '{action}' not found"
try:
result = self.tools[action](action_input)
return str(result)
except Exception as e:
return f"Error executing {action}: {str(e)}"
# === 定义工具 ===
def search(query: str) -> str:
"""Search the web for information"""
# 实际实现中调用搜索 API(如 Google Search API)
# 这里用模拟数据
mock_results = {
"埃隆·马斯克 出生日期": "埃隆·马斯克出生于 1971 年 6 月 28 日",
"OpenAI CEO": "OpenAI 的 CEO 是 Sam Altman",
}
return mock_results.get(query, "No results found")
def calculate(expression: str) -> str:
"""Evaluate a mathematical expression"""
try:
# 安全的计算(只允许数学运算)
allowed_names = {
'abs': abs, 'round': round, 'min': min, 'max': max,
'pow': pow, 'sum': sum
}
result = eval(expression, {"__builtins__": {}}, allowed_names)
return str(result)
except Exception as e:
return f"Calculation error: {e}"
def lookup(keyword: str) -> str:
"""Look up information in a knowledge base"""
# 实际实现中查询向量数据库或知识图谱
knowledge_base = {
"Python": "Python 是一种高级编程语言,由 Guido van Rossum 创建于 1991 年",
"AI": "人工智能(AI)是计算机科学的一个分支,致力于创建智能机器"
}
return knowledge_base.get(keyword, "Not found in knowledge base")
# === 使用 Agent ===
agent = ReActAgent(
tools={
"Search": search,
"Calculate": calculate,
"Lookup": lookup
}
)
answer = agent.run("埃隆·马斯克的年龄的平方是多少?")
print(f"\n=== Final Answer ===\n{answer}")
三、工具调用(Function Calling)
3.1 OpenAI Function Calling
原理:让 LLM 输出结构化的函数调用请求。
import openai
import json
# 定义工具
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
}
]
# 调用
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[{"role": "user", "content": "What's the weather in Boston?"}],
tools=tools,
tool_choice="auto" # 自动决定是否调用工具
)
# 检查是否需要调用工具
message = response.choices[0].message
if message.tool_calls:
tool_call = message.tool_calls[0]
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
print(f"调用工具: {function_name}")
print(f"参数: {function_args}")
# 执行工具调用
if function_name == "get_current_weather":
weather = get_current_weather(**function_args)
# 将结果返回给模型
second_response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "user", "content": "What's the weather in Boston?"},
message,
{
"role": "tool",
"tool_call_id": tool_call.id,
"name": function_name,
"content": json.dumps(weather)
}
]
)
final_answer = second_response.choices[0].message.content
print(f"最终回答: {final_answer}")
3.2 工具库设计
from typing import List, Dict, Any, Callable
from pydantic import BaseModel, Field
class Tool(BaseModel):
"""工具基类"""
name: str
description: str
parameters: Dict[str, Any]
function: Callable
def run(self, **kwargs) -> str:
"""执行工具"""
return self.function(**kwargs)
def to_openai_format(self) -> Dict:
"""转换为 OpenAI function calling 格式"""
return {
"type": "function",
"function": {
"name": self.name,
"description": self.description,
"parameters": self.parameters
}
}
class ToolRegistry:
"""工具注册表"""
def __init__(self):
self.tools: Dict[str, Tool] = {}
def register(self, tool: Tool):
"""注册工具"""
self.tools[tool.name] = tool
def get(self, name: str) -> Tool:
"""获取工具"""
return self.tools.get(name)
def list_tools(self) -> List[Dict]:
"""列出所有工具(OpenAI 格式)"""
return [tool.to_openai_format() for tool in self.tools.values()]
# 创建工具
def calculator(expression: str) -> str:
"""Evaluate a math expression"""
return str(eval(expression))
calculator_tool = Tool(
name="calculator",
description="Evaluate mathematical expressions",
parameters={
"type": "object",
"properties": {
"expression": {
"type": "string",
"description": "Math expression to evaluate, e.g. '2 + 2'"
}
},
"required": ["expression"]
},
function=calculator
)
# 注册工具
registry = ToolRegistry()
registry.register(calculator_tool)
四、记忆机制
4.1 短期记忆(对话历史)
from collections import deque
class ConversationMemory:
def __init__(self, max_turns=10):
self.messages = deque(maxlen=max_turns * 2) # 每轮2条消息(用户+助手)
def add_user_message(self, content: str):
self.messages.append({"role": "user", "content": content})
def add_assistant_message(self, content: str):
self.messages.append({"role": "assistant", "content": content})
def get_messages(self) -> List[Dict]:
return list(self.messages)
def clear(self):
self.messages.clear()
# 使用
memory = ConversationMemory(max_turns=5)
memory.add_user_message("你好,我叫张三")
memory.add_assistant_message("你好张三,很高兴认识你!")
memory.add_user_message("我的名字是什么?")
# Agent 可以从 memory 中回忆起"张三"
4.2 长期记忆(向量数据库)
from langchain.memory import VectorStoreRetrieverMemory
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
# 创建向量存储
embeddings = OpenAIEmbeddings()
vectorstore = Chroma(embedding_function=embeddings)
# 创建记忆
memory = VectorStoreRetrieverMemory(
retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
memory_key="history",
input_key="input"
)
# 存储记忆
memory.save_context(
{"input": "我最喜欢的颜色是蓝色"},
{"output": "好的,我记住了你喜欢蓝色"}
)
# 检索记忆(根据相关性)
relevant_memories = memory.load_memory_variables(
{"input": "我喜欢什么颜色?"}
)
print(relevant_memories["history"])
# 输出:你最喜欢的颜色是蓝色
4.3 结构化记忆(实体记忆)
from langchain.memory import ConversationEntityMemory
class EntityMemory:
def __init__(self):
self.entities = {} # {entity_name: {properties}}
def update_entity(self, name: str, properties: Dict):
"""更新实体信息"""
if name not in self.entities:
self.entities[name] = {}
self.entities[name].update(properties)
def get_entity(self, name: str) -> Dict:
"""获取实体信息"""
return self.entities.get(name, {})
def extract_and_update(self, text: str):
"""从文本中提取实体并更新"""
# 使用 NER 或 LLM 提取实体
# 示例:
entities = extract_entities_from_text(text) # 需要实现
for entity in entities:
self.update_entity(entity["name"], entity["properties"])
# 使用
memory = EntityMemory()
memory.update_entity("张三", {"职业": "工程师", "年龄": 30})
memory.update_entity("北京", {"类型": "城市", "国家": "中国"})
print(memory.get_entity("张三"))
# {'职业': '工程师', '年龄': 30}
五、任务规划
5.1 Plan-and-Execute 架构
from typing import List
class PlanExecuteAgent:
def __init__(self, planner_llm, executor_agent):
self.planner = planner_llm
self.executor = executor_agent
def run(self, objective: str) -> str:
# 步骤 1: 制定计划
plan = self.create_plan(objective)
print(f"=== Plan ===\n{plan}\n")
# 步骤 2: 执行计划
results = []
for step in plan:
print(f"执行: {step}")
result = self.executor.run(step)
results.append(result)
print(f"结果: {result}\n")
# 步骤 3: 汇总结果
final_answer = self.summarize_results(objective, results)
return final_answer
def create_plan(self, objective: str) -> List[str]:
"""使用 LLM 制定计划"""
prompt = f"""You are a planning expert. Break down the following objective into concrete, actionable steps.
Objective: {objective}
Steps (number each step):"""
response = self.planner(prompt)
# 解析步骤
steps = []
for line in response.split('\n'):
if line.strip() and line[0].isdigit():
steps.append(line.split('.', 1)[1].strip())
return steps
def summarize_results(self, objective: str, results: List[str]) -> str:
"""汇总执行结果"""
results_text = "\n".join([f"{i+1}. {r}" for i, r in enumerate(results)])
prompt = f"""Based on the following results, provide a final answer to the objective.
Objective: {objective}
Results:
{results_text}
Final Answer:"""
return self.planner(prompt)
# 使用
planner_llm = lambda prompt: openai.ChatCompletion.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
temperature=0
).choices[0].message.content
executor = ReActAgent(tools={...})
agent = PlanExecuteAgent(planner_llm, executor)
answer = agent.run("研究 GPT-4 的技术细节并写一份总结报告")
5.2 分层任务分解(HierarchicalAgent)
class HierarchicalAgent:
"""分层 Agent:高层规划 + 低层执行"""
def __init__(self, high_level_llm, low_level_agents: List):
self.high_level = high_level_llm
self.low_level = low_level_agents
def run(self, goal: str) -> str:
# 高层分解任务
subtasks = self.decompose_task(goal)
# 分配给低层 Agent
results = []
for subtask in subtasks:
agent = self.select_agent(subtask)
result = agent.run(subtask)
results.append(result)
# 整合结果
return self.integrate_results(results)
def decompose_task(self, goal: str) -> List[str]:
"""任务分解"""
prompt = f"""Decompose the following goal into 3-5 subtasks:
Goal: {goal}
Subtasks:"""
response = self.high_level(prompt)
return parse_subtasks(response)
def select_agent(self, subtask: str):
"""选择合适的 Agent"""
# 可以基于关键词、相似度等
for agent in self.low_level:
if agent.can_handle(subtask):
return agent
return self.low_level[0] # 默认
六、使用 LangChain 构建 Agent
6.1 基础 Agent
from langchain.agents import create_react_agent, AgentExecutor
from langchain.tools import Tool
from langchain_openai import ChatOpenAI
from langchain import hub
# 1. 定义工具
def search_web(query: str) -> str:
"""Search the web"""
return f"Search results for: {query}"
def calculator(expression: str) -> str:
"""Calculate math expression"""
return str(eval(expression))
tools = [
Tool(
name="Search",
func=search_web,
description="Useful for searching information"
),
Tool(
name="Calculator",
func=calculator,
description="Useful for math calculations"
)
]
# 2. 创建 LLM
llm = ChatOpenAI(model="gpt-4", temperature=0)
# 3. 获取 prompt 模板
prompt = hub.pull("hwchase17/react")
# 4. 创建 Agent
agent = create_react_agent(llm, tools, prompt)
# 5. 创建 Agent Executor
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
verbose=True,
max_iterations=10
)
# 6. 运行
result = agent_executor.invoke({
"input": "What is 25 * 4? Then search for information about that number."
})
print(result["output"])
6.2 带记忆的 Agent
from langchain.memory import ConversationBufferMemory
# 创建记忆
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
# 创建带记忆的 Agent
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
memory=memory,
verbose=True
)
# 多轮对话
agent_executor.invoke({"input": "我的名字是张三"})
agent_executor.invoke({"input": "我的名字是什么?"}) # 能回忆起"张三"
6.3 自定义 Agent
from langchain.agents import BaseSingleActionAgent
from langchain.schema import AgentAction, AgentFinish
class CustomAgent(BaseSingleActionAgent):
def plan(self, intermediate_steps, **kwargs):
"""决定下一步行动"""
# 自定义决策逻辑
user_input = kwargs["input"]
if "计算" in user_input:
return AgentAction(
tool="Calculator",
tool_input=extract_expression(user_input),
log="使用计算器"
)
elif "搜索" in user_input:
return AgentAction(
tool="Search",
tool_input=extract_query(user_input),
log="使用搜索"
)
else:
return AgentFinish(
return_values={"output": "我不确定如何处理这个请求"},
log="无法处理"
)
async def aplan(self, intermediate_steps, **kwargs):
"""异步版本"""
return self.plan(intermediate_steps, **kwargs)
七、AutoGPT:自主循环 Agent
7.1 AutoGPT 架构
class AutoGPT:
def __init__(self, llm, tools, memory, max_iterations=25):
self.llm = llm
self.tools = tools
self.memory = memory
self.max_iterations = max_iterations
def run(self, goal: str):
"""自主循环执行"""
self.memory.add("Goal", goal)
for i in range(self.max_iterations):
# 1. 构建上下文
context = self.build_context()
# 2. LLM 决策
action = self.llm.decide_next_action(context)
# 3. 执行action
if action["type"] == "FINISH":
return action["result"]
result = self.execute_action(action)
# 4. 存入记忆
self.memory.add(f"Action {i}", action)
self.memory.add(f"Result {i}", result)
# 5. 自我评估
if self.should_stop(result):
break
return "达到最大迭代次数"
def build_context(self) -> str:
"""构建提示上下文"""
return f"""You are an autonomous agent working towards: {self.memory.get('Goal')}
Memory:
{self.memory.summarize()}
Available tools:
{self.tools.describe()}
What should you do next? Respond in JSON format:
{{
"thought": "...",
"action": "...",
"action_input": "..."
}}"""
7.2 实现关键组件
class AgentMemory:
"""Agent 的记忆系统"""
def __init__(self, max_items=100):
self.short_term = deque(maxlen=max_items)
self.long_term = {} # 向量数据库
def add(self, key: str, value: str):
self.short_term.append((key, value))
def summarize(self, last_n=10) -> str:
"""总结最近的记忆"""
recent = list(self.short_term)[-last_n:]
return "\n".join([f"{k}: {v}" for k, v in recent])
def search(self, query: str, k=5) -> List[str]:
"""搜索相关记忆"""
# 使用向量相似度搜索
pass
class SelfCriticAgent:
"""带自我批评的 Agent"""
def run_with_criticism(self, task: str) -> str:
attempt = 0
max_attempts = 3
while attempt < max_attempts:
# 执行任务
result = self.execute(task)
# 自我批评
critique = self.critique(task, result)
if critique["quality"] > 0.8:
return result
# 根据批评改进
task = self.improve_based_on_critique(task, critique)
attempt += 1
return result
def critique(self, task: str, result: str) -> Dict:
"""批评结果"""
prompt = f"""Evaluate the following result:
Task: {task}
Result: {result}
Provide:
1. Quality score (0-1)
2. Issues found
3. Suggestions for improvement
JSON format:"""
response = self.llm(prompt)
return json.loads(response)
八、多智能体协作
8.1 MetaGPT 风格的角色分工
class MultiAgentSystem:
def __init__(self):
self.agents = {
"产品经理": ProductManagerAgent(),
"架构师": ArchitectAgent(),
"工程师": EngineerAgent(),
"测试": TesterAgent()
}
def run(self, project_description: str):
"""软件开发工作流"""
# 1. 产品经理:需求分析
requirements = self.agents["产品经理"].analyze(project_description)
# 2. 架构师:设计架构
architecture = self.agents["架构师"].design(requirements)
# 3. 工程师:编写代码
code = self.agents["工程师"].implement(architecture)
# 4. 测试:测试代码
test_results = self.agents["测试"].test(code)
# 5. 如果测试失败,返回修改
if not test_results["passed"]:
code = self.agents["工程师"].fix(code, test_results["issues"])
return code
class ProductManagerAgent:
def analyze(self, description: str) -> Dict:
"""分析需求"""
prompt = f"""As a product manager, analyze this project:
{description}
Provide:
1. Core features
2. User stories
3. Acceptance criteria
Format as JSON."""
return json.loads(self.llm(prompt))
8.2 辩论式多智能体
class DebateSystem:
"""多个 Agent 辩论得出最佳方案"""
def __init__(self, agents: List[Agent], judge: Agent):
self.agents = agents
self.judge = judge
def debate(self, topic: str, rounds=3) -> str:
proposals = []
for round_num in range(rounds):
print(f"\n=== Round {round_num + 1} ===")
round_proposals = []
for agent in self.agents:
# 每个 Agent 提出方案
proposal = agent.propose(
topic,
previous_proposals=proposals
)
round_proposals.append(proposal)
print(f"{agent.name}: {proposal}")
proposals.extend(round_proposals)
# 评委选出最佳方案
best = self.judge.select_best(topic, proposals)
return best
九、Agent 评估与优化
9.1 性能指标
class AgentEvaluator:
def evaluate(self, agent, test_cases):
metrics = {
"success_rate": 0,
"avg_steps": 0,
"avg_cost": 0,
"avg_time": 0
}
for case in test_cases:
result = agent.run(case["input"])
# 成功率
if self.check_correctness(result, case["expected"]):
metrics["success_rate"] += 1
# 步数
metrics["avg_steps"] += agent.step_count
# 成本(API 调用次数 × 价格)
metrics["avg_cost"] += agent.total_cost
# 时间
metrics["avg_time"] += agent.execution_time
# 计算平均值
n = len(test_cases)
metrics["success_rate"] /= n
metrics["avg_steps"] /= n
metrics["avg_cost"] /= n
metrics["avg_time"] /= n
return metrics
9.2 提示工程优化
# 优化 Agent 的系统提示
system_prompts = [
"You are a helpful assistant.",
"You are an expert problem solver. Think step by step.",
"You are a precise AI agent. Always verify your actions before executing."
]
best_prompt = None
best_score = 0
for prompt in system_prompts:
agent = Agent(system_prompt=prompt)
score = evaluator.evaluate(agent, test_cases)["success_rate"]
if score > best_score:
best_score = score
best_prompt = prompt
print(f"Best prompt: {best_prompt} (score: {best_score})")
十、总结
Agent 开发核心要点:
- 架构选择:
- 简单任务:ReAct
- 复杂任务:Plan-and-Execute
- 自主任务:AutoGPT
- 工具设计:
- 明确描述工具功能
- 参数类型清晰
- 错误处理完善
- 记忆机制:
- 短期:对话历史(滑动窗口)
- 长期:向量数据库(检索相关)
- 结构化:实体记忆(知识图谱)
- 提示工程:
- 清晰的角色定位
- 详细的指令格式
- Few-shot 示例
- 评估优化:
- 成功率、效率、成本
- A/B 测试不同提示
- 持续迭代改进
推荐框架:
- LangChain:最成熟,生态丰富
- LlamaIndex:专注于数据索引和查询
- AutoGPT:自主性强,适合探索性任务
- MetaGPT:多智能体协作,适合复杂项目
参考资源
论文:
- ReAct: Synergizing Reasoning and Acting
- Generative Agents: Interactive Simulacra
- AutoGPT: An Experimental OpenSource Attempt
框架:
教程:
💬 交流与讨论
⚠️ 尚未完成 Giscus 配置。请在
_config.yml中设置repo_id与category_id后重新部署,即可启用升级后的评论系统。配置完成后,评论区将自动支持 Markdown 代码高亮与 LaTeX 数学公式渲染,访客回复会同步到 GitHub Discussions,并具备通知功能。