规划能力:AI Agent的核心瓶颈
2026年,AI Agent的工具调用能力已经相当成熟,但规划能力仍然是最大的短板。许多Agent在面对复杂任务时的失败,不是因为"不知道怎么做",而是因为无法制定合理的执行计划,在错误的路径上越走越远。
本文将系统介绍三种主流的AI规划技术——Chain-of-Thought、Tree-of-Thought、ReAct——以及如何将它们工程化,构建出具有真正规划能力的AI Agent。
规划技术演进路线
简单问答 → Chain-of-Thought → ReAct → Tree-of-Thought → AlphaCode-style Search
(线性推理) (行动+推理) (多路径探索) (蒙特卡洛搜索)
技术1:Chain-of-Thought(思维链)工程化
标准CoT的问题:思维链是线性的,一旦在某一步出错,后续都会偏离正轨。
工程化改进:带验证的CoT
from anthropic import Anthropic
from dataclasses import dataclass
@dataclass
class ThoughtStep:
step_number: int
thought: str
action: str | None
result: str | None
is_verified: bool
class VerifiedCoT:
"""带自我验证的思维链"""
def __init__(self, model: str = "claude-3-5-sonnet-20241022"):
self.client = Anthropic()
self.model = model
def solve(self, problem: str, max_steps: int = 10) -> dict:
"""使用带验证的CoT求解问题"""
steps = []
messages = []
# 初始分解
initial_plan = self._create_initial_plan(problem)
messages.append({"role": "user", "content": problem})
messages.append({"role": "assistant", "content": initial_plan})
# 逐步执行并验证
for step_num in range(1, max_steps + 1):
# 执行下一步推理
thought, action, result = self._execute_step(messages, step_num)
# 自我验证:这一步是否正确?
is_verified = self._verify_step(problem, steps, thought, action, result)
step = ThoughtStep(step_num, thought, action, result, is_verified)
steps.append(step)
if not is_verified:
# 验证失败,触发回溯
corrected = self._backtrack_and_correct(problem, steps[:-1], step)
steps[-1] = corrected
messages.append({"role": "user", "content": f"步骤{step_num}已执行,继续。"})
messages.append({"role": "assistant", "content": f"思考:{thought}\n行动:{action}\n结果:{result}"})
# 检查是否完成
if self._is_complete(problem, steps):
break
final_answer = self._synthesize_answer(problem, steps)
return {
"problem": problem,
"steps": steps,
"final_answer": final_answer,
"total_steps": len(steps),
"verification_failures": sum(1 for s in steps if not s.is_verified),
}
def _create_initial_plan(self, problem: str) -> str:
"""创建初始解题计划"""
response = self.client.messages.create(
model=self.model,
max_tokens=500,
messages=[{
"role": "user",
"content": f"""请将以下问题分解为清晰的解题步骤(不要立即解答,只列出步骤):
{problem}
以编号列表形式输出步骤,每步说明需要做什么。"""
}]
)
return response.content[0].text
def _verify_step(self, problem, previous_steps, thought, action, result) -> bool:
"""验证当前步骤是否正确"""
context = "\n".join([f"步骤{s.step_number}: {s.thought}" for s in previous_steps])
response = self.client.messages.create(
model="claude-3-haiku-20240307", # 用快速模型做验证
max_tokens=200,
messages=[{
"role": "user",
"content": f"""验证以下推理步骤是否合理正确。
只输出JSON:{{"is_valid": true/false, "issue": "如果有问题,简要说明"}}
问题:{problem}
之前的步骤:{context}
当前步骤推理:{thought}
行动:{action}
结果:{result}"""
}]
)
import json, re
raw = response.content[0].text
match = re.search(r'\{.*\}', raw, re.DOTALL)
data = json.loads(match.group()) if match else {"is_valid": True}
return data.get("is_valid", True)
def _execute_step(self, messages, step_num):
"""执行推理步骤"""
response = self.client.messages.create(
model=self.model,
max_tokens=400,
messages=messages + [{
"role": "user",
"content": f"请执行第{step_num}步推理。格式:\n思考:[你的推理]\n行动:[需要执行的操作,如果不需要则说'思考']\n结果:[推理或操作的结果]"
}]
)
text = response.content[0].text
# 简单解析
thought = self._extract_section(text, "思考")
action = self._extract_section(text, "行动")
result = self._extract_section(text, "结果")
return thought, action, result
def _extract_section(self, text, section):
import re
match = re.search(f"{section}:(.*?)(?=思考:|行动:|结果:|$)", text, re.DOTALL)
return match.group(1).strip() if match else ""
def _is_complete(self, problem, steps) -> bool:
if not steps:
return False
last = steps[-1]
return "完成" in last.result or "答案" in last.result or "解决" in last.result
def _backtrack_and_correct(self, problem, previous_steps, failed_step):
response = self.client.messages.create(
model=self.model,
max_tokens=400,
messages=[{
"role": "user",
"content": f"""以下推理步骤有误,请纠正:
问题:{problem}
错误步骤:{failed_step.thought}
错误原因:验证未通过
请提供正确的推理步骤:"""
}]
)
corrected_text = response.content[0].text
return ThoughtStep(
failed_step.step_number,
corrected_text,
"纠正",
"已回溯并纠正",
True,
)
def _synthesize_answer(self, problem, steps) -> str:
steps_text = "\n".join([f"步骤{s.step_number}: {s.thought}\n结果: {s.result}" for s in steps])
response = self.client.messages.create(
model=self.model,
max_tokens=500,
messages=[{
"role": "user",
"content": f"基于以下推理步骤,给出最终答案:\n问题:{problem}\n\n推理过程:\n{steps_text}\n\n最终答案:"
}]
)
return response.content[0].text
技术2:Tree-of-Thought(思维树)
ToT的核心:在每个决策点生成多个候选思路,用评估器筛选最优路径,支持回溯。
import heapq
from dataclasses import dataclass, field
@dataclass
class ThoughtNode:
thought: str
score: float = 0.0
depth: int = 0
parent: 'ThoughtNode | None' = None
children: list = field(default_factory=list)
def __lt__(self, other):
return self.score > other.score # 用于最大堆
class TreeOfThought:
"""思维树:多路径探索与最优路径选择"""
def __init__(
self,
model: str = "claude-3-5-sonnet-20241022",
branch_factor: int = 3, # 每个节点展开的分支数
max_depth: int = 4, # 最大搜索深度
beam_width: int = 2, # Beam Search宽度
):
self.client = Anthropic()
self.model = model
self.branch_factor = branch_factor
self.max_depth = max_depth
self.beam_width = beam_width
def solve(self, problem: str) -> dict:
"""使用ToT求解问题"""
# 根节点
root = ThoughtNode(thought=f"开始解决:{problem}", depth=0)
# Beam Search
beam = [root]
for depth in range(1, self.max_depth + 1):
new_beam = []
for node in beam:
# 为每个节点展开子节点
children = self._expand_node(problem, node, self.branch_factor)
# 评估每个子节点
for child in children:
child.depth = depth
child.parent = node
child.score = self._evaluate_thought(problem, child)
node.children.append(child)
new_beam.append(child)
if not new_beam:
break
# 取评分最高的beam_width个节点继续搜索
beam = heapq.nlargest(self.beam_width, new_beam)
# 检查是否有节点已经到达答案
for node in beam:
if self._is_solution(problem, node):
path = self._extract_path(node)
return {
"solution": node.thought,
"path": path,
"depth": depth,
"strategy": "early_termination",
}
# 返回评分最高的最终节点
best_node = max(beam, key=lambda n: n.score)
path = self._extract_path(best_node)
# 基于最优路径生成最终答案
final_answer = self._generate_final_answer(problem, path)
return {
"solution": final_answer,
"path": path,
"best_score": best_node.score,
"strategy": "beam_search",
}
def _expand_node(self, problem: str, node: ThoughtNode, n: int) -> list[ThoughtNode]:
"""展开节点,生成n个候选子思路"""
path_context = self._get_path_context(node)
response = self.client.messages.create(
model=self.model,
max_tokens=600,
messages=[{
"role": "user",
"content": f"""问题:{problem}
当前推理路径:
{path_context}
请提供{n}种不同的、合理的下一步思路。每种思路应该从不同角度或方法出发。
格式:
思路1: [...]
思路2: [...]
思路3: [...]"""
}]
)
thoughts = self._parse_thoughts(response.content[0].text, n)
return [ThoughtNode(thought=t) for t in thoughts]
def _evaluate_thought(self, problem: str, node: ThoughtNode) -> float:
"""评估思路的质量(0-10分)"""
path_context = self._get_path_context(node)
response = self.client.messages.create(
model="claude-3-haiku-20240307",
max_tokens=100,
messages=[{
"role": "user",
"content": f"""评估以下推理路径对于解决问题的有效性,给出0-10的评分。
只输出一个数字。
问题:{problem}
推理路径:{path_context}"""
}]
)
try:
return float(response.content[0].text.strip()) / 10.0
except:
return 0.5
def _is_solution(self, problem: str, node: ThoughtNode) -> bool:
"""判断当前思路是否已经达到解决方案"""
keywords = ["答案是", "解决方案是", "最终结论", "因此得出", "可以得出结论"]
return any(kw in node.thought for kw in keywords)
def _extract_path(self, node: ThoughtNode) -> list[str]:
"""提取从根到当前节点的路径"""
path = []
current = node
while current:
path.append(current.thought)
current = current.parent
return list(reversed(path))
def _get_path_context(self, node: ThoughtNode) -> str:
path = self._extract_path(node)
return "\n→ ".join(path)
def _parse_thoughts(self, text: str, n: int) -> list[str]:
import re
thoughts = []
for i in range(1, n + 1):
match = re.search(f"思路{i}:\\s*(.*?)(?=思路{i+1}:|$)", text, re.DOTALL)
if match:
thoughts.append(match.group(1).strip())
return thoughts if thoughts else [text.strip()]
def _generate_final_answer(self, problem: str, path: list[str]) -> str:
response = self.client.messages.create(
model=self.model,
max_tokens=600,
messages=[{
"role": "user",
"content": f"基于以下最优推理路径,给出最终答案:\n\n问题:{problem}\n\n推理路径:\n" + "\n→ ".join(path)
}]
)
return response.content[0].text
技术3:ReAct增强版(带反思的行动循环)
class ReflectiveReAct:
"""带反思机制的ReAct Agent"""
SYSTEM_PROMPT = """你是一个能够自我反思的AI Agent。在每次行动后,你需要评估结果是否符合预期,
如果发现偏差,主动调整策略。
可用工具:search(搜索)、calculate(计算)、code(执行代码)、done(完成任务)
每轮格式:
Thought: [当前思考,包括对上一步的反思]
Action: [工具名]
Input: [工具输入]
---
Observation: [工具输出]
Reflection: [反思:结果是否符合预期?下一步策略是否需要调整?]
"""
def __init__(self, tools: dict):
self.client = Anthropic()
self.tools = tools
def run(self, task: str, max_iterations: int = 10) -> dict:
"""运行带反思的ReAct循环"""
trajectory = []
messages = [{"role": "user", "content": f"任务:{task}"}]
for iteration in range(max_iterations):
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=800,
system=self.SYSTEM_PROMPT,
messages=messages,
)
response_text = response.content[0].text
# 解析行动
action, action_input = self._parse_action(response_text)
if action == "done":
final_answer = self._extract_final_answer(response_text)
return {
"answer": final_answer,
"iterations": iteration + 1,
"trajectory": trajectory,
}
# 执行工具
observation = self._execute_tool(action, action_input)
trajectory.append({
"iteration": iteration + 1,
"thought": self._extract_section_react(response_text, "Thought"),
"action": action,
"input": action_input,
"observation": observation,
"reflection": self._extract_section_react(response_text, "Reflection"),
})
# 更新对话历史
messages.append({"role": "assistant", "content": response_text})
messages.append({
"role": "user",
"content": f"Observation: {observation}\n请继续执行。"
})
return {
"answer": "达到最大迭代次数",
"iterations": max_iterations,
"trajectory": trajectory,
}
def _parse_action(self, text: str) -> tuple[str, str]:
import re
action_match = re.search(r"Action:\s*(\w+)", text)
input_match = re.search(r"Input:\s*(.*?)(?=---|\n\n|Observation:|$)", text, re.DOTALL)
action = action_match.group(1) if action_match else "done"
action_input = input_match.group(1).strip() if input_match else ""
return action, action_input
def _execute_tool(self, tool_name: str, tool_input: str) -> str:
if tool_name in self.tools:
try:
return str(self.tools[tool_name](tool_input))
except Exception as e:
return f"工具执行失败: {e}"
return f"未知工具: {tool_name}"
def _extract_section_react(self, text: str, section: str) -> str:
import re
match = re.search(f"{section}:\\s*(.*?)(?=Thought:|Action:|Reflection:|---|\n\n|$)", text, re.DOTALL)
return match.group(1).strip() if match else ""
def _extract_final_answer(self, text: str) -> str:
import re
match = re.search(r"Input:\s*(.*?)(?=---|$)", text, re.DOTALL)
return match.group(1).strip() if match else text
规划技术选型建议
| 场景 | 推荐技术 | 原因 |
|---|---|---|
| 数学/逻辑推理 | ToT + Verified CoT | 需要多路径探索和精确验证 |
| 信息检索与整合 | ReAct + 反思 | 需要工具调用和迭代修正 |
| 创意写作/头脑风暴 | ToT(宽beam) | 多样性比精确性更重要 |
| 日常任务规划 | Verified CoT | 线性推理+验证已足够 |
| 复杂软件工程 | ReAct + ToT混合 | 需要工具执行+分支决策 |
真正强大的Agent往往是这些技术的组合:用ToT探索高层规划,用ReAct执行具体步骤,用CoT验证每个中间结果。规划能力的提升,是AI Agent从"能用"到"好用"的关键跃迁。