AI Agent的规划能力升级:从Chain-of-Thought到Tree-of-Thought工程实践

4 阅读1分钟

规划能力:AI Agent的核心瓶颈

2026年,AI Agent的工具调用能力已经相当成熟,但规划能力仍然是最大的短板。许多Agent在面对复杂任务时的失败,不是因为"不知道怎么做",而是因为无法制定合理的执行计划,在错误的路径上越走越远。

本文将系统介绍三种主流的AI规划技术——Chain-of-Thought、Tree-of-Thought、ReAct——以及如何将它们工程化,构建出具有真正规划能力的AI Agent。


规划技术演进路线

简单问答 → Chain-of-Thought → ReAct → Tree-of-Thought → AlphaCode-style Search
              (线性推理)      (行动+推理)  (多路径探索)      (蒙特卡洛搜索)

技术1:Chain-of-Thought(思维链)工程化

标准CoT的问题:思维链是线性的,一旦在某一步出错,后续都会偏离正轨。

工程化改进:带验证的CoT

from anthropic import Anthropic
from dataclasses import dataclass

@dataclass
class ThoughtStep:
    step_number: int
    thought: str
    action: str | None
    result: str | None
    is_verified: bool

class VerifiedCoT:
    """带自我验证的思维链"""
    
    def __init__(self, model: str = "claude-3-5-sonnet-20241022"):
        self.client = Anthropic()
        self.model = model
    
    def solve(self, problem: str, max_steps: int = 10) -> dict:
        """使用带验证的CoT求解问题"""
        
        steps = []
        messages = []
        
        # 初始分解
        initial_plan = self._create_initial_plan(problem)
        messages.append({"role": "user", "content": problem})
        messages.append({"role": "assistant", "content": initial_plan})
        
        # 逐步执行并验证
        for step_num in range(1, max_steps + 1):
            # 执行下一步推理
            thought, action, result = self._execute_step(messages, step_num)
            
            # 自我验证:这一步是否正确?
            is_verified = self._verify_step(problem, steps, thought, action, result)
            
            step = ThoughtStep(step_num, thought, action, result, is_verified)
            steps.append(step)
            
            if not is_verified:
                # 验证失败,触发回溯
                corrected = self._backtrack_and_correct(problem, steps[:-1], step)
                steps[-1] = corrected
            
            messages.append({"role": "user", "content": f"步骤{step_num}已执行,继续。"})
            messages.append({"role": "assistant", "content": f"思考:{thought}\n行动:{action}\n结果:{result}"})
            
            # 检查是否完成
            if self._is_complete(problem, steps):
                break
        
        final_answer = self._synthesize_answer(problem, steps)
        
        return {
            "problem": problem,
            "steps": steps,
            "final_answer": final_answer,
            "total_steps": len(steps),
            "verification_failures": sum(1 for s in steps if not s.is_verified),
        }
    
    def _create_initial_plan(self, problem: str) -> str:
        """创建初始解题计划"""
        response = self.client.messages.create(
            model=self.model,
            max_tokens=500,
            messages=[{
                "role": "user",
                "content": f"""请将以下问题分解为清晰的解题步骤(不要立即解答,只列出步骤):

{problem}

以编号列表形式输出步骤,每步说明需要做什么。"""
            }]
        )
        return response.content[0].text
    
    def _verify_step(self, problem, previous_steps, thought, action, result) -> bool:
        """验证当前步骤是否正确"""
        context = "\n".join([f"步骤{s.step_number}: {s.thought}" for s in previous_steps])
        
        response = self.client.messages.create(
            model="claude-3-haiku-20240307",  # 用快速模型做验证
            max_tokens=200,
            messages=[{
                "role": "user",
                "content": f"""验证以下推理步骤是否合理正确。
只输出JSON:{{"is_valid": true/false, "issue": "如果有问题,简要说明"}}

问题:{problem}
之前的步骤:{context}
当前步骤推理:{thought}
行动:{action}
结果:{result}"""
            }]
        )
        import json, re
        raw = response.content[0].text
        match = re.search(r'\{.*\}', raw, re.DOTALL)
        data = json.loads(match.group()) if match else {"is_valid": True}
        return data.get("is_valid", True)
    
    def _execute_step(self, messages, step_num):
        """执行推理步骤"""
        response = self.client.messages.create(
            model=self.model,
            max_tokens=400,
            messages=messages + [{
                "role": "user",
                "content": f"请执行第{step_num}步推理。格式:\n思考:[你的推理]\n行动:[需要执行的操作,如果不需要则说'思考']\n结果:[推理或操作的结果]"
            }]
        )
        text = response.content[0].text
        # 简单解析
        thought = self._extract_section(text, "思考")
        action = self._extract_section(text, "行动")
        result = self._extract_section(text, "结果")
        return thought, action, result
    
    def _extract_section(self, text, section):
        import re
        match = re.search(f"{section}:(.*?)(?=思考:|行动:|结果:|$)", text, re.DOTALL)
        return match.group(1).strip() if match else ""
    
    def _is_complete(self, problem, steps) -> bool:
        if not steps:
            return False
        last = steps[-1]
        return "完成" in last.result or "答案" in last.result or "解决" in last.result
    
    def _backtrack_and_correct(self, problem, previous_steps, failed_step):
        response = self.client.messages.create(
            model=self.model,
            max_tokens=400,
            messages=[{
                "role": "user",
                "content": f"""以下推理步骤有误,请纠正:

问题:{problem}
错误步骤:{failed_step.thought}
错误原因:验证未通过

请提供正确的推理步骤:"""
            }]
        )
        corrected_text = response.content[0].text
        return ThoughtStep(
            failed_step.step_number,
            corrected_text,
            "纠正",
            "已回溯并纠正",
            True,
        )
    
    def _synthesize_answer(self, problem, steps) -> str:
        steps_text = "\n".join([f"步骤{s.step_number}: {s.thought}\n结果: {s.result}" for s in steps])
        response = self.client.messages.create(
            model=self.model,
            max_tokens=500,
            messages=[{
                "role": "user",
                "content": f"基于以下推理步骤,给出最终答案:\n问题:{problem}\n\n推理过程:\n{steps_text}\n\n最终答案:"
            }]
        )
        return response.content[0].text

技术2:Tree-of-Thought(思维树)

ToT的核心:在每个决策点生成多个候选思路,用评估器筛选最优路径,支持回溯。

import heapq
from dataclasses import dataclass, field

@dataclass
class ThoughtNode:
    thought: str
    score: float = 0.0
    depth: int = 0
    parent: 'ThoughtNode | None' = None
    children: list = field(default_factory=list)
    
    def __lt__(self, other):
        return self.score > other.score  # 用于最大堆

class TreeOfThought:
    """思维树:多路径探索与最优路径选择"""
    
    def __init__(
        self, 
        model: str = "claude-3-5-sonnet-20241022",
        branch_factor: int = 3,    # 每个节点展开的分支数
        max_depth: int = 4,        # 最大搜索深度
        beam_width: int = 2,       # Beam Search宽度
    ):
        self.client = Anthropic()
        self.model = model
        self.branch_factor = branch_factor
        self.max_depth = max_depth
        self.beam_width = beam_width
    
    def solve(self, problem: str) -> dict:
        """使用ToT求解问题"""
        
        # 根节点
        root = ThoughtNode(thought=f"开始解决:{problem}", depth=0)
        
        # Beam Search
        beam = [root]
        
        for depth in range(1, self.max_depth + 1):
            new_beam = []
            
            for node in beam:
                # 为每个节点展开子节点
                children = self._expand_node(problem, node, self.branch_factor)
                
                # 评估每个子节点
                for child in children:
                    child.depth = depth
                    child.parent = node
                    child.score = self._evaluate_thought(problem, child)
                    node.children.append(child)
                    new_beam.append(child)
            
            if not new_beam:
                break
            
            # 取评分最高的beam_width个节点继续搜索
            beam = heapq.nlargest(self.beam_width, new_beam)
            
            # 检查是否有节点已经到达答案
            for node in beam:
                if self._is_solution(problem, node):
                    path = self._extract_path(node)
                    return {
                        "solution": node.thought,
                        "path": path,
                        "depth": depth,
                        "strategy": "early_termination",
                    }
        
        # 返回评分最高的最终节点
        best_node = max(beam, key=lambda n: n.score)
        path = self._extract_path(best_node)
        
        # 基于最优路径生成最终答案
        final_answer = self._generate_final_answer(problem, path)
        
        return {
            "solution": final_answer,
            "path": path,
            "best_score": best_node.score,
            "strategy": "beam_search",
        }
    
    def _expand_node(self, problem: str, node: ThoughtNode, n: int) -> list[ThoughtNode]:
        """展开节点,生成n个候选子思路"""
        path_context = self._get_path_context(node)
        
        response = self.client.messages.create(
            model=self.model,
            max_tokens=600,
            messages=[{
                "role": "user",
                "content": f"""问题:{problem}

当前推理路径:
{path_context}

请提供{n}种不同的、合理的下一步思路。每种思路应该从不同角度或方法出发。

格式:
思路1: [...]
思路2: [...]
思路3: [...]"""
            }]
        )
        
        thoughts = self._parse_thoughts(response.content[0].text, n)
        return [ThoughtNode(thought=t) for t in thoughts]
    
    def _evaluate_thought(self, problem: str, node: ThoughtNode) -> float:
        """评估思路的质量(0-10分)"""
        path_context = self._get_path_context(node)
        
        response = self.client.messages.create(
            model="claude-3-haiku-20240307",
            max_tokens=100,
            messages=[{
                "role": "user",
                "content": f"""评估以下推理路径对于解决问题的有效性,给出0-10的评分。
只输出一个数字。

问题:{problem}
推理路径:{path_context}"""
            }]
        )
        
        try:
            return float(response.content[0].text.strip()) / 10.0
        except:
            return 0.5
    
    def _is_solution(self, problem: str, node: ThoughtNode) -> bool:
        """判断当前思路是否已经达到解决方案"""
        keywords = ["答案是", "解决方案是", "最终结论", "因此得出", "可以得出结论"]
        return any(kw in node.thought for kw in keywords)
    
    def _extract_path(self, node: ThoughtNode) -> list[str]:
        """提取从根到当前节点的路径"""
        path = []
        current = node
        while current:
            path.append(current.thought)
            current = current.parent
        return list(reversed(path))
    
    def _get_path_context(self, node: ThoughtNode) -> str:
        path = self._extract_path(node)
        return "\n→ ".join(path)
    
    def _parse_thoughts(self, text: str, n: int) -> list[str]:
        import re
        thoughts = []
        for i in range(1, n + 1):
            match = re.search(f"思路{i}:\\s*(.*?)(?=思路{i+1}:|$)", text, re.DOTALL)
            if match:
                thoughts.append(match.group(1).strip())
        return thoughts if thoughts else [text.strip()]
    
    def _generate_final_answer(self, problem: str, path: list[str]) -> str:
        response = self.client.messages.create(
            model=self.model,
            max_tokens=600,
            messages=[{
                "role": "user",
                "content": f"基于以下最优推理路径,给出最终答案:\n\n问题:{problem}\n\n推理路径:\n" + "\n→ ".join(path)
            }]
        )
        return response.content[0].text

技术3:ReAct增强版(带反思的行动循环)

class ReflectiveReAct:
    """带反思机制的ReAct Agent"""
    
    SYSTEM_PROMPT = """你是一个能够自我反思的AI Agent。在每次行动后,你需要评估结果是否符合预期,
如果发现偏差,主动调整策略。

可用工具:search(搜索)、calculate(计算)、code(执行代码)、done(完成任务)

每轮格式:
Thought: [当前思考,包括对上一步的反思]
Action: [工具名]
Input: [工具输入]
---
Observation: [工具输出]
Reflection: [反思:结果是否符合预期?下一步策略是否需要调整?]
"""
    
    def __init__(self, tools: dict):
        self.client = Anthropic()
        self.tools = tools
    
    def run(self, task: str, max_iterations: int = 10) -> dict:
        """运行带反思的ReAct循环"""
        
        trajectory = []
        messages = [{"role": "user", "content": f"任务:{task}"}]
        
        for iteration in range(max_iterations):
            response = self.client.messages.create(
                model="claude-3-5-sonnet-20241022",
                max_tokens=800,
                system=self.SYSTEM_PROMPT,
                messages=messages,
            )
            
            response_text = response.content[0].text
            
            # 解析行动
            action, action_input = self._parse_action(response_text)
            
            if action == "done":
                final_answer = self._extract_final_answer(response_text)
                return {
                    "answer": final_answer,
                    "iterations": iteration + 1,
                    "trajectory": trajectory,
                }
            
            # 执行工具
            observation = self._execute_tool(action, action_input)
            
            trajectory.append({
                "iteration": iteration + 1,
                "thought": self._extract_section_react(response_text, "Thought"),
                "action": action,
                "input": action_input,
                "observation": observation,
                "reflection": self._extract_section_react(response_text, "Reflection"),
            })
            
            # 更新对话历史
            messages.append({"role": "assistant", "content": response_text})
            messages.append({
                "role": "user",
                "content": f"Observation: {observation}\n请继续执行。"
            })
        
        return {
            "answer": "达到最大迭代次数",
            "iterations": max_iterations,
            "trajectory": trajectory,
        }
    
    def _parse_action(self, text: str) -> tuple[str, str]:
        import re
        action_match = re.search(r"Action:\s*(\w+)", text)
        input_match = re.search(r"Input:\s*(.*?)(?=---|\n\n|Observation:|$)", text, re.DOTALL)
        action = action_match.group(1) if action_match else "done"
        action_input = input_match.group(1).strip() if input_match else ""
        return action, action_input
    
    def _execute_tool(self, tool_name: str, tool_input: str) -> str:
        if tool_name in self.tools:
            try:
                return str(self.tools[tool_name](tool_input))
            except Exception as e:
                return f"工具执行失败: {e}"
        return f"未知工具: {tool_name}"
    
    def _extract_section_react(self, text: str, section: str) -> str:
        import re
        match = re.search(f"{section}:\\s*(.*?)(?=Thought:|Action:|Reflection:|---|\n\n|$)", text, re.DOTALL)
        return match.group(1).strip() if match else ""
    
    def _extract_final_answer(self, text: str) -> str:
        import re
        match = re.search(r"Input:\s*(.*?)(?=---|$)", text, re.DOTALL)
        return match.group(1).strip() if match else text

规划技术选型建议

场景推荐技术原因
数学/逻辑推理ToT + Verified CoT需要多路径探索和精确验证
信息检索与整合ReAct + 反思需要工具调用和迭代修正
创意写作/头脑风暴ToT(宽beam)多样性比精确性更重要
日常任务规划Verified CoT线性推理+验证已足够
复杂软件工程ReAct + ToT混合需要工具执行+分支决策

真正强大的Agent往往是这些技术的组合:用ToT探索高层规划,用ReAct执行具体步骤,用CoT验证每个中间结果。规划能力的提升,是AI Agent从"能用"到"好用"的关键跃迁。