为什么你的Agent需要LangGraph?
在Agent开发领域,有一道绕不过去的门槛:当你的AI应用需要记住状态、做条件分支、处理循环逻辑的时候,简单的链式调用就不够用了。
LangGraph是LangChain团队在2024年推出、2026年已成为行业标配的有状态Agent框架。它的核心思想很简单:把Agent的执行流程建模成一个有向图(Graph),每个节点是一个处理步骤,边定义了流转逻辑,状态在整个图中持久化流动。
这篇文章是2026年最新版LangGraph的实战指南,会带你从最基础的概念一路走到生产级多Agent系统的搭建。
LangGraph核心概念:图、节点、边、状态
状态(State):Agent的记忆
LangGraph中一切的基础是State。State是一个Python字典(或Pydantic模型),贯穿整个Agent的执行周期。每个节点处理State、修改State,下游节点读取更新后的State。
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
import operator
class AgentState(TypedDict):
# 消息历史(使用 operator.add 作为 reducer,实现追加语义)
messages: Annotated[Sequence[BaseMessage], operator.add]
# 当前任务
current_task: str
# 迭代计数(防止死循环)
iteration_count: int
# 最终结果
final_answer: str | None
Annotated[..., operator.add] 是LangGraph的一个关键设计:通过reducer函数定义State字段如何合并。operator.add 意味着消息会追加到历史,而不是覆盖。
节点(Node):处理单元
节点是普通的Python函数,接收State,返回State的更新:
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage
llm = ChatOpenAI(model="gpt-4o", temperature=0)
def agent_node(state: AgentState) -> dict:
"""调用LLM处理当前状态"""
messages = state["messages"]
response = llm.invoke(messages)
return {
"messages": [response], # 追加到消息历史
"iteration_count": state["iteration_count"] + 1
}
def tool_executor(state: AgentState) -> dict:
"""执行工具调用"""
last_message = state["messages"][-1]
# 处理工具调用...
results = execute_tools(last_message.tool_calls)
return {"messages": results}
边(Edge):流转逻辑
边分为两类:
固定边:无条件跳转
graph.add_edge("agent", "tools")
条件边:根据State动态决定下一步
def should_continue(state: AgentState) -> str:
"""决定是继续工具调用还是输出最终答案"""
last_message = state["messages"][-1]
# 防止无限循环
if state["iteration_count"] >= 10:
return "end"
# 如果有工具调用,执行工具
if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
return "tools"
# 否则,Agent已经给出最终回复
return "end"
graph.add_conditional_edges(
"agent",
should_continue,
{
"tools": "tool_executor",
"end": END
}
)
从零构建:一个完整的研究Agent
让我们构建一个能够搜索网络、阅读页面并综合信息的研究Agent:
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import ToolNode
from langchain_openai import ChatOpenAI
from langchain_community.tools import DuckDuckGoSearchRun, WikipediaQueryRun
from langchain_core.messages import SystemMessage
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
import operator
# ── 1. 定义工具 ──
search_tool = DuckDuckGoSearchRun()
wiki_tool = WikipediaQueryRun()
tools = [search_tool, wiki_tool]
# ── 2. 定义状态 ──
class ResearchState(TypedDict):
messages: Annotated[Sequence[BaseMessage], operator.add]
research_question: str
sources: list[str]
iteration: int
# ── 3. 创建绑定了工具的LLM ──
llm_with_tools = ChatOpenAI(
model="gpt-4o",
temperature=0
).bind_tools(tools)
# ── 4. 定义节点 ──
SYSTEM_PROMPT = """你是一位专业的AI研究助手。
当需要查找信息时,使用搜索工具或维基百科。
综合多个来源,给出准确、全面的研究报告。"""
def researcher(state: ResearchState) -> dict:
messages = [SystemMessage(content=SYSTEM_PROMPT)] + list(state["messages"])
response = llm_with_tools.invoke(messages)
return {"messages": [response], "iteration": state["iteration"] + 1}
def route_after_agent(state: ResearchState) -> str:
last = state["messages"][-1]
if state["iteration"] >= 8:
return "end"
if hasattr(last, "tool_calls") and last.tool_calls:
return "tools"
return "end"
# ── 5. 构建图 ──
tool_node = ToolNode(tools)
workflow = StateGraph(ResearchState)
workflow.add_node("researcher", researcher)
workflow.add_node("tools", tool_node)
workflow.set_entry_point("researcher")
workflow.add_conditional_edges("researcher", route_after_agent, {
"tools": "tools",
"end": END
})
workflow.add_edge("tools", "researcher") # 工具执行后回到researcher
# ── 6. 编译并运行 ──
app = workflow.compile()
result = app.invoke({
"messages": [HumanMessage(content="分析2026年AI Agent的主流技术架构")],
"research_question": "2026年AI Agent技术架构",
"sources": [],
"iteration": 0
})
print(result["messages"][-1].content)
进阶:持久化状态与人工介入
生产环境中,Agent经常需要:
- 跨会话记忆:用户下次打开还记得上次的对话
- 暂停等待人工:关键决策需要人确认后再继续
LangGraph通过Checkpointer实现这两个需求:
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import StateGraph, END, interrupt
# 使用SQLite持久化状态
with SqliteSaver.from_conn_string("agent_memory.db") as memory:
app = workflow.compile(checkpointer=memory)
# 第一次对话
config = {"configurable": {"thread_id": "user_123"}}
result1 = app.invoke(
{"messages": [HumanMessage(content="我想研究量子计算")]},
config=config
)
# 第二次对话(自动载入上次状态)
result2 = app.invoke(
{"messages": [HumanMessage(content="重点说说量子纠错")]},
config=config # 相同thread_id,自动恢复上下文
)
人工介入(Human-in-the-loop):
def critical_decision_node(state: AgentState) -> dict:
"""需要人工确认的关键决策节点"""
proposal = state["messages"][-1].content
# 暂停执行,等待人工输入
human_input = interrupt({
"question": "AI提议以下操作,是否批准?",
"proposal": proposal,
"action": "approve_or_reject"
})
if human_input["decision"] == "approve":
return {"approved": True, "messages": [AIMessage(content="操作已批准,继续执行...")]}
else:
return {"approved": False, "messages": [AIMessage(content=f"操作已取消: {human_input.get('reason', '')}")]}
多Agent协作:Supervisor模式
当单个Agent能力不够时,需要多个专业Agent协作。LangGraph的Supervisor模式是当前最成熟的实现方式:
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel
from typing import Literal
# 专业Agent定义
code_agent = create_specialized_agent("代码专家", code_tools)
research_agent = create_specialized_agent("研究专家", research_tools)
writing_agent = create_specialized_agent("写作专家", writing_tools)
members = ["code_expert", "researcher", "writer"]
# Supervisor:决定派遣哪个Agent
class RouteDecision(BaseModel):
next: Literal["code_expert", "researcher", "writer", "FINISH"]
reason: str
supervisor_prompt = ChatPromptTemplate.from_messages([
("system", f"""你是一个任务协调器,管理以下专家:{members}
根据任务需求和当前进展,决定下一步派遣哪位专家。
当任务完成时,回复 FINISH。"""),
("human", "当前状态:\n{state}\n\n请决定下一步行动。")
])
def supervisor_node(state: MultiAgentState) -> dict:
response = supervisor_llm.with_structured_output(RouteDecision).invoke(
supervisor_prompt.format(state=str(state))
)
return {"next_agent": response.next, "supervisor_reason": response.reason}
def route_by_supervisor(state: MultiAgentState) -> str:
return state["next_agent"]
# 构建多Agent图
multi_agent_graph = StateGraph(MultiAgentState)
multi_agent_graph.add_node("supervisor", supervisor_node)
multi_agent_graph.add_node("code_expert", code_agent)
multi_agent_graph.add_node("researcher", research_agent)
multi_agent_graph.add_node("writer", writing_agent)
multi_agent_graph.set_entry_point("supervisor")
multi_agent_graph.add_conditional_edges("supervisor", route_by_supervisor, {
"code_expert": "code_expert",
"researcher": "researcher",
"writer": "writer",
"FINISH": END
})
# 每个专家完成后回到supervisor
for member in ["code_expert", "researcher", "writer"]:
multi_agent_graph.add_edge(member, "supervisor")
生产部署关键要点
1. 错误处理与重试
from tenacity import retry, stop_after_attempt, wait_exponential
def resilient_node(state: AgentState) -> dict:
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=1, max=10))
def call_with_retry():
return llm.invoke(state["messages"])
try:
response = call_with_retry()
return {"messages": [response]}
except Exception as e:
# 错误状态下的优雅降级
return {"messages": [AIMessage(content=f"处理失败,请重试: {str(e)}")]}
2. 流式输出
async for event in app.astream_events(initial_state, version="v2"):
if event["event"] == "on_chat_model_stream":
chunk = event["data"]["chunk"]
print(chunk.content, end="", flush=True)
3. 可观测性
from langsmith import traceable
@traceable(name="research_agent_run")
def run_agent(query: str) -> str:
result = app.invoke({"messages": [HumanMessage(content=query)]})
return result["messages"][-1].content
总结
LangGraph在2026年已经成为构建生产级AI Agent的首选框架,核心优势是:
- 有状态:State机制让复杂业务逻辑变得可管理
- 可控:条件边、人工介入让Agent行为可预期
- 可扩展:从单Agent到多Agent Supervisor,架构平滑升级
- 持久化:Checkpointer让跨会话记忆不再困难
掌握LangGraph,是2026年AI工程师的核心竞争力之一。不要等到项目复杂了才学——现在就开始用图的思维建模你的Agent流程。