LangGraph是2026年构建生产级AI Agent的首选框架——它将Agent的执行过程建模为有向图(DAG),每个节点是一个处理步骤,边是条件跳转逻辑。这种设计让复杂的多步骤AI工作流变得可视化、可调试、可扩展。本文从工程实践角度,深入解析LangGraph的核心概念与生产部署技巧。
一、为什么选择LangGraph而非简单的Agent循环
很多团队初期用"while循环 + 工具调用"实现Agent,够用但难以维护。LangGraph的优势在于:
状态机语义:工作流的每个状态都是显式定义的,便于调试和测试
条件分支:可以根据LLM输出或外部条件动态决定下一步走哪条路
并行执行:支持多个节点同时执行,然后聚合结果
持久化:内置checkpointing,工作流可以暂停、恢复,支持Human-in-the-Loop
可视化:图结构可以直接渲染为流程图,方便团队协作理解
二、LangGraph核心概念
2.1 State:工作流的共享状态
from typing import TypedDict, Annotated
from operator import add
from langgraph.graph import StateGraph, END
class WorkflowState(TypedDict):
"""工作流的共享状态定义"""
# 用户输入
user_query: str
# 中间结果(使用add操作符:新值追加而非覆盖)
search_results: Annotated[list[str], add]
# 最终输出
final_answer: str
# 控制流
iteration_count: int
should_continue: bool
# 工具调用历史
tool_calls: Annotated[list[dict], add]
# 错误信息
errors: Annotated[list[str], add]
2.2 Node:处理节点
每个节点是一个接受State、返回State更新的函数:
import anthropic
from langgraph.graph import StateGraph
client = anthropic.Anthropic()
def analyze_query_node(state: WorkflowState) -> dict:
"""分析用户查询,确定搜索策略"""
response = client.messages.create(
model="claude-opus-4-7",
max_tokens=500,
messages=[{
"role": "user",
"content": f"""分析这个查询,输出JSON:
查询:{state['user_query']}
输出格式:
{{
"query_type": "factual|analytical|creative",
"search_keywords": ["关键词1", "关键词2"],
"complexity": "simple|medium|complex",
"requires_calculation": true|false
}}"""
}]
)
import json
try:
analysis = json.loads(response.content[0].text)
except:
analysis = {"query_type": "factual", "search_keywords": [state['user_query']], "complexity": "simple"}
return {
"search_keywords": analysis.get("search_keywords", []),
"query_analysis": analysis
}
def web_search_node(state: WorkflowState) -> dict:
"""执行网络搜索"""
results = []
for keyword in state.get("search_keywords", [state["user_query"]])[:3]:
# 调用搜索API
search_result = perform_web_search(keyword)
results.extend(search_result)
return {
"search_results": results,
"tool_calls": [{"tool": "web_search", "keywords": state.get("search_keywords")}]
}
def synthesis_node(state: WorkflowState) -> dict:
"""综合搜索结果生成最终回答"""
context = "\n\n".join(state.get("search_results", [])[:5])
response = client.messages.create(
model="claude-opus-4-7",
max_tokens=1500,
messages=[{
"role": "user",
"content": f"""基于以下搜索结果,回答用户问题。
问题:{state['user_query']}
搜索结果:
{context}
请给出准确、全面的回答。"""
}]
)
return {
"final_answer": response.content[0].text,
"should_continue": False
}
def quality_check_node(state: WorkflowState) -> dict:
"""质量检查:判断回答是否满足要求"""
response = client.messages.create(
model="claude-opus-4-7",
max_tokens=200,
messages=[{
"role": "user",
"content": f"""评估回答质量:
问题:{state['user_query']}
回答:{state.get('final_answer', '')}
回答是否完整准确?(yes/no)
如果no,给出改进方向(一句话):"""
}]
)
answer_text = response.content[0].text.lower()
is_good = "yes" in answer_text
return {
"quality_passed": is_good,
"iteration_count": state.get("iteration_count", 0) + 1
}
2.3 Graph:编排工作流
from langgraph.graph import StateGraph, END
def build_research_workflow():
"""构建研究型工作流"""
workflow = StateGraph(WorkflowState)
# 添加节点
workflow.add_node("analyze", analyze_query_node)
workflow.add_node("search", web_search_node)
workflow.add_node("synthesize", synthesis_node)
workflow.add_node("quality_check", quality_check_node)
# 设置起始节点
workflow.set_entry_point("analyze")
# 顺序边
workflow.add_edge("analyze", "search")
workflow.add_edge("search", "synthesize")
workflow.add_edge("synthesize", "quality_check")
# 条件边:质量检查后决定是否重试
def should_retry(state: WorkflowState) -> str:
if state.get("quality_passed", True):
return "done"
elif state.get("iteration_count", 0) >= 2:
return "done" # 最多重试2次
else:
return "retry"
workflow.add_conditional_edges(
"quality_check",
should_retry,
{
"done": END,
"retry": "search" # 重新搜索
}
)
return workflow.compile()
# 使用工作流
app = build_research_workflow()
result = app.invoke({
"user_query": "2026年AI Agent的最新技术进展",
"search_results": [],
"tool_calls": [],
"errors": [],
"iteration_count": 0,
"should_continue": True
})
print(result["final_answer"])
三、Human-in-the-Loop:工作流暂停与恢复
LangGraph内置了检查点机制,支持在关键步骤暂停等待人工确认:
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import StateGraph, END
# 使用SQLite持久化检查点
memory = SqliteSaver.from_conn_string("checkpoints.db")
def build_approval_workflow():
"""需要人工审批的工作流"""
workflow = StateGraph(WorkflowState)
workflow.add_node("draft_response", draft_response_node)
workflow.add_node("human_review", human_review_node) # 等待人工
workflow.add_node("finalize", finalize_node)
workflow.set_entry_point("draft_response")
workflow.add_edge("draft_response", "human_review")
# human_review节点会在此处暂停,等待人工输入
workflow.add_conditional_edges(
"human_review",
lambda state: "approve" if state.get("approved") else "revise",
{
"approve": "finalize",
"revise": "draft_response"
}
)
workflow.add_edge("finalize", END)
# 编译时注入检查点
return workflow.compile(
checkpointer=memory,
interrupt_before=["human_review"] # 在此节点前暂停
)
app = build_approval_workflow()
# 第一次运行:会在human_review前暂停
thread_config = {"configurable": {"thread_id": "task_001"}}
result = app.invoke(
{"user_query": "起草给客户的季度报告"},
config=thread_config
)
print("草稿已生成,等待审批:", result.get("draft"))
# 人工审查后,继续运行(注入审批状态)
app.update_state(
thread_config,
{"approved": True, "human_feedback": "很好,可以发送"}
)
final_result = app.invoke(None, config=thread_config)
print("最终结果:", final_result.get("final_answer"))
四、并行节点:提升多任务效率
def build_parallel_research_workflow():
"""并行搜索多个来源,提高效率"""
workflow = StateGraph(WorkflowState)
workflow.add_node("decompose", decompose_query_node)
# 三个并行搜索节点
workflow.add_node("web_search", web_search_node)
workflow.add_node("db_search", database_search_node)
workflow.add_node("docs_search", docs_search_node)
workflow.add_node("merge_results", merge_results_node)
workflow.add_node("synthesize", synthesis_node)
workflow.set_entry_point("decompose")
# 分解后并行执行三个搜索(LangGraph自动并行处理同一源节点的多条边)
workflow.add_edge("decompose", "web_search")
workflow.add_edge("decompose", "db_search")
workflow.add_edge("decompose", "docs_search")
# 三个节点都完成后才到merge_results
workflow.add_edge("web_search", "merge_results")
workflow.add_edge("db_search", "merge_results")
workflow.add_edge("docs_search", "merge_results")
workflow.add_edge("merge_results", "synthesize")
workflow.add_edge("synthesize", END)
return workflow.compile()
五、流式输出与实时进度
async def run_with_streaming(user_query: str):
"""流式执行工作流,实时显示进度"""
app = build_research_workflow()
async for event in app.astream_events(
{"user_query": user_query, "search_results": [], "tool_calls": [],
"errors": [], "iteration_count": 0},
version="v1"
):
kind = event["event"]
if kind == "on_chain_start":
node_name = event["name"]
if node_name in ["analyze", "search", "synthesize", "quality_check"]:
print(f"🔄 执行节点: {node_name}")
elif kind == "on_chain_end":
node_name = event["name"]
if node_name == "synthesize":
output = event["data"].get("output", {})
if "final_answer" in output:
print(f"✅ 生成回答完成")
elif kind == "on_llm_stream":
# 实时输出LLM生成的文字
chunk = event["data"].get("chunk", "")
if hasattr(chunk, "content") and chunk.content:
print(chunk.content, end="", flush=True)
import asyncio
asyncio.run(run_with_streaming("2026年最值得关注的AI技术方向"))
六、生产部署:LangGraph Platform
LangGraph 0.2+提供了Platform功能,简化生产部署:
# langgraph.json - 部署配置
{
"dependencies": ["./my_agent"],
"graphs": {
"research_agent": "./my_agent/workflow.py:app",
"code_agent": "./my_agent/code_workflow.py:app"
},
"env": {
"ANTHROPIC_API_KEY": "env:ANTHROPIC_API_KEY"
}
}
# 本地开发服务器
langgraph dev
# 构建Docker镜像
langgraph build -t my-agent:latest
# 部署到云
langgraph up # 使用LangSmith托管
关键生产配置:
from langgraph.checkpoint.postgres import PostgresSaver
import psycopg2
# 生产环境使用PostgreSQL持久化检查点
conn = psycopg2.connect(os.environ["DATABASE_URL"])
checkpointer = PostgresSaver(conn)
checkpointer.setup()
# 编译生产级工作流
production_app = workflow.compile(
checkpointer=checkpointer,
interrupt_before=["human_approval"], # 需要审批的步骤
)
七、监控与调试
LangGraph与LangSmith深度集成,自动追踪每次执行:
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "your_langsmith_key"
os.environ["LANGCHAIN_PROJECT"] = "production-agent"
# 之后所有工作流执行都会自动发送到LangSmith
# 可以在LangSmith界面看到:
# - 完整的执行路径(哪些节点被执行了)
# - 每个节点的输入/输出
# - Token消耗和延迟
# - 失败节点和错误信息
八、总结
LangGraph是构建生产级AI工作流的工程利器:
- StateGraph:显式的状态管理,避免隐式状态传递的混乱
- 条件边:基于LLM输出或外部条件的动态路由
- 并行节点:自动并行化无依赖的处理步骤
- Checkpointing:工作流暂停/恢复,支持Human-in-the-Loop
- 流式执行:实时输出中间结果,提升用户体验
- Platform部署:从本地开发到生产的一站式支持
相比简单的Agent循环,LangGraph提供了工业级的可靠性、可调试性和可扩展性——这正是从原型迈向生产的关键差距。