LangGraph生产实战2026:构建有状态多步骤AI工作流的完整指南

3 阅读1分钟

LangGraph是2026年构建生产级AI Agent的首选框架——它将Agent的执行过程建模为有向图(DAG),每个节点是一个处理步骤,边是条件跳转逻辑。这种设计让复杂的多步骤AI工作流变得可视化、可调试、可扩展。本文从工程实践角度,深入解析LangGraph的核心概念与生产部署技巧。

一、为什么选择LangGraph而非简单的Agent循环

很多团队初期用"while循环 + 工具调用"实现Agent,够用但难以维护。LangGraph的优势在于:

状态机语义:工作流的每个状态都是显式定义的,便于调试和测试

条件分支:可以根据LLM输出或外部条件动态决定下一步走哪条路

并行执行:支持多个节点同时执行,然后聚合结果

持久化:内置checkpointing,工作流可以暂停、恢复,支持Human-in-the-Loop

可视化:图结构可以直接渲染为流程图,方便团队协作理解

二、LangGraph核心概念

2.1 State:工作流的共享状态

from typing import TypedDict, Annotated
from operator import add
from langgraph.graph import StateGraph, END

class WorkflowState(TypedDict):
    """工作流的共享状态定义"""
    
    # 用户输入
    user_query: str
    
    # 中间结果(使用add操作符:新值追加而非覆盖)
    search_results: Annotated[list[str], add]
    
    # 最终输出
    final_answer: str
    
    # 控制流
    iteration_count: int
    should_continue: bool
    
    # 工具调用历史
    tool_calls: Annotated[list[dict], add]
    
    # 错误信息
    errors: Annotated[list[str], add]

2.2 Node:处理节点

每个节点是一个接受State、返回State更新的函数:

import anthropic
from langgraph.graph import StateGraph

client = anthropic.Anthropic()

def analyze_query_node(state: WorkflowState) -> dict:
    """分析用户查询,确定搜索策略"""
    
    response = client.messages.create(
        model="claude-opus-4-7",
        max_tokens=500,
        messages=[{
            "role": "user",
            "content": f"""分析这个查询,输出JSON:
查询:{state['user_query']}

输出格式:
{{
  "query_type": "factual|analytical|creative",
  "search_keywords": ["关键词1", "关键词2"],
  "complexity": "simple|medium|complex",
  "requires_calculation": true|false
}}"""
        }]
    )
    
    import json
    try:
        analysis = json.loads(response.content[0].text)
    except:
        analysis = {"query_type": "factual", "search_keywords": [state['user_query']], "complexity": "simple"}
    
    return {
        "search_keywords": analysis.get("search_keywords", []),
        "query_analysis": analysis
    }

def web_search_node(state: WorkflowState) -> dict:
    """执行网络搜索"""
    
    results = []
    for keyword in state.get("search_keywords", [state["user_query"]])[:3]:
        # 调用搜索API
        search_result = perform_web_search(keyword)
        results.extend(search_result)
    
    return {
        "search_results": results,
        "tool_calls": [{"tool": "web_search", "keywords": state.get("search_keywords")}]
    }

def synthesis_node(state: WorkflowState) -> dict:
    """综合搜索结果生成最终回答"""
    
    context = "\n\n".join(state.get("search_results", [])[:5])
    
    response = client.messages.create(
        model="claude-opus-4-7",
        max_tokens=1500,
        messages=[{
            "role": "user",
            "content": f"""基于以下搜索结果,回答用户问题。

问题:{state['user_query']}

搜索结果:
{context}

请给出准确、全面的回答。"""
        }]
    )
    
    return {
        "final_answer": response.content[0].text,
        "should_continue": False
    }

def quality_check_node(state: WorkflowState) -> dict:
    """质量检查:判断回答是否满足要求"""
    
    response = client.messages.create(
        model="claude-opus-4-7",
        max_tokens=200,
        messages=[{
            "role": "user",
            "content": f"""评估回答质量:
问题:{state['user_query']}
回答:{state.get('final_answer', '')}

回答是否完整准确?(yes/no)
如果no,给出改进方向(一句话):"""
        }]
    )
    
    answer_text = response.content[0].text.lower()
    is_good = "yes" in answer_text
    
    return {
        "quality_passed": is_good,
        "iteration_count": state.get("iteration_count", 0) + 1
    }

2.3 Graph:编排工作流

from langgraph.graph import StateGraph, END

def build_research_workflow():
    """构建研究型工作流"""
    
    workflow = StateGraph(WorkflowState)
    
    # 添加节点
    workflow.add_node("analyze", analyze_query_node)
    workflow.add_node("search", web_search_node)
    workflow.add_node("synthesize", synthesis_node)
    workflow.add_node("quality_check", quality_check_node)
    
    # 设置起始节点
    workflow.set_entry_point("analyze")
    
    # 顺序边
    workflow.add_edge("analyze", "search")
    workflow.add_edge("search", "synthesize")
    workflow.add_edge("synthesize", "quality_check")
    
    # 条件边:质量检查后决定是否重试
    def should_retry(state: WorkflowState) -> str:
        if state.get("quality_passed", True):
            return "done"
        elif state.get("iteration_count", 0) >= 2:
            return "done"  # 最多重试2次
        else:
            return "retry"
    
    workflow.add_conditional_edges(
        "quality_check",
        should_retry,
        {
            "done": END,
            "retry": "search"  # 重新搜索
        }
    )
    
    return workflow.compile()

# 使用工作流
app = build_research_workflow()

result = app.invoke({
    "user_query": "2026年AI Agent的最新技术进展",
    "search_results": [],
    "tool_calls": [],
    "errors": [],
    "iteration_count": 0,
    "should_continue": True
})

print(result["final_answer"])

三、Human-in-the-Loop:工作流暂停与恢复

LangGraph内置了检查点机制,支持在关键步骤暂停等待人工确认:

from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.graph import StateGraph, END

# 使用SQLite持久化检查点
memory = SqliteSaver.from_conn_string("checkpoints.db")

def build_approval_workflow():
    """需要人工审批的工作流"""
    
    workflow = StateGraph(WorkflowState)
    
    workflow.add_node("draft_response", draft_response_node)
    workflow.add_node("human_review", human_review_node)  # 等待人工
    workflow.add_node("finalize", finalize_node)
    
    workflow.set_entry_point("draft_response")
    workflow.add_edge("draft_response", "human_review")
    
    # human_review节点会在此处暂停,等待人工输入
    workflow.add_conditional_edges(
        "human_review",
        lambda state: "approve" if state.get("approved") else "revise",
        {
            "approve": "finalize",
            "revise": "draft_response"
        }
    )
    
    workflow.add_edge("finalize", END)
    
    # 编译时注入检查点
    return workflow.compile(
        checkpointer=memory,
        interrupt_before=["human_review"]  # 在此节点前暂停
    )

app = build_approval_workflow()

# 第一次运行:会在human_review前暂停
thread_config = {"configurable": {"thread_id": "task_001"}}

result = app.invoke(
    {"user_query": "起草给客户的季度报告"},
    config=thread_config
)
print("草稿已生成,等待审批:", result.get("draft"))

# 人工审查后,继续运行(注入审批状态)
app.update_state(
    thread_config,
    {"approved": True, "human_feedback": "很好,可以发送"}
)

final_result = app.invoke(None, config=thread_config)
print("最终结果:", final_result.get("final_answer"))

四、并行节点:提升多任务效率

def build_parallel_research_workflow():
    """并行搜索多个来源,提高效率"""
    
    workflow = StateGraph(WorkflowState)
    
    workflow.add_node("decompose", decompose_query_node)
    
    # 三个并行搜索节点
    workflow.add_node("web_search", web_search_node)
    workflow.add_node("db_search", database_search_node)
    workflow.add_node("docs_search", docs_search_node)
    
    workflow.add_node("merge_results", merge_results_node)
    workflow.add_node("synthesize", synthesis_node)
    
    workflow.set_entry_point("decompose")
    
    # 分解后并行执行三个搜索(LangGraph自动并行处理同一源节点的多条边)
    workflow.add_edge("decompose", "web_search")
    workflow.add_edge("decompose", "db_search")
    workflow.add_edge("decompose", "docs_search")
    
    # 三个节点都完成后才到merge_results
    workflow.add_edge("web_search", "merge_results")
    workflow.add_edge("db_search", "merge_results")
    workflow.add_edge("docs_search", "merge_results")
    
    workflow.add_edge("merge_results", "synthesize")
    workflow.add_edge("synthesize", END)
    
    return workflow.compile()

五、流式输出与实时进度

async def run_with_streaming(user_query: str):
    """流式执行工作流,实时显示进度"""
    
    app = build_research_workflow()
    
    async for event in app.astream_events(
        {"user_query": user_query, "search_results": [], "tool_calls": [], 
         "errors": [], "iteration_count": 0},
        version="v1"
    ):
        kind = event["event"]
        
        if kind == "on_chain_start":
            node_name = event["name"]
            if node_name in ["analyze", "search", "synthesize", "quality_check"]:
                print(f"🔄 执行节点: {node_name}")
        
        elif kind == "on_chain_end":
            node_name = event["name"]
            if node_name == "synthesize":
                output = event["data"].get("output", {})
                if "final_answer" in output:
                    print(f"✅ 生成回答完成")
        
        elif kind == "on_llm_stream":
            # 实时输出LLM生成的文字
            chunk = event["data"].get("chunk", "")
            if hasattr(chunk, "content") and chunk.content:
                print(chunk.content, end="", flush=True)

import asyncio
asyncio.run(run_with_streaming("2026年最值得关注的AI技术方向"))

六、生产部署:LangGraph Platform

LangGraph 0.2+提供了Platform功能,简化生产部署:

# langgraph.json - 部署配置
{
  "dependencies": ["./my_agent"],
  "graphs": {
    "research_agent": "./my_agent/workflow.py:app",
    "code_agent": "./my_agent/code_workflow.py:app"
  },
  "env": {
    "ANTHROPIC_API_KEY": "env:ANTHROPIC_API_KEY"
  }
}
# 本地开发服务器
langgraph dev

# 构建Docker镜像
langgraph build -t my-agent:latest

# 部署到云
langgraph up  # 使用LangSmith托管

关键生产配置

from langgraph.checkpoint.postgres import PostgresSaver
import psycopg2

# 生产环境使用PostgreSQL持久化检查点
conn = psycopg2.connect(os.environ["DATABASE_URL"])
checkpointer = PostgresSaver(conn)
checkpointer.setup()

# 编译生产级工作流
production_app = workflow.compile(
    checkpointer=checkpointer,
    interrupt_before=["human_approval"],  # 需要审批的步骤
)

七、监控与调试

LangGraph与LangSmith深度集成,自动追踪每次执行:

import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "your_langsmith_key"
os.environ["LANGCHAIN_PROJECT"] = "production-agent"

# 之后所有工作流执行都会自动发送到LangSmith
# 可以在LangSmith界面看到:
# - 完整的执行路径(哪些节点被执行了)
# - 每个节点的输入/输出
# - Token消耗和延迟
# - 失败节点和错误信息

八、总结

LangGraph是构建生产级AI工作流的工程利器:

  1. StateGraph:显式的状态管理,避免隐式状态传递的混乱
  2. 条件边:基于LLM输出或外部条件的动态路由
  3. 并行节点:自动并行化无依赖的处理步骤
  4. Checkpointing:工作流暂停/恢复,支持Human-in-the-Loop
  5. 流式执行:实时输出中间结果,提升用户体验
  6. Platform部署:从本地开发到生产的一站式支持

相比简单的Agent循环,LangGraph提供了工业级的可靠性、可调试性和可扩展性——这正是从原型迈向生产的关键差距。