工具拆分
不能将chat 和工具混用,需要有单独的工具节点
-
Langgraph 里面有单独的 ToolNode类
-
需要多添加一条边回agent
同时运行2个节点的情况:可能会重复覆盖state 里面的内容
使用state Reducers
处理上下文
-
方案1:Langgraph 自身就可以处理(否定)
- "messages": [RemoveMessage(id=m.id) for m in state["messages"][:-3]]
- 删除除了最后三条消息之外的所有信息
-
方案2:直接处理session 方便统一管理
-
Session 后台是如何设计的?参考哪个格式?
-
Langgraph 里面的mempry实际上就是用一个线程,在内存中存了最近的几个session,直到完成任务再释放
-
在部分场景下,可以使用memory 比如多轮调参,但是会遇到以下几个问题:
- 线程(python里面实际是进程)在后台常驻
- 如何保证下一次对话,进入的是同一个线程(通过线程id可以吗)?
-
-
可以添加Summary 节点
Memory
可以添加summary节点
-
和业务强相关
langgraph一些特殊节点
停止节点(打断点)
当一个节点执行任务之后,需要等待其他条件成熟,或者其他节点的输入,可以先暂停(打断点),然后再运行
- 适用于human in the loop的场景
- 可以在打断点的时候修改state状态
虚拟节点
暂时想不到有什么用处
时间旅行
适合调试,从对应节点重新运行,是否能代替retry?
打印state经理所有的所有节点的状态
-
需要创建线程和检查点
-
graph.get_state() :获取当前最近的节点状态
-
graph.get_state_history() : 所有的节点的状态
- 需要线程id 和检查点的id
-
是重放replay,而不是重新执行
-
我的理解:相当于用一个新的线程,重新运行了一遍
下图的这种情况
-
提供id:会直接添加一条新的human_message,多了一条消息
-
不提供id:会覆盖最新的一条human_message
-
update_state 实际上在线程中添加新的分叉检查点
-
graph.get-state-history(thread) 里面的状态数量也会增加
-
并行运行2个节点
使用reducer,对应代码中的operator, 会自动地调用reducer 函数
使用reducer 决定并行节点的运行顺序
def sorting_reducer(left, right):
""" Combines and sorts the values in a list"""
if not isinstance(left, list):
left = [left]
if not isinstance(right, list):
right = [right]
return sorted(left + right, reverse=False)
class State(TypedDict):
# sorting_reducer will sort the values in state
state: Annotated[list, sorting_reducer]
# Add nodes
builder = StateGraph(State)
# Initialize each node with node_secret
builder.add_node("a", ReturnNodeValue("I'm A"))
builder.add_node("b", ReturnNodeValue("I'm B"))
builder.add_node("b2", ReturnNodeValue("I'm B2"))
builder.add_node("c", ReturnNodeValue("I'm C"))
builder.add_node("d", ReturnNodeValue("I'm D"))
# Flow
builder.add_edge(START, "a")
builder.add_edge("a", "b")
builder.add_edge("a", "c")
builder.add_edge("b", "b2")
builder.add_edge(["b2", "c"], "d")
builder.add_edge("d", END)
graph = builder.compile()
display(Image(graph.get_graph().draw_mermaid_png()))
子图
简单的说,就是把子图当做两个节点,拼接进主图
# run.py
from operator import add
from typing import List, Annotated, TypedDict
from langgraph.graph import StateGraph, START, END
# ---------- 1. 子图 1:FailureAnalysis ----------
class FState(TypedDict):
logs: List[dict] # 输入
fa_result: str # 输出
processed: List[str] # 输出
def fa_node(state: FState):
# 极简逻辑:把 id 拼成字符串返回
ids = [log["id"] for log in state["logs"]]
return {
"fa_result": f"FA 认为 {len(ids)} 条失败",
"processed": [f"fa-{i}" for i in ids]
}
fa_graph = (
StateGraph(FState)
.add_node("fa", fa_node)
.add_edge(START, "fa")
.add_edge("fa", END)
.compile()
)
# ---------- 2. 子图 2:QuestionSummarization ----------
class QState(TypedDict):
logs: List[dict]
qs_result: str
processed: List[str]
def qs_node(state: QState):
ids = [log["id"] for log in state["logs"]]
return {
"qs_result": f"QS 认为 {len(ids)} 条是问题",
"processed": [f"qs-{i}" for i in ids]
}
qs_graph = (
StateGraph(QState)
.add_node("qs", qs_node)
.add_edge(START, "qs")
.add_edge("qs", END)
.compile()
)
# ---------- 3. 主图:Entry ----------
class MainState(TypedDict):
raw_logs: List[dict]
logs: List[dict]
fa_result: str
qs_result: str
processed: Annotated[List[str], add] # 关键:自动累加
def clean_node(state: MainState):
# 这里啥也不干,直接把 raw_logs 原样往下传
return {"logs": state["raw_logs"]}
main_graph = (
StateGraph(MainState)
.add_node("clean", clean_node)
# 把子图当成一个普通节点挂进来
.add_node("fa_sub", fa_graph)
.add_node("qs_sub", qs_graph)
.add_edge(START, "clean")
# 同一份 logs 并行发给两个子图
.add_edge("clean", "fa_sub")
.add_edge("clean", "qs_sub")
.add_edge("fa_sub", END)
.add_edge("qs_sub", END)
.compile()
)
# ---------- 4. 跑一把 ----------
if __name__ == "__main__":
initial = {
"raw_logs": [{"id": "A"}, {"id": "B"}],
"processed": [] # 累加器初始为空
}
result = main_graph.invoke(initial)
print("fa_result :", result["fa_result"])
print("qs_result :", result["qs_result"])
print("processed :", result["processed"])
Map_reduce
高效任务分解和任务并行方法
-
Map
- 分解任务,让多个节点并行执行
- 使用send api指定特定节点,不用真实边(可以理解为虚拟边)
- 实际上写进同一个列表
-
Renduce
- 搜集所有对应节点的信息,根据情况选一个最好的
- 实际上是读取列表,然后让大模型总结
import operator
from typing import Annotated
from typing_extensions import TypedDict
from pydantic import BaseModel
from langchain_openai import ChatOpenAI
from langgraph.constants import Send
from langgraph.graph import END, StateGraph, START
# Prompts we will use
subjects_prompt = """Generate a list of 3 sub-topics that are all related to this overall topic: {topic}."""
joke_prompt = """Generate a joke about {subject}"""
best_joke_prompt = """Below are a bunch of jokes about {topic}. Select the best one! Return the ID of the best one, starting 0 as the ID for the first joke. Jokes: \n\n {jokes}"""
# LLM
model = ChatOpenAI(model="gpt-4o", temperature=0)
# Define the state
class Subjects(BaseModel):
subjects: list[str]
class BestJoke(BaseModel):
id: int
class OverallState(TypedDict):
topic: str
subjects: list
jokes: Annotated[list, operator.add]
best_selected_joke: str
def generate_topics(state: OverallState):
prompt = subjects_prompt.format(topic=state["topic"])
response = model.with_structured_output(Subjects).invoke(prompt)
return {"subjects": response.subjects}
class JokeState(TypedDict):
subject: str
class Joke(BaseModel):
joke: str
def generate_joke(state: JokeState):
prompt = joke_prompt.format(subject=state["subject"])
response = model.with_structured_output(Joke).invoke(prompt)
return {"jokes": [response.joke]}
def best_joke(state: OverallState):
jokes = "\n\n".join(state["jokes"])
prompt = best_joke_prompt.format(topic=state["topic"], jokes=jokes)
response = model.with_structured_output(BestJoke).invoke(prompt)
return {"best_selected_joke": state["jokes"][response.id]}
def continue_to_jokes(state: OverallState):
return [Send("generate_joke", {"subject": s}) for s in state["subjects"]]
# Construct the graph: here we put everything together to construct our graph
graph_builder = StateGraph(OverallState)
graph_builder.add_node("generate_topics", generate_topics)
graph_builder.add_node("generate_joke", generate_joke)
graph_builder.add_node("best_joke", best_joke)
graph_builder.add_edge(START, "generate_topics")
graph_builder.add_conditional_edges("generate_topics", continue_to_jokes, ["generate_joke"])
graph_builder.add_edge("generate_joke", "best_joke")
graph_builder.add_edge("best_joke", END)
# Compile the graph
graph = graph_builder.compile()
子图+mapreduce+ 并行
from typing import TypedDict, List
from langgraph.graph import StateGraph, START, END
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage
# 初始化 LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0)
# ---- State ----
class ResearchState(TypedDict):
topic: str
analysts: List[str]
report: str
# ---- Node 1: 创建分析师 ----
def create_analysts(state: ResearchState):
topic = state["topic"]
prompt = f"Generate 3 fictional analysts with unique roles to study: {topic}"
result = llm.invoke([HumanMessage(content=prompt)])
return {"analysts": result.content.split("\n")}
# ---- Node 2: 模拟采访 ----
def conduct_interview(state: ResearchState):
topic = state["topic"]
analysts = state["analysts"]
interviews = []
for a in analysts:
q = f"{a.strip()} asks an expert: What are the most interesting facts about {topic}?"
answer = llm.invoke([SystemMessage(content=q)])
interviews.append(f"### {a}\n{answer.content}")
return {"report": "\n\n".join(interviews)}
# ---- Node 3: 写报告 ----
def write_report(state: ResearchState):
topic = state["topic"]
report = state["report"]
prompt = f"Write a summary report on {topic} based on these interviews:\n\n{report}"
result = llm.invoke([HumanMessage(content=prompt)])
return {"report": result.content}
# ---- 构建工作流 ----
builder = StateGraph(ResearchState)
builder.add_node("create_analysts", create_analysts)
builder.add_node("conduct_interview", conduct_interview)
builder.add_node("write_report", write_report)
builder.add_edge(START, "create_analysts")
builder.add_edge("create_analysts", "conduct_interview")
builder.add_edge("conduct_interview", "write_report")
builder.add_edge("write_report", END)
graph = builder.compile()
# ---- 运行 ----
state = {"topic": "Artificial Intelligence Safety"}
result = graph.invoke(state)
print(result["report"])
Deep agent
注意:deep_agent_state 是自己定义的
planning
文件系统(上下文卸载)
使用 @tool 装饰写,读方法,
Sub_agent
核心:上下文隔离,消息处理,
-
把sub agent当做一个工具,给主agent调用
- 使用文件系统来存储工具调用过程(manus也提到)确保可以找到对应的原文
-
选择哪些信息给subagent,将subagent的返回信息,添加到主agent的session里面
- 主agent只知道subagent的最后一条消息
-
需要每个subagent的描述,对应的工具
-
可以通过文件来让agent/subagent交流
- 回调函数
- 修改文件
- 返回sub最后的一条消息,使用list存,然后读
-
注意DeepAgentState, InjectedState的区别
| 特性 | DeepAgentState | InjectedState |
|---|---|---|
| 类型 | 具体的状态类(TypedDict 或其他) | 类型注解(Annotated 标记) |
| 作用 | 定义代理状态的结构(如消息、文件等) | 指示 LangGraph 自动注入状态 |
| 定义位置 | 由开发者在代码中自定义 | LangGraph 提供的内置类型 |
| 内容 | 包含具体字段,如 messages, files | 无具体内容,仅标记注入行为 |
| 使用场景 | 用于定义和操作代理的状态 | 用于函数参数,自动获取当前状态 |
| 代码示例 | class DeepAgentState(TypedDict): ... | state: Annotated[DeepAgentState, InjectedState] |
-
DeepAgentState 定义了 state 的结构(可能包含 messages 和 files)
-
InjectedState 确保 state 参数在运行时由 LangGraph 自动填充
最简单版本,无文件系统,方便理解
from typing import TypedDict
from langchain_core.tools import tool
from langchain_core.messages import ToolMessage
from langgraph.prebuilt import create_react_agent
# 定义子代理配置
class SubAgent(TypedDict):
name: str
prompt: str
# 模拟语言模型(简化为直接返回输入)
def simple_model(state):
return {"messages": [{"role": "assistant", "content": f"处理: {state['messages'][0]['content']}"}]}
# 创建任务工具
def create_task_tool(subagents, state_schema):
# 子代理注册表
agents = {agent["name"]: create_react_agent(simple_model, prompt=agent["prompt"], state_schema=state_schema) for agent in subagents}
@tool
def task(description: str, subagent_type: str):
"""将任务委托给子代理,保持上下文隔离"""
if subagent_type not in agents:
return f"错误:子代理 {subagent_type} 不存在"
# 创建隔离上下文,仅包含任务描述
state = {"messages": [{"role": "user", "content": description}]}
# 调用子代理
sub_agent = agents[subagent_type]
result = sub_agent.invoke(state)
# 返回结果
return ToolMessage(content=result["messages"][-1]["content"], tool_call_id="mock_id")
return task
# 定义状态
class SimpleState(TypedDict):
messages: list
# 定义研究子代理
research_sub_agent = {
"name": "research-agent",
"prompt": "你是一个研究员,直接返回任务描述的处理结果"
}
# 创建任务工具
task_tool = create_task_tool([research_sub_agent], SimpleState)
# 创建主代理
main_agent = create_react_agent(simple_model, tools=[task_tool], state_schema=SimpleState)
# 测试
state = {"messages": [{"role": "user", "content": "研究 AI"}]}
result = main_agent.invoke(state)
print(result["messages"][-1]["content"])
使用文集系统+子代理,langgraph 教程原版
import os
from datetime import datetime
import uuid, base64
from dotenv import load_dotenv
import httpx
from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage, ToolMessage
from langchain_core.tools import InjectedToolArg, InjectedToolCallId, tool
from langgraph.prebuilt import create_react_agent, InjectedState
from langgraph.types import Command
from markdownify import markdownify
from pydantic import BaseModel, Field
from tavily import TavilyClient
from typing_extensions import Annotated, Literal
load_dotenv(os.path.join("..", ".env"), override=True)
summarization_model = init_chat_model(model="openai:gpt-4o-mini")
tavily_client = TavilyClient()
class Summary(BaseModel):
filename: str = Field(description="Name of the file to store.")
summary: str = Field(description="Key learnings from the webpage.")
def get_today_str() -> str:
return datetime.now().strftime("%a %b %-d, %Y")
SUMMARIZE_WEB_SEARCH = """Analyze the following webpage content and provide both:
1. A descriptive filename (max 50 chars, no spaces, .md extension)
2. A concise summary of the key learnings (2-3 sentences max)
Webpage content (date: {date}):
{webpage_content}
Provide only the most important insights suitable for research. Prioritize unique or surprising information over generic content."""
def run_tavily_search(
search_query: str,
max_results: int = 1,
topic: Literal["general", "news", "finance"] = "general",
include_raw_content: bool = True,
) -> dict:
result = tavily_client.search(
search_query,
max_results=max_results,
include_raw_content=include_raw_content,
topic=topic
)
return result
def summarize_webpage_content(webpage_content: str) -> Summary:
try:
structured_model = summarization_model.with_structured_output(Summary)
summary_and_filename = structured_model.invoke([
HumanMessage(content=SUMMARIZE_WEB_SEARCH.format(
webpage_content=webpage_content,
date=get_today_str()
))
])
return summary_and_filename
except Exception:
return Summary(
filename="search_result.md",
summary=webpage_content[:1000] + "..." if len(webpage_content) > 1000 else webpage_content
)
def process_search_results(results: dict) -> list[dict]:
processed_results = []
HTTPX_CLIENT = httpx.Client()
for result in results.get('results', []):
url = result['url']
response = HTTPX_CLIENT.get(url)
if response.status_code == 200:
raw_content = markdownify(response.text)
summary_obj = summarize_webpage_content(raw_content)
else:
raw_content = result.get('raw_content', '')
summary_obj = Summary(
filename="URL_error.md",
summary=result.get('content', 'Error reading URL; try another search.')
)
uid = base64.urlsafe_b64encode(uuid.uuid4().bytes).rstrip(b"=").decode("ascii")[:8]
name, ext = os.path.splitext(summary_obj.filename)
summary_obj.filename = f"{name}_{uid}{ext}"
processed_results.append({
'url': result['url'],
'title': result['title'],
'summary': summary_obj.summary,
'filename': summary_obj.filename,
'raw_content': raw_content,
})
return processed_results
@tool(parse_docstring=True)
def tavily_search(
query: str,
state: Annotated[dict, InjectedState],
tool_call_id: Annotated[str, InjectedToolCallId],
max_results: Annotated[int, InjectedToolArg] = 1,
topic: Annotated[Literal["general", "news", "finance"], InjectedToolArg] = "general",
) -> Command:
search_results = run_tavily_search(
query,
max_results=max_results,
topic=topic,
include_raw_content=True,
)
processed_results = process_search_results(search_results)
files = state.get("files", {})
saved_files = []
summaries = []
for i, result in enumerate(processed_results):
filename = result['filename']
file_content = f"""# Search Result: {result['title']}
**URL:** {result['url']}
**Query:** {query}
**Date:** {get_today_str()}
## Summary
{result['summary']}
## Raw Content
{result['raw_content'] if result['raw_content'] else 'No raw content available'}
"""
files[filename] = file_content
saved_files.append(filename)
summaries.append(f"- {filename}: {result['summary']}...")
summary_text = f"""🔍 Found {len(processed_results)} result(s) for '{query}':
{chr(10).join(summaries)}
Files: {', '.join(saved_files)}
💡 Use read_file() to access full details when needed."""
return Command(
update={
"files": files,
"messages": [
ToolMessage(summary_text, tool_call_id=tool_call_id)
],
}
)
@tool(parse_docstring=True)
def think_tool(reflection: str) -> str:
return f"Reflection recorded: {reflection}"
from deep_agents_from_scratch.file_tools import ls, read_file, write_file
from deep_agents_from_scratch.prompts import (
FILE_USAGE_INSTRUCTIONS,
RESEARCHER_INSTRUCTIONS,
SUBAGENT_USAGE_INSTRUCTIONS,
TODO_USAGE_INSTRUCTIONS,
)
from deep_agents_from_scratch.state import DeepAgentState
from deep_agents_from_scratch.task_tool import _create_task_tool
from deep_agents_from_scratch.todo_tools import write_todos, read_todos
model = init_chat_model(model="anthropic:claude-sonnet-4-20250514", temperature=0.0)
max_concurrent_research_units = 3
max_researcher_iterations = 3
sub_agent_tools = [tavily_search, think_tool]
built_in_tools = [ls, read_file, write_file, write_todos, read_todos, think_tool]
research_sub_agent = {
"name": "research-agent",
"description": "Delegate research to the sub-agent researcher. Only give this researcher one topic at a time.",
"prompt": RESEARCHER_INSTRUCTIONS.format(date=get_today_str()),
"tools": ["tavily_search", "think_tool"],
}
task_tool = _create_task_tool(
sub_agent_tools, [research_sub_agent], model, DeepAgentState
)
delegation_tools = [task_tool]
all_tools = sub_agent_tools + built_in_tools + delegation_tools
SUBAGENT_INSTRUCTIONS = SUBAGENT_USAGE_INSTRUCTIONS.format(
max_concurrent_research_units=max_concurrent_research_units,
max_researcher_iterations=max_researcher_iterations,
date=datetime.now().strftime("%a %b %-d, %Y"),
)
INSTRUCTIONS = (
"# TODO MANAGEMENT\n"
+ TODO_USAGE_INSTRUCTIONS
+ "\n\n"
+ "=" * 80
+ "\n\n"
+ "# FILE SYSTEM USAGE\n"
+ FILE_USAGE_INSTRUCTIONS
+ "\n\n"
+ "=" * 80
+ "\n\n"
+ "# SUB-AGENT DELEGATION\n"
+ SUBAGENT_INSTRUCTIONS
)
agent = create_react_agent(
model, all_tools, prompt=INSTRUCTIONS, state_schema=DeepAgentState
)
result = agent.invoke(
{
"messages": [
{
"role": "user",
"content": "Give me an overview of Model Context Protocol (MCP).",
}
],
}
)