什么是Context Engineering?
2026年,"Context Engineering"(上下文工程)已经成为AI工程师的核心技能之一,与Prompt Engineering并列。如果说Prompt Engineering是"如何向AI提问",那么Context Engineering是"如何管理AI知道什么"。
一个AI Agent在执行任务时,它的行为完全由上下文窗口的内容决定。上下文就是AI的工作记忆,它决定了AI能做什么、知道什么、记得什么。管理不好上下文,AI Agent就会:
- 忘记之前说过的话(失忆)
- 在不应该的地方使用某些信息(越界)
- 随着对话深入逐渐偏离原始目标(走偏)
本文将从工程角度系统介绍Context Engineering的核心技术,以及如何在生产Agent中实践。
上下文的五种类型
在设计AI Agent时,上下文可以分为五类,每类都需要不同的管理策略:
1. 系统上下文(System Context)
- 角色定义、行为规则、能力边界
- 永久存在于每次调用中
2. 任务上下文(Task Context)
- 当前任务的目标、约束、背景信息
- 任务开始时注入,任务结束时清除
3. 对话上下文(Conversation Context)
- 历史对话记录
- 需要主动管理:压缩、摘要、选择性遗忘
4. 知识上下文(Knowledge Context)
- 从知识库检索的相关信息(RAG)
- 动态注入,只在需要时检索
5. 状态上下文(State Context)
- Agent的工作状态:已完成的步骤、中间结果
- 需要持久化,支持断点续传
核心技术1:上下文压缩与滚动窗口
from anthropic import Anthropic
from dataclasses import dataclass, field
from typing import Optional
import json
@dataclass
class Message:
role: str
content: str
token_estimate: int = 0
importance: float = 1.0 # 0-1,越高越重要,压缩时保留
def __post_init__(self):
if self.token_estimate == 0:
self.token_estimate = len(self.content.split()) * 1.3
@dataclass
class ContextWindow:
"""智能上下文窗口管理器"""
max_tokens: int = 100000 # 上下文窗口大小
compression_threshold: float = 0.8 # 超过80%时触发压缩
min_messages_to_keep: int = 4 # 至少保留最近N条消息
system_prompt: str = ""
messages: list[Message] = field(default_factory=list)
_client: Anthropic = field(default_factory=Anthropic)
@property
def current_tokens(self) -> int:
"""估算当前使用的token数"""
system_tokens = len(self.system_prompt.split()) * 1.3
message_tokens = sum(m.token_estimate for m in self.messages)
return int(system_tokens + message_tokens)
@property
def is_near_limit(self) -> bool:
return self.current_tokens > self.max_tokens * self.compression_threshold
def add_message(self, role: str, content: str, importance: float = 1.0):
"""添加消息,如果快满了自动触发压缩"""
msg = Message(role=role, content=content, importance=importance)
self.messages.append(msg)
if self.is_near_limit:
self._compress()
def _compress(self):
"""
上下文压缩策略:
1. 保留所有高重要性消息
2. 将中间的低重要性消息替换为摘要
3. 始终保留最近N条消息
"""
if len(self.messages) <= self.min_messages_to_keep:
return
# 分区:要压缩的历史 + 要保留的最近消息
recent_count = self.min_messages_to_keep
to_compress = self.messages[:-recent_count]
recent = self.messages[-recent_count:]
if not to_compress:
return
# 生成压缩摘要
conversation_text = "\n".join([
f"{m.role}: {m.content}" for m in to_compress
])
response = self._client.messages.create(
model="claude-3-haiku-20240307",
max_tokens=500,
messages=[{
"role": "user",
"content": f"""请用简洁的摘要概括以下对话中的关键信息、决策和结论:
{conversation_text}
要求:
1. 保留所有重要的事实、决策和承诺
2. 忽略礼貌用语和重复内容
3. 摘要长度不超过原文的30%
4. 以客观叙述方式写作"""
}]
)
summary = response.content[0].text
summary_msg = Message(
role="system",
content=f"[早期对话摘要]\n{summary}",
importance=1.0, # 摘要本身重要性高
)
# 替换:用摘要 + 最近消息
self.messages = [summary_msg] + recent
print(f" [Context] 压缩完成:{len(to_compress)+recent_count}条 → {len(self.messages)}条")
def get_messages_for_api(self) -> list[dict]:
"""转换为API所需的消息格式"""
return [
{"role": m.role, "content": m.content}
for m in self.messages
if m.role in ("user", "assistant") # 过滤掉system类型的摘要
]
核心技术2:分层记忆架构
import time
from pathlib import Path
@dataclass
class Memory:
content: str
created_at: float = field(default_factory=time.time)
last_accessed: float = field(default_factory=time.time)
access_count: int = 0
importance: float = 0.5
tags: list[str] = field(default_factory=list)
memory_type: str = "episodic" # episodic, semantic, procedural
class HierarchicalMemory:
"""
三层记忆架构:
L1 - 工作记忆:当前对话上下文(最快,最小)
L2 - 情节记忆:近期交互历史(中速,中等大小)
L3 - 语义记忆:长期知识和事实(最慢,最大)
"""
def __init__(self, user_id: str, storage_path: str = "./memory_store"):
self.user_id = user_id
self.storage_path = Path(storage_path) / user_id
self.storage_path.mkdir(parents=True, exist_ok=True)
# L1: 工作记忆(纯内存)
self.working_memory: list[Memory] = []
self.max_working_memories = 10
# L2: 情节记忆(内存+磁盘持久化)
self.episodic_memories: list[Memory] = []
self.max_episodic_memories = 100
# L3: 语义记忆(向量数据库,懒加载)
self._semantic_store = None
# 加载持久化记忆
self._load_episodic()
def remember(self, content: str, importance: float = 0.5, tags: list[str] = None):
"""记录新的记忆,根据重要性决定存储层级"""
memory = Memory(
content=content,
importance=importance,
tags=tags or [],
)
if importance >= 0.8:
# 高重要性 → 直接写入情节记忆并持久化
self.episodic_memories.append(memory)
self._save_episodic()
else:
# 普通重要性 → 先放工作记忆
self.working_memory.append(memory)
# 工作记忆满了,将重要的转移到情节记忆
if len(self.working_memory) > self.max_working_memories:
self._consolidate_working_memory()
def recall(self, query: str, k: int = 5, include_working: bool = True) -> list[Memory]:
"""
从各层记忆中检索相关记忆
策略:先搜情节记忆,再搜语义记忆,最后合并工作记忆
"""
results = []
# 工作记忆(全部包含,最近的放前面)
if include_working:
results.extend(reversed(self.working_memory[-5:]))
# 情节记忆(关键词匹配)
query_words = set(query.lower().split())
for memory in self.episodic_memories:
memory_words = set(memory.content.lower().split())
overlap = len(query_words & memory_words)
if overlap > 0:
memory.last_accessed = time.time()
memory.access_count += 1
results.append(memory)
# 按相关性和重要性排序
results.sort(key=lambda m: m.importance * (1 + m.access_count * 0.1), reverse=True)
return results[:k]
def build_memory_context(self, current_task: str) -> str:
"""
为当前任务构建记忆上下文
根据任务相关性动态选择注入哪些记忆
"""
relevant_memories = self.recall(current_task, k=5)
if not relevant_memories:
return ""
memory_texts = []
for m in relevant_memories:
age_hours = (time.time() - m.created_at) / 3600
age_str = f"{age_hours:.0f}小时前" if age_hours < 24 else f"{age_hours/24:.0f}天前"
memory_texts.append(f"[{age_str}] {m.content}")
return "【相关历史记忆】\n" + "\n".join(memory_texts)
def _consolidate_working_memory(self):
"""将工作记忆中重要的转移到情节记忆"""
important = [m for m in self.working_memory if m.importance >= 0.6]
self.episodic_memories.extend(important)
# 工作记忆只保留最近的
self.working_memory = self.working_memory[-self.max_working_memories//2:]
# 情节记忆超限,删除旧的低重要性记忆
if len(self.episodic_memories) > self.max_episodic_memories:
self.episodic_memories.sort(key=lambda m: m.importance * (1 + m.access_count * 0.1))
self.episodic_memories = self.episodic_memories[20:] # 删除最低分的20个
self._save_episodic()
def _save_episodic(self):
data = [
{
"content": m.content,
"created_at": m.created_at,
"importance": m.importance,
"tags": m.tags,
"access_count": m.access_count,
}
for m in self.episodic_memories
]
with open(self.storage_path / "episodic.json", 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def _load_episodic(self):
path = self.storage_path / "episodic.json"
if path.exists():
with open(path, encoding='utf-8') as f:
data = json.load(f)
self.episodic_memories = [
Memory(**item) for item in data
]
核心技术3:防止上下文污染
class ContextIsolator:
"""
防止上下文越界:确保不同任务的上下文不相互污染
特别适用于多任务并发的Agent场景
"""
def __init__(self):
self._contexts: dict[str, list] = {}
self._active_context_id: str | None = None
def create_context(self, context_id: str, system_prompt: str) -> None:
"""创建隔离的上下文空间"""
self._contexts[context_id] = {
"system_prompt": system_prompt,
"messages": [],
"created_at": time.time(),
}
def switch_context(self, context_id: str) -> None:
"""切换到指定上下文"""
if context_id not in self._contexts:
raise ValueError(f"上下文不存在: {context_id}")
self._active_context_id = context_id
def add_to_active_context(self, role: str, content: str) -> None:
"""向当前活跃上下文添加消息"""
if not self._active_context_id:
raise RuntimeError("没有活跃的上下文")
self._contexts[self._active_context_id]["messages"].append(
{"role": role, "content": content}
)
def get_active_context_messages(self) -> tuple[str, list]:
"""获取当前上下文的系统提示和消息"""
if not self._active_context_id:
raise RuntimeError("没有活跃的上下文")
ctx = self._contexts[self._active_context_id]
return ctx["system_prompt"], ctx["messages"]
def merge_contexts(self, target_id: str, source_id: str, summary_only: bool = True):
"""
合并两个上下文(例如:子任务完成后将结果注入主任务)
summary_only=True: 只注入摘要,不暴露完整历史(防止信息越界)
"""
source = self._contexts.get(source_id)
target = self._contexts.get(target_id)
if not source or not target:
raise ValueError("上下文不存在")
if summary_only:
# 只注入摘要
messages_text = "\n".join([
f"{m['role']}: {m['content']}"
for m in source["messages"]
])
summary = f"[子任务 {source_id} 完成,摘要:{messages_text[:300]}...]"
target["messages"].append({"role": "system", "content": summary})
else:
# 完整注入
target["messages"].extend(source["messages"])
综合实践:Context-Aware Agent
class ContextAwareAgent:
"""集成所有上下文管理技术的完整Agent"""
def __init__(self, user_id: str, system_prompt: str):
self.client = Anthropic()
self.user_id = user_id
# 上下文窗口管理
self.context_window = ContextWindow(
system_prompt=system_prompt,
max_tokens=120000,
)
# 分层记忆
self.memory = HierarchicalMemory(user_id)
def chat(self, user_message: str) -> str:
"""处理用户消息,集成记忆检索"""
# 1. 检索相关记忆
memory_context = self.memory.build_memory_context(user_message)
# 2. 构建增强消息(注入记忆上下文)
enhanced_message = user_message
if memory_context:
enhanced_message = f"{memory_context}\n\n用户消息:{user_message}"
# 3. 添加到上下文窗口(自动管理压缩)
self.context_window.add_message("user", enhanced_message)
# 4. 调用LLM
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2048,
system=self.context_window.system_prompt,
messages=self.context_window.get_messages_for_api(),
)
assistant_message = response.content[0].text
# 5. 记录回复到上下文
self.context_window.add_message("assistant", assistant_message)
# 6. 提取并存储重要信息到记忆
self._extract_and_remember(user_message, assistant_message)
return assistant_message
def _extract_and_remember(self, user_msg: str, assistant_msg: str):
"""从对话中提取值得记住的信息"""
# 检测关键信息(用户偏好、决策、事实等)
key_indicators = ["我喜欢", "我不想", "请记住", "我的", "决定了", "选择"]
has_important_info = any(kw in user_msg for kw in key_indicators)
if has_important_info:
self.memory.remember(
content=f"用户表达:{user_msg[:200]}",
importance=0.8,
tags=["用户偏好"],
)
总结:Context Engineering的黄金法则
- 最小化原则:注入的上下文越少越好,只放AI真正需要的信息
- 层次化存储:工作记忆 → 情节记忆 → 语义记忆,不同时效的信息放不同层
- 主动压缩:不要让上下文被动地满溢,要主动管理和压缩
- 隔离原则:不同任务的上下文要隔离,防止信息越界和污染
- 可溯源性:任何注入的信息都要能追溯来源,方便调试
Context Engineering是让AI Agent在长期运行中保持稳定、可预期的核心基础设施。投资在上下文管理上的工程努力,会在Agent的可靠性和用户体验上得到数倍的回报。