前端监控平台/监控SDK的架构设计和难点亮点---youkeit.xyz/14907/
前端监控SDK:从指标采集到智能决策的全链路实践
现代前端监控体系全景图
现代前端监控已从简单的错误收集发展为覆盖"采集-传输-存储-分析-决策"全链路的智能体系。一个完整的监控SDK需要解决以下核心问题:
- 全维度指标采集:用户体验指标、性能指标、业务指标、错误指标
- 高效可靠传输:应对网络不稳定、数据量大等场景
- 智能数据分析:异常检测、根因分析、趋势预测
- 决策支撑:报警熔断、自动修复、体验优化建议
核心代码实现
1. 多维度指标采集器
// metrics-collector.ts
interface Metric {
name: string;
value: number;
tags: Record<string, string>;
timestamp: number;
}
class MetricsCollector {
private metrics: Metric[] = [];
private readonly MAX_METRICS = 100;
private readonly FLUSH_INTERVAL = 10000; // 10秒
constructor() {
this.setupAutoFlush();
this.setupPerformanceObserver();
this.setupErrorTracking();
}
// 自动定期上报
private setupAutoFlush() {
setInterval(() => this.flush(), this.FLUSH_INTERVAL);
window.addEventListener('beforeunload', () => this.flush());
}
// 性能指标自动采集
private setupPerformanceObserver() {
if (!window.PerformanceObserver) return;
// 采集LCP、FID、CLS等Web Vitals指标
const observeVitals = () => {
const vitals = ['LCP', 'FID', 'CLS'];
vitals.forEach(metric => {
(window as any).webVitals[metric](data => {
this.push(metric, data.value, {
id: data.id,
rating: data.rating
});
});
});
};
// 采集长任务
new PerformanceObserver(list => {
list.getEntries().forEach(entry => {
this.push('long_task', entry.duration, {
containerId: (entry as any).containerId || ''
});
});
}).observe({ type: 'longtask', buffered: true });
// 采集资源加载
new PerformanceObserver(list => {
list.getEntries().forEach(entry => {
this.push('resource_load', entry.duration, {
name: entry.name,
type: entry.initiatorType,
size: entry.transferSize
});
});
}).observe({ type: 'resource', buffered: true });
// 动态import vitals库
import('web-vitals').then(webVitals => {
(window as any).webVitals = webVitals;
observeVitals();
});
}
// 错误采集
private setupErrorTracking() {
// JS错误
window.addEventListener('error', event => {
this.push('js_error', 1, {
message: event.message,
filename: event.filename,
lineno: event.lineno,
colno: event.colno,
stack: event.error?.stack
});
});
// 未处理的Promise rejection
window.addEventListener('unhandledrejection', event => {
this.push('promise_error', 1, {
reason: event.reason?.toString()
});
});
// 资源加载失败
window.addEventListener('error', event => {
const target = event.target as HTMLElement;
if (target && target.tagName) {
this.push('resource_error', 1, {
tag: target.tagName,
src: (target as any).src || (target as any).href,
type: target.getAttribute('type')
});
}
}, true);
}
// 自定义业务指标
push(name: string, value: number, tags: Record<string, string> = {}) {
this.metrics.push({
name,
value,
tags: {
...tags,
page: window.location.pathname,
env: process.env.NODE_ENV
},
timestamp: Date.now()
});
if (this.metrics.length >= this.MAX_METRICS) {
this.flush();
}
}
// 数据上报
async flush() {
if (this.metrics.length === 0) return;
const metricsToSend = [...this.metrics];
this.metrics = [];
try {
// 使用navigator.sendBeacon优先,失败后降级到fetch
const blob = new Blob([JSON.stringify(metricsToSend)], {
type: 'application/json'
});
if (!navigator.sendBeacon('/api/metrics', blob)) {
await fetch('/api/metrics', {
method: 'POST',
body: JSON.stringify(metricsToSend),
headers: { 'Content-Type': 'application/json' },
keepalive: true
});
}
} catch (err) {
console.error('上报指标失败:', err);
// 失败后重新放回队列(去重)
this.metrics = [
...metricsToSend.filter(m =>
!this.metrics.some(existing =>
existing.timestamp === m.timestamp && existing.name === m.name
)
),
...this.metrics
];
}
}
}
// 使用示例
const collector = new MetricsCollector();
// 自定义业务指标
collector.push('checkout_step', 1, { step: 'cart_view' });
collector.push('api_response_time', 235, { endpoint: '/user/info' });
2. 智能数据传输控制器
// data-transmitter.ts
interface QueuedItem {
id: string;
data: any;
retries: number;
timestamp: number;
}
class DataTransmitter {
private queue: QueuedItem[] = [];
private readonly MAX_RETRIES = 3;
private readonly MAX_QUEUE_SIZE = 500;
private readonly BATCH_SIZE = 20;
private readonly RETRY_DELAY = [1000, 5000, 10000]; // 重试延迟策略
private isOnline = navigator.onLine;
private isSending = false;
constructor() {
this.setupConnectivityListener();
this.setupVisibilityListener();
this.setupStorage();
}
private setupConnectivityListener() {
window.addEventListener('online', () => {
this.isOnline = true;
this.processQueue();
});
window.addEventListener('offline', () => {
this.isOnline = false;
});
}
private setupVisibilityListener() {
document.addEventListener('visibilitychange', () => {
if (document.visibilityState === 'visible') {
this.processQueue();
}
});
}
private setupStorage() {
// 从本地存储恢复队列
const savedQueue = localStorage.getItem('monitoring_queue');
if (savedQueue) {
try {
this.queue = JSON.parse(savedQueue);
} catch (err) {
console.error('恢复队列失败:', err);
}
}
// 定期持久化队列
setInterval(() => {
if (this.queue.length > 0) {
localStorage.setItem('monitoring_queue', JSON.stringify(this.queue));
}
}, 5000);
}
async send(data: any, urgent = false) {
const item: QueuedItem = {
id: Math.random().toString(36).slice(2),
data,
retries: 0,
timestamp: Date.now()
};
if (urgent || this.queue.length >= this.MAX_QUEUE_SIZE) {
this.queue.unshift(item); // 紧急数据或队列满时插入队首
} else {
this.queue.push(item);
}
await this.processQueue();
}
private async processQueue() {
if (!this.isOnline || this.isSending || this.queue.length === 0) {
return;
}
this.isSending = true;
try {
// 按优先级处理:先处理重试次数多的,再处理新的
this.queue.sort((a, b) => b.retries - a.retries || a.timestamp - b.timestamp);
const batch = this.queue.slice(0, this.BATCH_SIZE);
const success = await this.sendBatch(batch);
if (success) {
this.queue = this.queue.slice(batch.length);
localStorage.removeItem('monitoring_queue');
}
} catch (err) {
console.error('处理队列失败:', err);
} finally {
this.isSending = false;
}
}
private async sendBatch(batch: QueuedItem[]): Promise<boolean> {
try {
const response = await fetch('/api/batch', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(batch.map(item => item.data)),
keepalive: true
});
if (!response.ok) throw new Error(`HTTP ${response.status}`);
return true;
} catch (err) {
console.error('批量发送失败:', err);
// 更新重试次数和延迟
batch.forEach(item => {
item.retries++;
if (item.retries <= this.MAX_RETRIES) {
setTimeout(() => this.processQueue(), this.RETRY_DELAY[item.retries - 1]);
}
});
// 超过重试次数的数据丢弃
this.queue = this.queue.filter(item =>
item.retries <= this.MAX_RETRIES
);
return false;
}
}
// 压缩数据减少传输量
private compress(data: any): string {
// 实际项目可使用lz-string等库
return JSON.stringify(data);
}
}
3. 智能决策引擎
// decision-engine.ts
type AlertRule = {
type: 'threshold' | 'trend' | 'anomaly';
metric: string;
condition: (value: number, history: number[]) => boolean;
severity: 'warning' | 'error' | 'critical';
action: 'alert' | 'rollback' | 'throttle';
};
class DecisionEngine {
private rules: AlertRule[] = [];
private metricHistory: Record<string, number[]> = {};
private readonly HISTORY_SIZE = 100;
constructor() {
this.setupDefaultRules();
}
private setupDefaultRules() {
// 阈值规则
this.addRule({
type: 'threshold',
metric: 'js_error',
condition: (value) => value > 10,
severity: 'error',
action: 'alert'
});
// 趋势规则
this.addRule({
type: 'trend',
metric: 'api_response_time',
condition: (value, history) => {
if (history.length < 5) return false;
const avg = history.reduce((sum, v) => sum + v, 0) / history.length;
return value > avg * 2;
},
severity: 'warning',
action: 'alert'
});
// 异常检测规则(基于3σ原则)
this.addRule({
type: 'anomaly',
metric: 'LCP',
condition: (value, history) => {
if (history.length < 30) return false;
const mean = history.reduce((sum, v) => sum + v, 0) / history.length;
const variance = history.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / history.length;
const stdDev = Math.sqrt(variance);
return value > mean + 3 * stdDev;
},
severity: 'critical',
action: 'throttle'
});
}
addRule(rule: AlertRule) {
this.rules.push(rule);
}
processMetric(metric: string, value: number) {
// 保存历史数据
if (!this.metricHistory[metric]) {
this.metricHistory[metric] = [];
}
this.metricHistory[metric].push(value);
if (this.metricHistory[metric].length > this.HISTORY_SIZE) {
this.metricHistory[metric].shift();
}
// 检查相关规则
const relevantRules = this.rules.filter(r => r.metric === metric);
const history = this.metricHistory[metric] || [];
for (const rule of relevantRules) {
if (rule.condition(value, history)) {
this.triggerAction(rule, value);
}
}
}
private triggerAction(rule: AlertRule, value: number) {
console.log(`[${rule.severity.toUpperCase()}] ${rule.metric}触发规则:`, value);
switch (rule.action) {
case 'alert':
this.sendAlert(rule, value);
break;
case 'rollback':
this.triggerRollback(rule);
break;
case 'throttle':
this.throttleSystem(rule);
break;
}
}
private sendAlert(rule: AlertRule, value: number) {
const message = `[前端监控] ${rule.metric}异常: ${value} (${rule.type})`;
// 实际项目中可接入企业微信、钉钉、Slack等
fetch('/api/alert', {
method: 'POST',
body: JSON.stringify({
title: `${rule.severity.toUpperCase()}告警`,
message,
metric: rule.metric,
value,
type: rule.type
})
});
}
private triggerRollback(rule: AlertRule) {
console.log('执行回滚操作...');
// 实际项目中可触发CI/CD系统的回滚流程
}
private throttleSystem(rule: AlertRule) {
console.log('执行降级操作...');
// 例如:关闭非核心功能、减少数据采集频率等
}
// 预测性分析
predictTrend(metric: string): { current: number; predicted: number; trend: 'up' | 'down' | 'stable' } {
const history = this.metricHistory[metric] || [];
if (history.length < 10) {
return { current: 0, predicted: 0, trend: 'stable' };
}
// 简单线性回归预测
const n = history.length;
const xSum = history.reduce((sum, _, i) => sum + i, 0);
const ySum = history.reduce((sum, y) => sum + y, 0);
const xySum = history.reduce((sum, y, i) => sum + i * y, 0);
const xxSum = history.reduce((sum, _, i) => sum + i * i, 0);
const slope = (n * xySum - xSum * ySum) / (n * xxSum - xSum * xSum);
const intercept = (ySum - slope * xSum) / n;
const current = history[history.length - 1];
const predicted = slope * n + intercept;
return {
current,
predicted,
trend: slope > 0.1 ? 'up' : slope < -0.1 ? 'down' : 'stable'
};
}
}
// 使用示例
const engine = new DecisionEngine();
// 自定义业务规则
engine.addRule({
type: 'threshold',
metric: 'checkout_abandon_rate',
condition: (value) => value > 0.7, // 结账放弃率超过70%
severity: 'error',
action: 'alert'
});
// 处理指标
collector.push('checkout_abandon_rate', 0.75);
engine.processMetric('checkout_abandon_rate', 0.75);
// 获取预测
const lcpTrend = engine.predictTrend('LCP');
console.log('LCP趋势预测:', lcpTrend);
全链路亮点设计
1. 智能采样与降级策略
// adaptive-sampler.js
class AdaptiveSampler {
constructor() {
this.sampleRates = {
error: 1.0, // 错误全采集
performance: 0.2,
business: 0.1,
log: 0.01
};
this.systemLoad = 0;
this.monitorSystemLoad();
}
monitorSystemLoad() {
// 基于内存和CPU使用率计算系统负载
setInterval(() => {
if (window.performance && performance.memory) {
const memoryUsage = performance.memory.usedJSHeapSize /
performance.memory.jsHeapSizeLimit;
// 简化版CPU负载估算(实际项目可使用Web Worker)
const now = performance.now();
let sum = 0;
for (let i = 0; i < 1000000; i++) {
sum += Math.random();
}
const cpuUsage = (performance.now() - now) / 10;
this.systemLoad = Math.max(memoryUsage, cpuUsage);
// 动态调整采样率
this.adjustSampleRates();
}
}, 5000);
}
adjustSampleRates() {
if (this.systemLoad > 0.8) {
// 高负载时降级采样
this.sampleRates.performance = Math.max(0.05, this.sampleRates.performance * 0.5);
this.sampleRates.business = Math.max(0.02, this.sampleRates.business * 0.5);
this.sampleRates.log = 0;
} else if (this.systemLoad < 0.3) {
// 低负载时恢复采样
this.sampleRates.performance = Math.min(0.3, this.sampleRates.performance * 1.5);
this.sampleRates.business = Math.min(0.2, this.sampleRates.business * 1.5);
this.sampleRates.log = 0.01;
}
}
shouldSample(type) {
if (!this.sampleRates[type]) return true;
// 重要用户全采样(可根据userId哈希决定)
if (this.isImportantUser()) return true;
return Math.random() < this.sampleRates[type];