前端监控系统指南~必看篇

72 阅读4分钟

前端监控是一个系统工程,主要涵盖性能监控错误监控用户行为监控业务监控

前端监控主要关注哪些指标?

性能指标:LCPFIDCLSFCPTTFB
错误指标:JS错误、资源加载错误、Promise拒绝、HTTP请求错误
用户行为:PV/UV、点击热力图、停留时长、转化率
业务指标:关键操作成功率、接口响应时间、白屏率

如何设计一个完整的前端监控系统?

数据采集层:Performance APIError监听、用户行为追踪
数据处理层:数据清洗、格式化、采样去重
数据传输层:批量上报、失败重试、离线缓存
数据存储层:时序数据库、错误日志存储
数据展示层:Dashboard、告警、统计分析

要搭建前端监控系统需要具备哪些要素呢?针对个人

  1. 系统性思维:从采集、处理、上报到展示的全链路思考
  2. 性能意识:监控系统本身不能影响业务性能
  3. 工程化能力:错误降级、采样去重、失败重试等机制
  4. 业务结合:监控要为业务服务,关注关键业务指标
  5. 现代技术:熟悉 Web Vitals、Performance API 等现代浏览器能力

以Vue项目为例,实现一个日志上报性能优化实践案例

1. 核心设计原则

监控系统自身必须是高性能的,不能成为性能瓶颈。以下是关键原则:

  • 异步化:所有上报操作必须异步执行
  • 非阻塞:不能阻塞主线程和关键渲染路径
  • 懒加载:监控代码按需加载
  • 批量处理:合并上报请求,减少网络开销
  • 失败降级:上报失败不能影响业务功能

2. Vue 项目代码封装

2.1 基础日志上报类

class PerformanceLogger {
  constructor() {
    this.queue = []
    this.isInitialized = false
    this.maxQueueSize = 50
    this.flushInterval = 10000 // 10秒
    this.retryAttempts = 2
    
    // 立即初始化基础监控,延迟初始化复杂功能
    this.initCoreMonitoring()
    
    // 延迟初始化非核心功能
    this.lazyInit()
  }

  // 立即初始化核心功能(错误监控)
  initCoreMonitoring() {
    this.setupErrorHandling()
    this.setupVueErrorHandler()
    this.isInitialized = true
  }

  // 延迟初始化非核心功能
  lazyInit() {
    // 使用 requestIdleCallback 在浏览器空闲时初始化
    if ('requestIdleCallback' in window) {
      requestIdleCallback(() => {
        this.setupPerformanceMonitoring()
        this.setupUserBehaviorTracking()
      })
    } else {
      // 降级方案:在下一个宏任务中初始化
      setTimeout(() => {
        this.setupPerformanceMonitoring()
        this.setupUserBehaviorTracking()
      }, 3000)
    }
  }

  // 设置错误处理
  setupErrorHandling() {
    window.addEventListener('error', this.handleWindowError.bind(this))
    window.addEventListener('unhandledrejection', this.handlePromiseError.bind(this))
  }

  // 设置 Vue 错误处理器
  setupVueErrorHandler() {
    if (window.Vue && window.Vue.config) {
      window.Vue.config.errorHandler = (err, vm, info) => {
        this.captureException(err, {
          type: 'VUE_ERROR',
          component: vm?.$options?.name,
          lifecycle: info,
          stack: err.stack
        })
      }
    }
  }

  // 设置性能监控(延迟加载)
  setupPerformanceMonitoring() {
    // 使用动态导入,按需加载 web-vitals
    import('web-vitals').then(({ getCLS, getFID, getLCP, getFCP, getTTFB }) => {
      getCLS(this.reportWebVital.bind(this))
      getFID(this.reportWebVital.bind(this))
      getLCP(this.reportWebVital.bind(this))
      getFCP(this.reportWebVital.bind(this))
      getTTFB(this.reportWebVital.bind(this))
    }).catch(() => {
      // 如果加载失败,不影响业务功能
      console.warn('Web Vitals 加载失败,性能监控不可用')
    })
  }

  // 上报性能指标
  reportWebVital(metric) {
    this.log({
      type: 'PERFORMANCE',
      name: metric.name,
      value: metric.value,
      rating: metric.rating,
      timestamp: Date.now()
    })
  }

  // 捕获异常
  captureException(error, extra = {}) {
    const errorInfo = {
      type: 'ERROR',
      message: error.message,
      stack: error.stack,
      ...extra,
      url: window.location.href,
      userAgent: navigator.userAgent,
      timestamp: Date.now()
    }

    this.log(errorInfo)
  }

  // 记录日志(核心方法)
  log(data) {
    // 如果队列过大,丢弃最旧的数据
    if (this.queue.length >= this.maxQueueSize) {
      this.queue.shift()
    }

    this.queue.push(data)

    // 立即上报错误日志,延迟上报其他日志
    if (data.type === 'ERROR') {
      this.flushImmediate()
    } else {
      this.scheduleFlush()
    }
  }

  // 安排延迟刷新
  scheduleFlush() {
    if (this.flushTimeout) {
      clearTimeout(this.flushTimeout)
    }

    this.flushTimeout = setTimeout(() => {
      this.flush()
    }, this.flushInterval)
  }

  // 立即刷新(用于错误上报)
  flushImmediate() {
    if (this.queue.length === 0) return

    const batch = this.queue.splice(0, this.queue.length)
    this.sendBatch(batch)
  }

  // 正常刷新
  async flush() {
    if (this.queue.length === 0 || this.isSending) return

    this.isSending = true
    const batch = this.queue.splice(0, Math.min(this.queue.length, 10))

    try {
      await this.sendBatch(batch)
    } catch (error) {
      // 发送失败,将数据重新放回队列
      this.queue.unshift(...batch)
      console.warn('日志上报失败:', error)
    } finally {
      this.isSending = false
      
      // 如果队列中还有数据,继续安排刷新
      if (this.queue.length > 0) {
        this.scheduleFlush()
      }
    }
  }

  // 发送批量数据
  async sendBatch(batch) {
    // 使用 Beacon API(如果可用)进行最终上报
    if (this.isUnloading && 'sendBeacon' in navigator) {
      const data = JSON.stringify({ events: batch })
      const success = navigator.sendBeacon('/api/logs', data)
      return success ? Promise.resolve() : Promise.reject(new Error('Beacon failed'))
    }

    // 正常 fetch 请求,但设置超时和 keepalive
    const controller = new AbortController()
    const timeoutId = setTimeout(() => controller.abort(), 5000)

    try {
      const response = await fetch('/api/logs', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({ events: batch }),
        signal: controller.signal,
        keepalive: true // 确保请求在页面卸载时也能完成
      })

      clearTimeout(timeoutId)

      if (!response.ok) {
        throw new Error(`HTTP ${response.status}`)
      }

      return await response.json()
    } catch (error) {
      clearTimeout(timeoutId)
      throw error
    }
  }

  // 处理窗口错误
  handleWindowError(event) {
    this.captureException(event.error, {
      type: 'WINDOW_ERROR',
      filename: event.filename,
      lineno: event.lineno,
      colno: event.colno
    })
  }

  // 处理 Promise 错误
  handlePromiseError(event) {
    this.captureException(event.reason, {
      type: 'PROMISE_ERROR'
    })
  }

  // 页面卸载处理
  setupUnloadHandler() {
    this.isUnloading = false
    
    window.addEventListener('beforeunload', () => {
      this.isUnloading = true
      this.flushImmediate()
    })

    // 监听页面可见性变化
    document.addEventListener('visibilitychange', () => {
      if (document.visibilityState === 'hidden') {
        this.flushImmediate()
      }
    })
  }
}

// 创建单例
const logger = new PerformanceLogger()

export default logger

2.2 Vue 插件封装

import logger from '@/utils/logger'

const LoggerPlugin = {
  install(Vue, options) {
    // 添加到 Vue 原型
    Vue.prototype.$logger = logger
    
    // 添加全局混入
    Vue.mixin({
      created() {
        // 组件创建时记录(低优先级)
        if (this.$options.name) {
          requestIdleCallback(() => {
            logger.log({
              type: 'COMPONENT_CREATED',
              component: this.$options.name,
              timestamp: Date.now()
            })
          })
        }
      },
      
      mounted() {
        // 组件挂载后记录性能数据
        if (this.$options.name && this.$el) {
          const observer = new PerformanceObserver((list) => {
            list.getEntries().forEach((entry) => {
              if (entry.name.includes(this.$options.name)) {
                logger.log({
                  type: 'COMPONENT_PERFORMANCE',
                  component: this.$options.name,
                  entryType: entry.entryType,
                  duration: entry.duration,
                  timestamp: Date.now()
                })
              }
            })
          })
          
          observer.observe({ entryTypes: ['measure', 'paint'] })
        }
      },
      
      errorCaptured(err, vm, info) {
        // Vue 错误捕获
        logger.captureException(err, {
          type: 'VUE_ERROR_CAPTURED',
          component: vm.$options.name,
          info: info
        })
        
        // 返回 false 阻止错误继续向上传播
        return false
      }
    })
    
    // 添加全局方法
    Vue.logger = logger
    
    // 指令:用于跟踪用户交互
    Vue.directive('track', {
      bind(el, binding) {
        const eventName = binding.arg || 'click'
        const trackingData = binding.value
        
        const handler = () => {
          // 使用 requestIdleCallback 避免阻塞交互
          requestIdleCallback(() => {
            logger.log({
              type: 'USER_INTERACTION',
              event: eventName,
              element: el.tagName,
              ...trackingData,
              timestamp: Date.now()
            })
          })
        }
        
        el.addEventListener(eventName, handler, { passive: true })
        el._trackHandler = handler
      },
      
      unbind(el) {
        if (el._trackHandler) {
          el.removeEventListener('click', el._trackHandler)
        }
      }
    })
  }
}

export default LoggerPlugin

2.3 在项目中使用

import Vue from 'vue'
import App from './App.vue'
import LoggerPlugin from './plugins/logger'

// 注册插件
Vue.use(LoggerPlugin, {
  enabled: process.env.NODE_ENV === 'production',
  sampleRate: 0.1 // 采样率 10%
})

new Vue({
  render: h => h(App),
}).$mount('#app')
<template>
  <div>
    <!-- 使用跟踪指令 -->
    <button 
      v-track:click="{ action: 'purchase', product: 'premium' }"
      @click="handlePurchase"
    >
      购买
    </button>
    
    <button 
      v-track:mouseover="{ action: 'hover_feature', feature: 'tooltip' }"
      @click="showTooltip"
    >
      显示提示
    </button>
  </div>
</template>

<script>
export default {
  name: 'ProductPage',
  
  methods: {
    handlePurchase() {
      // 手动记录关键业务事件
      this.$logger.log({
        type: 'BUSINESS',
        action: 'purchase_completed',
        product: 'premium',
        amount: 99.99,
        currency: 'USD',
        timestamp: Date.now()
      })
      
      // 业务逻辑...
      this.processOrder()
    },
    
    processOrder() {
      try {
        // 业务代码...
      } catch (error) {
        // 捕获并上报业务异常
        this.$logger.captureException(error, {
          type: 'BUSINESS_ERROR',
          context: 'order_processing',
          orderId: this.orderId
        })
        
        this.showError('订单处理失败')
      }
    },
    
    async loadUserData() {
      try {
        const response = await this.$api.getUserData()
        
        // 记录 API 性能
        this.$logger.log({
          type: 'API_PERFORMANCE',
          endpoint: '/api/user',
          duration: response.duration,
          status: 'success',
          timestamp: Date.now()
        })
        
        return response.data
      } catch (error) {
        this.$logger.log({
          type: 'API_PERFORMANCE',
          endpoint: '/api/user',
          duration: error.duration,
          status: 'error',
          error: error.message,
          timestamp: Date.now()
        })
        
        throw error
      }
    }
  },
  
  mounted() {
    // 组件性能监控
    const startTime = performance.now()
    
    this.loadInitialData().then(() => {
      const loadTime = performance.now() - startTime
      
      this.$logger.log({
        type: 'COMPONENT_LOAD',
        component: 'ProductPage',
        loadTime: loadTime,
        timestamp: Date.now()
      })
    })
  }
}
</script>

3. 页面加载优化策略

3.1 关键加载路径优化

class PerformanceLogger {
  // ... 其他代码 ...

  // 页面加载性能监控
  setupPageLoadMonitoring() {
    // 等待页面完全加载后再记录性能数据
    if (document.readyState === 'complete') {
      this.recordPageLoadMetrics()
    } else {
      window.addEventListener('load', () => {
        // 使用 setTimeout 确保在 load 事件之后执行
        setTimeout(() => {
          this.recordPageLoadMetrics()
        }, 0)
      })
    }
  }

  recordPageLoadMetrics() {
    // 使用 requestIdleCallback 避免阻塞
    if ('requestIdleCallback' in window) {
      requestIdleCallback(() => {
        this.captureNavigationTiming()
        this.captureResourceTiming()
        this.capturePaintTiming()
      })
    } else {
      // 降级:在下一个任务中执行
      Promise.resolve().then(() => {
        this.captureNavigationTiming()
        this.captureResourceTiming()
        this.capturePaintTiming()
      })
    }
  }

  captureNavigationTiming() {
    const navigation = performance.getEntriesByType('navigation')[0]
    if (navigation) {
      this.log({
        type: 'NAVIGATION_TIMING',
        dnsLookup: navigation.domainLookupEnd - navigation.domainLookupStart,
        tcp: navigation.connectEnd - navigation.connectStart,
        ttfb: navigation.responseStart - navigation.requestStart,
        response: navigation.responseEnd - navigation.responseStart,
        domContentLoaded: navigation.domContentLoadedEventEnd - navigation.navigationStart,
        load: navigation.loadEventEnd - navigation.navigationStart,
        timestamp: Date.now()
      })
    }
  }

  captureResourceTiming() {
    // 只监控关键资源
    const criticalResources = performance.getEntriesByType('resource').filter(entry => 
      entry.name.includes('critical') || 
      entry.duration > 1000
    )

    criticalResources.forEach(entry => {
      this.log({
        type: 'RESOURCE_TIMING',
        name: entry.name,
        duration: entry.duration,
        size: entry.transferSize,
        timestamp: Date.now()
      })
    })
  }

  capturePaintTiming() {
    const paints = performance.getEntriesByType('paint')
    paints.forEach(entry => {
      this.log({
        type: 'PAINT_TIMING',
        name: entry.name,
        value: entry.startTime,
        timestamp: Date.now()
      })
    })
  }
}

3.2 智能采样和节流

class SmartSampler {
  constructor(sampleRates = {}) {
    this.sampleRates = {
      ERROR: 1.0,        // 错误 100% 上报
      PERFORMANCE: 0.1,  // 性能数据 10% 采样
      USER_INTERACTION: 0.01, // 用户交互 1% 采样
      BUSINESS: 0.5,     // 业务事件 50% 采样
      ...sampleRates
    }
    
    this.userSampleRate = this.getUserSampleRate()
  }

  // 基于用户ID的稳定采样
  getUserSampleRate() {
    // 从 localStorage 获取或生成稳定的用户采样率
    const key = 'user_sample_rate'
    let rate = localStorage.getItem(key)
    
    if (!rate) {
      rate = Math.random()
      localStorage.setItem(key, rate.toString())
    }
    
    return parseFloat(rate)
  }

  shouldSample(eventType, customRate) {
    const rate = customRate || this.sampleRates[eventType] || 0.1
    
    // 错误事件总是上报
    if (eventType === 'ERROR') return true
    
    // 基于用户采样率做稳定采样
    return this.userSampleRate <= rate
  }

  // 节流函数 - 用于高频事件
  createThrottledLogger(logger, eventType, interval = 1000) {
    const lastLogTimes = new Map()
    
    return (data) => {
      const key = data.key || eventType
      const lastTime = lastLogTimes.get(key) || 0
      const now = Date.now()
      
      if (now - lastTime >= interval) {
        lastLogTimes.set(key, now)
        logger.log({
          type: eventType,
          ...data,
          timestamp: now
        })
      }
    }
  }
}

// 在 logger 中使用
class PerformanceLogger {
  constructor() {
    this.sampler = new SmartSampler()
    this.throttledLoggers = new Map()
  }

  smartLog(data) {
    // 采样判断
    if (!this.sampler.shouldSample(data.type)) {
      return
    }

    // 对高频事件使用节流
    if (this.isHighFrequencyEvent(data.type)) {
      this.getThrottledLogger(data.type)(data)
    } else {
      this.log(data)
    }
  }

  isHighFrequencyEvent(eventType) {
    return ['USER_INTERACTION', 'PERFORMANCE'].includes(eventType)
  }

  getThrottledLogger(eventType) {
    if (!this.throttledLoggers.has(eventType)) {
      const throttledLogger = this.sampler.createThrottledLogger(
        this, 
        eventType, 
        this.getThrottleInterval(eventType)
      )
      this.throttledLoggers.set(eventType, throttledLogger)
    }
    return this.throttledLoggers.get(eventType)
  }

  getThrottleInterval(eventType) {
    const intervals = {
      USER_INTERACTION: 1000,    // 1秒
      PERFORMANCE: 5000,         // 5秒
      BUSINESS: 1000             // 1秒
    }
    return intervals[eventType] || 1000
  }
}

4. 假设不调用接口的上报方案

4.1 本地存储 + 延迟上报

class OfflineLogger {
  constructor() {
    this.storageKey = 'offline_logs'
    this.maxOfflineSize = 100
    this.isOnline = navigator.onLine
    this.setupOnlineHandler()
  }

  // 监听网络状态
  setupOnlineHandler() {
    window.addEventListener('online', () => {
      this.isOnline = true
      this.flushOfflineLogs()
    })
    
    window.addEventListener('offline', () => {
      this.isOnline = false
    })
  }

  // 存储到本地
  storeLocally(data) {
    const existing = this.getStoredLogs()
    
    // 限制本地存储大小
    if (existing.length >= this.maxOfflineSize) {
      existing.shift()
    }
    
    existing.push({
      ...data,
      storedAt: Date.now()
    })
    
    try {
      localStorage.setItem(this.storageKey, JSON.stringify(existing))
    } catch (error) {
      // 本地存储已满,清理最旧的数据
      this.clearOldLogs()
      this.storeLocally(data) // 重试
    }
  }

  getStoredLogs() {
    try {
      return JSON.parse(localStorage.getItem(this.storageKey) || '[]')
    } catch {
      return []
    }
  }

  clearOldLogs() {
    const logs = this.getStoredLogs()
    // 保留最近 50 条日志
    const recentLogs = logs.slice(-50)
    localStorage.setItem(this.storageKey, JSON.stringify(recentLogs))
  }

  // 上报离线日志
  async flushOfflineLogs() {
    if (!this.isOnline) return

    const logs = this.getStoredLogs()
    if (logs.length === 0) return

    try {
      await this.sendBatch(logs)
      // 上报成功,清空本地存储
      localStorage.removeItem(this.storageKey)
    } catch (error) {
      console.warn('离线日志上报失败:', error)
      // 保留日志,下次重试
    }
  }

  // 扩展主 logger
  enhanceLogger(mainLogger) {
    const originalSendBatch = mainLogger.sendBatch.bind(mainLogger)
    
    mainLogger.sendBatch = async (batch) => {
      if (!this.isOnline) {
        // 离线时存储到本地
        batch.forEach(log => this.storeLocally(log))
        return Promise.resolve()
      }
      
      try {
        return await originalSendBatch(batch)
      } catch (error) {
        // 上报失败,降级到本地存储
        batch.forEach(log => this.storeLocally(log))
        throw error
      }
    }
  }
}

4.2 Service Worker 代理上报

self.addEventListener('install', (event) => {
  self.skipWaiting()
})

self.addEventListener('activate', (event) => {
  event.waitUntil(self.clients.claim())
})

self.addEventListener('fetch', (event) => {
  if (event.request.url.includes('/api/logs')) {
    event.respondWith(
      (async () => {
        try {
          const response = await fetch(event.request)
          return response
        } catch (error) {
          // 网络失败,存储到 IndexedDB
          const logData = await event.request.clone().json()
          await storeLogsInIDB(logData)
          return new Response(JSON.stringify({ success: true }), {
            status: 202,
            headers: { 'Content-Type': 'application/json' }
          })
        }
      })()
    )
  }
})

async function storeLogsInIDB(logs) {
  // 使用 IndexedDB 存储日志
  return new Promise((resolve, reject) => {
    const request = indexedDB.open('LogsDB', 1)
    
    request.onupgradeneeded = (event) => {
      const db = event.target.result
      if (!db.objectStoreNames.contains('logs')) {
        db.createObjectStore('logs', { autoIncrement: true })
      }
    }
    
    request.onsuccess = (event) => {
      const db = event.target.result
      const transaction = db.transaction(['logs'], 'readwrite')
      const store = transaction.objectStore('logs')
      
      logs.events.forEach(log => {
        store.add(log)
      })
      
      transaction.oncomplete = () => resolve()
      transaction.onerror = () => reject(transaction.error)
    }
    
    request.onerror = () => reject(request.error)
  })
}

5. 性能影响 评估和监控

class PerformanceGuard {
  constructor() {
    this.longTaskThreshold = 50 // 50ms
    this.monitorLoggerPerformance()
  }

  // 监控 logger 自身的性能影响
  monitorLoggerPerformance() {
    const observer = new PerformanceObserver((list) => {
      list.getEntries().forEach((entry) => {
        if (entry.duration > this.longTaskThreshold) {
          console.warn(`Logger 可能引起长任务: ${entry.duration}ms`)
          
          // 自动调整策略
          this.adjustLoggingStrategy()
        }
      })
    })
    
    observer.observe({ entryTypes: ['longtask'] })
  }

  adjustLoggingStrategy() {
    // 动态调整采样率
    const sampler = logger.sampler
    sampler.sampleRates.USER_INTERACTION *= 0.5
    sampler.sampleRates.PERFORMANCE *= 0.5
    
    console.warn('检测到性能问题,自动降低采样率')
  }

  // 测量 logger 方法执行时间
  measureExecution(methodName, fn) {
    return (...args) => {
      const startTime = performance.now()
      const result = fn.apply(this, args)
      const endTime = performance.now()
      
      if (endTime - startTime > 10) { // 超过 10ms 警告
        console.warn(`Logger.${methodName} 执行时间过长: ${endTime - startTime}ms`)
      }
      
      return result
    }
  }
}

// 包装 logger 方法
const originalLog = logger.log
logger.log = performanceGuard.measureExecution('log', originalLog)

总的来说,为了确保了日志上报系统在提供完整功能的同时,对业务页面性能的影响降到最低,在实际项目设计开发过程中,关键要实现以下几点:

  1. 异步化一切:使用 requestIdleCallbacksetTimeoutPromise 等API
  2. 懒加载:非核心功能延迟初始化
  3. 批量处理:合并请求,减少网络开销
  4. 智能采样:根据事件类型和用户特征采样
  5. 失败降级:网络失败时降级到本地存储
  6. 性能自监控:监控 logger 自身的性能影响