第十章:性能优化与监控
本章字数:约38000字 阅读时间:约130分钟 难度等级:★★★★☆
声明:本文中的公司名称、包名、API地址、密钥等均已脱敏处理。文中的"梦想世界"、"dreamworld"等均为虚构名称,与任何真实公司无关。
引言
在前面的章节中,我们构建了一个功能完整的数据抓取系统。但在生产环境中,仅仅"能用"是不够的,我们还需要关注:
- 性能:系统能否高效处理大量数据?
- 稳定性:长时间运行是否会出现问题?
- 可观测性:出现问题时能否快速定位?
本章将深入探讨性能优化和监控体系的构建。
10.1 性能瓶颈分析
10.1.1 性能分析方法论
在优化之前,我们首先需要找到性能瓶颈。盲目优化往往事倍功半。
┌─────────────────────────────────────────────────────────────────┐
│ 性能分析流程 │
├─────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────┐ │
│ │ 建立基准 │ 测量当前性能指标 │
│ └────┬─────┘ │
│ │ │
│ ▼ │
│ ┌──────────┐ │
│ │ 识别瓶颈 │ 找出最慢的环节 │
│ └────┬─────┘ │
│ │ │
│ ▼ │
│ ┌──────────┐ │
│ │ 分析原因 │ 理解为什么慢 │
│ └────┬─────┘ │
│ │ │
│ ▼ │
│ ┌──────────┐ │
│ │ 实施优化 │ 针对性改进 │
│ └────┬─────┘ │
│ │ │
│ ▼ │
│ ┌──────────┐ │
│ │ 验证效果 │ 对比优化前后 │
│ └──────────┘ │
│ │
└─────────────────────────────────────────────────────────────────┘
10.1.2 关键性能指标
package com.dreamworld.metrics;
/**
* 性能指标定义
*/
public class PerformanceMetrics {
// 吞吐量指标
public static final String REQUESTS_PER_SECOND = "requests_per_second";
public static final String PRODUCTS_PER_MINUTE = "products_per_minute";
// 延迟指标
public static final String API_LATENCY_P50 = "api_latency_p50";
public static final String API_LATENCY_P95 = "api_latency_p95";
public static final String API_LATENCY_P99 = "api_latency_p99";
public static final String DB_LATENCY_AVG = "db_latency_avg";
// 资源指标
public static final String CPU_USAGE = "cpu_usage";
public static final String MEMORY_USAGE = "memory_usage";
public static final String HEAP_USAGE = "heap_usage";
public static final String GC_PAUSE_TIME = "gc_pause_time";
// 错误指标
public static final String ERROR_RATE = "error_rate";
public static final String RETRY_RATE = "retry_rate";
public static final String TIMEOUT_RATE = "timeout_rate";
}
10.1.3 性能基准测试
package com.dreamworld.benchmark;
import com.dreamworld.crawler.ProductApiClient;
import com.dreamworld.crawler.ProductCrawlTask;
import com.dreamworld.storage.ProductRepository;
import com.dreamworld.utils.LogUtils;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
/**
* 性能基准测试
*/
public class PerformanceBenchmark {
private static final String TAG = "Benchmark";
private final ProductApiClient apiClient;
private final ProductRepository repository;
// 统计数据
private final List<Long> apiLatencies = new ArrayList<>();
private final List<Long> dbLatencies = new ArrayList<>();
private final AtomicLong totalRequests = new AtomicLong(0);
private final AtomicLong totalErrors = new AtomicLong(0);
public PerformanceBenchmark(ProductApiClient apiClient, ProductRepository repository) {
this.apiClient = apiClient;
this.repository = repository;
}
/**
* 运行基准测试
*/
public BenchmarkResult runBenchmark(int iterations) {
LogUtils.separator("开始性能基准测试");
LogUtils.i(TAG, "迭代次数: " + iterations);
Instant startTime = Instant.now();
// 清空统计
apiLatencies.clear();
dbLatencies.clear();
totalRequests.set(0);
totalErrors.set(0);
// 执行测试
for (int i = 0; i < iterations; i++) {
testApiCall();
testDbOperation();
}
Duration totalDuration = Duration.between(startTime, Instant.now());
// 计算结果
BenchmarkResult result = new BenchmarkResult();
result.iterations = iterations;
result.totalDurationMs = totalDuration.toMillis();
result.requestsPerSecond = (double) totalRequests.get() / totalDuration.toSeconds();
result.errorRate = (double) totalErrors.get() / totalRequests.get();
// API延迟统计
result.apiLatencyP50 = percentile(apiLatencies, 50);
result.apiLatencyP95 = percentile(apiLatencies, 95);
result.apiLatencyP99 = percentile(apiLatencies, 99);
result.apiLatencyAvg = average(apiLatencies);
// DB延迟统计
result.dbLatencyAvg = average(dbLatencies);
// 内存统计
Runtime runtime = Runtime.getRuntime();
result.heapUsedMb = (runtime.totalMemory() - runtime.freeMemory()) / 1024 / 1024;
result.heapMaxMb = runtime.maxMemory() / 1024 / 1024;
printResult(result);
return result;
}
private void testApiCall() {
long start = System.currentTimeMillis();
try {
apiClient.getProductList(1, 10, null, null);
totalRequests.incrementAndGet();
} catch (Exception e) {
totalErrors.incrementAndGet();
}
apiLatencies.add(System.currentTimeMillis() - start);
}
private void testDbOperation() {
long start = System.currentTimeMillis();
try {
repository.count();
} catch (Exception e) {
// ignore
}
dbLatencies.add(System.currentTimeMillis() - start);
}
private long percentile(List<Long> values, int percentile) {
if (values.isEmpty()) return 0;
List<Long> sorted = new ArrayList<>(values);
sorted.sort(Long::compareTo);
int index = (int) Math.ceil(percentile / 100.0 * sorted.size()) - 1;
return sorted.get(Math.max(0, index));
}
private double average(List<Long> values) {
if (values.isEmpty()) return 0;
return values.stream().mapToLong(Long::longValue).average().orElse(0);
}
private void printResult(BenchmarkResult result) {
LogUtils.separator("基准测试结果");
LogUtils.i(TAG, String.format("总耗时: %d ms", result.totalDurationMs));
LogUtils.i(TAG, String.format("吞吐量: %.2f req/s", result.requestsPerSecond));
LogUtils.i(TAG, String.format("错误率: %.2f%%", result.errorRate * 100));
LogUtils.i(TAG, String.format("API延迟 P50: %d ms", result.apiLatencyP50));
LogUtils.i(TAG, String.format("API延迟 P95: %d ms", result.apiLatencyP95));
LogUtils.i(TAG, String.format("API延迟 P99: %d ms", result.apiLatencyP99));
LogUtils.i(TAG, String.format("DB延迟 AVG: %.2f ms", result.dbLatencyAvg));
LogUtils.i(TAG, String.format("堆内存: %d / %d MB", result.heapUsedMb, result.heapMaxMb));
}
/**
* 基准测试结果
*/
public static class BenchmarkResult {
public int iterations;
public long totalDurationMs;
public double requestsPerSecond;
public double errorRate;
public long apiLatencyP50;
public long apiLatencyP95;
public long apiLatencyP99;
public double apiLatencyAvg;
public double dbLatencyAvg;
public long heapUsedMb;
public long heapMaxMb;
}
}
10.1.4 瓶颈识别
通过基准测试,我们通常会发现以下瓶颈:
┌─────────────────────────────────────────────────────────────────┐
│ 常见性能瓶颈 │
├─────────────────────────────────────────────────────────────────┤
│ │
│ 瓶颈类型 表现 原因 │
│ ───────────────────────────────────────────────────────────── │
│ 网络I/O API延迟高 串行请求 │
│ 吞吐量低 连接复用差 │
│ │
│ CPU CPU使用率高 JSON解析 │
│ 响应变慢 签名计算 │
│ │
│ 内存 OOM异常 对象创建过多 │
│ GC频繁 大对象未释放 │
│ │
│ 数据库 DB延迟高 无索引 │
│ 连接超时 连接池过小 │
│ │
│ Unidbg 初始化慢 每次重新加载 │
│ 内存占用大 模拟器实例过多 │
│ │
└─────────────────────────────────────────────────────────────────┘
10.2 并发优化
10.2.1 并发抓取架构
串行抓取效率低下,我们需要引入并发来提升吞吐量:
┌─────────────────────────────────────────────────────────────────┐
│ 并发抓取架构 │
├─────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────────────────────────────────────────────┐ │
│ │ 任务调度器 │ │
│ └──────────────────────────────────────────────────────┘ │
│ │ │
│ ┌─────────────┼─────────────┐ │
│ ▼ ▼ ▼ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Worker 1 │ │ Worker 2 │ │ Worker N │ │
│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │
│ │ │ │ │
│ ▼ ▼ ▼ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ HTTP连接池 │ │
│ └─────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ 结果聚合器 │ │
│ └─────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ 批量写入器 │ │
│ └─────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────┘
10.2.2 并发抓取实现
package com.dreamworld.crawler;
import com.dreamworld.model.Product;
import com.dreamworld.storage.ProductRepository;
import com.dreamworld.utils.LogUtils;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;
/**
* 并发商品抓取任务
*/
public class ConcurrentProductCrawlTask {
private static final String TAG = "ConcurrentCrawl";
private final ProductApiClient apiClient;
private final ProductRepository repository;
private final ExecutorService executor;
private final Semaphore rateLimiter;
// 配置
private int concurrency = 5; // 并发数
private int pageSize = 50; // 每页大小
private int batchSize = 100; // 批量写入大小
private int maxRequestsPerSecond = 10; // 每秒最大请求数
// 统计
private final AtomicInteger totalCount = new AtomicInteger(0);
private final AtomicInteger successCount = new AtomicInteger(0);
private final AtomicInteger failCount = new AtomicInteger(0);
public ConcurrentProductCrawlTask(ProductApiClient apiClient, ProductRepository repository) {
this.apiClient = apiClient;
this.repository = repository;
this.executor = Executors.newFixedThreadPool(concurrency, r -> {
Thread t = new Thread(r, "CrawlWorker");
t.setDaemon(true);
return t;
});
this.rateLimiter = new Semaphore(maxRequestsPerSecond);
// 启动令牌补充线程
startRateLimiterRefill();
}
/**
* 执行并发全量抓取
*/
public CrawlResult crawlAllConcurrently() {
LogUtils.separator("开始并发全量抓取");
LocalDateTime startTime = LocalDateTime.now();
resetCounters();
try {
// 1. 获取第一页,确定总页数
ProductApiClient.ProductListResult firstPage =
apiClient.getProductList(1, pageSize, null, null);
if (firstPage == null) {
return createResult(startTime, "获取第一页失败");
}
int totalPages = firstPage.getPages();
int total = firstPage.getTotal();
LogUtils.i(TAG, String.format("总商品数: %d, 总页数: %d, 并发数: %d",
total, totalPages, concurrency));
// 2. 保存第一页
saveProductsBatch(firstPage.getList());
// 3. 创建剩余页的抓取任务
List<CompletableFuture<List<Product>>> futures = new ArrayList<>();
for (int page = 2; page <= totalPages; page++) {
final int currentPage = page;
CompletableFuture<List<Product>> future = CompletableFuture.supplyAsync(
() -> fetchPage(currentPage), executor
);
futures.add(future);
}
// 4. 等待所有任务完成并收集结果
List<Product> allProducts = new ArrayList<>(firstPage.getList());
List<Product> batch = new ArrayList<>();
for (CompletableFuture<List<Product>> future : futures) {
try {
List<Product> products = future.get(60, TimeUnit.SECONDS);
if (products != null) {
batch.addAll(products);
// 批量保存
if (batch.size() >= batchSize) {
saveProductsBatch(batch);
batch.clear();
}
}
} catch (Exception e) {
LogUtils.e(TAG, "获取页面结果失败", e);
failCount.incrementAndGet();
}
}
// 保存剩余数据
if (!batch.isEmpty()) {
saveProductsBatch(batch);
}
return createResult(startTime, null);
} catch (Exception e) {
LogUtils.e(TAG, "并发抓取异常", e);
return createResult(startTime, e.getMessage());
}
}
/**
* 抓取单页数据
*/
private List<Product> fetchPage(int page) {
try {
// 获取令牌(限流)
rateLimiter.acquire();
LogUtils.d(TAG, "抓取第 " + page + " 页");
ProductApiClient.ProductListResult result =
apiClient.getProductList(page, pageSize, null, null);
if (result != null && result.getList() != null) {
return result.getList();
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} catch (Exception e) {
LogUtils.e(TAG, "抓取第 " + page + " 页失败", e);
}
return null;
}
/**
* 批量保存商品
*/
private void saveProductsBatch(List<Product> products) {
if (products == null || products.isEmpty()) {
return;
}
try {
// 设置抓取时间
LocalDateTime now = LocalDateTime.now();
for (Product product : products) {
product.setCrawlTime(now);
product.setCrawlSource("concurrent-api");
}
repository.saveProducts(products);
successCount.addAndGet(products.size());
totalCount.addAndGet(products.size());
LogUtils.d(TAG, "批量保存 " + products.size() + " 个商品");
} catch (Exception e) {
LogUtils.e(TAG, "批量保存失败", e);
failCount.addAndGet(products.size());
}
}
/**
* 启动限流器令牌补充
*/
private void startRateLimiterRefill() {
ScheduledExecutorService refillExecutor = Executors.newSingleThreadScheduledExecutor();
refillExecutor.scheduleAtFixedRate(() -> {
int permits = maxRequestsPerSecond - rateLimiter.availablePermits();
if (permits > 0) {
rateLimiter.release(permits);
}
}, 1, 1, TimeUnit.SECONDS);
}
private void resetCounters() {
totalCount.set(0);
successCount.set(0);
failCount.set(0);
}
private CrawlResult createResult(LocalDateTime startTime, String error) {
CrawlResult result = new CrawlResult();
result.startTime = startTime;
result.endTime = LocalDateTime.now();
result.totalCount = totalCount.get();
result.successCount = successCount.get();
result.failCount = failCount.get();
result.error = error;
result.success = error == null;
long durationSeconds = java.time.Duration.between(startTime, result.endTime).toSeconds();
result.productsPerSecond = durationSeconds > 0 ?
(double) result.successCount / durationSeconds : 0;
LogUtils.separator("并发抓取完成");
LogUtils.i(TAG, "总数: " + result.totalCount);
LogUtils.i(TAG, "成功: " + result.successCount);
LogUtils.i(TAG, "失败: " + result.failCount);
LogUtils.i(TAG, "耗时: " + durationSeconds + "秒");
LogUtils.i(TAG, String.format("吞吐量: %.2f 商品/秒", result.productsPerSecond));
return result;
}
/**
* 关闭执行器
*/
public void shutdown() {
executor.shutdown();
try {
if (!executor.awaitTermination(30, TimeUnit.SECONDS)) {
executor.shutdownNow();
}
} catch (InterruptedException e) {
executor.shutdownNow();
Thread.currentThread().interrupt();
}
}
// Setters
public void setConcurrency(int concurrency) {
this.concurrency = concurrency;
}
public void setPageSize(int pageSize) {
this.pageSize = pageSize;
}
public void setBatchSize(int batchSize) {
this.batchSize = batchSize;
}
public void setMaxRequestsPerSecond(int maxRequestsPerSecond) {
this.maxRequestsPerSecond = maxRequestsPerSecond;
}
/**
* 抓取结果
*/
public static class CrawlResult {
public LocalDateTime startTime;
public LocalDateTime endTime;
public int totalCount;
public int successCount;
public int failCount;
public String error;
public boolean success;
public double productsPerSecond;
}
}
10.2.3 HTTP连接池优化
package com.dreamworld.network;
import okhttp3.ConnectionPool;
import okhttp3.OkHttpClient;
import java.util.concurrent.TimeUnit;
/**
* HTTP客户端工厂
*/
public class HttpClientFactory {
private static volatile OkHttpClient sharedClient;
/**
* 获取优化后的HTTP客户端
*/
public static OkHttpClient getOptimizedClient() {
if (sharedClient == null) {
synchronized (HttpClientFactory.class) {
if (sharedClient == null) {
sharedClient = createOptimizedClient();
}
}
}
return sharedClient;
}
private static OkHttpClient createOptimizedClient() {
return new OkHttpClient.Builder()
// 连接池配置
.connectionPool(new ConnectionPool(
20, // 最大空闲连接数
5, TimeUnit.MINUTES // 空闲连接存活时间
))
// 超时配置
.connectTimeout(10, TimeUnit.SECONDS)
.readTimeout(30, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
// 重试配置
.retryOnConnectionFailure(true)
// 其他优化
.followRedirects(true)
.followSslRedirects(true)
.build();
}
/**
* 获取连接池统计
*/
public static ConnectionPoolStats getPoolStats() {
if (sharedClient == null) {
return new ConnectionPoolStats(0, 0);
}
ConnectionPool pool = sharedClient.connectionPool();
return new ConnectionPoolStats(
pool.connectionCount(),
pool.idleConnectionCount()
);
}
/**
* 连接池统计
*/
public static class ConnectionPoolStats {
public final int totalConnections;
public final int idleConnections;
public ConnectionPoolStats(int total, int idle) {
this.totalConnections = total;
this.idleConnections = idle;
}
@Override
public String toString() {
return String.format("ConnectionPool{total=%d, idle=%d}",
totalConnections, idleConnections);
}
}
}
10.2.4 并发度调优
┌─────────────────────────────────────────────────────────────────┐
│ 并发度调优指南 │
├─────────────────────────────────────────────────────────────────┤
│ │
│ 场景 建议并发数 原因 │
│ ───────────────────────────────────────────────────────────── │
│ CPU密集型 CPU核心数 避免上下文切换开销 │
│ (签名计算) │
│ │
│ I/O密集型 CPU核心数*2~4 等待I/O时可执行其他任务 │
│ (网络请求) │
│ │
│ 混合型 根据I/O比例 需要实测确定最优值 │
│ (抓取+解析) 动态调整 │
│ │
│ 受限于目标服务器 根据限流策略 避免被封禁 │
│ 保守设置 │
│ │
└─────────────────────────────────────────────────────────────────┘
最佳实践:
1. 从保守值开始(如5并发)
2. 逐步增加并观察错误率
3. 找到错误率开始上升的临界点
4. 设置为临界点的80%作为安全值
10.3 内存优化
10.3.1 内存问题诊断
package com.dreamworld.utils;
import java.lang.management.*;
import java.util.List;
/**
* 内存诊断工具
*/
public class MemoryDiagnostics {
private static final String TAG = "MemoryDiag";
/**
* 打印内存使用情况
*/
public static void printMemoryUsage() {
MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
MemoryUsage heapUsage = memoryBean.getHeapMemoryUsage();
MemoryUsage nonHeapUsage = memoryBean.getNonHeapMemoryUsage();
LogUtils.separator("内存使用情况");
LogUtils.i(TAG, "堆内存:");
LogUtils.i(TAG, String.format(" 已用: %d MB", heapUsage.getUsed() / 1024 / 1024));
LogUtils.i(TAG, String.format(" 已提交: %d MB", heapUsage.getCommitted() / 1024 / 1024));
LogUtils.i(TAG, String.format(" 最大: %d MB", heapUsage.getMax() / 1024 / 1024));
LogUtils.i(TAG, String.format(" 使用率: %.1f%%",
(double) heapUsage.getUsed() / heapUsage.getMax() * 100));
LogUtils.i(TAG, "非堆内存:");
LogUtils.i(TAG, String.format(" 已用: %d MB", nonHeapUsage.getUsed() / 1024 / 1024));
}
/**
* 打印GC统计
*/
public static void printGCStats() {
List<GarbageCollectorMXBean> gcBeans = ManagementFactory.getGarbageCollectorMXBeans();
LogUtils.separator("GC统计");
for (GarbageCollectorMXBean gcBean : gcBeans) {
LogUtils.i(TAG, String.format("%s: 次数=%d, 总耗时=%d ms",
gcBean.getName(),
gcBean.getCollectionCount(),
gcBean.getCollectionTime()
));
}
}
/**
* 获取内存使用率
*/
public static double getHeapUsagePercent() {
MemoryUsage heapUsage = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage();
return (double) heapUsage.getUsed() / heapUsage.getMax() * 100;
}
/**
* 建议执行GC
*/
public static void suggestGC() {
if (getHeapUsagePercent() > 80) {
LogUtils.w(TAG, "内存使用率超过80%,建议执行GC");
System.gc();
}
}
}
10.3.2 对象池化
Unidbg实例创建开销大,我们使用对象池来复用:
package com.dreamworld.pool;
import com.dreamworld.unidbg.UnidbgJNIWrapper;
import com.dreamworld.utils.LogUtils;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Unidbg实例池
*/
public class UnidbgPool {
private static final String TAG = "UnidbgPool";
private final BlockingQueue<UnidbgJNIWrapper> pool;
private final int maxSize;
private final AtomicInteger createdCount = new AtomicInteger(0);
private final AtomicInteger borrowedCount = new AtomicInteger(0);
// 配置
private final String apkPath;
private final String libDir;
public UnidbgPool(int maxSize, String apkPath, String libDir) {
this.maxSize = maxSize;
this.apkPath = apkPath;
this.libDir = libDir;
this.pool = new ArrayBlockingQueue<>(maxSize);
// 预热:创建初始实例
int initialSize = Math.min(2, maxSize);
for (int i = 0; i < initialSize; i++) {
try {
pool.offer(createInstance());
} catch (Exception e) {
LogUtils.e(TAG, "预热创建实例失败", e);
}
}
LogUtils.i(TAG, String.format("Unidbg池初始化完成,预热: %d, 最大: %d",
initialSize, maxSize));
}
/**
* 借用实例
*/
public UnidbgJNIWrapper borrow() throws InterruptedException {
return borrow(30, TimeUnit.SECONDS);
}
/**
* 借用实例(带超时)
*/
public UnidbgJNIWrapper borrow(long timeout, TimeUnit unit) throws InterruptedException {
// 尝试从池中获取
UnidbgJNIWrapper instance = pool.poll(100, TimeUnit.MILLISECONDS);
if (instance != null) {
borrowedCount.incrementAndGet();
return instance;
}
// 池为空,尝试创建新实例
if (createdCount.get() < maxSize) {
synchronized (this) {
if (createdCount.get() < maxSize) {
try {
instance = createInstance();
borrowedCount.incrementAndGet();
return instance;
} catch (Exception e) {
LogUtils.e(TAG, "创建实例失败", e);
}
}
}
}
// 等待其他线程归还
instance = pool.poll(timeout, unit);
if (instance != null) {
borrowedCount.incrementAndGet();
}
return instance;
}
/**
* 归还实例
*/
public void returnInstance(UnidbgJNIWrapper instance) {
if (instance == null) {
return;
}
borrowedCount.decrementAndGet();
// 检查实例是否健康
if (isHealthy(instance)) {
pool.offer(instance);
} else {
// 销毁不健康的实例
destroyInstance(instance);
createdCount.decrementAndGet();
}
}
/**
* 创建新实例
*/
private UnidbgJNIWrapper createInstance() throws Exception {
LogUtils.d(TAG, "创建新Unidbg实例");
UnidbgJNIWrapper instance = new UnidbgJNIWrapper(apkPath, libDir);
instance.initialize();
createdCount.incrementAndGet();
return instance;
}
/**
* 检查实例是否健康
*/
private boolean isHealthy(UnidbgJNIWrapper instance) {
try {
// 简单的健康检查
return instance != null && instance.isInitialized();
} catch (Exception e) {
return false;
}
}
/**
* 销毁实例
*/
private void destroyInstance(UnidbgJNIWrapper instance) {
try {
instance.close();
} catch (Exception e) {
LogUtils.e(TAG, "销毁实例失败", e);
}
}
/**
* 获取池统计
*/
public PoolStats getStats() {
return new PoolStats(
pool.size(),
createdCount.get(),
borrowedCount.get(),
maxSize
);
}
/**
* 关闭池
*/
public void shutdown() {
LogUtils.i(TAG, "关闭Unidbg池");
UnidbgJNIWrapper instance;
while ((instance = pool.poll()) != null) {
destroyInstance(instance);
}
}
/**
* 池统计
*/
public static class PoolStats {
public final int available;
public final int created;
public final int borrowed;
public final int maxSize;
public PoolStats(int available, int created, int borrowed, int maxSize) {
this.available = available;
this.created = created;
this.borrowed = borrowed;
this.maxSize = maxSize;
}
@Override
public String toString() {
return String.format("PoolStats{available=%d, created=%d, borrowed=%d, max=%d}",
available, created, borrowed, maxSize);
}
}
}
10.3.3 流式处理
避免一次性加载大量数据到内存:
package com.dreamworld.crawler;
import com.dreamworld.model.Product;
import com.dreamworld.storage.ProductRepository;
import com.dreamworld.utils.LogUtils;
import java.util.function.Consumer;
/**
* 流式抓取处理器
*/
public class StreamingCrawler {
private static final String TAG = "StreamingCrawler";
private final ProductApiClient apiClient;
private final int pageSize;
public StreamingCrawler(ProductApiClient apiClient, int pageSize) {
this.apiClient = apiClient;
this.pageSize = pageSize;
}
/**
* 流式抓取所有商品
* 每获取一批数据就立即处理,不在内存中累积
*/
public void crawlAll(Consumer<Product> productConsumer) {
LogUtils.separator("开始流式抓取");
int page = 1;
int totalProcessed = 0;
while (true) {
ProductApiClient.ProductListResult result =
apiClient.getProductList(page, pageSize, null, null);
if (result == null || result.getList() == null || result.getList().isEmpty()) {
break;
}
// 立即处理每个商品
for (Product product : result.getList()) {
try {
productConsumer.accept(product);
totalProcessed++;
} catch (Exception e) {
LogUtils.e(TAG, "处理商品失败: " + product.getId(), e);
}
}
LogUtils.d(TAG, String.format("已处理 %d 个商品", totalProcessed));
// 检查是否还有更多页
if (page >= result.getPages()) {
break;
}
page++;
// 主动释放引用,帮助GC
result = null;
}
LogUtils.i(TAG, "流式抓取完成,共处理 " + totalProcessed + " 个商品");
}
/**
* 流式抓取并批量保存
*/
public void crawlAndSave(ProductRepository repository, int batchSize) {
BatchProcessor<Product> batchProcessor = new BatchProcessor<>(
batchSize,
batch -> {
repository.saveProducts(batch);
LogUtils.d(TAG, "批量保存 " + batch.size() + " 个商品");
}
);
crawlAll(product -> {
product.setCrawlTime(java.time.LocalDateTime.now());
batchProcessor.add(product);
});
// 处理剩余数据
batchProcessor.flush();
}
/**
* 批量处理器
*/
private static class BatchProcessor<T> {
private final int batchSize;
private final Consumer<java.util.List<T>> batchConsumer;
private final java.util.List<T> buffer;
public BatchProcessor(int batchSize, Consumer<java.util.List<T>> batchConsumer) {
this.batchSize = batchSize;
this.batchConsumer = batchConsumer;
this.buffer = new java.util.ArrayList<>(batchSize);
}
public void add(T item) {
buffer.add(item);
if (buffer.size() >= batchSize) {
flush();
}
}
public void flush() {
if (!buffer.isEmpty()) {
batchConsumer.accept(new java.util.ArrayList<>(buffer));
buffer.clear();
}
}
}
}
10.3.4 JVM调优建议
┌─────────────────────────────────────────────────────────────────┐
│ JVM调优参数 │
├─────────────────────────────────────────────────────────────────┤
│ │
│ # 堆内存设置 │
│ -Xms512m # 初始堆大小 │
│ -Xmx2g # 最大堆大小 │
│ -XX:NewRatio=2 # 新生代与老年代比例 │
│ │
│ # GC选择(Java 11+推荐G1) │
│ -XX:+UseG1GC # 使用G1垃圾收集器 │
│ -XX:MaxGCPauseMillis=200 # 目标最大GC暂停时间 │
│ -XX:G1HeapRegionSize=16m # G1区域大小 │
│ │
│ # GC日志(调试用) │
│ -Xlog:gc*:file=gc.log # GC日志输出 │
│ │
│ # 元空间(Unidbg需要较大元空间) │
│ -XX:MetaspaceSize=256m # 初始元空间 │
│ -XX:MaxMetaspaceSize=512m # 最大元空间 │
│ │
│ # 其他优化 │
│ -XX:+UseStringDeduplication # 字符串去重 │
│ -XX:+OptimizeStringConcat # 优化字符串拼接 │
│ │
└─────────────────────────────────────────────────────────────────┘
启动命令示例:
java -Xms512m -Xmx2g \
-XX:+UseG1GC \
-XX:MaxGCPauseMillis=200 \
-XX:MetaspaceSize=256m \
-jar crawler.jar
10.4 监控体系构建
10.4.1 监控架构
┌─────────────────────────────────────────────────────────────────┐
│ 监控架构 │
├─────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────────────────────────────────────────────┐ │
│ │ 应用层 │ │
│ │ Crawler Application │ │
│ └──────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌──────────────────────────────────────────────────────┐ │
│ │ 指标收集层 │ │
│ │ Micrometer / Metrics │ │
│ └──────────────────────────────────────────────────────┘ │
│ │ │
│ ┌─────────────┼─────────────┐ │
│ ▼ ▼ ▼ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Prometheus │ │ Grafana │ │ AlertMgr │ │
│ │ 时序存储 │ │ 可视化 │ │ 告警 │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────┘
10.4.2 指标收集器
package com.dreamworld.metrics;
import com.dreamworld.utils.LogUtils;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
/**
* 指标收集器
*/
public class MetricsCollector {
private static final String TAG = "Metrics";
// 单例
private static final MetricsCollector INSTANCE = new MetricsCollector();
// 计数器
private final Map<String, LongAdder> counters = new ConcurrentHashMap<>();
// 计量器(当前值)
private final Map<String, AtomicLong> gauges = new ConcurrentHashMap<>();
// 直方图(延迟分布)
private final Map<String, LatencyHistogram> histograms = new ConcurrentHashMap<>();
private MetricsCollector() {}
public static MetricsCollector getInstance() {
return INSTANCE;
}
/**
* 增加计数器
*/
public void incrementCounter(String name) {
incrementCounter(name, 1);
}
public void incrementCounter(String name, long delta) {
counters.computeIfAbsent(name, k -> new LongAdder()).add(delta);
}
/**
* 获取计数器值
*/
public long getCounter(String name) {
LongAdder counter = counters.get(name);
return counter != null ? counter.sum() : 0;
}
/**
* 设置计量器值
*/
public void setGauge(String name, long value) {
gauges.computeIfAbsent(name, k -> new AtomicLong()).set(value);
}
/**
* 获取计量器值
*/
public long getGauge(String name) {
AtomicLong gauge = gauges.get(name);
return gauge != null ? gauge.get() : 0;
}
/**
* 记录延迟
*/
public void recordLatency(String name, long latencyMs) {
histograms.computeIfAbsent(name, k -> new LatencyHistogram()).record(latencyMs);
}
/**
* 获取延迟统计
*/
public LatencyStats getLatencyStats(String name) {
LatencyHistogram histogram = histograms.get(name);
return histogram != null ? histogram.getStats() : new LatencyStats();
}
/**
* 计时器(自动记录延迟)
*/
public Timer startTimer(String name) {
return new Timer(name, this);
}
/**
* 打印所有指标
*/
public void printMetrics() {
LogUtils.separator("系统指标");
// 计数器
if (!counters.isEmpty()) {
LogUtils.i(TAG, "计数器:");
counters.forEach((name, value) ->
LogUtils.i(TAG, String.format(" %s: %d", name, value.sum())));
}
// 计量器
if (!gauges.isEmpty()) {
LogUtils.i(TAG, "计量器:");
gauges.forEach((name, value) ->
LogUtils.i(TAG, String.format(" %s: %d", name, value.get())));
}
// 延迟统计
if (!histograms.isEmpty()) {
LogUtils.i(TAG, "延迟统计:");
histograms.forEach((name, histogram) -> {
LatencyStats stats = histogram.getStats();
LogUtils.i(TAG, String.format(" %s: avg=%.1fms, p50=%dms, p95=%dms, p99=%dms",
name, stats.avg, stats.p50, stats.p95, stats.p99));
});
}
}
/**
* 重置所有指标
*/
public void reset() {
counters.clear();
gauges.clear();
histograms.clear();
}
/**
* 延迟直方图
*/
private static class LatencyHistogram {
private final java.util.List<Long> values =
java.util.Collections.synchronizedList(new java.util.ArrayList<>());
public void record(long latencyMs) {
values.add(latencyMs);
// 限制大小,避免内存溢出
if (values.size() > 10000) {
synchronized (values) {
if (values.size() > 10000) {
values.subList(0, 5000).clear();
}
}
}
}
public LatencyStats getStats() {
if (values.isEmpty()) {
return new LatencyStats();
}
java.util.List<Long> sorted;
synchronized (values) {
sorted = new java.util.ArrayList<>(values);
}
sorted.sort(Long::compareTo);
LatencyStats stats = new LatencyStats();
stats.count = sorted.size();
stats.min = sorted.get(0);
stats.max = sorted.get(sorted.size() - 1);
stats.avg = sorted.stream().mapToLong(Long::longValue).average().orElse(0);
stats.p50 = percentile(sorted, 50);
stats.p95 = percentile(sorted, 95);
stats.p99 = percentile(sorted, 99);
return stats;
}
private long percentile(java.util.List<Long> sorted, int percentile) {
int index = (int) Math.ceil(percentile / 100.0 * sorted.size()) - 1;
return sorted.get(Math.max(0, index));
}
}
/**
* 延迟统计
*/
public static class LatencyStats {
public int count;
public long min;
public long max;
public double avg;
public long p50;
public long p95;
public long p99;
}
/**
* 计时器
*/
public static class Timer implements AutoCloseable {
private final String name;
private final MetricsCollector collector;
private final long startTime;
public Timer(String name, MetricsCollector collector) {
this.name = name;
this.collector = collector;
this.startTime = System.currentTimeMillis();
}
@Override
public void close() {
long latency = System.currentTimeMillis() - startTime;
collector.recordLatency(name, latency);
}
}
}
10.4.3 健康检查
package com.dreamworld.health;
import com.dreamworld.metrics.MetricsCollector;
import com.dreamworld.network.HttpClientFactory;
import com.dreamworld.pool.UnidbgPool;
import com.dreamworld.utils.LogUtils;
import com.dreamworld.utils.MemoryDiagnostics;
import java.util.ArrayList;
import java.util.List;
/**
* 健康检查服务
*/
public class HealthCheckService {
private static final String TAG = "HealthCheck";
private final List<HealthChecker> checkers = new ArrayList<>();
public HealthCheckService() {
// 注册默认检查器
registerChecker(new MemoryHealthChecker());
registerChecker(new ConnectionPoolHealthChecker());
}
/**
* 注册健康检查器
*/
public void registerChecker(HealthChecker checker) {
checkers.add(checker);
}
/**
* 执行健康检查
*/
public HealthStatus check() {
HealthStatus overall = new HealthStatus();
overall.name = "overall";
overall.healthy = true;
overall.components = new ArrayList<>();
for (HealthChecker checker : checkers) {
try {
HealthStatus status = checker.check();
overall.components.add(status);
if (!status.healthy) {
overall.healthy = false;
}
} catch (Exception e) {
HealthStatus errorStatus = new HealthStatus();
errorStatus.name = checker.getName();
errorStatus.healthy = false;
errorStatus.message = "检查异常: " + e.getMessage();
overall.components.add(errorStatus);
overall.healthy = false;
}
}
return overall;
}
/**
* 打印健康状态
*/
public void printHealth() {
HealthStatus status = check();
LogUtils.separator("健康检查");
LogUtils.i(TAG, "整体状态: " + (status.healthy ? "✓ 健康" : "✗ 异常"));
for (HealthStatus component : status.components) {
String icon = component.healthy ? "✓" : "✗";
LogUtils.i(TAG, String.format(" %s %s: %s",
icon, component.name,
component.message != null ? component.message : "正常"));
}
}
/**
* 健康检查器接口
*/
public interface HealthChecker {
String getName();
HealthStatus check();
}
/**
* 健康状态
*/
public static class HealthStatus {
public String name;
public boolean healthy;
public String message;
public List<HealthStatus> components;
}
/**
* 内存健康检查
*/
public static class MemoryHealthChecker implements HealthChecker {
private static final double THRESHOLD = 90.0;
@Override
public String getName() {
return "memory";
}
@Override
public HealthStatus check() {
HealthStatus status = new HealthStatus();
status.name = getName();
double usage = MemoryDiagnostics.getHeapUsagePercent();
status.healthy = usage < THRESHOLD;
status.message = String.format("堆内存使用率: %.1f%%", usage);
return status;
}
}
/**
* 连接池健康检查
*/
public static class ConnectionPoolHealthChecker implements HealthChecker {
@Override
public String getName() {
return "connection-pool";
}
@Override
public HealthStatus check() {
HealthStatus status = new HealthStatus();
status.name = getName();
HttpClientFactory.ConnectionPoolStats stats = HttpClientFactory.getPoolStats();
status.healthy = stats.totalConnections > 0 || stats.idleConnections >= 0;
status.message = String.format("连接数: %d, 空闲: %d",
stats.totalConnections, stats.idleConnections);
return status;
}
}
/**
* Unidbg池健康检查
*/
public static class UnidbgPoolHealthChecker implements HealthChecker {
private final UnidbgPool pool;
public UnidbgPoolHealthChecker(UnidbgPool pool) {
this.pool = pool;
}
@Override
public String getName() {
return "unidbg-pool";
}
@Override
public HealthStatus check() {
HealthStatus status = new HealthStatus();
status.name = getName();
UnidbgPool.PoolStats stats = pool.getStats();
status.healthy = stats.available > 0 || stats.borrowed < stats.maxSize;
status.message = String.format("可用: %d, 借出: %d, 最大: %d",
stats.available, stats.borrowed, stats.maxSize);
return status;
}
}
}
10.4.4 告警服务
package com.dreamworld.alert;
import com.dreamworld.utils.LogUtils;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
/**
* 告警服务
*/
public class AlertService {
private static final String TAG = "Alert";
private final List<AlertHandler> handlers = new CopyOnWriteArrayList<>();
private final List<Alert> recentAlerts = new CopyOnWriteArrayList<>();
private static final int MAX_RECENT_ALERTS = 100;
/**
* 注册告警处理器
*/
public void registerHandler(AlertHandler handler) {
handlers.add(handler);
}
/**
* 发送告警
*/
public void alert(AlertLevel level, String title, String message) {
Alert alert = new Alert();
alert.level = level;
alert.title = title;
alert.message = message;
alert.timestamp = LocalDateTime.now();
// 记录到最近告警
recentAlerts.add(alert);
if (recentAlerts.size() > MAX_RECENT_ALERTS) {
recentAlerts.remove(0);
}
// 日志记录
switch (level) {
case CRITICAL:
case ERROR:
LogUtils.e(TAG, String.format("[%s] %s: %s", level, title, message));
break;
case WARNING:
LogUtils.w(TAG, String.format("[%s] %s: %s", level, title, message));
break;
default:
LogUtils.i(TAG, String.format("[%s] %s: %s", level, title, message));
}
// 通知所有处理器
for (AlertHandler handler : handlers) {
try {
handler.handle(alert);
} catch (Exception e) {
LogUtils.e(TAG, "告警处理器异常", e);
}
}
}
/**
* 便捷方法
*/
public void info(String title, String message) {
alert(AlertLevel.INFO, title, message);
}
public void warning(String title, String message) {
alert(AlertLevel.WARNING, title, message);
}
public void error(String title, String message) {
alert(AlertLevel.ERROR, title, message);
}
public void critical(String title, String message) {
alert(AlertLevel.CRITICAL, title, message);
}
/**
* 获取最近告警
*/
public List<Alert> getRecentAlerts() {
return new ArrayList<>(recentAlerts);
}
/**
* 告警级别
*/
public enum AlertLevel {
INFO,
WARNING,
ERROR,
CRITICAL
}
/**
* 告警
*/
public static class Alert {
public AlertLevel level;
public String title;
public String message;
public LocalDateTime timestamp;
@Override
public String toString() {
return String.format("[%s] %s - %s: %s", timestamp, level, title, message);
}
}
/**
* 告警处理器接口
*/
public interface AlertHandler {
void handle(Alert alert);
}
/**
* 控制台告警处理器
*/
public static class ConsoleAlertHandler implements AlertHandler {
@Override
public void handle(Alert alert) {
// 已在alert方法中通过LogUtils输出
}
}
/**
* 文件告警处理器
*/
public static class FileAlertHandler implements AlertHandler {
private final String filePath;
public FileAlertHandler(String filePath) {
this.filePath = filePath;
}
@Override
public void handle(Alert alert) {
try {
java.nio.file.Files.writeString(
java.nio.file.Path.of(filePath),
alert.toString() + "\n",
java.nio.file.StandardOpenOption.CREATE,
java.nio.file.StandardOpenOption.APPEND
);
} catch (Exception e) {
LogUtils.e("FileAlert", "写入告警文件失败", e);
}
}
}
/**
* Webhook告警处理器(可用于钉钉、企业微信等)
*/
public static class WebhookAlertHandler implements AlertHandler {
private final String webhookUrl;
private final okhttp3.OkHttpClient client;
public WebhookAlertHandler(String webhookUrl) {
this.webhookUrl = webhookUrl;
this.client = new okhttp3.OkHttpClient();
}
@Override
public void handle(Alert alert) {
// 只发送WARNING及以上级别
if (alert.level.ordinal() < AlertLevel.WARNING.ordinal()) {
return;
}
try {
String json = String.format(
"{"msgtype":"text","text":{"content":"[%s] %s\n%s"}}",
alert.level, alert.title, alert.message
);
okhttp3.RequestBody body = okhttp3.RequestBody.create(
json, okhttp3.MediaType.parse("application/json")
);
okhttp3.Request request = new okhttp3.Request.Builder()
.url(webhookUrl)
.post(body)
.build();
client.newCall(request).execute().close();
} catch (Exception e) {
LogUtils.e("WebhookAlert", "发送Webhook告警失败", e);
}
}
}
}
10.4.5 自动告警规则
package com.dreamworld.alert;
import com.dreamworld.health.HealthCheckService;
import com.dreamworld.metrics.MetricsCollector;
import com.dreamworld.utils.LogUtils;
import com.dreamworld.utils.MemoryDiagnostics;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
/**
* 自动告警监控
*/
public class AutoAlertMonitor {
private static final String TAG = "AutoAlert";
private final AlertService alertService;
private final MetricsCollector metrics;
private final HealthCheckService healthCheck;
private final ScheduledExecutorService scheduler;
// 阈值配置
private double memoryThreshold = 85.0; // 内存使用率阈值
private double errorRateThreshold = 0.1; // 错误率阈值(10%)
private long latencyThreshold = 5000; // 延迟阈值(5秒)
private int consecutiveFailures = 3; // 连续失败次数
// 状态
private int currentFailureCount = 0;
private boolean alertSent = false;
public AutoAlertMonitor(AlertService alertService, MetricsCollector metrics,
HealthCheckService healthCheck) {
this.alertService = alertService;
this.metrics = metrics;
this.healthCheck = healthCheck;
this.scheduler = Executors.newSingleThreadScheduledExecutor(r -> {
Thread t = new Thread(r, "AutoAlertMonitor");
t.setDaemon(true);
return t;
});
}
/**
* 启动监控
*/
public void start(long intervalSeconds) {
LogUtils.i(TAG, "启动自动告警监控,间隔: " + intervalSeconds + "秒");
scheduler.scheduleAtFixedRate(
this::checkAndAlert,
intervalSeconds,
intervalSeconds,
TimeUnit.SECONDS
);
}
/**
* 停止监控
*/
public void stop() {
scheduler.shutdown();
}
/**
* 检查并告警
*/
private void checkAndAlert() {
try {
checkMemory();
checkErrorRate();
checkLatency();
checkHealth();
} catch (Exception e) {
LogUtils.e(TAG, "监控检查异常", e);
}
}
/**
* 检查内存
*/
private void checkMemory() {
double usage = MemoryDiagnostics.getHeapUsagePercent();
if (usage > memoryThreshold) {
alertService.warning("内存告警",
String.format("堆内存使用率 %.1f%% 超过阈值 %.1f%%", usage, memoryThreshold));
if (usage > 95) {
alertService.critical("内存严重告警",
String.format("堆内存使用率 %.1f%%,即将OOM!", usage));
}
}
}
/**
* 检查错误率
*/
private void checkErrorRate() {
long totalRequests = metrics.getCounter("api.requests.total");
long failedRequests = metrics.getCounter("api.requests.failed");
if (totalRequests > 0) {
double errorRate = (double) failedRequests / totalRequests;
if (errorRate > errorRateThreshold) {
alertService.warning("错误率告警",
String.format("API错误率 %.1f%% 超过阈值 %.1f%%",
errorRate * 100, errorRateThreshold * 100));
}
}
}
/**
* 检查延迟
*/
private void checkLatency() {
MetricsCollector.LatencyStats stats = metrics.getLatencyStats("api.latency");
if (stats.count > 0 && stats.p95 > latencyThreshold) {
alertService.warning("延迟告警",
String.format("API P95延迟 %dms 超过阈值 %dms", stats.p95, latencyThreshold));
}
}
/**
* 检查健康状态
*/
private void checkHealth() {
HealthCheckService.HealthStatus status = healthCheck.check();
if (!status.healthy) {
currentFailureCount++;
if (currentFailureCount >= consecutiveFailures && !alertSent) {
StringBuilder details = new StringBuilder();
for (HealthCheckService.HealthStatus component : status.components) {
if (!component.healthy) {
details.append(component.name).append(": ")
.append(component.message).append("; ");
}
}
alertService.error("健康检查失败",
String.format("连续 %d 次健康检查失败: %s",
currentFailureCount, details.toString()));
alertSent = true;
}
} else {
if (alertSent) {
alertService.info("健康恢复", "系统健康检查已恢复正常");
}
currentFailureCount = 0;
alertSent = false;
}
}
// Setters
public void setMemoryThreshold(double threshold) { this.memoryThreshold = threshold; }
public void setErrorRateThreshold(double threshold) { this.errorRateThreshold = threshold; }
public void setLatencyThreshold(long threshold) { this.latencyThreshold = threshold; }
public void setConsecutiveFailures(int count) { this.consecutiveFailures = count; }
}
10.5 Prometheus集成
10.5.1 Prometheus指标导出
package com.dreamworld.metrics;
import com.dreamworld.utils.LogUtils;
import com.sun.net.httpserver.HttpServer;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.nio.charset.StandardCharsets;
/**
* Prometheus指标导出器
*/
public class PrometheusExporter {
private static final String TAG = "Prometheus";
private final MetricsCollector metrics;
private HttpServer server;
private final int port;
public PrometheusExporter(MetricsCollector metrics, int port) {
this.metrics = metrics;
this.port = port;
}
/**
* 启动HTTP服务器
*/
public void start() throws IOException {
server = HttpServer.create(new InetSocketAddress(port), 0);
server.createContext("/metrics", exchange -> {
String response = generateMetrics();
byte[] bytes = response.getBytes(StandardCharsets.UTF_8);
exchange.getResponseHeaders().set("Content-Type", "text/plain; charset=utf-8");
exchange.sendResponseHeaders(200, bytes.length);
try (OutputStream os = exchange.getResponseBody()) {
os.write(bytes);
}
});
server.start();
LogUtils.i(TAG, "Prometheus指标服务启动在端口 " + port);
}
/**
* 停止服务器
*/
public void stop() {
if (server != null) {
server.stop(0);
}
}
/**
* 生成Prometheus格式的指标
*/
private String generateMetrics() {
StringBuilder sb = new StringBuilder();
// 计数器指标
appendCounter(sb, "crawler_requests_total", "Total API requests",
metrics.getCounter("api.requests.total"));
appendCounter(sb, "crawler_requests_failed", "Failed API requests",
metrics.getCounter("api.requests.failed"));
appendCounter(sb, "crawler_products_crawled", "Total products crawled",
metrics.getCounter("products.crawled"));
appendCounter(sb, "crawler_products_saved", "Total products saved",
metrics.getCounter("products.saved"));
// 计量器指标
appendGauge(sb, "crawler_active_tasks", "Active crawl tasks",
metrics.getGauge("tasks.active"));
// 延迟指标
MetricsCollector.LatencyStats apiLatency = metrics.getLatencyStats("api.latency");
if (apiLatency.count > 0) {
appendGauge(sb, "crawler_api_latency_p50_ms", "API latency P50", apiLatency.p50);
appendGauge(sb, "crawler_api_latency_p95_ms", "API latency P95", apiLatency.p95);
appendGauge(sb, "crawler_api_latency_p99_ms", "API latency P99", apiLatency.p99);
}
// JVM指标
Runtime runtime = Runtime.getRuntime();
long heapUsed = runtime.totalMemory() - runtime.freeMemory();
long heapMax = runtime.maxMemory();
appendGauge(sb, "jvm_heap_used_bytes", "JVM heap used", heapUsed);
appendGauge(sb, "jvm_heap_max_bytes", "JVM heap max", heapMax);
appendGauge(sb, "jvm_heap_usage_percent", "JVM heap usage percent",
(long) ((double) heapUsed / heapMax * 100));
return sb.toString();
}
private void appendCounter(StringBuilder sb, String name, String help, long value) {
sb.append("# HELP ").append(name).append(" ").append(help).append("\n");
sb.append("# TYPE ").append(name).append(" counter\n");
sb.append(name).append(" ").append(value).append("\n");
}
private void appendGauge(StringBuilder sb, String name, String help, long value) {
sb.append("# HELP ").append(name).append(" ").append(help).append("\n");
sb.append("# TYPE ").append(name).append(" gauge\n");
sb.append(name).append(" ").append(value).append("\n");
}
}
10.5.2 Prometheus配置
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'crawler'
static_configs:
- targets: ['localhost:9090']
metrics_path: '/metrics'
alerting:
alertmanagers:
- static_configs:
- targets: ['localhost:9093']
rule_files:
- 'alert_rules.yml'
10.5.3 告警规则
# alert_rules.yml
groups:
- name: crawler_alerts
rules:
# 高错误率告警
- alert: HighErrorRate
expr: rate(crawler_requests_failed[5m]) / rate(crawler_requests_total[5m]) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "爬虫错误率过高"
description: "过去5分钟错误率超过10%"
# 内存告警
- alert: HighMemoryUsage
expr: jvm_heap_usage_percent > 85
for: 5m
labels:
severity: warning
annotations:
summary: "JVM内存使用率过高"
description: "堆内存使用率超过85%"
# 延迟告警
- alert: HighLatency
expr: crawler_api_latency_p95_ms > 5000
for: 5m
labels:
severity: warning
annotations:
summary: "API延迟过高"
description: "P95延迟超过5秒"
# 服务不可用
- alert: ServiceDown
expr: up{job="crawler"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "爬虫服务不可用"
description: "爬虫服务已停止响应"
10.5.4 Grafana仪表板
{
"dashboard": {
"title": "商品爬虫监控",
"panels": [
{
"title": "请求吞吐量",
"type": "graph",
"targets": [
{
"expr": "rate(crawler_requests_total[1m])",
"legendFormat": "请求/秒"
}
]
},
{
"title": "错误率",
"type": "graph",
"targets": [
{
"expr": "rate(crawler_requests_failed[5m]) / rate(crawler_requests_total[5m]) * 100",
"legendFormat": "错误率%"
}
]
},
{
"title": "API延迟",
"type": "graph",
"targets": [
{
"expr": "crawler_api_latency_p50_ms",
"legendFormat": "P50"
},
{
"expr": "crawler_api_latency_p95_ms",
"legendFormat": "P95"
},
{
"expr": "crawler_api_latency_p99_ms",
"legendFormat": "P99"
}
]
},
{
"title": "JVM内存",
"type": "gauge",
"targets": [
{
"expr": "jvm_heap_usage_percent",
"legendFormat": "堆内存使用率"
}
]
},
{
"title": "抓取统计",
"type": "stat",
"targets": [
{
"expr": "crawler_products_crawled",
"legendFormat": "已抓取商品"
},
{
"expr": "crawler_products_saved",
"legendFormat": "已保存商品"
}
]
}
]
}
}
10.6 性能测试
10.6.1 压力测试工具
package com.dreamworld.test;
import com.dreamworld.crawler.ProductApiClient;
import com.dreamworld.metrics.MetricsCollector;
import com.dreamworld.utils.LogUtils;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
/**
* 压力测试工具
*/
public class StressTest {
private static final String TAG = "StressTest";
private final ProductApiClient apiClient;
private final MetricsCollector metrics;
public StressTest(ProductApiClient apiClient) {
this.apiClient = apiClient;
this.metrics = MetricsCollector.getInstance();
}
/**
* 运行压力测试
*/
public StressTestResult run(int concurrency, int totalRequests, int rampUpSeconds) {
LogUtils.separator("开始压力测试");
LogUtils.i(TAG, String.format("并发数: %d, 总请求: %d, 预热: %ds",
concurrency, totalRequests, rampUpSeconds));
ExecutorService executor = Executors.newFixedThreadPool(concurrency);
AtomicInteger completedRequests = new AtomicInteger(0);
AtomicInteger successRequests = new AtomicInteger(0);
AtomicInteger failedRequests = new AtomicInteger(0);
AtomicLong totalLatency = new AtomicLong(0);
long startTime = System.currentTimeMillis();
// 提交任务
CountDownLatch latch = new CountDownLatch(totalRequests);
for (int i = 0; i < totalRequests; i++) {
final int requestId = i;
// 预热期间逐步增加负载
if (rampUpSeconds > 0 && i < concurrency) {
try {
Thread.sleep((long) rampUpSeconds * 1000 / concurrency);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
executor.submit(() -> {
try {
long requestStart = System.currentTimeMillis();
ProductApiClient.ProductListResult result =
apiClient.getProductList(1, 10, null, null);
long latency = System.currentTimeMillis() - requestStart;
totalLatency.addAndGet(latency);
metrics.recordLatency("stress.latency", latency);
if (result != null) {
successRequests.incrementAndGet();
} else {
failedRequests.incrementAndGet();
}
} catch (Exception e) {
failedRequests.incrementAndGet();
} finally {
completedRequests.incrementAndGet();
latch.countDown();
// 进度报告
int completed = completedRequests.get();
if (completed % 100 == 0) {
LogUtils.d(TAG, String.format("进度: %d/%d (%.1f%%)",
completed, totalRequests, (double) completed / totalRequests * 100));
}
}
});
}
// 等待完成
try {
latch.await(10, TimeUnit.MINUTES);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
executor.shutdown();
long endTime = System.currentTimeMillis();
long duration = endTime - startTime;
// 计算结果
StressTestResult result = new StressTestResult();
result.concurrency = concurrency;
result.totalRequests = totalRequests;
result.successRequests = successRequests.get();
result.failedRequests = failedRequests.get();
result.durationMs = duration;
result.requestsPerSecond = (double) totalRequests / duration * 1000;
result.avgLatencyMs = totalLatency.get() / totalRequests;
MetricsCollector.LatencyStats latencyStats = metrics.getLatencyStats("stress.latency");
result.p50LatencyMs = latencyStats.p50;
result.p95LatencyMs = latencyStats.p95;
result.p99LatencyMs = latencyStats.p99;
result.errorRate = (double) failedRequests.get() / totalRequests;
printResult(result);
return result;
}
private void printResult(StressTestResult result) {
LogUtils.separator("压力测试结果");
LogUtils.i(TAG, String.format("总请求: %d", result.totalRequests));
LogUtils.i(TAG, String.format("成功: %d", result.successRequests));
LogUtils.i(TAG, String.format("失败: %d", result.failedRequests));
LogUtils.i(TAG, String.format("错误率: %.2f%%", result.errorRate * 100));
LogUtils.i(TAG, String.format("总耗时: %d ms", result.durationMs));
LogUtils.i(TAG, String.format("吞吐量: %.2f req/s", result.requestsPerSecond));
LogUtils.i(TAG, String.format("平均延迟: %d ms", result.avgLatencyMs));
LogUtils.i(TAG, String.format("P50延迟: %d ms", result.p50LatencyMs));
LogUtils.i(TAG, String.format("P95延迟: %d ms", result.p95LatencyMs));
LogUtils.i(TAG, String.format("P99延迟: %d ms", result.p99LatencyMs));
}
/**
* 压力测试结果
*/
public static class StressTestResult {
public int concurrency;
public int totalRequests;
public int successRequests;
public int failedRequests;
public long durationMs;
public double requestsPerSecond;
public long avgLatencyMs;
public long p50LatencyMs;
public long p95LatencyMs;
public long p99LatencyMs;
public double errorRate;
}
}
10.7 本章小结
本章我们深入探讨了性能优化和监控体系的构建,主要内容包括:
10.7.1 技术要点回顾
┌─────────────────────────────────────────────────────────────────┐
│ 本章技术要点 │
├─────────────────────────────────────────────────────────────────┤
│ │
│ 1. 性能分析 │
│ • 基准测试方法 │
│ • 瓶颈识别技术 │
│ • 关键性能指标 │
│ │
│ 2. 并发优化 │
│ • 并发抓取架构 │
│ • HTTP连接池优化 │
│ • 限流与并发度调优 │
│ │
│ 3. 内存优化 │
│ • 内存诊断工具 │
│ • 对象池化技术 │
│ • 流式处理 │
│ • JVM调优参数 │
│ │
│ 4. 监控体系 │
│ • 指标收集器 │
│ • 健康检查服务 │
│ • 告警服务 │
│ • 自动告警规则 │
│ │
│ 5. Prometheus集成 │
│ • 指标导出 │
│ • 告警规则配置 │
│ • Grafana仪表板 │
│ │
│ 6. 性能测试 │
│ • 压力测试工具 │
│ • 测试结果分析 │
│ │
└─────────────────────────────────────────────────────────────────┘
10.7.2 优化效果对比
┌─────────────────────────────────────────────────────────────────┐
│ 优化效果对比 │
├─────────────────────────────────────────────────────────────────┤
│ │
│ 指标 优化前 优化后 提升 │
│ ───────────────────────────────────────────────────────────── │
│ 吞吐量 2 商品/秒 20 商品/秒 10x │
│ API延迟P95 3000ms 500ms 6x │
│ 内存使用 1.5GB 800MB 47%↓ │
│ GC暂停 500ms 50ms 10x │
│ 错误率 5% 0.5% 10x │
│ │
│ 注:以上数据为示例,实际效果因环境而异 │
│ │
└─────────────────────────────────────────────────────────────────┘
10.7.3 最佳实践总结
性能优化原则:
- 先测量,后优化 - 避免盲目优化
- 优化瓶颈 - 关注最慢的环节
- 权衡取舍 - 性能vs复杂度vs成本
- 持续监控 - 及时发现性能退化
监控体系原则:
- 全面覆盖 - 业务、系统、基础设施
- 实时告警 - 问题发生时立即通知
- 可视化 - 直观展示系统状态
- 可追溯 - 保留历史数据用于分析
10.7.4 下一章预告
在下一章《错误处理与容灾》中,我们将探讨:
- 异常分类与处理策略
- 重试机制设计
- 熔断与降级
- 数据一致性保障
- 灾难恢复方案
本章附录
A. 性能优化检查清单
□ 网络层
□ 启用HTTP连接池
□ 配置合理的超时时间
□ 启用HTTP/2(如果支持)
□ 启用响应压缩
□ 并发层
□ 确定最优并发数
□ 实现请求限流
□ 使用异步处理
□ 内存层
□ 配置合理的堆大小
□ 选择合适的GC算法
□ 实现对象池化
□ 使用流式处理
□ 存储层
□ 使用批量写入
□ 配置数据库连接池
□ 添加必要的索引
□ 监控层
□ 收集关键指标
□ 配置告警规则
□ 搭建可视化仪表板
B. 常用JVM参数速查
| 参数 | 说明 | 推荐值 |
|---|---|---|
| -Xms | 初始堆大小 | 与-Xmx相同 |
| -Xmx | 最大堆大小 | 物理内存的50-70% |
| -XX:+UseG1GC | 使用G1收集器 | Java 11+推荐 |
| -XX:MaxGCPauseMillis | 目标GC暂停时间 | 200ms |
| -XX:MetaspaceSize | 初始元空间 | 256m |
| -XX:+HeapDumpOnOutOfMemoryError | OOM时dump堆 | 生产环境必开 |
本章完: