SQL Server 管理系列(第六期):性能监控与调优——DMV 与性能计数器的实战应用
前五期我们完成了安装配置、安全加固、备份恢复、高可用和自动化运维。这一期,我们进入 DBA 的核心技能——性能监控与调优。你是否遇到过:用户反馈系统慢,你却不知道从哪里开始排查?服务器 CPU 飙升,却找不到是哪个查询导致的?基线是什么?如何判断是正常波动还是异常?这一期,我们用 DMV 和性能计数器构建一套完整的性能监控体系。
一、性能监控架构概览
┌─────────────────────────────────────────────────────────────┐
│ 性能监控金字塔 │
│ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ 第5层:等待统计 (sys.dm_os_wait_stats) │ │
│ │ 第4层:查询统计 (sys.dm_exec_query_stats) │ │
│ │ 第3层:会话/请求 (sys.dm_exec_requests/sessions) │ │
│ │ 第2层:性能计数器 (sys.dm_os_performance_counters) │ │
│ │ 第1层:系统级指标 (CPU/内存/磁盘) │ │
│ └─────────────────────────────────────────────────────┘ │
│ │
│ 排查顺序:从顶到底(先看等待统计定位方向,再深入查询) │
└─────────────────────────────────────────────────────────────┘
二、性能计数器(Performance Counters)
2.1 核心计数器解读
| 计数器 | 正常值 | 告警阈值 | 说明 |
|---|---|---|---|
| Buffer cache hit ratio | > 95% | < 90% | 数据页在内存中的命中率 |
| Page life expectancy | > 300秒 | < 300秒 | 页在内存中的停留时间 |
| Lazy writes/sec | < 20 | > 20 | 内存压力刷写页面的频率 |
| SQL Compilations/sec | 视负载 | 突然翻倍 | SQL 编译次数 |
| SQL Re-Compilations/sec | < 10% of Compilations | > 20% | 重编译比例过高 |
| User Connections | 稳定 | 突增/突降 | 用户连接数 |
| Batch Requests/sec | 稳定 | 突降 | 批处理请求数 |
| Average Latch Wait Time (ms) | < 100ms | > 500ms | 闩锁等待 |
| Lock Waits/sec | < 5 | > 10 | 锁等待频率 |
2.2 收集性能计数器
-- 创建性能数据收集表
CREATE TABLE DBA_PerfCounters (
CollectionID BIGINT IDENTITY PRIMARY KEY,
CollectionTime DATETIME DEFAULT GETDATE(),
CounterName NVARCHAR(200),
CounterValue BIGINT,
InstanceName NVARCHAR(200)
);
-- 创建收集存储过程
CREATE PROC dbo.CollectPerfCounters
AS
BEGIN
INSERT INTO DBA_PerfCounters (CounterName, CounterValue, InstanceName)
SELECT
counter_name,
cntr_value,
instance_name
FROM sys.dm_os_performance_counters
WHERE counter_name IN (
'Buffer cache hit ratio',
'Page life expectancy',
'Lazy writes/sec',
'SQL Compilations/sec',
'SQL Re-Compilations/sec',
'User Connections',
'Batch Requests/sec',
'Lock Waits/sec',
'Average Latch Wait Time (ms)'
);
-- 清理7天前的数据
DELETE FROM DBA_PerfCounters
WHERE CollectionTime < DATEADD(day, -7, GETDATE());
END
-- 创建 SQL Agent 作业,每15分钟执行一次
2.3 实时监控脚本
-- 快速健康检查(一键诊断)
SELECT
'Buffer cache hit ratio' AS Metric,
CAST(cntr_value AS VARCHAR) + '%' AS Value,
CASE WHEN cntr_value < 90 THEN '⚠️ 警告:内存可能不足' ELSE '✅ 正常' END AS Status
FROM sys.dm_os_performance_counters
WHERE counter_name = 'Buffer cache hit ratio'
UNION ALL
SELECT
'Page life expectancy',
CAST(cntr_value AS VARCHAR) + ' 秒',
CASE WHEN cntr_value < 300 THEN '⚠️ 警告:内存压力大' ELSE '✅ 正常' END
FROM sys.dm_os_performance_counters
WHERE counter_name = 'Page life expectancy'
UNION ALL
SELECT
'Lazy writes/sec',
CAST(cntr_value AS VARCHAR),
CASE WHEN cntr_value > 20 THEN '⚠️ 警告:内存压力' ELSE '✅ 正常' END
FROM sys.dm_os_performance_counters
WHERE counter_name = 'Lazy writes/sec'
UNION ALL
SELECT
'SQL Compilations/sec',
CAST(cntr_value AS VARCHAR),
CASE WHEN cntr_value > 100 THEN '⚠️ 警告:编译频率过高' ELSE '✅ 正常' END
FROM sys.dm_os_performance_counters
WHERE counter_name = 'SQL Compilations/sec';
三、等待统计(Wait Stats)
3.1 等待类型分类
| 类别 | 等待类型 | 含义 | 解决方案 |
|---|---|---|---|
| I/O | PAGEIOLATCH_* | 从磁盘读取数据 | 加内存、优化查询、换SSD |
| 锁 | LCK_* | 锁争用 | 优化事务、调整隔离级别 |
| CPU | SOS_SCHEDULER_YIELD | CPU 压力 | 优化查询、增加CPU |
| 日志 | WRITELOG | 日志写入慢 | 分离日志盘、批量提交 |
| 网络 | ASYNC_NETWORK_IO | 网络或客户端慢 | 减少返回行数、检查网络 |
| 并行 | CXCONSUMER | 并行查询倾斜 | 调整 MAXDOP |
| 内存 | RESOURCE_SEMAPHORE | 内存不足 | 加内存、优化大查询 |
3.2 等待统计分析脚本
-- 获取当前等待统计(排除无害等待)
WITH Waits AS (
SELECT
wait_type,
wait_time_ms,
waiting_tasks_count,
signal_wait_time_ms,
wait_time_ms - signal_wait_time_ms AS resource_wait_ms,
CAST(100.0 * wait_time_ms / SUM(wait_time_ms) OVER() AS DECIMAL(5,2)) AS percentage
FROM sys.dm_os_wait_stats
WHERE wait_type NOT IN (
'BROKER_EVENTHANDLER', 'BROKER_RECEIVE_WAITFOR', 'BROKER_TASK_STOP',
'BROKER_TO_FLUSH', 'BROKER_TRANSMITTER', 'CHECKPOINT_QUEUE',
'CHKPT', 'CLR_AUTO_EVENT', 'CLR_MANUAL_EVENT', 'CLR_SEMAPHORE',
'DBMIRROR_DBM_EVENT', 'DBMIRROR_EVENTS_QUEUE', 'DBMIRROR_WORKER_QUEUE',
'DBMIRRORING_CMD', 'DIRTY_PAGE_POLL', 'DISPATCHER_QUEUE_SEMAPHORE',
'EXECSYNC', 'FSAGENT', 'FT_IFTS_SCHEDULER_IDLE_WAIT', 'FT_IFTSHC_MUTEX',
'HADR_CLUSAPI_CALL', 'HADR_FILESTREAM_IOMGR_IOCOMPLETION', 'HADR_LOGCAPTURE_WAIT',
'HADR_NOTIFICATION_DEQUEUE', 'HADR_TIMER_TASK', 'HADR_WORK_QUEUE',
'KSOURCE_WAKEUP', 'LAZYWRITER_SLEEP', 'LOGMGR_QUEUE',
'MEMORY_ALLOCATION_EXT', 'ONDEMAND_TASK_QUEUE', 'PREEMPTIVE_OS_LIBRARYOPS',
'PREEMPTIVE_OS_COMOPS', 'PREEMPTIVE_OS_CRYPTOPS', 'PREEMPTIVE_OS_PIPEOPS',
'PREEMPTIVE_OS_AUTHENTICATIONOPS', 'PREEMPTIVE_OS_GENERICOPS', 'PREEMPTIVE_OS_VERIFYTRUST',
'PREEMPTIVE_OS_FILEOPS', 'PREEMPTIVE_OS_DEVICEOPS', 'PREEMPTIVE_OS_QUERYREGISTRY',
'PREEMPTIVE_OS_WRITEFILE', 'PWAIT_ALL_COMPONENTS_INITIALIZED', 'PWAIT_DIRECTLOGCONSUMER_GETNEXT',
'QDS_PERSIST_TASK_MAIN_LOOP_SLEEP', 'QDS_ASYNC_QUEUE', 'QDS_CLEANUP_STALE_QUERIES_TASK_MAIN_LOOP_SLEEP',
'REQUEST_FOR_DEADLOCK_SEARCH', 'RESOURCE_QUEUE', 'SERVER_IDLE_CHECK',
'SLEEP_BPOOL_FLUSH', 'SLEEP_DBSTARTUP', 'SLEEP_DCOMSTARTUP', 'SLEEP_MASTERDBREADY',
'SLEEP_MASTERMDREADY', 'SLEEP_MASTERUPGRADED', 'SLEEP_MSDBSTARTUP', 'SLEEP_SYSTEMTASK',
'SLEEP_TASK', 'SLEEP_TEMPDBSTARTUP', 'SNI_HTTP_ACCEPT', 'SP_SERVER_DIAGNOSTICS_SLEEP',
'SQLTRACE_BUFFER_FLUSH', 'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', 'SQLTRACE_WAIT_ENTRIES',
'WAIT_FOR_RESULTS', 'WAITFOR', 'WAITFOR_TASKSHUTDOWN', 'WAIT_XTP_RECOVERY',
'WAIT_XTP_HOST_WAIT', 'WAIT_XTP_OFFLINE_CKPT_NEW_LOG', 'WAIT_XTP_CKPT_CLOSE',
'XE_DISPATCHER_JOIN', 'XE_DISPATCHER_WAIT', 'XE_TIMER_EVENT'
)
)
SELECT TOP 10
wait_type,
wait_time_ms / 1000 AS wait_sec,
waiting_tasks_count,
resource_wait_ms / 1000 AS resource_wait_sec,
percentage,
REPLICATE('█', CAST(percentage / 5 AS INT)) AS bar
FROM Waits
WHERE wait_time_ms > 0
ORDER BY wait_time_ms DESC;
-- 计算信号等待比例(反映 CPU 压力)
SELECT
SUM(signal_wait_time_ms) * 100.0 / SUM(wait_time_ms) AS signal_wait_percentage
FROM sys.dm_os_wait_stats
WHERE wait_type NOT LIKE '%SLEEP%';
-- signal_wait_percentage > 15% 表示 CPU 压力
3.3 等待统计历史趋势
-- 创建等待统计历史表
CREATE TABLE DBA_WaitStatsHistory (
CollectionTime DATETIME DEFAULT GETDATE(),
wait_type NVARCHAR(120),
wait_time_ms BIGINT,
waiting_tasks_count BIGINT,
signal_wait_time_ms BIGINT
);
-- 创建收集存储过程
CREATE PROC dbo.CollectWaitStats
AS
BEGIN
INSERT INTO DBA_WaitStatsHistory (wait_type, wait_time_ms, waiting_tasks_count, signal_wait_time_ms)
SELECT
wait_type,
wait_time_ms,
waiting_tasks_count,
signal_wait_time_ms
FROM sys.dm_os_wait_stats
WHERE wait_type NOT LIKE '%SLEEP%'
AND wait_type NOT LIKE '%IDLE%';
-- 清理30天前数据
DELETE FROM DBA_WaitStatsHistory
WHERE CollectionTime < DATEADD(day, -30, GETDATE());
END
-- 查看等待趋势(最近7天 vs 上周同期)
WITH CurrentWeek AS (
SELECT
wait_type,
AVG(wait_time_ms) AS avg_wait_ms
FROM DBA_WaitStatsHistory
WHERE CollectionTime >= DATEADD(day, -7, GETDATE())
GROUP BY wait_type
),
LastWeek AS (
SELECT
wait_type,
AVG(wait_time_ms) AS avg_wait_ms
FROM DBA_WaitStatsHistory
WHERE CollectionTime >= DATEADD(day, -14, GETDATE())
AND CollectionTime < DATEADD(day, -7, GETDATE())
GROUP BY wait_type
)
SELECT
c.wait_type,
c.avg_wait_ms AS current_avg_ms,
l.avg_wait_ms AS last_week_avg_ms,
(c.avg_wait_ms - l.avg_wait_ms) * 100.0 / NULLIF(l.avg_wait_ms, 0) AS increase_pct
FROM CurrentWeek c
LEFT JOIN LastWeek l ON c.wait_type = l.wait_type
WHERE c.avg_wait_ms > 1000 -- 超过1秒
ORDER BY c.avg_wait_ms DESC;
四、DMV 核心查询模板
4.1 当前活动会话监控
-- 查看当前正在执行的查询
SELECT
r.session_id,
s.login_name,
s.host_name,
s.program_name,
r.status,
r.blocking_session_id,
r.wait_type,
r.wait_time / 1000 AS wait_sec,
r.last_wait_type,
r.cpu_time,
r.total_elapsed_time / 1000 AS elapsed_sec,
r.logical_reads,
r.writes,
SUBSTRING(st.text, (r.statement_start_offset/2)+1,
((CASE r.statement_end_offset
WHEN -1 THEN DATALENGTH(st.text)
ELSE r.statement_end_offset
END - r.statement_start_offset)/2) + 1) AS query_text,
qp.query_plan
FROM sys.dm_exec_requests r
JOIN sys.dm_exec_sessions s ON r.session_id = s.session_id
CROSS APPLY sys.dm_exec_sql_text(r.sql_handle) st
OUTER APPLY sys.dm_exec_query_plan(r.plan_handle) qp
WHERE r.session_id > 50 -- 排除系统会话
AND r.status NOT IN ('background', 'sleeping')
ORDER BY r.total_elapsed_time DESC;
4.2 阻塞链分析
-- 查找阻塞树
WITH BlockingTree AS (
SELECT
session_id,
blocking_session_id,
wait_type,
wait_time,
last_wait_type,
CAST(session_id AS VARCHAR(MAX)) AS tree_path,
0 AS level
FROM sys.dm_exec_requests
WHERE blocking_session_id > 0
UNION ALL
SELECT
r.session_id,
r.blocking_session_id,
r.wait_type,
r.wait_time,
r.last_wait_type,
bt.tree_path + ' -> ' + CAST(r.session_id AS VARCHAR),
bt.level + 1
FROM sys.dm_exec_requests r
JOIN BlockingTree bt ON r.blocking_session_id = bt.session_id
)
SELECT
session_id AS blocked_session,
blocking_session_id AS blocking_session,
wait_type,
wait_time / 1000 AS wait_sec,
tree_path,
level,
(SELECT TEXT FROM sys.dm_exec_sql_text(sql_handle)) AS query_text
FROM BlockingTree
ORDER BY level, blocking_session_id;
-- 查找锁持有者详情
SELECT
request_session_id,
resource_type,
resource_database_id,
resource_description,
request_mode,
request_status,
request_owner_type
FROM sys.dm_tran_locks
WHERE resource_database_id = DB_ID()
AND request_status = 'GRANT'
ORDER BY request_session_id;
4.3 历史查询性能统计
-- Top 10 最耗 CPU 的查询
SELECT TOP 10
qs.total_worker_time / 1000 AS total_cpu_ms,
qs.execution_count,
qs.total_worker_time / qs.execution_count / 1000 AS avg_cpu_ms,
qs.total_logical_reads,
qs.total_logical_reads / qs.execution_count AS avg_logical_reads,
qs.total_elapsed_time / 1000 AS total_elapsed_ms,
SUBSTRING(st.text, (qs.statement_start_offset/2)+1,
((CASE qs.statement_end_offset
WHEN -1 THEN DATALENGTH(st.text)
ELSE qs.statement_end_offset
END - qs.statement_start_offset)/2) + 1) AS query_text,
qp.query_plan
FROM sys.dm_exec_query_stats qs
CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle) st
OUTER APPLY sys.dm_exec_query_plan(qs.plan_handle) qp
WHERE st.dbid = DB_ID()
ORDER BY qs.total_worker_time DESC;
-- Top 10 最耗 I/O 的查询
SELECT TOP 10
total_logical_reads + total_logical_writes AS total_io,
execution_count,
(total_logical_reads + total_logical_writes) / execution_count AS avg_io,
total_physical_reads,
SUBSTRING(st.text, (qs.statement_start_offset/2)+1,
((CASE qs.statement_end_offset
WHEN -1 THEN DATALENGTH(st.text)
ELSE qs.statement_end_offset
END - qs.statement_start_offset)/2) + 1) AS query_text
FROM sys.dm_exec_query_stats qs
CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle) st
ORDER BY total_logical_reads + total_logical_writes DESC;
-- Top 10 执行次数最多的查询
SELECT TOP 10
execution_count,
total_worker_time / 1000 AS total_cpu_ms,
total_worker_time / execution_count / 1000 AS avg_cpu_ms,
total_logical_reads / execution_count AS avg_logical_reads,
SUBSTRING(st.text, (qs.statement_start_offset/2)+1,
((CASE qs.statement_end_offset
WHEN -1 THEN DATALENGTH(st.text)
ELSE qs.statement_end_offset
END - qs.statement_start_offset)/2) + 1) AS query_text
FROM sys.dm_exec_query_stats qs
CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle) st
ORDER BY execution_count DESC;
4.4 缺失索引与冗余索引
-- 缺失索引建议(按影响程度排序)
SELECT
migs.avg_total_user_cost * migs.avg_user_impact * (migs.user_seeks + migs.user_scans) AS Score,
mid.statement AS TableName,
mid.equality_columns,
mid.inequality_columns,
mid.included_columns,
migs.user_seeks,
migs.user_scans,
migs.avg_total_user_cost,
migs.avg_user_impact
FROM sys.dm_db_missing_index_groups mig
JOIN sys.dm_db_missing_index_group_stats migs ON migs.group_handle = mig.index_handle
JOIN sys.dm_db_missing_index_details mid ON mig.index_handle = mid.index_handle
WHERE mid.database_id = DB_ID()
ORDER BY Score DESC;
-- 查找从未使用过的索引
SELECT
OBJECT_NAME(i.object_id) AS TableName,
i.name AS IndexName,
i.type_desc,
s.user_seeks,
s.user_scans,
s.user_lookups,
s.user_updates,
s.last_user_seek,
s.last_user_scan,
s.last_user_lookup
FROM sys.indexes i
LEFT JOIN sys.dm_db_index_usage_stats s
ON i.object_id = s.object_id AND i.index_id = s.index_id
WHERE OBJECTPROPERTY(i.object_id, 'IsUserTable') = 1
AND (s.user_seeks + s.user_scans + s.user_lookups = 0 OR s.user_seeks IS NULL)
AND i.name IS NOT NULL
AND i.type_desc != 'HEAP'
ORDER BY ISNULL(s.user_updates, 0) DESC;
-- 查找重复索引
WITH IndexColumns AS (
SELECT
OBJECT_NAME(i.object_id) AS TableName,
i.name AS IndexName,
STUFF((
SELECT ',' + c.name
FROM sys.index_columns ic
JOIN sys.columns c ON ic.object_id = c.object_id AND ic.column_id = c.column_id
WHERE ic.object_id = i.object_id
AND ic.index_id = i.index_id
AND ic.is_included_column = 0
ORDER BY ic.key_ordinal
FOR XML PATH('')
), 1, 1, '') AS KeyColumns,
STUFF((
SELECT ',' + c.name
FROM sys.index_columns ic
JOIN sys.columns c ON ic.object_id = c.object_id AND ic.column_id = c.column_id
WHERE ic.object_id = i.object_id
AND ic.index_id = i.index_id
AND ic.is_included_column = 1
ORDER BY ic.key_ordinal
FOR XML PATH('')
), 1, 1, '') AS IncludedColumns
FROM sys.indexes i
WHERE i.type_desc IN ('NONCLUSTERED', 'CLUSTERED')
)
SELECT
t1.TableName,
t1.IndexName AS Index1,
t2.IndexName AS Index2,
t1.KeyColumns
FROM IndexColumns t1
JOIN IndexColumns t2 ON t1.TableName = t2.TableName
AND t1.KeyColumns = t2.KeyColumns
AND t1.IndexName < t2.IndexName
ORDER BY t1.TableName, t1.KeyColumns;
五、基线建立与异常检测
5.1 创建性能基线
-- 创建基线表
CREATE TABLE DBA_PerformanceBaseline (
BaselineID INT IDENTITY PRIMARY KEY,
CollectionTime DATETIME DEFAULT GETDATE(),
MetricName NVARCHAR(100),
MetricValue DECIMAL(20,2),
DayOfWeek TINYINT,
HourOfDay TINYINT
);
-- 收集基线数据(每小时执行)
CREATE PROC dbo.CollectPerformanceBaseline
AS
BEGIN
-- CPU 使用率
INSERT INTO DBA_PerformanceBaseline (MetricName, MetricValue, DayOfWeek, HourOfDay)
SELECT
'CPU_Usage',
(SELECT cntr_value FROM sys.dm_os_performance_counters
WHERE counter_name = '% Processor Time' AND instance_name = '_Total'),
DATEPART(WEEKDAY, GETDATE()),
DATEPART(HOUR, GETDATE());
-- 批处理请求数/秒
INSERT INTO DBA_PerformanceBaseline (MetricName, MetricValue, DayOfWeek, HourOfDay)
SELECT
'BatchRequests',
(SELECT cntr_value FROM sys.dm_os_performance_counters
WHERE counter_name = 'Batch Requests/sec'),
DATEPART(WEEKDAY, GETDATE()),
DATEPART(HOUR, GETDATE());
-- Page Life Expectancy
INSERT INTO DBA_PerformanceBaseline (MetricName, MetricValue, DayOfWeek, HourOfDay)
SELECT
'PLE',
(SELECT cntr_value FROM sys.dm_os_performance_counters
WHERE counter_name = 'Page life expectancy'),
DATEPART(WEEKDAY, GETDATE()),
DATEPART(HOUR, GETDATE());
-- 清理90天前数据
DELETE FROM DBA_PerformanceBaseline
WHERE CollectionTime < DATEADD(day, -90, GETDATE());
END
-- 异常检测(当前值 vs 历史基线)
WITH BaselineStats AS (
SELECT
MetricName,
HourOfDay,
AVG(MetricValue) AS AvgValue,
STDEV(MetricValue) AS StdDev
FROM DBA_PerformanceBaseline
WHERE CollectionTime > DATEADD(day, -30, GETDATE())
GROUP BY MetricName, HourOfDay
)
SELECT
b.MetricName,
b.HourOfDay,
b.AvgValue,
b.StdDev,
c.CurrentValue,
CASE
WHEN c.CurrentValue > b.AvgValue + 3 * b.StdDev THEN '严重异常'
WHEN c.CurrentValue > b.AvgValue + 2 * b.StdDev THEN '警告'
ELSE '正常'
END AS Status
FROM BaselineStats b
CROSS APPLY (
SELECT cntr_value AS CurrentValue
FROM sys.dm_os_performance_counters
WHERE counter_name = CASE
WHEN b.MetricName = 'CPU_Usage' THEN '% Processor Time'
WHEN b.MetricName = 'BatchRequests' THEN 'Batch Requests/sec'
WHEN b.MetricName = 'PLE' THEN 'Page life expectancy'
END
) c
WHERE b.AvgValue > 0;
六、自动化性能报告
6.1 每日性能报告邮件
CREATE PROC dbo.SendDailyPerformanceReport
AS
BEGIN
DECLARE @html NVARCHAR(MAX);
-- 生成 HTML 报告
SET @html = N'
<html>
<head><style>
th {background-color: #4CAF50; color: white; padding: 8px;}
td {border: 1px solid #ddd; padding: 6px;}
.warning {color: orange;}
.critical {color: red;}
</style></head>
<body>
<h2>SQL Server 性能日报 - ' + CONVERT(NVARCHAR, GETDATE(), 111) + N'</h2>
<h3>1. 关键性能指标</h3>
<table>
<tr><th>指标</th><th>当前值</th><th>状态</th></tr>';
-- 添加指标行
SELECT @html = @html + N'
<tr>
<td>' + MetricName + N'</td>
<td>' + CAST(Value AS NVARCHAR) + N'</td>
<td class="' + Status + N'">' + StatusText + N'</td>
</tr>'
FROM (
SELECT 'Buffer Cache Hit Ratio' AS MetricName,
CAST(cntr_value AS NVARCHAR) + '%' AS Value,
CASE WHEN cntr_value < 90 THEN 'critical' WHEN cntr_value < 95 THEN 'warning' ELSE '' END AS Status,
CASE WHEN cntr_value < 90 THEN '⚠️ 严重' WHEN cntr_value < 95 THEN '⚠️ 警告' ELSE '✅ 正常' END AS StatusText
FROM sys.dm_os_performance_counters WHERE counter_name = 'Buffer cache hit ratio'
UNION ALL
SELECT 'Page Life Expectancy',
CAST(cntr_value AS NVARCHAR) + '秒',
CASE WHEN cntr_value < 300 THEN 'critical' ELSE '' END,
CASE WHEN cntr_value < 300 THEN '⚠️ 内存压力' ELSE '✅ 正常' END
FROM sys.dm_os_performance_counters WHERE counter_name = 'Page life expectancy'
) t;
SET @html = @html + N'
</table>
<h3>2. Top 5 耗时查询</h3>
<table>
<tr><th>平均CPU(ms)</th><th>执行次数</th><th>查询文本</th></tr>';
SELECT @html = @html + N'
<tr>
<td>' + CAST(avg_cpu_ms AS NVARCHAR) + N'</td>
<td>' + CAST(execution_count AS NVARCHAR) + N'</td>
<td>' + LEFT(query_text, 100) + N'</td>
</tr>'
FROM (
SELECT TOP 5
total_worker_time / execution_count / 1000 AS avg_cpu_ms,
execution_count,
SUBSTRING(st.text, (qs.statement_start_offset/2)+1, 100) AS query_text
FROM sys.dm_exec_query_stats qs
CROSS APPLY sys.dm_exec_sql_text(qs.sql_handle) st
ORDER BY total_worker_time / execution_count DESC
) t;
SET @html = @html + N'
</table>
<h3>3. 等待统计 Top 5</h3>
<table>
<tr><th>等待类型</th><th>等待时间(秒)</th><th>占比</th></tr>';
SELECT @html = @html + N'
<tr>
<td>' + wait_type + N'</td>
<td>' + CAST(wait_sec AS NVARCHAR) + N'</td>
<td>' + CAST(percentage AS NVARCHAR) + N'%</td>
</tr>'
FROM (
SELECT TOP 5
wait_type,
wait_time_ms / 1000 AS wait_sec,
CAST(100.0 * wait_time_ms / SUM(wait_time_ms) OVER() AS DECIMAL(5,2)) AS percentage
FROM sys.dm_os_wait_stats
WHERE wait_type NOT LIKE '%SLEEP%'
ORDER BY wait_time_ms DESC
) t;
SET @html = @html + N'
</table>
</body></html>';
-- 发送邮件
EXEC msdb.dbo.sp_send_dbmail
@profile_name = 'SQL_Alert_Profile',
@recipients = 'dba@company.com',
@subject = N'SQL Server 性能日报 - ' + CONVERT(NVARCHAR, GETDATE(), 111),
@body = @html,
@body_format = 'HTML';
END
七、核心总结
| 知识点 | 核心要点 |
|---|---|
| 性能计数器 | PLE、Lazy writes、Hit Ratio 三件套 |
| 等待统计 | PAGEIOLATCH→I/O、LCK→锁、SOS_SCHEDULER→CPU |
| 查询统计 | top CPU、top I/O、top executions |
| 阻塞分析 | blocking_session_id、锁资源 |
| 缺失索引 | 从 DMV 获取优化建议 |
| 冗余索引 | 定期清理未使用索引 |
| 性能基线 | 历史对比,识别异常 |
一句话记住本期内容:
性能监控从等待统计看方向,从查询统计看细节,从性能计数器看趋势——三者结合才能精准定位瓶颈。
动手练习
-- 场景:用户反馈系统在每天下午 2-3 点变慢
-- 问题1:如何确认是 CPU、I/O 还是锁的问题?
-- 问题2:如何找到导致慢的具体查询?
-- 问题3:如何验证优化效果?
点击查看参考答案
问题1:查看等待统计
SELECT TOP 3 wait_type, wait_time_ms
FROM sys.dm_os_wait_stats
WHERE wait_type NOT LIKE '%SLEEP%'
ORDER BY wait_time_ms DESC;
-- PAGEIOLATCH→I/O, LCK→锁, SOS_SCHEDULER→CPU
问题2:查询历史统计
SELECT TOP 5
total_worker_time/execution_count AS avg_cpu,
query_text
FROM sys.dm_exec_query_stats
CROSS APPLY sys.dm_exec_sql_text(sql_handle)
ORDER BY total_worker_time/execution_count DESC;
问题3:建立基线对比
-- 优化前后执行同一查询
SET STATISTICS TIME ON;
SET STATISTICS IO ON;
-- 对比逻辑读取和执行时间
下一期预告
故障排查实战——从报警到解决的完整流程
- 常见故障场景:CPU飙升、内存耗尽、磁盘爆满、阻塞死锁
- 故障排查决策树
- 根因分析方法
- 应急预案与 Runbook
- 案例复盘:真实生产故障排查
📌 本文 DMV 查询适用于 SQL Server 2016-2022。部分 DMV 需要 VIEW SERVER STATE 权限。
管理系列持续更新中,点击关注不错过第七期(系列收官之作)。