processhacker 获取windows GPU性能数据获取方法

1,684 阅读2分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路。

processhacker 源码

processhacker 中对于Gpu 性能数据获取的主要逻辑在 /plugins/ExtenedTools/gpumon.c 文件中。

性能数据通过extern修饰的全局变量的形式存在,例如:

...
ULONG EtGpuTotalNodeCount = 0;      //  gpu 所有node的数量
ULONG EtGpuTotalSegmentCount = 0;
ULONG EtGpuNextNodeIndex = 0;
...
ULONG64 EtGpuDedicatedLimit = 0;
ULONG64 EtGpuDedicatedUsage = 0;    //  gpu 专用内存
ULONG64 EtGpuSharedLimit = 0;
ULONG64 EtGpuSharedUsage = 0;       //  gpu 共享内存
FLOAT EtGpuPowerUsageLimit = 100.0f;
...

依赖项

perflib:

#include <perflib.h>

链接 AdvAPI32.dll

PerfAddCounters(...);
PerfQueryCounterData(...);

DXGI:

PhLoadLibrary(L"dxgi.dll");     //  DirectX Graphics

D3DKMTQueryAdapterInfo(&queryAdapterInfo);

数据流

processHacker 有两套获取Gpu 性能数据的逻辑,通过 EtD3DEnable 来标识,该标识的设置逻辑如下:


VOID EtGpuMonitorInitialization(
    VOID
    )
{
    if (PhGetIntegerSetting(SETTING_NAME_ENABLE_GPU_MONITOR))
    {
        EtGpuSupported = PhWindowsVersion >= WINDOWS_10_RS4;
        EtD3DEnabled = EtGpuSupported && !!PhGetIntegerSetting(SETTING_NAME_ENABLE_GPUPERFCOUNTERS);
        EtpGpuAdapterList = PhCreateList(4);
        if (EtpInitializeD3DStatistics())
            EtGpuEnabled = TRUE;
    }
    ...
}

EtD3DEnable == true

graph TD
PerfQueryCounterData[PerfQueryCounterData] -->|buffer| EtPerfCounterGetCounterData(EtPerfCounterGetCounterData)

EtPerfCounterGetCounterData[EtPerfCounterGetCounterData] -->|perfQueryBuffer|EtUpdatePerfCounterData(EtUpdatePerfCounterData)

EtUpdatePerfCounterData[EtUpdatePerfCounterData] -->|counter| EtPerfCounterProcessGpuEngineUtilizationCounter(EtPerfCounterProcessGpuEngineUtilizationCounter)

EtUpdatePerfCounterData[EtUpdatePerfCounterData] -->|counter|EtPerfCounterGpuProcessUtilizationCounter(EtPerfCounterGpuProcessUtilizationCounter)

EtUpdatePerfCounterData[EtUpdatePerfCounterData] -->|counter|EtPerfCounterGpuAdapterDedicatedCounter(EtPerfCounterGpuAdapterDedicatedCounter)

EtPerfCounterProcessGpuEngineUtilizationCounter[EtPerfCounterProcessGpuEngineUtilizationCounter]-.->
EtGpuRunningTimeHashTable(EtGpuRunningTimeHashTable)

EtPerfCounterGpuProcessUtilizationCounter[EtPerfCounterGpuProcessUtilizationCounter]-.->
EtGpuAdapterDedicatedHashTable(EtGpuAdapterDedicatedHashTable)

EtPerfCounterGpuAdapterDedicatedCounter[EtPerfCounterGpuAdapterDedicatedCounter]-.->
EtGpuProcessCounterHashTable(EtGpuProcessCounterHashTable)

EtGpuRunningTimeHashTable(EtGpuRunningTimeHashTable) --> EtLookupTotalGpuUtilization(EtLookupTotalGpuUtilization)

EtGpuAdapterDedicatedHashTable(EtGpuAdapterDedicatedHashTable) --> 
EtLookupTotalGpuDedicated(EtLookupTotalGpuDedicated)

EtGpuProcessCounterHashTable(EtGpuProcessCounterHashTable) --> 
EtLookupTotalGpuShared(EtLookupTotalGpuShared)


EtLookupTotalGpuUtilization(EtLookupTotalGpuUtilization) --> 
EtGpuProcessesUpdatedCallback(EtGpuProcessesUpdatedCallback)


EtLookupTotalGpuDedicated(EtLookupTotalGpuDedicated) --> 
EtGpuProcessesUpdatedCallback(EtGpuProcessesUpdatedCallback)

EtLookupTotalGpuShared(EtLookupTotalGpuShared) --> 
EtGpuProcessesUpdatedCallback(EtGpuProcessesUpdatedCallback)

从原始数据到最后被monitor 拿到的数据:

  1. EtPerfCounterGetCounterData 调用系统API PerfQueryCounterData 拿到原始数据,并将原始数据存入 buffer中:
_Success_(return)
BOOLEAN EtPerfCounterGetCounterData(
    _In_ HANDLE CounterHandle,
    _Out_ PPERF_DATA_HEADER *CounterBuffer
    )
{
    ...
    buffer = PhAllocate(bufferSize);
    status = PerfQueryCounterData(
        CounterHandle,
        buffer,
        bufferSize,
        &bufferSize
        );
    if (status == ERROR_NOT_ENOUGH_MEMORY)
    {
        if (initialBufferSize < bufferSize)
            initialBufferSize = bufferSize;
        PhFree(buffer);
        buffer = PhAllocate(bufferSize);
        status = PerfQueryCounterData(
            CounterHandle,
            buffer,
            bufferSize,
            &bufferSize
            );
    }
    ...
}
  1. EtUpdatePerfCounterData 通过偏移拿取 PPERF_COUNTER_HEADER, PPERF_DATA_HEADER 等数据,再把这些数据进行分类,将不同类型的数据放入不同的 counter 中。
  2. EtPerfCounterGpuAdapterDedicatedCounter, EtPerfCounterGpuProcessUtilizationCounter, EtPerfCounterProcessGpuEngineUtilizationCounter 函数,将 counter 的数据存储到该类型counter 相对应的 hashtable 中。
  3. EtGpuProcessesUpdatedCallback 获取gpu相关数据时,调用 EtLookupTotalGpuUtilization, EtLookupTotalGpuDedicated, EtLookupTotalGpuShared 方法,从每一个 counter 相对应的 hashtable 中拿到数据。

EtD3DEnable == false

graph TD
D3DKMTQueryStatistics[D3DKMTQueryStatistics] -->|D3DKMT_QUERYSTATISTICS_PROCESS_NODE|EtpUpdateSystemSegmentInformation(EtpUpdateSystemSegmentInformation)

D3DKMTQueryStatistics[D3DKMTQueryStatistics] -->|D3DKMT_QUERYSTATISTICS_NODE|EtpUpdateSystemNodeInformation(EtpUpdateSystemNodeInformation)

EtpUpdateSystemSegmentInformation[EtpUpdateSystemSegmentInformation]-.->EtGpuDedicatedUsage(EtGpuDedicatedUsage)

EtpUpdateSystemSegmentInformation[EtpUpdateSystemSegmentInformation]-.->EtGpuSharedUsage(EtGpuSharedUsage)

EtpUpdateSystemNodeInformation[EtpUpdateSystemNodeInformation]-.->
EtGpuNodesTotalRunningTimeDelta(EtGpuNodesTotalRunningTimeDelta)

EtGpuDedicatedUsage(EtGpuDedicatedUsage) --> 
EtGpuProcessesUpdatedCallback(EtGpuProcessesUpdatedCallback)

EtGpuSharedUsage(EtGpuSharedUsage) --> 
EtGpuProcessesUpdatedCallback(EtGpuProcessesUpdatedCallback)

EtGpuNodesTotalRunningTimeDelta(EtGpuNodesTotalRunningTimeDelta) --> 
EtGpuProcessesUpdatedCallback(EtGpuProcessesUpdatedCallback)

源码

gpumon.c (EtGpuProcessesUpdatedCallback)

VOID NTAPI EtGpuProcessesUpdatedCallback(
    _In_opt_ PVOID Parameter,
    _In_opt_ PVOID Context
    )
{
    ...
    // d3d 开启 就调用d3d 的方法
    if (EtD3DEnabled)
    {
        FLOAT gpuTotal;
        ULONG64 dedicatedTotal;
        ULONG64 sharedTotal;
        EtUpdatePerfCounterData();
        gpuTotal = EtLookupTotalGpuUtilization();
        dedicatedTotal = EtLookupTotalGpuDedicated();
        sharedTotal = EtLookupTotalGpuShared();
        if (gpuTotal > 1)
            gpuTotal = 1;
        if (gpuTotal > tempGpuUsage)
            tempGpuUsage = gpuTotal;
        EtGpuNodeUsage = tempGpuUsage;
        EtGpuDedicatedUsage = dedicatedTotal;
        EtGpuSharedUsage = sharedTotal;
    }
    else    //  不支持d3d, 使用 D3DKMTQueryStatistics 通过不同的 D3DKMT_QUERYSTATISTICS 来获取最原始的数据,再计算出相应的数据
    {
        EtpUpdateSystemSegmentInformation();
        EtpUpdateSystemNodeInformation();
        elapsedTime = (DOUBLE)EtClockTotalRunningTimeDelta.Delta * 10000000 / EtClockTotalRunningTimeFrequency.QuadPart;
        if (elapsedTime != 0)
        {
            for (i = 0; i < EtGpuTotalNodeCount; i++)
            {
                FLOAT usage = (FLOAT)(EtGpuNodesTotalRunningTimeDelta[i].Delta / elapsedTime);
                if (usage > 1)
                    usage = 1;
                if (usage > tempGpuUsage)
                    tempGpuUsage = usage;
            }
        }
        EtGpuNodeUsage = tempGpuUsage;
    }
    if (EtGpuSupported && EtpGpuAdapterList->Count)
    {
        FLOAT powerUsage;
        FLOAT temperature;
        ULONG64 fanRpm;
        powerUsage = 0.0f;
        temperature = 0.0f;
        fanRpm = 0;
        for (ULONG i = 0; i < EtpGpuAdapterList->Count; i++)
        {
            PETP_GPU_ADAPTER gpuAdapter;
            D3DKMT_HANDLE adapterHandle;
            D3DKMT_ADAPTER_PERFDATA adapterPerfData;
            gpuAdapter = EtpGpuAdapterList->Items[i];
            //
            // jxy-s: we open this frequently, consider opening this once in the list
            //
            if (!NT_SUCCESS(EtOpenAdapterFromDeviceName(&adapterHandle, PhGetString(gpuAdapter->DeviceInterface))))
                continue;
            memset(&adapterPerfData, 0, sizeof(D3DKMT_ADAPTER_PERFDATA));
            if (NT_SUCCESS(EtQueryAdapterInformation(
                adapterHandle,
                KMTQAITYPE_ADAPTERPERFDATA,
                &adapterPerfData,
                sizeof(D3DKMT_ADAPTER_PERFDATA)
                )))
            {
                powerUsage += (((FLOAT)adapterPerfData.Power / 1000) * 100);
                temperature += (((FLOAT)adapterPerfData.Temperature / 1000) * 100);
                fanRpm += adapterPerfData.FanRPM;
            }
            EtCloseAdapterHandle(adapterHandle);
        }
        EtGpuPowerUsage = powerUsage / EtpGpuAdapterList->Count;
        EtGpuTemperature = temperature / EtpGpuAdapterList->Count;
        EtGpuFanRpm = fanRpm / EtpGpuAdapterList->Count;
        //
        // Update the limits if we see higher values
        //
        if (EtGpuPowerUsage > EtGpuPowerUsageLimit)
        {
            //
            // Possibly over-clocked power limit
            //
            EtGpuPowerUsageLimit = EtGpuPowerUsage;
        }
        if (EtGpuTemperature > EtGpuTemperatureLimit)
        {
            //
            // Damn that card is hawt
            //
            EtGpuTemperatureLimit = EtGpuTemperature;
        }
        if (EtGpuFanRpm > EtGpuFanRpmLimit)
        {
            //
            // Fan go brrrrrr
            //
            EtGpuFanRpmLimit = EtGpuFanRpm;
        }
    }
    // Update per-process statistics.
    // Note: no lock is needed because we only ever modify the list on this same thread.
    listEntry = EtProcessBlockListHead.Flink;
    while (listEntry != &EtProcessBlockListHead)
    {
        PET_PROCESS_BLOCK block;
        block = CONTAINING_RECORD(listEntry, ET_PROCESS_BLOCK, ListEntry);
        if (block->ProcessItem->State & PH_PROCESS_ITEM_REMOVED)
        {
            listEntry = listEntry->Flink;
            continue;
        }
        if (EtD3DEnabled)
        {
            ULONG64 sharedUsage;
            ULONG64 dedicatedUsage;
            ULONG64 commitUsage;
            block->GpuNodeUtilization = EtLookupProcessGpuUtilization(block->ProcessItem->ProcessId);
            if (EtLookupProcessGpuMemoryCounters(
                block->ProcessItem->ProcessId,
                &sharedUsage,
                &dedicatedUsage,
                &commitUsage
                ))
            {
                block->GpuSharedUsage = sharedUsage;
                block->GpuDedicatedUsage = dedicatedUsage;
                block->GpuCommitUsage = commitUsage;
            }
            else
            {
                block->GpuSharedUsage = 0;
                block->GpuDedicatedUsage = 0;
                block->GpuCommitUsage = 0;
            }
            if (runCount != 0)
            {
                block->CurrentGpuUsage = block->GpuNodeUtilization;
                block->CurrentMemUsage = (ULONG)(block->GpuDedicatedUsage / PAGE_SIZE);
                block->CurrentMemSharedUsage = (ULONG)(block->GpuSharedUsage / PAGE_SIZE);
                block->CurrentCommitUsage = (ULONG)(block->GpuCommitUsage / PAGE_SIZE);
                PhAddItemCircularBuffer_FLOAT(&block->GpuHistory, block->CurrentGpuUsage);
                PhAddItemCircularBuffer_ULONG(&block->MemoryHistory, block->CurrentMemUsage);
                PhAddItemCircularBuffer_ULONG(&block->MemorySharedHistory, block->CurrentMemSharedUsage);
                PhAddItemCircularBuffer_ULONG(&block->GpuCommittedHistory, block->CurrentCommitUsage);
            }
        }
        else
        {
            EtpUpdateProcessSegmentInformation(block);
            EtpUpdateProcessNodeInformation(block);
            if (elapsedTime != 0)
            {
                block->GpuNodeUtilization = (FLOAT)(block->GpuRunningTimeDelta.Delta / elapsedTime);
                // HACK
                if (block->GpuNodeUtilization > EtGpuNodeUsage)
                    block->GpuNodeUtilization = EtGpuNodeUsage;
                //for (i = 0; i < EtGpuTotalNodeCount; i++)
                //{
                //    FLOAT usage = (FLOAT)(block->GpuTotalRunningTimeDelta[i].Delta / elapsedTime);
                //
                //    if (usage > block->GpuNodeUtilization)
                //    {
                //        block->GpuNodeUtilization = usage;
                //    }
                //}
                if (block->GpuNodeUtilization > 1)
                    block->GpuNodeUtilization = 1;
                if (runCount != 0)
                {
                    block->CurrentGpuUsage = block->GpuNodeUtilization;
                    block->CurrentMemUsage = (ULONG)(block->GpuDedicatedUsage / PAGE_SIZE);
                    block->CurrentMemSharedUsage = (ULONG)(block->GpuSharedUsage / PAGE_SIZE);
                    block->CurrentCommitUsage = (ULONG)(block->GpuCommitUsage / PAGE_SIZE);
                    PhAddItemCircularBuffer_FLOAT(&block->GpuHistory, block->CurrentGpuUsage);
                    PhAddItemCircularBuffer_ULONG(&block->MemoryHistory, block->CurrentMemUsage);
                    PhAddItemCircularBuffer_ULONG(&block->MemorySharedHistory, block->CurrentMemSharedUsage);
                    PhAddItemCircularBuffer_ULONG(&block->GpuCommittedHistory, block->CurrentCommitUsage);
                }
            }
        }
        if (maxNodeValue < block->GpuNodeUtilization)
        {
            maxNodeValue = block->GpuNodeUtilization;
            maxNodeBlock = block;
        }
        listEntry = listEntry->Flink;
    }
    // Update history buffers.
    if (runCount != 0)
    {
        PhAddItemCircularBuffer_FLOAT(&EtGpuNodeHistory, EtGpuNodeUsage);
        PhAddItemCircularBuffer_ULONG64(&EtGpuDedicatedHistory, EtGpuDedicatedUsage);
        PhAddItemCircularBuffer_ULONG64(&EtGpuSharedHistory, EtGpuSharedUsage);
        if (EtGpuSupported)
        {
            PhAddItemCircularBuffer_FLOAT(&EtGpuPowerUsageHistory, EtGpuPowerUsage);
            PhAddItemCircularBuffer_FLOAT(&EtGpuTemperatureHistory, EtGpuTemperature);
            PhAddItemCircularBuffer_ULONG64(&EtGpuFanRpmHistory, EtGpuFanRpm);
        }
        if (EtD3DEnabled)
        {
            for (i = 0; i < EtGpuTotalNodeCount; i++)
            {
                FLOAT usage;
                usage = EtLookupTotalGpuEngineUtilization(i);
                if (usage > 1)
                    usage = 1;
                PhAddItemCircularBuffer_FLOAT(&EtGpuNodesHistory[i], usage);
            }
        }
        else
        {
            if (elapsedTime != 0)
            {
                for (i = 0; i < EtGpuTotalNodeCount; i++)
                {
                    FLOAT usage;
                    usage = (FLOAT)(EtGpuNodesTotalRunningTimeDelta[i].Delta / elapsedTime);
                    if (usage > 1)
                        usage = 1;
                    PhAddItemCircularBuffer_FLOAT(&EtGpuNodesHistory[i], usage);
                }
            }
            else
            {
                for (i = 0; i < EtGpuTotalNodeCount; i++)
                    PhAddItemCircularBuffer_FLOAT(&EtGpuNodesHistory[i], 0);
            }
        }
        if (maxNodeBlock)
        {
            PhAddItemCircularBuffer_ULONG(&EtMaxGpuNodeHistory, HandleToUlong(maxNodeBlock->ProcessItem->ProcessId));
            PhAddItemCircularBuffer_FLOAT(&EtMaxGpuNodeUsageHistory, maxNodeBlock->GpuNodeUtilization);
            PhReferenceProcessRecordForStatistics(maxNodeBlock->ProcessItem->Record);
        }
        else
        {
            PhAddItemCircularBuffer_ULONG(&EtMaxGpuNodeHistory, 0);
            PhAddItemCircularBuffer_FLOAT(&EtMaxGpuNodeUsageHistory, 0);
        }
    }
    runCount++;
}