APM - iOS 卡顿监控 Matrix实现原理
一、简介
卡顿类型
// Define the type of the lag
typedef NS_ENUM(NSUInteger, EDumpType) {
EDumpType_Unlag = 2000,
EDumpType_MainThreadBlock = 2001, // foreground main thread block
EDumpType_BackgroundMainThreadBlock = 2002, // background main thread block
EDumpType_CPUBlock = 2003, // CPU too high
//EDumpType_FrameDropBlock = 2004, // frame drop too much,no use currently
EDumpType_SelfDefinedDump = 2005, // no use currently
//EDumpType_B2FBlock = 2006, // no use currently
EDumpType_LaunchBlock = 2007, // main thread block during the launch of the app
//EDumpType_CPUIntervalHigh = 2008, // CPU too high within a time period
EDumpType_BlockThreadTooMuch = 2009, // main thread block and the thread is too much. (more than 64 threads)
EDumpType_BlockAndBeKilled = 2010, // main thread block and killed by the system
//EDumpType_JSStack = 2011, // no use currently
EDumpType_PowerConsume = 2011, // battery cost stack report
EDumpType_DiskIO = 2013, // disk io too much
EDumpType_FPS = 2014, // FPS
EDumpType_Test = 10000,
};
卡顿判断依据
-
Runloop
- CommonMode
- UIInitializationMode
-
启动状态
-
前后台切换
-
CPU
-
Memory
-
线程信息
-
电量信息
二、卡顿监控SDK
1. RunLoop Observer
添加了4个Observer
-
kCFRunLoopCommonModes
- BeginObserver
- EndObserver
-
UIInitializationRunLoopMode
- BeginObserver
- EndObserver
- kCFRunLoopCommonModes用于观察APP通常情况的卡顿
- UIInitializationRunLoopMode用于观察APP启动时的卡顿,该RunloopMode只执行1次就退出
添加Begin和EndObserver分别对应的是最高和最终优先级,类似于AutoreleasePool,需要在最前和最后执行回调,把其中执行的函数都包含在中间。
- (void)addRunloopObserver {
NSRunLoop *curRunLoop = [NSRunLoop currentRunLoop];
CFRunLoopObserverContext context = {0, (__bridge void *) self, NULL, NULL, NULL};
CFRunLoopObserverRef beginObserver = CFRunLoopObserverCreate(kCFAllocatorDefault, kCFRunLoopAllActivities, YES, LONG_MIN, &myRunLoopBeginCallback, &context);
CFRetain(beginObserver);
runloopBeginObserver = beginObserver;
CFRunLoopObserverRef endObserver = CFRunLoopObserverCreate(kCFAllocatorDefault, kCFRunLoopAllActivities, YES, LONG_MAX, &myRunLoopEndCallback, &context);
CFRetain(endObserver);
runloopEndObserver = endObserver;
CFRunLoopRef runloop = [curRunLoop getCFRunLoop];
CFRunLoopAddObserver(runloop, beginObserver, kCFRunLoopCommonModes);
CFRunLoopAddObserver(runloop, endObserver, kCFRunLoopCommonModes);
CFRunLoopObserverContext initializationContext = {0, (__bridge void *) self, NULL, NULL, NULL};
initializationBeginObserver = CFRunLoopObserverCreate(kCFAllocatorDefault, kCFRunLoopAllActivities, YES, LONG_MIN, &myInitializetionRunLoopBeginCallback, &initializationContext);
CFRetain(initializationBeginObserver);
initializationEndObserver = CFRunLoopObserverCreate(kCFAllocatorDefault, kCFRunLoopAllActivities, YES, LONG_MAX, &myInitializetionRunLoopEndCallback, &initializationContext);
CFRetain(initializationEndObserver);
CFRunLoopAddObserver(runloop, initializationBeginObserver, (CFRunLoopMode)@"UIInitializationRunLoopMode");
CFRunLoopAddObserver(runloop, initializationEndObserver, (CFRunLoopMode)@"UIInitializationRunLoopMode");
}
2. 建立Check Thread
创建线程
- (void)addThread {
thread = [[NSThread alloc] initWithTarget:self selector:@selector(threadProc) object:nil];
[thread start];
}
线程处理
- (void)threadProc {
sleep(5);
while (YES) {
NSLog(@"threadProc");
BOOL tmp_g_bRun = g_bRun;
struct timeval tmp_g_tvRun = g_tvRun;
struct timeval tvCur;
gettimeofday(&tvCur, NULL);
unsigned long long diff = [SLLagManager diffTime:&tmp_g_tvRun endTime:&tvCur];
if (tmp_g_bRun && diff > 2000000) {
NSLog(@"超时了");
}
usleep(1000000);
}
}
阈值计算
阈值设定是2秒,检测线程的间隔是1秒。
通过检测当时的时间和g_tvRun全局变量的时间做差值,超过阈值就记录卡顿。
g_tvRun的时间,是上一次AfterWaiting之后更新的时间戳。Runloop每次循环之后,都会更新g_tvRun的数值,如果一直不更新,该数值跟当前的系统时间的差值就会超过阈值,代表当前的Runloop已经卡顿在某一个loop内。
g_tvRun赋值逻辑
void myRunLoopBeginCallback(CFRunLoopObserverRef observer, CFRunLoopActivity activity, void *info) {
NSLog(@"----------- myRunLoopBeginCallback ------------");
switch (activity) {
case kCFRunLoopEntry:
g_bRun = YES;
NSLog(@"kCFRunLoopEntry");
break;
case kCFRunLoopBeforeTimers:
if (g_bRun == NO) {
gettimeofday(&g_tvRun, NULL);
}
g_bRun = YES;
NSLog(@"kCFRunLoopBeforeTimers");
break;
case kCFRunLoopBeforeSources:
if (g_bRun == NO) {
gettimeofday(&g_tvRun, NULL);
}
g_bRun = YES;
NSLog(@"kCFRunLoopBeforeSources");
break;
case kCFRunLoopAfterWaiting:
if (g_bRun == NO) {
gettimeofday(&g_tvRun, NULL);
}
g_bRun = YES;
NSLog(@"kCFRunLoopAfterWaiting");
break;
case kCFRunLoopAllActivities:
break;
default:
break;
}
}
void myRunLoopEndCallback(CFRunLoopObserverRef observer, CFRunLoopActivity activity, void *info) {
NSLog(@"----------- myRunLoopEndCallback ------------");
switch (activity) {
case kCFRunLoopBeforeWaiting:
gettimeofday(&g_tvRun, NULL);
g_bRun = NO;
NSLog(@"kCFRunLoopBeforeWaiting");
break;
case kCFRunLoopExit:
g_bRun = NO;
NSLog(@"kCFRunLoopExit");
break;
case kCFRunLoopAllActivities:
NSLog(@"kCFRunLoopAllActivities");
break;
default:
break;
}
}
- (void)threadProc {
sleep(5);
while (YES) {
NSLog(@"threadProc");
BOOL tmp_g_bRun = g_bRun;
struct timeval tmp_g_tvRun = g_tvRun;
struct timeval tvCur;
gettimeofday(&tvCur, NULL);
unsigned long long diff = [SLLagManager diffTime:&tmp_g_tvRun endTime:&tvCur];
if (tmp_g_bRun && diff > 2000000) {
NSLog(@"超时了");
}
usleep(1000000);
}
}
如果kCFRunLoopBeforeTimers,kCFRunLoopBeforeSources,kCFRunLoopAfterWaiting状态,超过了阈值时间这些状态还没发生改变的话,会触发超时监控。
+ (unsigned long long)diffTime:(struct timeval *)tvStart endTime:(struct timeval *)tvEnd
{
return 1000000 * (tvEnd->tv_sec - tvStart->tv_sec) + tvEnd->tv_usec - tvStart->tv_usec;
}
3. 确定Dump类型
结合了APP前后台状态,是否处于启动状态,CPU,Memory等信息,判断出Lag属于枚举中的哪个类型,从而确定DumpType。
if (EDumpType_BackgroundMainThreadBlock == dumpType || EDumpType_MainThreadBlock == dumpType) {
//主线程卡顿的情况下,抓取堆栈,通过循环队列和栈顶函数重复次数来计算最近最耗时堆栈
} else if (EDumpType_CPUBlock == dumpType) {
//thread的数组,thread对应消耗CPU的数组,thread对应堆栈信息
}
4. 退火算法
过滤类型
typedef NS_ENUM(NSUInteger, EFilterType) {
EFilterType_None = 0,
EFilterType_Meaningless = 1, // the adress count of the stack is too little
EFilterType_Annealing = 2, // the Annealing algorithm, filter the continuous same stack
EFilterType_TrigerByTooMuch = 3, // filter the stack that appear too much one day
};
其中包含了堆栈过短,退火算法,当天出现触发过多卡顿等情况。
if (bIsSame) {
NSUInteger lastTimeInterval = m_nIntervalTime;
m_nIntervalTime = m_nLastTimeInterval + m_nIntervalTime;
m_nLastTimeInterval = lastTimeInterval;
MatrixInfo(@"call stack same timeinterval = %lu", (unsigned long) m_nIntervalTime);
return EFilterType_Annealing;
}
5. 堆栈获取
获取主线程堆栈,间隔是usleep(g_PerStackInterval),默认是50ms
for (int index = 0; index < intervalCount && !m_bStop; index++) {
usleep(g_PerStackInterval);
size_t stackBytes = sizeof(uintptr_t) * g_StackMaxCount;
uintptr_t *stackArray = (uintptr_t *) malloc(stackBytes);
if (stackArray == NULL) {
continue;
}
__block size_t nSum = 0;
memset(stackArray, 0, stackBytes);
[WCGetMainThreadUtil getCurrentMainThreadStack:^(NSUInteger pc) {
stackArray[nSum] = (uintptr_t) pc;
nSum++;
}
withMaxEntries:g_StackMaxCount
withThreadCount:g_CurrentThreadCount];
[m_pointMainThreadHandler addThreadStack:stackArray andStackCount:nSum];
}
堆栈获取的实现
+ (int)getCurrentMainThreadStack:(void (^)(NSUInteger pc))saveResultBlock
withMaxEntries:(NSUInteger)maxEntries
withThreadCount:(NSUInteger &)retThreadCount
{
thread_act_array_t threads;
mach_msg_type_number_t thread_count;
if (task_threads(mach_task_self(), &threads, &thread_count) != KERN_SUCCESS) {
return 0;
}
thread_t mainThread = threads[0];
KSThread currentThread = ksthread_self();
if (mainThread == currentThread) {
return 0;
}
if (thread_suspend(mainThread) != KERN_SUCCESS) {
return 0;
}
uintptr_t backtraceBuffer[maxEntries];
int backTraceLength = kssc_backtraceCurrentThread(mainThread, backtraceBuffer, (int) maxEntries);
for (int i = 0; i < backTraceLength; i++) {
NSUInteger pc = backtraceBuffer[i];
saveResultBlock(pc);
}
retThreadCount = thread_count;
thread_resume(mainThread);
for (mach_msg_type_number_t i = 0; i < thread_count; i++) {
mach_port_deallocate(mach_task_self(), threads[i]);
}
vm_deallocate(mach_task_self(), (vm_address_t) threads, sizeof(thread_t) * thread_count);
return backTraceLength;
}
- (void)addThreadStack:(uintptr_t *)stackArray andStackCount:(size_t)stackCount
{
if (stackArray == NULL) {
return;
}
if (m_mainThreadStackCycleArray == NULL || m_mainThreadStackCount == NULL) {
return;
}
pthread_mutex_lock(&m_threadLock);
if (m_mainThreadStackCycleArray[m_tailPoint] != NULL) {
free(m_mainThreadStackCycleArray[m_tailPoint]);
}
m_mainThreadStackCycleArray[m_tailPoint] = stackArray;
m_mainThreadStackCount[m_tailPoint] = stackCount;
uint64_t lastTailPoint = (m_tailPoint + m_cycleArrayCount - 1) % m_cycleArrayCount;
uintptr_t lastTopStack = 0;
if (m_mainThreadStackCycleArray[lastTailPoint] != NULL) {
lastTopStack = m_mainThreadStackCycleArray[lastTailPoint][0];
}
uintptr_t currentTopStackAddr = stackArray[0];
if (lastTopStack == currentTopStackAddr) {
size_t lastRepeatCount = m_topStackAddressRepeatArray[lastTailPoint];
m_topStackAddressRepeatArray[m_tailPoint] = lastRepeatCount + 1;
} else {
m_topStackAddressRepeatArray[m_tailPoint] = 0;
}
m_tailPoint = (m_tailPoint + 1) % m_cycleArrayCount;
pthread_mutex_unlock(&m_threadLock);
}
6. 最近最耗时堆栈数据处理
获取循环队列和计算最近最耗时堆栈中每个栈帧的耗时(重复次数)
- (KSStackCursor *)getPointStackCursor
{
pthread_mutex_lock(&m_threadLock);
size_t maxValue = 0;
BOOL trueStack = NO;
for (int i = 0; i < m_cycleArrayCount; i++) {
size_t currentValue = m_topStackAddressRepeatArray[i];
int stackCount = (int) m_mainThreadStackCount[i];
if (currentValue >= maxValue && stackCount > SHORTEST_LENGTH_OF_STACK) {
maxValue = currentValue;
trueStack = YES;
}
}
if (!trueStack) {
return NULL;
}
size_t currentIndex = (m_tailPoint + m_cycleArrayCount - 1) % m_cycleArrayCount;
for (int i = 0; i < m_cycleArrayCount; i++) {
int trueIndex = (m_tailPoint + m_cycleArrayCount - i - 1) % m_cycleArrayCount;
int stackCount = (int) m_mainThreadStackCount[trueIndex];
if (m_topStackAddressRepeatArray[trueIndex] == maxValue && stackCount > SHORTEST_LENGTH_OF_STACK) {
currentIndex = trueIndex;
break;
}
}
// current count of point stack
size_t stackCount = m_mainThreadStackCount[currentIndex];
size_t pointThreadSize = sizeof(uintptr_t) * stackCount;
uintptr_t *pointThreadStack = (uintptr_t *) malloc(pointThreadSize);
size_t repeatCountArrayBytes = stackCount * sizeof(int);
m_mainThreadStackRepeatCountArray = (int *) malloc(repeatCountArrayBytes);
if (m_mainThreadStackRepeatCountArray != NULL) {
memset(m_mainThreadStackRepeatCountArray, 0, repeatCountArrayBytes);
}
// calculate the repeat count
for (size_t i = 0; i < stackCount; i++) {
for (int innerIndex = 0; innerIndex < m_cycleArrayCount; innerIndex++) {
size_t innerStackCount = m_mainThreadStackCount[innerIndex];
for (size_t idx = 0; idx < innerStackCount; idx++) {
// point stack i_th address compare to others
if (m_mainThreadStackCycleArray[currentIndex][i] == m_mainThreadStackCycleArray[innerIndex][idx]) {
m_mainThreadStackRepeatCountArray[i] += 1;
}
}
}
}
if (pointThreadStack != NULL) {
memset(pointThreadStack, 0, pointThreadSize);
for (size_t idx = 0; idx < stackCount; idx++) {
pointThreadStack[idx] = m_mainThreadStackCycleArray[currentIndex][idx];
}
KSStackCursor *pointCursor = (KSStackCursor *) malloc(sizeof(KSStackCursor));
kssc_initWithBacktrace(pointCursor, pointThreadStack, (int) stackCount, 0);
pthread_mutex_unlock(&m_threadLock);
return pointCursor;
}
pthread_mutex_unlock(&m_threadLock);
return NULL;
}
7. 写入文件
防止对内存产生影响
调用关系
WCDumpInterface中封装了多个方法,最后由KSCrash做对应实现。
下面代码是写入文件,其中int* stackRepeatCountArray = g_pointThreadRepeatNumberWriteCallback();是写入了最近最耗时堆栈中栈帧的耗时(重复次数)信息。
static void writeBacktraceWithCount(const KSCrashReportWriter* const writer,
const char* const key,
KSStackCursor* stackCursor,
bool isRepeatCount)
{
int* stackRepeatCountArray = g_pointThreadRepeatNumberWriteCallback();
writer->beginObject(writer, key);
{
writer->beginArray(writer, KSCrashField_Contents);
{
g_tailPoint = 0;
while(stackCursor->advanceCursor(stackCursor))
{
if (g_tailPoint == 100) {
g_lastFirstAddr = stackCursor->stackEntry.address;
}
if (g_tailPoint == 101) {
g_lastSecndAddr = stackCursor->stackEntry.address;
}
if (g_tailPoint == 102) {
g_lastThirdAddr = stackCursor->stackEntry.address;
}
if (g_tailPoint == 103) {
g_lastForthAddr = stackCursor->stackEntry.address;
}
if (g_tailPoint == 104) {
g_lastFifthAddr = stackCursor->stackEntry.address;
}
g_tailPoint++;
if (g_tailPoint > 105) {
if (stackCursor->stackEntry.address == g_lastFirstAddr ||
stackCursor->stackEntry.address == g_lastSecndAddr ||
stackCursor->stackEntry.address == g_lastThirdAddr ||
stackCursor->stackEntry.address == g_lastForthAddr ||
stackCursor->stackEntry.address == g_lastFifthAddr) {
continue;
}
}
writer->beginObject(writer, NULL);
{
if(stackCursor->symbolicate(stackCursor))
{
if(stackCursor->stackEntry.imageName != NULL)
{
writer->addStringElement(writer, KSCrashField_ObjectName, ksfu_lastPathEntry(stackCursor->stackEntry.imageName));
}
writer->addUIntegerElement(writer, KSCrashField_ObjectAddr, stackCursor->stackEntry.imageAddress);
if(stackCursor->stackEntry.symbolName != NULL)
{
writer->addStringElement(writer, KSCrashField_SymbolName, stackCursor->stackEntry.symbolName);
}
writer->addUIntegerElement(writer, KSCrashField_SymbolAddr, stackCursor->stackEntry.symbolAddress);
}
writer->addUIntegerElement(writer, KSCrashField_InstructionAddr, stackCursor->stackEntry.address);
if (isRepeatCount) {
writer->addIntegerElement(writer, KSCrashField_RepeatCount, *(stackRepeatCountArray + g_tailPoint - 1));
}
}
writer->endContainer(writer);
}
}
writer->endContainer(writer);
writer->addIntegerElement(writer, KSCrashField_Skipped, 0);
}
writer->endContainer(writer);
}
三、数据格式
"name":"HandledThread"是对应的添加过耗时(重复信息)的最近最耗时堆栈。
"index":0是对应的主线程堆栈。
以下JSON数据有精简
{
"backtrace":{
"skipped":0,
"contents":[
{
"repeat_count":10,
"symbol_name":"kevent_id",
"symbol_addr":7722398112,
"instruction_addr":7722398120,
"object_name":"libsystem_kernel.dylib",
"object_addr":7722221568
},
{
"repeat_count":10,
"symbol_name":"<redacted>",
"symbol_addr":6945361632,
"instruction_addr":6945361864,
"object_name":"libdispatch.dylib",
"object_addr":6945206272
},
{
"repeat_count":10,
"symbol_name":"<redacted>",
"symbol_addr":6945364172,
"instruction_addr":6945364612,
"object_name":"libdispatch.dylib",
"object_addr":6945206272
},
{
"repeat_count":10,
"symbol_name":"<redacted>",
"symbol_addr":6945286800,
"instruction_addr":6945287148,
"object_name":"libdispatch.dylib",
"object_addr":6945206272
}
]
},
"current_thread":false,
"crashed":false,
"name":"HandledThread",
"index":0
}
四、符号化
符号化解析我在另一篇文章中有介绍,有兴趣的同学移步看一下。
五、问题和总结
1. 无法检测非栈顶栈帧函数耗时
Matrix获取是最近最耗时堆栈信息,但是如果耗时函数不是栈顶栈帧,是业务代码在堆栈中间的栈帧,那么栈顶栈帧耗时可能都没有超过阈值,但是堆栈中层的栈帧可能会超过阈值。该方案无法检测到这种情况。
我们可以通过上报堆栈快照的数组,来绘制火焰图信息。
2. 堆栈信息冗余
由于是基于KSCrash,而KSCrash中为了兼容可转换成苹果Crash Log的信息格式,造成堆栈信息也携带了过多信息,比如Image List等,导致一次卡顿的堆栈信息在700kb左右。
Matrix中获取是所有线程的信息,其实卡顿的情况主要是主线程的问题,当然也有子线程消耗CPU过多的情况,所以我们可以适当精简到只获取主线程信息。
3. 过早的对卡顿类型进行判断
在得到对应的卡顿类型之后,对元数据做了处理和筛选,在后期迭代变化的过程中,导致部分有效元数据的缺失。