卡顿监控 - ANR 底层机制源码分析

1,840 阅读3分钟

ANR 可能是大家平时开发过程中,相对比较少的一类的问题,从考核过程中就能看出来,很少有同学能够回答完善。既然我们在开发中会遇到,那么我们至少需要知道 Framework 层的原理,ANR 怎么监控?ANR 怎么分析解决?这三个维度。

ANR 大致分为这么两种:四大组件启动超时和 Input 响应超时,它们之间的底层实现原理也是有些差别的,四大组件启动超时 ANR 是一个埋炸弹拆炸弹的过程,而 Input 响应超时 ANR 是一个排雷的过程。四大组件启动超时的原理我相信大家都能看懂,但 Input 响应超时可能就未必了,因为它需要一定的 C++ 基础。

1. 四大组件启动超时 ANR

我们在 Framework 源码分析篇中详细介绍过 Service 启动流程。其中在 Service 进程 attach 到 system_server 进程的过程中会调用 realStartServiceLocked() 方法来埋下炸弹.

private final void realStartServiceLocked(ServiceRecord r, ProcessRecord app, boolean execInFg) throws RemoteException {
    ...
    // 发送delay消息 (SERVICE_TIMEOUT_MSG) 
    bumpServiceExecutingLocked(r, execInFg, "create");
    try {
        ...
        // 最终创建 Service 并执行服务的 onCreate() 方法
        app.thread.scheduleCreateService(r, r.serviceInfo,
                mAm.compatibilityInfoForPackageLocked(r.serviceInfo.applicationInfo),
                app.repProcState);
    } catch (DeadObjectException e) {
        mAm.appDiedLocked(app);
        throw e;
    } finally {
        ...
    }
}

private final void bumpServiceExecutingLocked(ServiceRecord r, boolean fg, String why) {
    ... 
    scheduleServiceTimeoutLocked(r.app);
}

void scheduleServiceTimeoutLocked(ProcessRecord proc) {
    if (proc.executingServices.size() == 0 || proc.thread == null) {
        return;
    }
    long now = SystemClock.uptimeMillis();
    Message msg = mAm.mHandler.obtainMessage(
            ActivityManagerService.SERVICE_TIMEOUT_MSG);
    msg.obj = proc;
    
    // 当超时后仍没有 remove 该 SERVICE_TIMEOUT_MSG 消息,则执行 service Timeout 流程
    mAm.mHandler.sendMessageAtTime(msg,
        proc.execServicesFg ? (now+SERVICE_TIMEOUT) : (now+ SERVICE_BACKGROUND_TIMEOUT));
}

该方法的主要工作发送 delay 消息 (SERVICE_TIMEOUT_MSG)。炸弹已埋下, 我们并不希望炸弹被引爆, 那么就需要在炸弹爆炸之前拆除炸弹。

    private void handleCreateService(CreateServiceData data) {
        ...
        // 反射创建 Service 对象
        java.lang.ClassLoader cl = packageInfo.getClassLoader();
        Service service = (Service) cl.loadClass(data.info.name).newInstance();
        ...

        try {
            // 创建 ContextImpl 对象
            ContextImpl context = ContextImpl.createAppContext(this, packageInfo);
            context.setOuterContext(service);
            // 创建 Application 对象
            Application app = packageInfo.makeApplication(false, mInstrumentation);
            service.attach(context, this, data.info.name, data.token, app,
                    ActivityManagerNative.getDefault());
            // 调用服务 onCreate() 方法 
            service.onCreate();
            
            // 拆除炸弹引线 
            ActivityManagerNative.getDefault().serviceDoneExecuting(
                    data.token, SERVICE_DONE_EXECUTING_ANON, 0, 0);
        } catch (Exception e) {
            ...
        }
    }

    private void serviceDoneExecutingLocked(ServiceRecord r, boolean inDestroying, boolean finishing) {
      ...
      if (r.executeNesting <= 0) {
        if (r.app != null) {
            r.app.execServicesFg = false;
            r.app.executingServices.remove(r);
            if (r.app.executingServices.size() == 0) {
                //当前服务所在进程中没有正在执行的service
                mAm.mHandler.removeMessages(ActivityManagerService.SERVICE_TIMEOUT_MSG, r.app);
          ...
        }
      ...
    }

如果整个启动过程中一切正常那么就会 removeMessages ,如果不正常则会引爆炸弹

final class MainHandler extends Handler {
    public void handleMessage(Message msg) {
        switch (msg.what) {
            case SERVICE_TIMEOUT_MSG: {
                ...
                mServices.serviceTimeout((ProcessRecord)msg.obj);
            } break;
            ...
        }
        ...
    }
}

void serviceTimeout(ProcessRecord proc) {
    String anrMessage = null;

    synchronized(mAm) {
        if (proc.executingServices.size() == 0 || proc.thread == null) {
            return;
        }
        final long now = SystemClock.uptimeMillis();
        final long maxTime =  now -
                (proc.execServicesFg ? SERVICE_TIMEOUT : SERVICE_BACKGROUND_TIMEOUT);
        ServiceRecord timeout = null;
        long nextTime = 0;
        for (int i=proc.executingServices.size()-1; i>=0; i--) {
            ServiceRecord sr = proc.executingServices.valueAt(i);
            if (sr.executingStart < maxTime) {
                timeout = sr;
                break;
            }
            if (sr.executingStart > nextTime) {
                nextTime = sr.executingStart;
            }
        }
        if (timeout != null && mAm.mLruProcesses.contains(proc)) {
            Slog.w(TAG, "Timeout executing service: " + timeout);
            StringWriter sw = new StringWriter();
            PrintWriter pw = new FastPrintWriter(sw, false, 1024);
            pw.println(timeout);
            timeout.dump(pw, " ");
            pw.close();
            mLastAnrDump = sw.toString();
            mAm.mHandler.removeCallbacks(mLastAnrDumpClearer);
            mAm.mHandler.postDelayed(mLastAnrDumpClearer, LAST_ANR_LIFETIME_DURATION_MSECS);
            anrMessage = "executing service " + timeout.shortName;
        }
    }

    if (anrMessage != null) {
        // 当存在timeout的service,则执行appNotResponding
        mAm.appNotResponding(proc, null, null, false, anrMessage);
    }
}

2. Input 响应超时 ANR

在正式开始分析 Input 事件的 ANR 触发原理以及触发场景之前,大家需要先回顾一下 Input 事件的分发流程,这个也在 Framework 源码分析篇中。

Input ANR 时间区间便是指当前这次的事件 dispatch 过程中执行 findFocusedWindowTargetsLocked() 方法到下一次执行 resetANRTimeoutsLocked() 的时间区间。看源码有 5 个时机会 reset。都位于InputDispatcher.cpp 文件:

void InputDispatcher::dispatchOnceInnerLocked(nsecs_t* nextWakeupTime) {
    
    // Ready to start a new event.
    // If we don't already have a pending event, go grab one.
    if (! mPendingEvent) {
        ......
        // Get ready to dispatch the event.
        resetANRTimeoutsLocked();
    }
    ......
    switch (mPendingEvent->type) {
    
    case EventEntry::TYPE_MOTION: {
        ......
        done = dispatchMotionLocked(currentTime, typedEntry,
                &dropReason, nextWakeupTime);
        break;
    }

    ......
}

int32_t InputDispatcher::findFocusedWindowTargetsLocked(nsecs_t currentTime,
        const EventEntry* entry, Vector<InputTarget>& inputTargets, nsecs_t* nextWakeupTime) {
    int32_t injectionResult;
    String8 reason;

    // If there is no currently focused window and no focused application
    // then drop the event.
    if (mFocusedWindowHandle == NULL) {
        if (mFocusedApplicationHandle != NULL) {
            injectionResult = handleTargetsNotReadyLocked(currentTime, entry,
                    mFocusedApplicationHandle, NULL, nextWakeupTime,
                    "Waiting because no window has focus but there is a "
                    "focused application that may eventually add a window "
                    "when it finishes starting up.");
            goto Unresponsive;
        }
        goto Failed;
    }
    ......
    return injectionResult;
}

int32_t InputDispatcher::handleTargetsNotReadyLocked(nsecs_t currentTime,
        const EventEntry* entry,
        const sp<InputApplicationHandle>& applicationHandle,
        const sp<InputWindowHandle>& windowHandle,
        nsecs_t* nextWakeupTime, const char* reason) {
    if (applicationHandle == NULL && windowHandle == NULL) {
        ...
    } else {
        if (mInputTargetWaitCause != INPUT_TARGET_WAIT_CAUSE_APPLICATION_NOT_READY) {
            if (windowHandle != NULL) {
                timeout = windowHandle->getDispatchingTimeout(DEFAULT_INPUT_DISPATCHING_TIMEOUT);
            } else if (applicationHandle != NULL) {
                timeout = applicationHandle->getDispatchingTimeout(
                        DEFAULT_INPUT_DISPATCHING_TIMEOUT);
            } else {
                timeout = DEFAULT_INPUT_DISPATCHING_TIMEOUT;
            }

            mInputTargetWaitCause = INPUT_TARGET_WAIT_CAUSE_APPLICATION_NOT_READY;
            mInputTargetWaitStartTime = currentTime;
            // 这个就是超时时间了
            mInputTargetWaitTimeoutTime = currentTime + timeout;
            mInputTargetWaitTimeoutExpired = false;
            mInputTargetWaitApplicationHandle.clear();

            if (windowHandle != NULL) {
                mInputTargetWaitApplicationHandle = windowHandle->inputApplicationHandle;
            }
            if (mInputTargetWaitApplicationHandle == NULL && applicationHandle != NULL) {
                mInputTargetWaitApplicationHandle = applicationHandle;
            }
        }
    }

    if (mInputTargetWaitTimeoutExpired) {
        return INPUT_EVENT_INJECTION_TIMED_OUT;
    }
}

视频链接: pan.baidu.com/s/1bYHPvx3I…
视频密码: l5df