Android知识点3--Android 的Handler机制MessageQueue 采用单链表的数据结构来存储消息列

Handler是Android消息机制的上层接口，Handler的运行需要底层的MessageQueue和Looper的支撑。

MessageQueue 采用单链表的数据结构来存储消息列表，称为消息队列，它按照Message的时间when来进行排序，是个有序队列。MessageQueue这是一个消息存储单元，不能去处理消息，这就需要Looper来实现了。Looper会以无限循环的方式查看队列中是否有新消息，有的话就去处理，没有的话就等待。

Handler的主要作用就是将一个任务切换到指定线程中去执行。Android规定UI只能在主线程进行，通过ViewRootImpl对UI操作进行了验证，通过ViewRootImpl的checkThread 方法来完成。

Android的UI控件不是线程安全的，采用锁机制会降低UI访问效率，阻塞线程的行为，所以采用单线程模型来处理UI操作。

一个线程可以有多个Handler，每个Handler只有一个Looper，一个Looper只有一个MessageQueue。

final Looper mLooper;
final MessageQueue mQueue;

public Handler() {
    this(null , false);
}

public Handler(Callback callback, boolean async){

        ...
        mLooper = Looper.myLooper();
        if (mLooper == null) {
            throw new RuntimeException(
                "Can't create handler inside thread " + Thread.currentThread()
                        + " that has not called Looper.prepare()");
        }
        mQueue = mLooper.mQueue;
        mCallback = callback;
        mAsynchronous = async;
}

static final ThreadLocal<Looper> sThreadLocal = new ThreadLocal<Looper>();

public static @Nullable Looper myLooper() {
    return sThreadLocal.get();
}

Handler中使用的Looper是通过ThreadLocal获取当前线程的Looper对象。TheadLocal可以在不同线程中互不干扰的存储并提供数据。线程默认是没有Looper的，所以如果使用Handler就必须为线程创建Looper。而我们常说的主线程就是ActivityThread，在ActivityThread的main方法中通过Looper.prepareMainLooper()初始化Looper对象以及Looper.loop()来开启无限循环，所以主线程默认可以使用Handler。

public static void prepareMainLooper () {
    prepare(false);
    synchronized(Looper.class){
        if(sMainLooper != null){
            // 主线程的Looper已经创建了
            throw new IllegalStateException("The main Looper has already been prepared.");
        }
        sMainLooper = myLooper();
    }
}

private static void prepare(boolean quitAllowed) {
    if (sThreadLocal.get() != null) {
        // 一个线程只能创建一个Looper
        throw new RuntimeException("Only one Looper may be created per thread");
    }
    // 新创建的Looper设置到了线程本地变量sThreadLocal中
    sThreadLocal.set(new Looper(quitAllowed));
}

private Looper(boolean quitAllowed) {
    mQueue = new MessageQueue(quitAllowed);
    mThread = Thread.currentThread();
}

MessageQueue(boolean quitAllowed) {
    mQuitAllowed = quitAllowed;
    // 注意MessageQueue的真正实现是Native层，nativeInit函数在Native层创建了一个与MessageQueue对应的NativeMessageQueue对象
    mPtr = nativeInit();
}

public static @Nullable Looper myLooper() {
    return sThreadLocal.get();
}

public static void loop() {
    ...
    for(;;){
        Message msg = queue.next();
        if(msg == null){
            return;
        }

        try{
            // 这个msg.target就是Handler
            msg.target.dispatchMessage(msg);
            ...
        } finally {
            ...
        }
    }
}

public void dispatchMessage( Message msg) {
    if(msg.callback != null){
        // 通过post(Runnable)发送的消息会执行到这里
        handleCallback(msg);
    } else {
        // 通过sendMessage发送的消息执行到这里
        if(mCallback != null){
            if(mCallback.handleMessage(msg)) {
                return;
            }
        }
        handleMessage(msg);
    }
}

// 这个空方法是我们再创建Handler是要覆盖的方法
public void handleMessage (Message msg) {}

3.1. Handler的 sendMessage 系列方法

sendMessage/sendMessageDelayed/sendMessageAtTime 最终会调到sendMessageAtTime的enqueueMessage()中。


private boolean enqueueMessage(@NonNull MessageQueue queue, @NonNull Message msg,
        long uptimeMillis) {
    // 把handler赋值给msg.target
    msg.target = this;
    msg.workSourceUid = ThreadLocalWorkSource.getUid();

    if (mAsynchronous) {
        msg.setAsynchronous(true);
    }
    return queue.enqueueMessage(msg, uptimeMillis);
}

boolean enqueueMessage(Message msg, long when) {
    if (msg.target == null) {
        throw new IllegalArgumentException("Message must have a target.");
    }

    synchronized (this) {
        if (msg.isInUse()) {
            throw new IllegalStateException(msg + " This message is already in use.");
        }

        if (mQuitting) {
            IllegalStateException e = new IllegalStateException(
                    msg.target + " sending message to a Handler on a dead thread");
            Log.w(TAG, e.getMessage(), e);
            msg.recycle();
            return false;
        }

        msg.markInUse();
        msg.when = when;
        Message p = mMessages;
        boolean needWake;
        // 根据时间进行排序
        if (p == null || when == 0 || when < p.when) {
            // New head, wake up the event queue if blocked.
            msg.next = p;
            mMessages = msg;
            needWake = mBlocked;
        } else {
           
            needWake = mBlocked && p.target == null && msg.isAsynchronous();
            Message prev;
            for (;;) {
                prev = p;
                p = p.next;
                if (p == null || when < p.when) {
                    break;
                }
                if (needWake && p.isAsynchronous()) {
                    needWake = false;
                }
            }
            msg.next = p; // invariant: p == prev.next
            prev.next = msg;
        }

        // We can assume mPtr != 0 because mQuitting is false.
        if (needWake) {
            // 调用Native层代码，添加消息时可能在阻塞状态
            nativeWake(mPtr);
        }
    }
    return true;
}

3.2. Handler 的post(Runnable) 与 sendMessage 有什么区别？


public final boolean post(@NonNull Runnable r) {
    return  sendMessageDelayed(getPostMessage(r), 0);
}

private static Message getPostMessage(Runnable r) {
    Message m = Message.obtain();
    m.callback = r;
    return m;
}

post(Runnable)会将Runnable赋值到Message的callback变量中，这个Runnable会在Looper从MessageQueue中取出Message后，调用dispatchMessage的时候处理。

3.3. Handler的sendMessageDelayed 或者 postDelayed 是如何实现的？

消息延时处理的核心就是在获取Message的阶段。


Message next() {
    
    ...
    int nextPollTimeoutMillis = 0;
    for (;;) {
        if (nextPollTimeoutMillis != 0) {
            Binder.flushPendingCommands();
        }

        // 消息队列若为空，取消息时也可能在阻塞状态
        nativePollOnce(ptr, nextPollTimeoutMillis);

        synchronized (this) {
            ...
            if (msg != null) {
                //  当前时间小于消息触发时间，计算一个timeout
                if (now < msg.when) {
                    // Next message is not ready.  Set a timeout to wake up when it is ready.
                    nextPollTimeoutMillis = (int) Math.min(msg.when - now, Integer.MAX_VALUE);
                } else {
                    // Got a message.
                    mBlocked = false;
                    if (prevMsg != null) {
                        prevMsg.next = msg.next;
                    } else {
                        mMessages = msg.next;
                    }
                    msg.next = null;
                    if (DEBUG) Log.v(TAG, "Returning message: " + msg);
                    msg.markInUse();
                    return msg;
                }
            } else {
                // No more messages.
                nextPollTimeoutMillis = -1;
            }
        ...
        nextPollTimeoutMillis = 0;
    }
}

延迟消息的执行时间不是准确的

3.4. Looper.loop() 为什么不会阻塞主线程？

先来看一下MessageQueue.java中定义的native层方法都有哪些？


private native static long nativeInit();
private native static void nativeDestroy(long ptr);
@UnsupportedAppUsage
private native void nativePollOnce(long ptr, int timeoutMillis); /*non-static for callbacks*/
private native static void nativeWake(long ptr);
private native static boolean nativeIsPolling(long ptr);
private native static void nativeSetFileDescriptorEvents(long ptr, int fd, int events);


// Java层的next方法
Message next() {
    
    ...
    int nextPollTimeoutMillis = 0;
    for (;;) {
        if (nextPollTimeoutMillis != 0) {
            Binder.flushPendingCommands();
        }

        // 调用Native层的方法
        nativePollOnce(ptr, nextPollTimeoutMillis);
    }
    ...
}

上面MessageQueue.java 的构造方法中，调用了NativeInt方法。

MessageQueue(boolean quitAllowed) {
    mQuitAllowed = quitAllowed;
    mPtr = nativeInit();
}


android_os_MessageQueue.cpp

// 这是NativeInt方法在Native层的实现
static jlong android_os_MessageQueue_nativeInit(JNIEnv* env, jclass clazz) {
    // NativeMessageQueue是MessageQueue在Native层的代表
    NativeMessageQueue* nativeMessageQueue = new NativeMessageQueue();
    if (!nativeMessageQueue) {
        jniThrowRuntimeException(env, "Unable to allocate native queue");
        return 0;
    }

    nativeMessageQueue->incStrong(env);
    // 将这个NativeMessageQueue的指针给Java层
    return reinterpret_cast<jlong>(nativeMessageQueue);
}

// NativeMessageQueue中生成了Looper对象
> 注意这是Native层Looper，它几乎重写了Java层的Looper逻辑
NativeMessageQueue::NativeMessageQueue() :
        mPollEnv(NULL), mPollObj(NULL), mExceptionObj(NULL) {
    mLooper = Looper::getForThread();
    if (mLooper == NULL) {
        mLooper = new Looper(false);
        Looper::setForThread(mLooper);
    }
}

// Looper里创建了epoll，注册了事件
Looper::Looper(bool allowNonCallbacks) :
        mAllowNonCallbacks(allowNonCallbacks), mSendingMessage(false),
        mPolling(false), mEpollFd(-1), mEpollRebuildRequired(false),
        mNextRequestSeq(0), mResponseIndex(0), mNextMessageUptime(LLONG_MAX) {
    // 使用这个函数来创建一个事件对象，该函数返回一个文件描述符来代表这个事件对象，之后我们就用这个来调用对象
    mWakeEventFd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
    LOG_ALWAYS_FATAL_IF(mWakeEventFd < 0, "Could not make wake event fd: %s",
                        strerror(errno));

    AutoMutex _l(mLock);
    // 重建epoll事件
    rebuildEpollLocked();
}

// TLS，线程本地存储，可以理解为Java端的ThreadLocal
// 将创建好的Looper保存到TLS中。
void Looper::setForThread(const sp<Looper>& looper) {
    sp<Looper> old = getForThread(); // also has side-effect of initializing TLS

    if (looper != NULL) {
        looper->incStrong((void*)threadDestructor);
    }

    pthread_setspecific(gTLSKey, looper.get());

    if (old != NULL) {
        old->decStrong((void*)threadDestructor);
    }
}

// Java层的Looper的loop()死循环中，调用MessageQueue.java的next()方法，next()方法调用nativePollOnce
static void android_os_MessageQueue_nativePollOnce(JNIEnv* env, jobject obj,
        jlong ptr, jint timeoutMillis) {
    NativeMessageQueue* nativeMessageQueue = reinterpret_cast<NativeMessageQueue*>(ptr);
    nativeMessageQueue->pollOnce(env, obj, timeoutMillis);
}

// MessageQueue.cpp 
void NativeMessageQueue::pollOnce(JNIEnv* env, jobject pollObj, int timeoutMillis) {
    mPollEnv = env;
    mPollObj = pollObj;
    mLooper->pollOnce(timeoutMillis);
    mPollObj = NULL;
    mPollEnv = NULL;

    if (mExceptionObj) {
        env->Throw(mExceptionObj);
        env->DeleteLocalRef(mExceptionObj);
        mExceptionObj = NULL;
    }
}

// Looper.cpp
int Looper::pollOnce(int timeoutMillis, int* outFd, int* outEvents, void** outData) {
    int result = 0;
    for (;;) {
        ...
        // timeoutMillis就是Java层设置的超时时间
        result = pollInner(timeoutMillis);
    }
}

// Looper.cpp
int Looper::pollInner(int timeoutMillis) {

    ...
    // Poll.
    int result = POLL_WAKE;

    ...
    // 关键方法，这个方法会等待事情发生或者超时
    // 在nativeWake方法向管道写端写入字符时，该方法会返回，否则一直阻塞
    int eventCount = epoll_wait(mEpollFd, eventItems, EPOLL_MAX_EVENTS, timeoutMillis);
    
    ...
    //从epoll_wait()里唤醒了，读取管道内容
    awoken();
            
    ...
    return result;
}

// 从管道里读取内容，已经拿到Native层的Message了
void Looper::awoken() {
#if DEBUG_POLL_AND_WAKE
    ALOGD("%p ~ awoken", this);
#endif

    uint64_t counter;
    TEMP_FAILURE_RETRY(read(mWakeEventFd, &counter, sizeof(uint64_t)));
}

// nativeWake()方法在MessageQueue.java的enqueueMessage()中被调用
android_os_MessageQueue.cpp

 static void android_os_MessageQueue_nativeWake(JNIEnv* env, jclass clazz, jlong ptr) {
    NativeMessageQueue* nativeMessageQueue = reinterpret_cast<NativeMessageQueue*>(ptr);
    nativeMessageQueue->wake();
}

void NativeMessageQueue::wake() {
    mLooper->wake();
}

// Looper.cpp
// 如何唤醒的呢？
// 通过调用write()向管道写入一个整数1，TEMP_FAILURE_RETRY就是失败不断的重试，直到成功唤醒为止，
// 成功写入后，管道的另一端就会接收到，并从阻塞状态结束，即从epoll_wait()返回，执行后面代码

void Looper::wake() {
#if DEBUG_POLL_AND_WAKE
    ALOGD("%p ~ wake", this);
#endif

    uint64_t inc = 1;
    ssize_t nWrite = TEMP_FAILURE_RETRY(write(mWakeEventFd, &inc, sizeof(uint64_t)));
    if (nWrite != sizeof(uint64_t)) {
        if (errno != EAGAIN) {
            LOG_ALWAYS_FATAL("Could not write wake signal to fd %d: %s",
                    mWakeEventFd, strerror(errno));
        }
    }
}

在enqueueMessage() 和 next()方法执行时可能会发生阻塞，但是没有发生ANR，原因就在于两个native方法，nativeWake 和 nativePollOnce。它的本质就是Linux的管道。管道，其本质是也是文件，但又和普通的文件会有所不同：管道缓冲区大小一般为1页，即4K字节。管道分为读端和写端，读端负责从管道拿数据，当数据为空时则阻塞；写端向管道写数据，当管道缓存区满时则阻塞。

在主线程的MessageQueue没有消息时，会阻塞在next方法中的nativePollOnce方法，主线程会释放CPU资源进入休眠状态，直到下次消息到达或者有事务发生，通过往pipe管道写端写入数据来唤醒主线程工作，采用epoll机制。

写了这么多，参考下面这张图来看一下过程

注意在子线程中创建了Looper后，在所有事情都完成以后应该调用quit方法来终止消息循环，否则这个子线程就会一直等待(阻塞)状态。退出Looper后线程就会终止，所以在不需要的时候要及时终止Looper。