Android AOSP 6.0.1 Process start 流程分析(二)

300 阅读8分钟

Process 类 startViaZygote 方法中,最后一步调用了 zygoteSendArgsAndGetResult,此函数以 openZygoteSocketIfNeeded 返回的 ZygoteState 对象,和很多组织在 ArrayList 中的参数作为实参传入。

frameworks/base/core/java/android/os/Process.java

public class Process {
    ......
    private static ProcessStartResult startViaZygote(final String processClass,
                                  final String niceName,
                                  final int uid, final int gid,
                                  final int[] gids,
                                  int debugFlags, int mountExternal,
                                  int targetSdkVersion,
                                  String seInfo,
                                  String abi,
                                  String instructionSet,
                                  String appDataDir,
                                  String[] extraArgs)
                                  throws ZygoteStartFailedEx {
        synchronized(Process.class) {
            ArrayList<String> argsForZygote = new ArrayList<String>();

            // --runtime-args, --setuid=, --setgid=,
            // and --setgroups= must go first
            argsForZygote.add("--runtime-args");
            argsForZygote.add("--setuid=" + uid);
            argsForZygote.add("--setgid=" + gid);
            if ((debugFlags & Zygote.DEBUG_ENABLE_JNI_LOGGING) != 0) {
                argsForZygote.add("--enable-jni-logging");
            }
            if ((debugFlags & Zygote.DEBUG_ENABLE_SAFEMODE) != 0) {
                argsForZygote.add("--enable-safemode");
            }
            if ((debugFlags & Zygote.DEBUG_ENABLE_DEBUGGER) != 0) {
                argsForZygote.add("--enable-debugger");
            }
            if ((debugFlags & Zygote.DEBUG_ENABLE_CHECKJNI) != 0) {
                argsForZygote.add("--enable-checkjni");
            }
            if ((debugFlags & Zygote.DEBUG_ENABLE_JIT) != 0) {
                argsForZygote.add("--enable-jit");
            }
            if ((debugFlags & Zygote.DEBUG_GENERATE_DEBUG_INFO) != 0) {
                argsForZygote.add("--generate-debug-info");
            }
            if ((debugFlags & Zygote.DEBUG_ENABLE_ASSERT) != 0) {
                argsForZygote.add("--enable-assert");
            }
            if (mountExternal == Zygote.MOUNT_EXTERNAL_DEFAULT) {
                argsForZygote.add("--mount-external-default");
            } else if (mountExternal == Zygote.MOUNT_EXTERNAL_READ) {
                argsForZygote.add("--mount-external-read");
            } else if (mountExternal == Zygote.MOUNT_EXTERNAL_WRITE) {
                argsForZygote.add("--mount-external-write");
            }
            argsForZygote.add("--target-sdk-version=" + targetSdkVersion);

            //TODO optionally enable debuger
            //argsForZygote.add("--enable-debugger");

            // --setgroups 是一个逗号分隔的列表
            if (gids != null && gids.length > 0) {
                StringBuilder sb = new StringBuilder();
                sb.append("--setgroups=");

                int sz = gids.length;
                for (int i = 0; i < sz; i++) {
                    if (i != 0) {
                        sb.append(',');
                    }
                    sb.append(gids[i]);
                }

                argsForZygote.add(sb.toString());
            }

            if (niceName != null) {
                argsForZygote.add("--nice-name=" + niceName);
            }

            if (seInfo != null) {
                argsForZygote.add("--seinfo=" + seInfo);
            }

            if (instructionSet != null) {
                argsForZygote.add("--instruction-set=" + instructionSet);
            }

            if (appDataDir != null) {
                argsForZygote.add("--app-data-dir=" + appDataDir);
            }

            argsForZygote.add(processClass);

            if (extraArgs != null) {
                for (String arg : extraArgs) {
                    argsForZygote.add(arg);
                }
            }

            return zygoteSendArgsAndGetResult(openZygoteSocketIfNeeded(abi), argsForZygote);
        }
    }
    ......
}

zygoteSendArgsAndGetResult 函数向 zygote 进程发送一个参数列表,该进程启动一个新的子进程并返回子进程的 pid。

frameworks/base/core/java/android/os/Process.java

public class Process {
    ......
    private static ProcessStartResult zygoteSendArgsAndGetResult(
            ZygoteState zygoteState, ArrayList<String> args)
            throws ZygoteStartFailedEx {
        try {
            // 如果发现换行符,则抛出异常
            int sz = args.size();
            for (int i = 0; i < sz; i++) {
                if (args.get(i).indexOf('\n') >= 0) {
                    throw new ZygoteStartFailedEx("embedded newlines not allowed");
                }
            }
            // 参见 com.android.internal.os.ZygoteInit.readArgumentList()
            // 写入 socket 使用的带缓存区 Writer
            final BufferedWriter writer = zygoteState.writer;
            // 从 socket 读出使用的输入流
            final DataInputStream inputStream = zygoteState.inputStream;
            // 写入参数总个数
            writer.write(Integer.toString(args.size()));
            writer.newLine();
            // 写入各个参数
            for (int i = 0; i < sz; i++) {
                String arg = args.get(i);
                writer.write(arg);
                writer.newLine();
            }

            writer.flush();

            // Should there be a timeout on this?
            ProcessStartResult result = new ProcessStartResult();

            // 始终从输入流中读取整个结果,以避免在流中留下字节,以便将来的进程偶然遇到。
            result.pid = inputStream.readInt();
            result.usingWrapper = inputStream.readBoolean();

            if (result.pid < 0) {
                throw new ZygoteStartFailedEx("fork() failed");
            }
            return result;
        } catch (IOException ex) {
            zygoteState.close();
            throw new ZygoteStartFailedEx(ex);
        }
    }
    ......
}

启动进程需要的必要参数已经写入到套接字,接下来 zygote 进程接收到从套接字发送来的消息进行处理。从注释不难看出 ZygoteInit readArgumentList 是获取参数的方法,在 ZygoteInit 类中根本就找不到 readArgumentList 方法,线索中断了,既然是通过套接字连接,那么可以猜测套接字服务端就位于 Zygote 进程中。

先上时序图:
在这里插入图片描述

我们来分析其 main 函数找找答案。先来说说 ZygoteInit 类的作用:

它是 zygote 进程的启动类。预初始化一些类,然后在 UNIX 域套接字上等待命令。 基于这些命令,派生继承虚拟机初始状态的子进程。

frameworks/base/core/java/com/android/internal/os/ZygoteInit.java

public class ZygoteInit {
    ......
    public static void main(String argv[]) {
        try {
            ......
            String socketName = "zygote";
            String abiList = null;
            for (int i = 1; i < argv.length; i++) {
                if ("start-system-server".equals(argv[i])) {
                    startSystemServer = true;
                } else if (argv[i].startsWith(ABI_LIST_ARG)) {
                    abiList = argv[i].substring(ABI_LIST_ARG.length());
                } else if (argv[i].startsWith(SOCKET_NAME_ARG)) {
                    socketName = argv[i].substring(SOCKET_NAME_ARG.length());
                } else {
                    throw new RuntimeException("Unknown command line argument: " + argv[i]);
                }
            }

            if (abiList == null) {
                throw new RuntimeException("No ABI list supplied.");
            }

            registerZygoteSocket(socketName);
            .......
            Log.i(TAG, "Accepting command socket connections");
            runSelectLoop(abiList);

            closeServerSocket();
        } catch (MethodAndArgsCaller caller) {
            caller.run();
        } catch (RuntimeException ex) {
            Log.e(TAG, "Zygote died with exception", ex);
            closeServerSocket();
            throw ex;
        }
    }
    ......
}

在我的设备上,abiList=“armeabi-v7a,armeabi”,socketName=“zygote”。registerZygoteSocket 函数实现套接字的注册。首先调用 System 类的静态方法 getenv 查找以 ANDROID_SOCKET_zygote 字符串为 key 对应的值,然后将字符串字面数值转为整形值,接着创建 FileDescriptor 文件描述符对象,并将刚刚的整形值通过 setInt$ 方法设置到 FileDescriptor 文件描述符对象上。最后将文件描述符对象 fd 作为入参传入 LocalServerSocket 构造函数,创建 LocalServerSocket 对象。

frameworks/base/core/java/com/android/internal/os/ZygoteInit.java

public class ZygoteInit {
    ......
    private static void registerZygoteSocket(String socketName) {
        if (sServerSocket == null) {
            int fileDesc;
            final String fullSocketName = ANDROID_SOCKET_PREFIX + socketName;
            try {
                String env = System.getenv(fullSocketName);
                fileDesc = Integer.parseInt(env);
            } catch (RuntimeException ex) {
                throw new RuntimeException(fullSocketName + " unset or invalid", ex);
            }

            try {
                FileDescriptor fd = new FileDescriptor();
                fd.setInt$(fileDesc);
                sServerSocket = new LocalServerSocket(fd);
            } catch (IOException ex) {
                throw new RuntimeException(
                        "Error binding to local socket '" + fileDesc + "'", ex);
            }
        }
    }
    ......
}

在 LocalServerSocket 构造函数中,先创建 LocalSocketImpl 对象,然后调用其 listen 方法,最后通过 LocalSocketImpl 对象 impl getSockAddress 方法获取到 LocalSocketAddress 对象,赋给 LocalServerSocket 类成员变量 localAddress。

frameworks/base/core/java/android/net/LocalServerSocket.java

public class LocalServerSocket {
    private final LocalSocketImpl impl;
    private final LocalSocketAddress localAddress;
    ......
    public LocalServerSocket(FileDescriptor fd) throws IOException
    {
        impl = new LocalSocketImpl(fd);
        impl.listen(LISTEN_BACKLOG);
        localAddress = impl.getSockAddress();
    }
    ......
}

listen 方法内部调用了 jni 方法 listen_native。

frameworks/base/core/java/android/net/LocalSocketImpl.java

class LocalSocketImpl
{
    ......
    private native void listen_native(FileDescriptor fd, int backlog)
            throws IOException;
    ......
    protected void listen(int backlog) throws IOException
    {
        if (fd == null) {
            throw new IOException("socket not created");
        }

        listen_native(fd, backlog);
    }
    ......
}

转入 listen_native jni 方法的 native 实现。首先从 java 对象中解出文件描述符整形值 fd,这是使用 jniGetFDFromFileDescriptor 方法实现的。接着调用 listen 函数实现监听客户端套接字连接。

frameworks/base/core/jni/android_net_LocalSocketImpl.cpp

/* private native void listen_native(int fd, int backlog) throws IOException; */
static void
socket_listen (JNIEnv *env, jobject object, jobject fileDescriptor, jint backlog)
{
    int ret;
    int fd;

    fd = jniGetFDFromFileDescriptor(env, fileDescriptor);

    if (env->ExceptionCheck()) {
        return;
    }

    ret = listen(fd, backlog);

    if (ret < 0) {
        jniThrowIOException(env, errno);
        return;
    }
}

listen 方法定义在头文件 sys/socket.h 中。

sockfd----绑定了地址的 socket 文件描述符;

backlog----服务器负载,提示系统进程所要入队的未完成请求数量。

int listen(int sockfd, int backlog);

现在回到 ZygoteInit 类 main 方法中接下来的调用 runSelectLoop 函数中。runSelectLoop 函数作用是运行 zygote 进程的 poll 循环。接受新的连接,并从连接中读取命令,每次产生一个请求。下面详细的阐述了 poll 方法,poll 函数传入的第二个参数是 -1,代表一直阻塞到 fds 数组中有一个达到就绪态或者捕获到一个信号。监听的事件位掩码设置为 POLLIN,可以监听描述符上发生的事件,代码中 i 等于 0 (代表监听 socket 描述符发生的事件,说明有个客户端连接上来了)的时候建立一个新的连接,这个时候会返回对应的已连接的 socket 描述符, 如果事件发生在已连接的 socket 描述符上,说明客户端有消息发送到了服务端,如果消息处理结束,则将这个已连接的 socket 描述符从 poll 监听列表中移除。

frameworks/base/core/java/com/android/internal/os/ZygoteInit.java

public class ZygoteInit {
    ......
    private static void runSelectLoop(String abiList) throws MethodAndArgsCaller {
        ArrayList<FileDescriptor> fds = new ArrayList<FileDescriptor>();
        ArrayList<ZygoteConnection> peers = new ArrayList<ZygoteConnection>();

        fds.add(sServerSocket.getFileDescriptor());
        peers.add(null);

        while (true) {
            StructPollfd[] pollFds = new StructPollfd[fds.size()];
            for (int i = 0; i < pollFds.length; ++i) {
                pollFds[i] = new StructPollfd();
                pollFds[i].fd = fds.get(i);
                pollFds[i].events = (short) POLLIN;
            }
            try {
                Os.poll(pollFds, -1);
            } catch (ErrnoException ex) {
                throw new RuntimeException("poll failed", ex);
            }
            for (int i = pollFds.length - 1; i >= 0; --i) {
                if ((pollFds[i].revents & POLLIN) == 0) {
                    continue;
                }
                if (i == 0) {
                    ZygoteConnection newPeer = acceptCommandPeer(abiList);
                    peers.add(newPeer);
                    fds.add(newPeer.getFileDesciptor());
                } else {
                    boolean done = peers.get(i).runOnce();
                    if (done) {
                        peers.remove(i);
                        fds.remove(i);
                    }
                }
            }
        }
    }

    ......
}

一、poll 函数分析

先来理解 poll 函数。从上一节可知调用 Os类 静态方法 poll 最终委托给 Posix 类 poll 方法实现。

libcore/luni/src/main/java/libcore/io/Posix.java

public final class Posix implements Os {
    ......
    public native int poll(StructPollfd[] fds, int timeoutMs) throws ErrnoException;
    ......
}

再来看它的 Native 实现。重点关注定义在头文件 poll.h 中的 poll 方法。

libcore/luni/src/main/native/libcore_io_Posix.cpp

static jint Posix_poll(JNIEnv* env, jobject, jobjectArray javaStructs, jint timeoutMs) {
    ......

    int rc;
    while (true) {
        ......

        rc = poll(fds.get(), count, timeoutMs);
        if (rc >= 0 || errno != EINTR) {
            break;
        }

        ......
    }

    ......
    return rc;
}

select 与 poll 工作原理:

  1. select 主要是采用轮询的方式来实现对就绪的 fd 处理;
  2. poll 和 select 基本相同,主要不同在于 poll 没有对 fd 数量限制。
    在这里插入图片描述
    下面是定义在 poll.h 中的函数原型。
int poll(struct pollfd fds[], nfds_t nfds, int timeout)

fds----一个struct pollfd结构类型的数组,列出了我们需要 poll() 检查的文件描述符,其定义如下:

typedef struct pollfd {
        int fd;           /* 需要被检测或选择的文件描述符*/
        short events;     /* 对文件描述符 fd 上感兴趣的事件 */
        short revents;    /* 文件描述符 fd 上当前实际发生的事件*/
} pollfd_t;

pollfd 结构体中的 events 和 revents 字段都是掩码。调用者初始化 events 来指定需要为描述符 fd 做检查的事件。当 poll() 返回时,revents 被设定以此来表示该文件描述符上实际发生的事件。

位掩码events中的输入返回到revents描述
POLLIN POLLRDNORM POLLRDBAND POLLPRI POLLRDHUB● ● ● ● ●● ● ● ● ●可读取非高优先级数据 等同于POLLIN 可读取优先级数据(Linux中不可用) 可读取高优先级数据 对端套接字关闭
POLLOUT POLLWRNORM POLLWRBAND● ● ●● ● ●普通数据可写 等同于POLLOUT 优先级数据可写入
POLLERR POLLHUP POLLNVAL● ● ●有错误发生 出现挂断 文件描述符未打开
POLLMSGLinux中不可用

下表列出了 events 和 revents 字段中掩码:

nfds----指定了 fds 中元素的个数,nfds_t 为无符号整形

timeout----决定阻塞行为,一般如下:

-1:一直阻塞到 fds 数组中有一个达到就绪态或者捕获到一个信号

0:不会阻塞,立即返回

> 0:阻塞时间

和 select() 一样,timeout 的精度受软件时钟粒度的影响,如果不是时间粒度整数倍,向上取整。

返回值

> 0:数组 fds 中准备好读、写或出错状态的那些 socket 描述符的总数量;

== 0:数组 fds 中没有任何 socket 描述符准备好读、写,或出错;此时 poll 超时

-1:poll 函数调用失败,同时会自动设置全局变量 errno 为下列值之一:

EBADF:一个或多个结构体中指定的文件描述符无效;

EFAULT:fds 指针指向的地址超出进程的地址空间;

EINTR:请求的事件之前产生一个信号,调用可以重新发起;

EINVAL:nfds 参数超出 PLIMIT_NOFILE 值;

ENOMEM:可用内存不足,无法完成请求。

二、acceptCommandPeer 函数分析

acceptCommandPeer 函数中,首先调用服务端套接字接受客户端的连接,实际上返回的是一个 LocalSocket 对象,然后创建 ZygoteConnection 对象。

frameworks/base/core/java/com/android/internal/os/ZygoteInit.java

public class ZygoteInit {
    ......
    private static ZygoteConnection acceptCommandPeer(String abiList) {
        try {
            return new ZygoteConnection(sServerSocket.accept(), abiList);
        } catch (IOException ex) {
            throw new RuntimeException(
                    "IOException during accept()", ex);
        }
    }
    ......
}

sServerSocket 实际是一个 LocalServerSocket 对象,调用其 accept 方法其实就是调用了 LocalSocketImpl 的 accept 方法。最后返回一个 LocalSocket 对象,其内部封装了一个 LocalSocketImpl 对象。

frameworks/base/core/java/android/net/LocalServerSocket.java

public class LocalServerSocket {
    private final LocalSocketImpl impl;
    private final LocalSocketAddress localAddress;
    ......
    public LocalSocket accept() throws IOException
    {
        LocalSocketImpl acceptedImpl = new LocalSocketImpl();

        impl.accept (acceptedImpl);

        return new LocalSocket(acceptedImpl, LocalSocket.SOCKET_UNKNOWN);
    }
    ......
}

LocalSocketImpl 类中 accept 方法实现最终调用了重载版本的 jni 实现。

frameworks/base/core/java/android/net/LocalSocketImpl.java

class LocalSocketImpl
{
    ......
    private FileDescriptor fd;
    ......
    private native FileDescriptor accept
            (FileDescriptor fd, LocalSocketImpl s) throws IOException;
    ......
    protected void accept(LocalSocketImpl s) throws IOException
    {
        if (fd == null) {
            throw new IOException("socket not created");
        }

        s.fd = accept(fd, s);
        s.mFdCreatedInternally = true;
    }

    ......
}

accept 方法定义在头文件 sys/socket.h 中。

int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen);

accept 系统调用主要用在基于连接的套接字类型,比如 SOCK_STREAM 和 SOCK_SEQPACKET。它提取出所监听套接字的等待连接队列中第一个连接请求,创建一个新的套接字,并返回指向该套接字的文件描述符。

sockfd----利用系统调用 socket 建立的套接字描述符,通过 bind 绑定到一个本地地址(一般为服务器的套接字),并且通过 listen 一直在监听连接;

addr----指向 struct sockaddr 的指针,该结构用通讯层服务器对等套接字的地址(一般为客户端地址)填写,返回地址 addr 的确切格式由套接字的地址类别(比如 TCP 或 UDP)决定;若 addr 为 NULL,没有有效地址填写,这种情况下,addrlen 也不使用,应该置为 NULL;

addrlen----一个值结果参数,调用函数必须初始化为包含 addr 所指向结构大小的数值,函数返回时包含对等地址(一般为服务器地址)的实际数值。

frameworks/base/core/jni/android_net_LocalSocketImpl.cpp

/*    private native FileDescriptor
**    accept (FileDescriptor fd, LocalSocketImpl s)
**                                   throws IOException;
*/
static jobject
socket_accept (JNIEnv *env, jobject object, jobject fileDescriptor, jobject s)
{
    union {
        struct sockaddr address;
        struct sockaddr_un un_address;
    } sa;

    int ret;
    int retFD;
    int fd;
    socklen_t addrlen;

    if (s == NULL) {
        jniThrowNullPointerException(env, NULL);
        return NULL;
    }

    fd = jniGetFDFromFileDescriptor(env, fileDescriptor);

    if (env->ExceptionCheck()) {
        return NULL;
    }

    do {
        addrlen = sizeof(sa);
        ret = accept(fd, &(sa.address), &addrlen);
    } while (ret < 0 && errno == EINTR);

    if (ret < 0) {
        jniThrowIOException(env, errno);
        return NULL;
    }

    retFD = ret;

    return jniCreateFileDescriptor(env, retFD);
}

在 ZygoteConnection 构造函数中,从 LocalSocket 获取套接字的输入、输出流,并设置了连接超时为 1000 ms。如此就可以实现和客户端的交互。

frameworks/base/core/java/com/android/internal/os/ZygoteConnection.java

class ZygoteConnection {
    private static final String TAG = "Zygote";

    private static final int CONNECTION_TIMEOUT_MILLIS = 1000;

    private final LocalSocket mSocket;
    private final DataOutputStream mSocketOutStream;
    private final BufferedReader mSocketReader;
    private final Credentials peer;
    private final String abiList;

    ZygoteConnection(LocalSocket socket, String abiList) throws IOException {
        mSocket = socket;
        this.abiList = abiList;

        mSocketOutStream
                = new DataOutputStream(socket.getOutputStream());

        mSocketReader = new BufferedReader(
                new InputStreamReader(socket.getInputStream()), 256);

        mSocket.setSoTimeout(CONNECTION_TIMEOUT_MILLIS);

        try {
            peer = mSocket.getPeerCredentials();
        } catch (IOException ex) {
            Log.e(TAG, "Cannot read peer credentials", ex);
            throw ex;
        }
    }
    ......
}

三、runOnce 函数分析

最后来看 runOnce 函数干了什么?runOnce 定义在 ZygoteConnection 类中。首先调用 readArgumentList 函数得到客户端发来的启动进程参数,接着进行权限检查,然后调用 forkAndSpecialize 创建新进程,返回 pid 等于 0,开始调用 handleChildProc 处理子进程;返回 pid > 0,开始调用 handleParentProc 处理父进程。

frameworks/base/core/java/com/android/internal/os/ZygoteConnection.java

class ZygoteConnection {
    ......
        boolean runOnce() throws ZygoteInit.MethodAndArgsCaller {

        String args[];
        Arguments parsedArgs = null;
        FileDescriptor[] descriptors;

        try {
            args = readArgumentList();
            descriptors = mSocket.getAncillaryFileDescriptors();
        } catch (IOException ex) {
            Log.w(TAG, "IOException on command socket " + ex.getMessage());
            closeSocket();
            return true;
        }

        if (args == null) {
            // EOF reached.
            closeSocket();
            return true;
        }

        /** the stderr of the most recent request, if avail */
        PrintStream newStderr = null;

        if (descriptors != null && descriptors.length >= 3) {
            newStderr = new PrintStream(
                    new FileOutputStream(descriptors[2]));
        }

        int pid = -1;
        FileDescriptor childPipeFd = null;
        FileDescriptor serverPipeFd = null;

        try {
            parsedArgs = new Arguments(args);

            if (parsedArgs.abiListQuery) {
                return handleAbiListQuery();
            }

            if (parsedArgs.permittedCapabilities != 0 || parsedArgs.effectiveCapabilities != 0) {
                throw new ZygoteSecurityException("Client may not specify capabilities: " +
                        "permitted=0x" + Long.toHexString(parsedArgs.permittedCapabilities) +
                        ", effective=0x" + Long.toHexString(parsedArgs.effectiveCapabilities));
            }

            applyUidSecurityPolicy(parsedArgs, peer);
            applyInvokeWithSecurityPolicy(parsedArgs, peer);

            applyDebuggerSystemProperty(parsedArgs);
            applyInvokeWithSystemProperty(parsedArgs);

            int[][] rlimits = null;

            if (parsedArgs.rlimits != null) {
                rlimits = parsedArgs.rlimits.toArray(intArray2d);
            }

            if (parsedArgs.invokeWith != null) {
                FileDescriptor[] pipeFds = Os.pipe2(O_CLOEXEC);
                childPipeFd = pipeFds[1];
                serverPipeFd = pipeFds[0];
                Os.fcntlInt(childPipeFd, F_SETFD, 0);
            }

            /**
             * In order to avoid leaking descriptors to the Zygote child,
             * the native code must close the two Zygote socket descriptors
             * in the child process before it switches from Zygote-root to
             * the UID and privileges of the application being launched.
             *
             * In order to avoid "bad file descriptor" errors when the
             * two LocalSocket objects are closed, the Posix file
             * descriptors are released via a dup2() call which closes
             * the socket and substitutes an open descriptor to /dev/null.
             */

            int [] fdsToClose = { -1, -1 };

            FileDescriptor fd = mSocket.getFileDescriptor();

            if (fd != null) {
                fdsToClose[0] = fd.getInt$();
            }

            fd = ZygoteInit.getServerSocketFileDescriptor();

            if (fd != null) {
                fdsToClose[1] = fd.getInt$();
            }

            fd = null;

            pid = Zygote.forkAndSpecialize(parsedArgs.uid, parsedArgs.gid, parsedArgs.gids,
                    parsedArgs.debugFlags, rlimits, parsedArgs.mountExternal, parsedArgs.seInfo,
                    parsedArgs.niceName, fdsToClose, parsedArgs.instructionSet,
                    parsedArgs.appDataDir);
        } catch (ErrnoException ex) {
            logAndPrintError(newStderr, "Exception creating pipe", ex);
        } catch (IllegalArgumentException ex) {
            logAndPrintError(newStderr, "Invalid zygote arguments", ex);
        } catch (ZygoteSecurityException ex) {
            logAndPrintError(newStderr,
                    "Zygote security policy prevents request: ", ex);
        }

        try {
            if (pid == 0) {
                // in child
                IoUtils.closeQuietly(serverPipeFd);
                serverPipeFd = null;
                handleChildProc(parsedArgs, descriptors, childPipeFd, newStderr);

                // should never get here, the child is expected to either
                // throw ZygoteInit.MethodAndArgsCaller or exec().
                return true;
            } else {
                // in parent...pid of < 0 means failure
                IoUtils.closeQuietly(childPipeFd);
                childPipeFd = null;
                return handleParentProc(pid, descriptors, serverPipeFd, parsedArgs);
            }
        } finally {
            IoUtils.closeQuietly(childPipeFd);
            IoUtils.closeQuietly(serverPipeFd);
        }
    }
    ......
}

forkAndSpecialize 仅仅将参数传递给 nativeForkAndSpecialize 方法,它是一个 jni 函数。

frameworks/base/core/java/com/android/internal/os/Zygote.java

public final class Zygote {
    ......
    public static int forkAndSpecialize(int uid, int gid, int[] gids, int debugFlags,
          int[][] rlimits, int mountExternal, String seInfo, String niceName, int[] fdsToClose,
          String instructionSet, String appDataDir) {
        VM_HOOKS.preFork();
        int pid = nativeForkAndSpecialize(
                  uid, gid, gids, debugFlags, rlimits, mountExternal, seInfo, niceName, fdsToClose,
                  instructionSet, appDataDir);
        // Enable tracing as soon as possible for the child process.
        if (pid == 0) {
            Trace.setTracingEnabled(true);

            // Note that this event ends at the end of handleChildProc,
            Trace.traceBegin(Trace.TRACE_TAG_ACTIVITY_MANAGER, "PostFork");
        }
        VM_HOOKS.postForkCommon();
        return pid;
    }

    native private static int nativeForkAndSpecialize(int uid, int gid, int[] gids,int debugFlags,
          int[][] rlimits, int mountExternal, String seInfo, String niceName, int[] fdsToClose,
          String instructionSet, String appDataDir);    
    ......
}

其 Native 实现定义在 com_android_internal_os_Zygote.cpp 中。nativeForkAndSpecialize 调用了 ForkAndSpecializeCommon 函数。

frameworks/base/core/jni/com_android_internal_os_Zygote.cpp

static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
        JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
        jint debug_flags, jobjectArray rlimits,
        jint mount_external, jstring se_info, jstring se_name,
        jintArray fdsToClose, jstring instructionSet, jstring appDataDir) {
    // Grant CAP_WAKE_ALARM to the Bluetooth process.
    jlong capabilities = 0;
    if (uid == AID_BLUETOOTH) {
        capabilities |= (1LL << CAP_WAKE_ALARM);
    }

    return ForkAndSpecializeCommon(env, uid, gid, gids, debug_flags,
            rlimits, capabilities, capabilities, mount_external, se_info,
            se_name, false, fdsToClose, instructionSet, appDataDir);
}

ForkAndSpecializeCommon 函数中看到了熟悉的 fork 系统调用,这个函数会返回两次,一次返回到子进程,一次返回到父进程。

frameworks/base/core/jni/com_android_internal_os_Zygote.cpp

static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
                                     jint debug_flags, jobjectArray javaRlimits,
                                     jlong permittedCapabilities, jlong effectiveCapabilities,
                                     jint mount_external,
                                     jstring java_se_info, jstring java_se_name,
                                     bool is_system_server, jintArray fdsToClose,
                                     jstring instructionSet, jstring dataDir) {
    ......
    pid_t pid = fork();
    ......
}

fork 调用的一个奇妙之处就是它仅仅被调用一次,却能够返回两次,它可能有三种不同的返回值:

1.在父进程中,fork返回新创建子进程的进程ID;

2.在子进程中,fork返回0;

3.如果出现错误,fork返回一个负值。

在 fork 函数执行完毕后,如果创建新进程成功,则出现两个进程,一个是子进程,一个是父进程。在子进程中,fork 函数返回 0,在父进程中,fork 返回新创建子进程的进程 ID。我们可以通过 fork 返回的值来判断当前进程是子进程还是父进程。

最后来分析 handleChildProc 函数,由于 --invoke-with 参数客户端没有传入,我们重点分析 RuntimeInit 类的静态方法 zygoteInit 即可。

frameworks/base/core/java/com/android/internal/os/ZygoteConnection.java

class ZygoteConnection {
    ......
    private void handleChildProc(Arguments parsedArgs,
            FileDescriptor[] descriptors, FileDescriptor pipeFd, PrintStream newStderr)
            throws ZygoteInit.MethodAndArgsCaller {
        /**
         * 到我们到达此处时,本机代码已关闭两个实际的Zygote套接字连接,并在其位置替换了/dev/null。LocalSocket对象仍然需要正确关闭。
         */

        closeSocket();
        ZygoteInit.closeServerSocket();

        if (descriptors != null) {
            try {
                Os.dup2(descriptors[0], STDIN_FILENO);
                Os.dup2(descriptors[1], STDOUT_FILENO);
                Os.dup2(descriptors[2], STDERR_FILENO);

                for (FileDescriptor fd: descriptors) {
                    IoUtils.closeQuietly(fd);
                }
                newStderr = System.err;
            } catch (ErrnoException ex) {
                Log.e(TAG, "Error reopening stdio", ex);
            }
        }

        if (parsedArgs.niceName != null) {
            Process.setArgV0(parsedArgs.niceName);
        }

        // End of the postFork event.
        Trace.traceEnd(Trace.TRACE_TAG_ACTIVITY_MANAGER);
        if (parsedArgs.invokeWith != null) {
            WrapperInit.execApplication(parsedArgs.invokeWith,
                    parsedArgs.niceName, parsedArgs.targetSdkVersion,
                    VMRuntime.getCurrentInstructionSet(),
                    pipeFd, parsedArgs.remainingArgs);
        } else {
            RuntimeInit.zygoteInit(parsedArgs.targetSdkVersion,
                    parsedArgs.remainingArgs, null /* classLoader */);
        }
    }
    ......
}

RuntimeInit 类 zygoteInit 静态方法调用了依次调用了四个方法,依次做一些工作,分别是将 System.out 和 System.err 重定向到 Android 日志,通用的初始化工作,以及 native 的初始化工作,最后一个函数的作用是通过 wrapper 进程启动应用程序时调用的主要功能。

frameworks/base/core/java/com/android/internal/os/RuntimeInit.java

public class RuntimeInit {
    ......
    public static final void zygoteInit(int targetSdkVersion, String[] argv, ClassLoader classLoader)
            throws ZygoteInit.MethodAndArgsCaller {
        if (DEBUG) Slog.d(TAG, "RuntimeInit: Starting application from zygote");

        Trace.traceBegin(Trace.TRACE_TAG_ACTIVITY_MANAGER, "RuntimeInit");
        redirectLogStreams();

        commonInit();
        nativeZygoteInit();
        applicationInit(targetSdkVersion, argv, classLoader);
    }
    ......
}

applicationInit 方法中最终调用了 invokeStaticMain 方法。

frameworks/base/core/java/com/android/internal/os/RuntimeInit.java

public class RuntimeInit {
    ......
    private static void applicationInit(int targetSdkVersion, String[] argv, ClassLoader classLoader)
            throws ZygoteInit.MethodAndArgsCaller {
        ......
        final Arguments args;
        try {
            args = new Arguments(argv);
        } catch (IllegalArgumentException ex) {
            Slog.e(TAG, ex.getMessage());
            // let the process exit
            return;
        }

        ......
        invokeStaticMain(args.startClass, args.startArgs, classLoader);
    }

    ......
}

invokeStaticMain 方法中通过反射的方式获取到对应的 Class 和 Method,然后抛出一个调用了 ZygoteInit 类的内部静态类 MethodAndArgsCaller 类型的异常,这个异常继承了 Runnable 接口。最终这个异常被 ZygoteInit.main() 捕获,该异常通过调用异常的 run() 方法进行响应。

frameworks/base/core/java/com/android/internal/os/RuntimeInit.java

public class RuntimeInit {
    ......
    private static void invokeStaticMain(String className, String[] argv, ClassLoader classLoader)
            throws ZygoteInit.MethodAndArgsCaller {
        Class<?> cl;

        try {
            cl = Class.forName(className, true, classLoader);
        } catch (ClassNotFoundException ex) {
            throw new RuntimeException(
                    "Missing class when invoking static main " + className,
                    ex);
        }

        Method m;
        try {
            m = cl.getMethod("main", new Class[] { String[].class });
        } catch (NoSuchMethodException ex) {
            throw new RuntimeException(
                    "Missing static main on " + className, ex);
        } catch (SecurityException ex) {
            throw new RuntimeException(
                    "Problem getting static main on " + className, ex);
        }

        int modifiers = m.getModifiers();
        if (! (Modifier.isStatic(modifiers) && Modifier.isPublic(modifiers))) {
            throw new RuntimeException(
                    "Main method is not public and static on " + className);
        }

        /*
         * This throw gets caught in ZygoteInit.main(), which responds
         * by invoking the exception's run() method. This arrangement
         * clears up all the stack frames that were required in setting
         * up the process.
         */
        throw new ZygoteInit.MethodAndArgsCaller(m, argv);
    }
    ......
}

我们看到 MethodAndArgsCaller run 方法中调用了上一步传入的 Method 对象代表的方法。这个方法实际上就是 android.app.ActivityThread 的静态 main 方法。

frameworks/base/core/java/com/android/internal/os/ZygoteInit.java

    public static class MethodAndArgsCaller extends Exception
            implements Runnable {
        /** method to call */
        private final Method mMethod;

        /** argument array */
        private final String[] mArgs;

        public MethodAndArgsCaller(Method method, String[] args) {
            mMethod = method;
            mArgs = args;
        }

        public void run() {
            try {
                mMethod.invoke(null, new Object[] { mArgs });
            } catch (IllegalAccessException ex) {
                throw new RuntimeException(ex);
            } catch (InvocationTargetException ex) {
                Throwable cause = ex.getCause();
                if (cause instanceof RuntimeException) {
                    throw (RuntimeException) cause;
                } else if (cause instanceof Error) {
                    throw (Error) cause;
                }
                throw new RuntimeException(ex);
            }
        }
    }

在 ZygoteInit 类 main 方法中捕获了 MethodAndArgsCaller 异常,并调用了其 run 方法。至此流程就全部理顺了!之所以要这样做,结合注释略知其用意一二,这种安排清除了请求设置进程所需的所有栈帧。

frameworks/base/core/java/com/android/internal/os/ZygoteInit.java

public class ZygoteInit {
    ......
    public static void main(String argv[]) {
        try {
            ......
        } catch (MethodAndArgsCaller caller) {
            caller.run();
        } catch (RuntimeException ex) {
            ......
        }
    }
    ......
}

最后谈一谈在 ZygoteInit 类中根本就找不到 readArgumentList 方法,在 runOnce 方法中发现了它,非常简单它解析了来自客户端发来的消息,首先解出参数个数,然后解出各个参数。

frameworks/base/core/java/com/android/internal/os/ZygoteConnection.java

class ZygoteConnection {
    ......
    private String[] readArgumentList()
            throws IOException {

        int argc;

        try {
            String s = mSocketReader.readLine();

            if (s == null) {
                // EOF reached.
                return null;
            }
            argc = Integer.parseInt(s);
        } catch (NumberFormatException ex) {
            Log.e(TAG, "invalid Zygote wire format: non-int at argc");
            throw new IOException("invalid wire format");
        }

        // See bug 1092107: large argc can be used for a DOS attack
        if (argc > MAX_ZYGOTE_ARGC) {
            throw new IOException("max arg count exceeded");
        }

        String[] result = new String[argc];
        for (int i = 0; i < argc; i++) {
            result[i] = mSocketReader.readLine();
            if (result[i] == null) {
                // We got an unexpected EOF.
                throw new IOException("truncated request");
            }
        }

        return result;
    }
    ......
}