系统启动流程分析之Zygote进程启动流程分析

1,005 阅读4分钟

「这是我参与2022首次更文挑战的第17天,活动详情查看:2022首次更文挑战」。

对于zygote的解析流程,需要从安卓系统的启动流程中分析,而在系统启动的过程中,当配置被解析过后,在/system/core/init/init.cpp文件的SecondStageMain函数中会对于所有的Service/Action等运行,而对于zygote来说,在此前的一篇解析工具分析文章中,是通过生成一个Service,从而来启动这个服务的,而Service的启动,是通过调用其ExecStart函数来启动的(过程感兴趣的话可以去分析一下),因此

// system/core/init/service.cpp
Result<void> Service::ExecStart() {
    // ...... 功能无关代码省略
    // 设置flags
    flags_ |= SVC_ONESHOT;
    // 调用Start函数启动Service
    if (auto result = Start(); !result.ok()) {
        return result;
    }
    // 设置flags状态
    flags_ |= SVC_EXEC;
    is_exec_service_running_ = true;

    // ...... 功能无关代码省略
    return {};
}

Result<void> Service::Start() {
    // ...... 功能无关代码省略
    // 对于zygote来说,在其配置文件中并未由设置其为DISABLED和RESET状态
    // 此处会对这两个状态进行判断,此前我们分析过的bootanim进程这个状态就设置为DISABLED,
    // 这样该进程只能通过其他方式来启动,此处不会自启动
    bool disabled = (flags_ & (SVC_DISABLED | SVC_RESET));
    // Starting a service removes it from the disabled or reset state and
    // immediately takes it out of the restarting state if it was in there.
    flags_ &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET|SVC_RESTART|SVC_DISABLED_START));

    // ...... 功能无关代码省略

    struct stat sb;
    // args_参数是在初始化的时候赋值的
    // 在zygote进程中,这个args_[0] = "/system/bin/app_process"
    if (stat(args_[0].c_str(), &sb) == -1) {
        flags_ |= SVC_DISABLED;
        return ErrnoError() << "Cannot find '" << args_[0] << "'";
    }

    std::string scon;
    // ...... 判断条件不满足代码省略
    {
        auto result = ComputeContextFromExecutable(args_[0]);
        if (!result.ok()) {
            return result.error();
        }
        scon = *result;
    }

    if (!AreRuntimeApexesReady() && !pre_apexd_) {
        // If this service is started before the Runtime and ART APEXes get
        // available, mark it as pre-apexd one. Note that this marking is
        // permanent. So for example, if the service is re-launched (e.g., due
        // to crash), it is still recognized as pre-apexd... for consistency.
        pre_apexd_ = true;
    }

    post_data_ = ServiceList::GetInstance().IsPostData();

    LOG(INFO) << "starting service '" << name_ << "'...";

    std::vector<Descriptor> descriptors;
    // 为Service创建socket,从配置文件中可知,zygote有两个socket,在此处会创建
    for (const auto& socket : sockets_) {
        if (auto result = socket.Create(scon); result.ok()) {
            descriptors.emplace_back(std::move(*result));
        } else {
            LOG(INFO) << "Could not create socket '" << socket.name << "': " << result.error();
        }
    }

    // ...... 代码省略

    pid_t pid = -1;
    if (namespaces_.flags) {
        pid = clone(nullptr, nullptr, namespaces_.flags | SIGCHLD, nullptr);
    } else {
        // fork一个进程
        pid = fork();
    }

    if (pid == 0) {
        umask(077);

        if (auto result = EnterNamespaces(namespaces_, name_, pre_apexd_); !result.ok()) {
            LOG(FATAL) << "Service '" << name_
                       << "' failed to set up namespaces: " << result.error();
        }

        for (const auto& [key, value] : environment_vars_) {
            setenv(key.c_str(), value.c_str(), 1);
        }

        for (const auto& descriptor : descriptors) {
            descriptor.Publish();
        }
        // 将pid写入到文件中
        if (auto result = WritePidToFiles(&writepid_files_); !result.ok()) {
            LOG(ERROR) << "failed to write pid to files: " << result.error();
        }

        if (task_profiles_.size() > 0 && !SetTaskProfiles(getpid(), task_profiles_)) {
            LOG(ERROR) << "failed to set task profiles";
        }

        // As requested, set our gid, supplemental gids, uid, context, and
        // priority. Aborts on failure.
        SetProcessAttributesAndCaps();
        // 调用ExpandArgsAndExecv函数
        if (!ExpandArgsAndExecv(args_, sigstop_)) {
            PLOG(ERROR) << "cannot execv('" << args_[0]
                        << "'). See the 'Debugging init' section of init's README.md for tips";
        }

        _exit(127);
    }

    // ......
}

可以看到,在上述的Start函数中,此处对于zygote的配置文件中的各项参数进行初始化和创建等,最后通过ExpandArgsAndExecv函数来运行服务

static bool ExpandArgsAndExecv(const std::vector<std::string>& args, bool sigstop) {
    std::vector<std::string> expanded_args;
    std::vector<char*> c_strings;
    // 预初始化expanded_args和c_strings
    expanded_args.resize(args.size());
    c_strings.push_back(const_cast<char*>(args[0].data()));
    for (std::size_t i = 1; i < args.size(); ++i) {
        auto expanded_arg = ExpandProps(args[i]);
        if (!expanded_arg.ok()) {
            LOG(FATAL) << args[0] << ": cannot expand arguments': " << expanded_arg.error();
        }
        expanded_args[i] = *expanded_arg;
        c_strings.push_back(expanded_args[i].data());
    }
    c_strings.push_back(nullptr);
    // 是否有接收sigstop信号,若有这个信号接收到,则直接kill当前pid对应的进程
    if (sigstop) {
        kill(getpid(), SIGSTOP);
    }
    // 最后运行c_strings[0],此处为"/system/bin/app_process"
    return execv(c_strings[0], c_strings.data()) == 0;
}

在这个函数中,首先预初始化参数,然后通过execv函数来运行/system/bin/app_process应用程序,而app_process应用程序的源代码位于/frameworks/base/cmds/app_process/下

/frameworks/base/cmds/app_process/app_main.cpp
int main(int argc, char* const argv[])
{
    // ......
    // 1. 初始化一个AppRuntime对象
    AppRuntime runtime(argv[0], computeArgBlockSize(argc, argv));
    // Process command line arguments
    // ignore argv[0]
    // 将argv[0]去除,此处为/system/bin/app_process,此处已经运行这个程序,无需该参数去除
    argc--;
    argv++;
    // 剩余的argv = { "-Xzygote", "/system/bin", "--zygote", "--start-system-server" }

    // ......

    int i;
    for (i = 0; i < argc; i++) {
        // ......

        if (argv[i][0] != '-') {
            break;
        }
        if (argv[i][1] == '-' && argv[i][2] == 0) {
            ++i; // Skip --.
            break;
        }
        // 添加"-Xzygote"到AppRuntime中
        runtime.addOption(strdup(argv[i]));
        // ......
    }
    // 上述for循环结束后,i = 1

    // Parse runtime arguments.  Stop at first unrecognized option.
    bool zygote = false;
    bool startSystemServer = false;
    bool application = false;
    String8 niceName;
    String8 className;
    // ++i, 此时i = 2,也即,此处跳过第二个参数"/system/bin"
    ++i;  // Skip unused "parent dir" argument.
    while (i < argc) {
        const char* arg = argv[i++];
        if (strcmp(arg, "--zygote") == 0) {
            // i = 2的情况下满足条件,设置niceName为zygote
            zygote = true;
            niceName = ZYGOTE_NICE_NAME;
        } else if (strcmp(arg, "--start-system-server") == 0) {
            // i = 3的情况下满足条件
            startSystemServer = true;
        }
        // ......无法满足条件的分支代码省略
    }

    Vector<String8> args;
    // className并未有设置,此处为null
    if (!className.isEmpty()) {
        // ......无法满足条件的分支代码省略
    } else {
        // We're in zygote mode.
        maybeCreateDalvikCache();
        // 增加start-system-server参数
        if (startSystemServer) {
            args.add(String8("start-system-server"));
        }

        char prop[PROP_VALUE_MAX];
        // 获取系统abi属性
        if (property_get(ABI_LIST_PROPERTY, prop, NULL) == 0) {
            LOG_ALWAYS_FATAL("app_process: Unable to determine ABI list from property %s.",
                ABI_LIST_PROPERTY);
            return 11;
        }

        String8 abiFlag("--abi-list=");
        abiFlag.append(prop);
        // 添加abi参数
        args.add(abiFlag);

        // In zygote mode, pass all remaining arguments to the zygote
        // main() method.
        // 上述while循环后,i = 4,等于argc
        for (; i < argc; ++i) {
            args.add(String8(argv[i]));
        }
    }
    // 设置进程名称
    if (!niceName.isEmpty()) {
        runtime.setArgv0(niceName.string(), true /* setProcName */);
    }

    if (zygote) {
        // 2. 调用AppRuntime的start函数,启动第一个参数对应的Java层的对象
        runtime.start("com.android.internal.os.ZygoteInit", args, zygote);
    }
    // ......
}

从上述函数的代码来看,主要是先初始化了一个AppRuntime对象,这个对象继承自AndroidRuntime对象

classDiagram
AndroidRuntime <|-- AppRuntime
class AppRuntime {
    +onVmCreated(JNIEnv* env)
    +onStarted()
    +onZygoteInit()
    +onExit(int code)
    +mClassName : String8
    +mArgs : Vector<String8>
    +mClass : jclass
}
class AndroidRuntime {
    -startReg(JNIEnv* env)
    -startVm(JavaVM** pJavaVM, JNIEnv** pEnv, bool zygote, bool primary_zygote)
    -mOptions : Vector<JavaVMOption>
    -mJavaVM : JavaVM*
    -javaCreateThreadEtc(android_thread_func_t, void*, const char*, int32_t, size_t, android_thread_id_t*)
    +onVmCreated(JNIEnv* env)
    +onStarted()
    +onZygoteInit()
    +onExit(int /*code*/)
    +getJavaVM()
    +getJNIEnv()
    +start(const char *, const Vector<String8>&, bool)
}

然后为AppRuntime对象的start函数预初始化参数,最终调用AppRuntime的start函数,从上述类图可以看到,此处调用的是其父类AndroidRuntime对象的start函数

void AndroidRuntime::start(const char* className, const Vector<String8>& options, bool zygote)
{
    // ......
    static const String8 startSystemServer("start-system-server");
    // Whether this is the primary zygote, meaning the zygote which will fork system server.
    bool primary_zygote = false;

    /*
     * 'startSystemServer == true' means runtime is obsolete and not run from
     * init.rc anymore, so we print out the boot start event here.
     */
    for (size_t i = 0; i < options.size(); ++i) {
        // 从上述的代码分析可知,此处if语句成立,因此promary_zygote = true
        if (options[i] == startSystemServer) {
            primary_zygote = true;
           /* track our progress through the boot sequence */
           const int LOG_BOOT_PROGRESS_START = 3000;
           LOG_EVENT_LONG(LOG_BOOT_PROGRESS_START,  ns2ms(systemTime(SYSTEM_TIME_MONOTONIC)));
        }
    }

    const char* rootDir = getenv("ANDROID_ROOT");
    if (rootDir == NULL) {
        rootDir = "/system";
        if (!hasDir("/system")) {
            LOG_FATAL("No root directory specified, and /system does not exist.");
            return;
        }
        setenv("ANDROID_ROOT", rootDir, 1);
    }

    const char* artRootDir = getenv("ANDROID_ART_ROOT");
    if (artRootDir == NULL) {
        LOG_FATAL("No ART directory specified with ANDROID_ART_ROOT environment variable.");
        return;
    }

    const char* i18nRootDir = getenv("ANDROID_I18N_ROOT");
    if (i18nRootDir == NULL) {
        LOG_FATAL("No runtime directory specified with ANDROID_I18N_ROOT environment variable.");
        return;
    }

    const char* tzdataRootDir = getenv("ANDROID_TZDATA_ROOT");
    if (tzdataRootDir == NULL) {
        LOG_FATAL("No tz data directory specified with ANDROID_TZDATA_ROOT environment variable.");
        return;
    }
    // ......
    /* start the virtual machine */
    JniInvocation jni_invocation;
    jni_invocation.Init(NULL);
    JNIEnv* env;
    // 启动Java虚拟机
    if (startVm(&mJavaVM, &env, zygote, primary_zygote) != 0) {
        return;
    }
    // 初始化Java虚拟机
    onVmCreated(env);

    /*
     * Register android functions.
     */
    // 注册JNI函数,后续我们需要分析的ZygoteInit,就是在这个函数中进行注册的
    // 将Java层的ZygoteInit和Native层进行关联
    if (startReg(env) < 0) {
        ALOGE("Unable to register all android natives\n");
        return;
    }

    /*
     * We want to call main() with a String array with arguments in it.
     * At present we have two arguments, the class name and an option string.
     * Create an array to hold them.
     */
    jclass stringClass;
    jobjectArray strArray;
    jstring classNameStr;

    stringClass = env->FindClass("java/lang/String");
    assert(stringClass != NULL);
    strArray = env->NewObjectArray(options.size() + 1, stringClass, NULL);
    assert(strArray != NULL);
    classNameStr = env->NewStringUTF(className);
    assert(classNameStr != NULL);
    // 将传入参数className存放到strArray字符串数组中
    env->SetObjectArrayElement(strArray, 0, classNameStr);
    // 将传入的其他参数存放到strArray字符串数组中
    for (size_t i = 0; i < options.size(); ++i) {
        jstring optionsStr = env->NewStringUTF(options.itemAt(i).string());
        assert(optionsStr != NULL);
        env->SetObjectArrayElement(strArray, i + 1, optionsStr);
    }

    /*
     * Start VM.  This thread becomes the main thread of the VM, and will
     * not return until the VM exits.
     */
    // 找到Java层的ZygoteInit类
    char* slashClassName = toSlashClassName(className != NULL ? className : "");
    jclass startClass = env->FindClass(slashClassName);
    if (startClass == NULL) {
        ALOGE("JavaVM unable to locate class '%s'\n", slashClassName);
        /* keep going */
    } else {
        // 获取其main函数
        jmethodID startMeth = env->GetStaticMethodID(startClass, "main",
            "([Ljava/lang/String;)V");
        if (startMeth == NULL) {
            ALOGE("JavaVM unable to find main() in '%s'\n", className);
            /* keep going */
        } else {
            // 调用ZygoteInit.main函数,注意传入参数为strArray,这个参数由三个字符串组成
            env->CallStaticVoidMethod(startClass, startMeth, strArray);
            // ......
        }
    }
    // ...... 回收工作
}

如上备注所述,上述的代码主要初始化进程zygote的AppRuntime对象,然后通过start函数调用AndroidRuntime.start,从而启动一个Java虚拟机,注册对应的JNI函数等,最终调用Java层的ZygoteInit.main函数

public static void main(String argv[]) {
    ZygoteServer zygoteServer = null;

    // Mark zygote start. This ensures that thread creation will throw
    // an error.
    ZygoteHooks.startZygoteNoThreadCreation();

    // Zygote goes into its own process group.
    try {
        // 设置zygote的pid和gid
        Os.setpgid(0, 0);
    } catch (ErrnoException ex) {
        throw new RuntimeException("Failed to setpgid(0,0)", ex);
    }

    Runnable caller;
    try {
        // Store now for StatsLogging later.
        final long startTime = SystemClock.elapsedRealtime();
        final boolean isRuntimeRestarted = "1".equals(
                SystemProperties.get("sys.boot_completed"));

        String bootTimeTag = Process.is64Bit() ? "Zygote64Timing" : "Zygote32Timing";
        TimingsTraceLog bootTimingsTraceLog = new TimingsTraceLog(bootTimeTag,
                Trace.TRACE_TAG_DALVIK);
        bootTimingsTraceLog.traceBegin("ZygoteInit");
        RuntimeInit.preForkInit();

        boolean startSystemServer = false;
        String zygoteSocketName = "zygote";
        String abiList = null;
        boolean enableLazyPreload = false;
        for (int i = 1; i < argv.length; i++) {
            if ("start-system-server".equals(argv[i])) {
                // 满足条件,此处设置startSystemServer = true
                startSystemServer = true;
            } else if ("--enable-lazy-preload".equals(argv[i])) {
                enableLazyPreload = true;
            } else if (argv[i].startsWith(ABI_LIST_ARG)) {
                // 设置abi flags
                abiList = argv[i].substring(ABI_LIST_ARG.length());
            }
            // ......
        }
        // isPrimaryZygote = true
        final boolean isPrimaryZygote = zygoteSocketName.equals(Zygote.PRIMARY_SOCKET_NAME);
        if (!isRuntimeRestarted) {
            if (isPrimaryZygote) {
                FrameworkStatsLog.write(FrameworkStatsLog.BOOT_TIME_EVENT_ELAPSED_TIME_REPORTED,
                        BOOT_TIME_EVENT_ELAPSED_TIME__EVENT__ZYGOTE_INIT_START,
                        startTime);
            } else if (zygoteSocketName.equals(Zygote.SECONDARY_SOCKET_NAME)) {
                FrameworkStatsLog.write(FrameworkStatsLog.BOOT_TIME_EVENT_ELAPSED_TIME_REPORTED,
                        BOOT_TIME_EVENT_ELAPSED_TIME__EVENT__SECONDARY_ZYGOTE_INIT_START,
                        startTime);
            }
        }
        // ......
        // Do an initial gc to clean up after startup
        bootTimingsTraceLog.traceBegin("PostZygoteInitGC");
        gcAndFinalize();
        bootTimingsTraceLog.traceEnd(); // PostZygoteInitGC

        bootTimingsTraceLog.traceEnd(); // ZygoteInit

        Zygote.initNativeState(isPrimaryZygote);

        ZygoteHooks.stopZygoteNoThreadCreation();

        zygoteServer = new ZygoteServer(isPrimaryZygote);

        if (startSystemServer) {
            // fork system server进程
            Runnable r = forkSystemServer(abiList, zygoteSocketName, zygoteServer);

            // {@code r == null} in the parent (zygote) process, and {@code r != null} in the
            // child (system_server) process.
            if (r != null) {
                r.run();
                return;
            }
        }

        Log.i(TAG, "Accepting command socket connections");

        // The select loop returns early in the child process after a fork and
        // loops forever in the zygote.
        caller = zygoteServer.runSelectLoop(abiList);
    } catch (Throwable ex) {
        Log.e(TAG, "System zygote died with exception", ex);
        throw ex;
    } finally {
        if (zygoteServer != null) {
            zygoteServer.closeServerSocket();
        }
    }

    // We're in the child process and have exited the select loop. Proceed to execute the
    // command.
    if (caller != null) {
        caller.run();
    }
}

如上述代码,最终会通过调用forkSystemServer函数来fork一个system server进程,关于system_server进程的启动情况,我们下一篇中再行分析,此篇不作过多分析 如上的代码流程时序图如下:

图片.png