背景
按下开机键后,会进入BootLoader引导扇区,拉起OS。然后创建了第一个内核进程swapper(),pid=0。
swapper(0) 0号进程在kernel层fork出kthread进程后。从内核层来到用户空间,在用户空间的native层,fork()出init进程。 init进程是操作系统在用户空间的第一个进程!
开始native层的工作:
- 开机动画
- HALL 硬件抽象层接口
- audio、media等功能
- native daemons 守护进程
- servicemanager binder服务管理者
Android 10对init进程的入口函数做了调整:
之前是在init.cpp的main()方法中。现在调整到main.cpp的main()方法中,分阶段启动,职责更加清晰。
一、main.cpp
system/core/init/main.cpp
int main(int argc, char** argv) {
#if __has_feature(address_sanitizer)
__asan_set_error_report_callback(AsanReportCallback);
#elif __has_feature(hwaddress_sanitizer)
__hwasan_set_error_report_callback(AsanReportCallback);
#endif
// Boost prio which will be restored later
setpriority(PRIO_PROCESS, 0, -20);
if (!strcmp(basename(argv[0]), "ueventd")) {
return ueventd_main(argc, argv);
}
if (argc > 1) {
if (!strcmp(argv[1], "subcontext")) {
// 初始化日志
android::base::InitLogging(argv, &android::base::KernelLogger);
const BuiltinFunctionMap& function_map = GetBuiltinFunctionMap();
return SubcontextMain(argc, argv, &function_map);
}
// 2 第二阶段: 增强linux功能
if (!strcmp(argv[1], "selinux_setup")) {
return SetupSelinux(argv);
}
//3 第三阶段: 解析init.rc
if (!strcmp(argv[1], "second_stage")) {
return SecondStageMain(argc, argv);
}
}
// 1 第一阶段 : 挂载相关文件系统
return FirstStageMain(argc, argv);
}
分为三个主要的阶段:
- FirstStageMain
- SetupSelinux
- SecondStageMain
1.1 FirstStageMain
system/core/init/first_stage_init.cpp
int FirstStageMain(int argc, char** argv) {
if (REBOOT_BOOTLOADER_ON_PANIC) {
InstallRebootSignalHandlers();
}
// 第一阶段必须要挂载的文件目录
boot_clock::time_point start_time = boot_clock::now();
std::vector<std::pair<std::string, int>> errors;
#define CHECKCALL(x) \
if ((x) != 0) errors.emplace_back(#x " failed", errno);
// Clear the umask.
umask(0);
CHECKCALL(clearenv());
CHECKCALL(setenv("PATH", _PATH_DEFPATH, 1));
// Get the basic filesystem setup we need put together in the initramdisk
// on / and then we'll let the rc file figure out the rest.
CHECKCALL(mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755"));
CHECKCALL(mkdir("/dev/pts", 0755));
CHECKCALL(mkdir("/dev/socket", 0755));
CHECKCALL(mkdir("/dev/dm-user", 0755));
CHECKCALL(mount("devpts", "/dev/pts", "devpts", 0, NULL));
#define MAKE_STR(x) __STRING(x)
CHECKCALL(mount("proc", "/proc", "proc", 0, "hidepid=2,gid=" MAKE_STR(AID_READPROC)));
#undef MAKE_STR
// Don't expose the raw commandline to unprivileged processes.
CHECKCALL(chmod("/proc/cmdline", 0440));
std::string cmdline;
android::base::ReadFileToString("/proc/cmdline", &cmdline);
// Don't expose the raw bootconfig to unprivileged processes.
chmod("/proc/bootconfig", 0440);
std::string bootconfig;
android::base::ReadFileToString("/proc/bootconfig", &bootconfig);
gid_t groups[] = {AID_READPROC};
CHECKCALL(setgroups(arraysize(groups), groups));
CHECKCALL(mount("sysfs", "/sys", "sysfs", 0, NULL));
CHECKCALL(mount("selinuxfs", "/sys/fs/selinux", "selinuxfs", 0, NULL));
CHECKCALL(mknod("/dev/kmsg", S_IFCHR | 0600, makedev(1, 11)));
if constexpr (WORLD_WRITABLE_KMSG) {
CHECKCALL(mknod("/dev/kmsg_debug", S_IFCHR | 0622, makedev(1, 11)));
}
CHECKCALL(mknod("/dev/random", S_IFCHR | 0666, makedev(1, 8)));
CHECKCALL(mknod("/dev/urandom", S_IFCHR | 0666, makedev(1, 9)));
// This is needed for log wrapper, which gets called before ueventd runs.
CHECKCALL(mknod("/dev/ptmx", S_IFCHR | 0666, makedev(5, 2)));
CHECKCALL(mknod("/dev/null", S_IFCHR | 0666, makedev(1, 3)));
// These below mounts are done in first stage init so that first stage mount can mount
// subdirectories of /mnt/{vendor,product}/. Other mounts, not required by first stage mount,
// should be done in rc files.
// Mount staging areas for devices managed by vold
// See storage config details at http://source.android.com/devices/storage/
CHECKCALL(mount("tmpfs", "/mnt", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
"mode=0755,uid=0,gid=1000"));
// /mnt/vendor is used to mount vendor-specific partitions that can not be
// part of the vendor partition, e.g. because they are mounted read-write.
CHECKCALL(mkdir("/mnt/vendor", 0755));
// /mnt/product is used to mount product-specific partitions that can not be
// part of the product partition, e.g. because they are mounted read-write.
CHECKCALL(mkdir("/mnt/product", 0755));
// /debug_ramdisk is used to preserve additional files from the debug ramdisk
CHECKCALL(mount("tmpfs", "/debug_ramdisk", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
"mode=0755,uid=0,gid=0"));
// /second_stage_resources is used to preserve files from first to second
// stage init
CHECKCALL(mount("tmpfs", kSecondStageRes, "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
"mode=0755,uid=0,gid=0"))
// First stage init stores Mainline sepolicy here.
CHECKCALL(mkdir("/dev/selinux", 0744));
#undef CHECKCALL
SetStdioToDevNull(argv);
// Now that tmpfs is mounted on /dev and we have /dev/kmsg, we can actually
// talk to the outside world...
InitKernelLogging(argv);
if (!errors.empty()) {
for (const auto& [error_string, error_errno] : errors) {
LOG(ERROR) << error_string << " " << strerror(error_errno);
}
LOG(FATAL) << "Init encountered errors starting first stage, aborting";
}
LOG(INFO) << "init first stage started!";
auto old_root_dir = std::unique_ptr<DIR, decltype(&closedir)>{opendir("/"), closedir};
if (!old_root_dir) {
PLOG(ERROR) << "Could not opendir("/"), not freeing ramdisk";
}
struct stat old_root_info;
if (stat("/", &old_root_info) != 0) {
PLOG(ERROR) << "Could not stat("/"), not freeing ramdisk";
old_root_dir.reset();
}
//...省略
const char* path = "/system/bin/init";
const char* args[] = {path, "selinux_setup", nullptr};
auto fd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
dup2(fd, STDOUT_FILENO);
dup2(fd, STDERR_FILENO);
close(fd);
//再次执行init程序,并传入参数: selinux_setup
execv(path, const_cast<char**>(args));
// execv() only returns if an error happened, in which case we
// panic and never fall through this conditional.
PLOG(FATAL) << "execv("" << path << "") failed";
return 1;
}
- 挂载所需要的文件系统 tempfs、devpts、proc、sysfs 四类虚拟文件系统,只存在于RAM中。
- 通过execv系统调用,再次执行main.cpp中的main方法 本质是执行 SetupSelinux(args) 第二阶段。
1.2 SetupSelinux
system/core/init/selinux.cpp
int SetupSelinux(char** argv) {
SetStdioToDevNull(argv);
InitKernelLogging(argv);
if (REBOOT_BOOTLOADER_ON_PANIC) {
InstallRebootSignalHandlers();
}
boot_clock::time_point start_time = boot_clock::now();
MountMissingSystemPartitions();
SelinuxSetupKernelLogging();
LOG(INFO) << "Opening SELinux policy";
PrepareApexSepolicy();
// Read the policy before potentially killing snapuserd.
std::string policy;
ReadPolicy(&policy);
CleanupApexSepolicy();
auto snapuserd_helper = SnapuserdSelinuxHelper::CreateIfNeeded();
if (snapuserd_helper) {
// Kill the old snapused to avoid audit messages. After this we cannot
// read from /system (or other dynamic partitions) until we call
// FinishTransition().
snapuserd_helper->StartTransition();
}
LoadSelinuxPolicy(policy);
if (snapuserd_helper) {
// Before enforcing, finish the pending snapuserd transition.
snapuserd_helper->FinishTransition();
snapuserd_helper = nullptr;
}
// This restorecon is intentionally done before SelinuxSetEnforcement because the permissions
// needed to transition files from tmpfs to *_contexts_file context should not be granted to
// any process after selinux is set into enforcing mode.
if (selinux_android_restorecon("/dev/selinux/", SELINUX_ANDROID_RESTORECON_RECURSE) == -1) {
PLOG(FATAL) << "restorecon failed of /dev/selinux failed";
}
SelinuxSetEnforcement();
// We're in the kernel domain and want to transition to the init domain. File systems that
// store SELabels in their xattrs, such as ext4 do not need an explicit restorecon here,
// but other file systems do. In particular, this is needed for ramdisks such as the
// recovery image for A/B devices.
if (selinux_android_restorecon("/system/bin/init", 0) == -1) {
PLOG(FATAL) << "restorecon failed of /system/bin/init failed";
}
setenv(kEnvSelinuxStartedAt, std::to_string(start_time.time_since_epoch().count()).c_str(), 1);
const char* path = "/system/bin/init";
const char* args[] = {path, "second_stage", nullptr};
// 开始执行第三阶段
execv(path, const_cast<char**>(args));
// execv() only returns if an error happened, in which case we
// panic and never return from this function.
PLOG(FATAL) << "execv("" << path << "") failed";
return 1;
}
SELinux模块加载 是linux一个安全模块。提高安全性,进一步约束权限。 开启第三阶段 SecondStageMain
1.3 SecondStageMain
system/core/init/init.cpp
int SecondStageMain(int argc, char** argv) {
if (REBOOT_BOOTLOADER_ON_PANIC) {
// 简体重启信号
InstallRebootSignalHandlers();
}
boot_clock::time_point start_time = boot_clock::now();
trigger_shutdown = [](const std::string& command) { shutdown_state.TriggerShutdown(command); };
SetStdioToDevNull(argv);
InitKernelLogging(argv);
LOG(INFO) << "init second stage started!";
// Update $PATH in the case the second stage init is newer than first stage init, where it is
// first set.
if (setenv("PATH", _PATH_DEFPATH, 1) != 0) {
PLOG(FATAL) << "Could not set $PATH to '" << _PATH_DEFPATH << "' in second stage";
}
// Init should not crash because of a dependence on any other process, therefore we ignore
// SIGPIPE and handle EPIPE at the call site directly. Note that setting a signal to SIG_IGN
// is inherited across exec, but custom signal handlers are not. Since we do not want to
// ignore SIGPIPE for child processes, we set a no-op function for the signal handler instead.
{
struct sigaction action = {.sa_flags = SA_RESTART};
action.sa_handler = [](int) {};
sigaction(SIGPIPE, &action, nullptr);
}
// Set init and its forked children's oom_adj.
if (auto result =
WriteFile("/proc/1/oom_score_adj", StringPrintf("%d", DEFAULT_OOM_SCORE_ADJUST));
!result.ok()) {
LOG(ERROR) << "Unable to write " << DEFAULT_OOM_SCORE_ADJUST
<< " to /proc/1/oom_score_adj: " << result.error();
}
// Set up a session keyring that all processes will have access to. It
// will hold things like FBE encryption keys. No process should override
// its session keyring.
keyctl_get_keyring_ID(KEY_SPEC_SESSION_KEYRING, 1);
// Indicate that booting is in progress to background fw loaders, etc.
close(open("/dev/.booting", O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
// See if need to load debug props to allow adb root, when the device is unlocked.
const char* force_debuggable_env = getenv("INIT_FORCE_DEBUGGABLE");
bool load_debug_prop = false;
if (force_debuggable_env && AvbHandle::IsDeviceUnlocked()) {
load_debug_prop = "true"s == force_debuggable_env;
}
unsetenv("INIT_FORCE_DEBUGGABLE");
// Umount the debug ramdisk so property service doesn't read .prop files from there, when it
// is not meant to.
if (!load_debug_prop) {
UmountDebugRamdisk();
}
// 属性初始化
PropertyInit();
// Umount second stage resources after property service has read the .prop files.
UmountSecondStageRes();
// Umount the debug ramdisk after property service has read the .prop files when it means to.
if (load_debug_prop) {
UmountDebugRamdisk();
}
// Mount extra filesystems required during second stage init 挂在第二阶段需要的问题件系统
MountExtraFilesystems();
// Now set up SELinux for second stage.
SelinuxSetupKernelLogging();
SelabelInitialize();
SelinuxRestoreContext();
Epoll epoll;
if (auto result = epoll.Open(); !result.ok()) {
PLOG(FATAL) << result.error();
}
//fd信号处理
InstallSignalFdHandler(&epoll);
//
InstallInitNotifier(&epoll);
//开启服务
StartPropertyService(&property_fd);
// Make the time that init stages started available for bootstat to log.
RecordStageBoottimes(start_time);
// Set libavb version for Framework-only OTA match in Treble build.
if (const char* avb_version = getenv("INIT_AVB_VERSION"); avb_version != nullptr) {
SetProperty("ro.boot.avb_version", avb_version);
}
unsetenv("INIT_AVB_VERSION");
fs_mgr_vendor_overlay_mount_all();
export_oem_lock_status();
MountHandler mount_handler(&epoll);
SetUsbController();
SetKernelVersion();
const BuiltinFunctionMap& function_map = GetBuiltinFunctionMap();
Action::set_function_map(&function_map);
if (!SetupMountNamespaces()) {
PLOG(FATAL) << "SetupMountNamespaces failed";
}
InitializeSubcontext();
ActionManager& am = ActionManager::GetInstance();
ServiceList& sm = ServiceList::GetInstance();
// 解析脚本init.rc
LoadBootScripts(am, sm);
// Turning this on and letting the INFO logging be discarded adds 0.2s to
// Nexus 9 boot time, so it's disabled by default.
if (false) DumpState();
// Make the GSI status available before scripts start running.
auto is_running = android::gsi::IsGsiRunning() ? "1" : "0";
SetProperty(gsi::kGsiBootedProp, is_running);
auto is_installed = android::gsi::IsGsiInstalled() ? "1" : "0";
SetProperty(gsi::kGsiInstalledProp, is_installed);
am.QueueBuiltinAction(SetupCgroupsAction, "SetupCgroups");
am.QueueBuiltinAction(SetKptrRestrictAction, "SetKptrRestrict");
am.QueueBuiltinAction(TestPerfEventSelinuxAction, "TestPerfEventSelinux");
am.QueueBuiltinAction(ConnectEarlyStageSnapuserdAction, "ConnectEarlyStageSnapuserd");
am.QueueEventTrigger("early-init");
// Queue an action that waits for coldboot done so we know ueventd has set up all of /dev...
am.QueueBuiltinAction(wait_for_coldboot_done_action, "wait_for_coldboot_done");
// ... so that we can start queuing up actions that require stuff from /dev.
am.QueueBuiltinAction(SetMmapRndBitsAction, "SetMmapRndBits");
Keychords keychords;
am.QueueBuiltinAction(
[&epoll, &keychords](const BuiltinArguments& args) -> Result<void> {
for (const auto& svc : ServiceList::GetInstance()) {
keychords.Register(svc->keycodes());
}
keychords.Start(&epoll, HandleKeychord);
return {};
},
"KeychordInit");
// Trigger all the boot actions to get us started.
am.QueueEventTrigger("init");
// Don't mount filesystems or start core system services in charger mode.
std::string bootmode = GetProperty("ro.bootmode", "");
if (bootmode == "charger") {
am.QueueEventTrigger("charger");
} else {
am.QueueEventTrigger("late-init");
}
// Run all property triggers based on current state of the properties.
am.QueueBuiltinAction(queue_property_triggers_action, "queue_property_triggers");
// Restore prio before main loop
setpriority(PRIO_PROCESS, 0, 0);
while (true) {
// By default, sleep until something happens.
auto epoll_timeout = std::optional<std::chrono::milliseconds>{kDiagnosticTimeout};
auto shutdown_command = shutdown_state.CheckShutdown();
if (shutdown_command) {
LOG(INFO) << "Got shutdown_command '" << *shutdown_command
<< "' Calling HandlePowerctlMessage()";
HandlePowerctlMessage(*shutdown_command);
shutdown_state.set_do_shutdown(false);
}
if (!(prop_waiter_state.MightBeWaiting() || Service::is_exec_service_running())) {
am.ExecuteOneCommand();
}
if (!IsShuttingDown()) {
auto next_process_action_time = HandleProcessActions();
// If there's a process that needs restarting, wake up in time for that.
if (next_process_action_time) {
epoll_timeout = std::chrono::ceil<std::chrono::milliseconds>(
*next_process_action_time - boot_clock::now());
if (*epoll_timeout < 0ms) epoll_timeout = 0ms;
}
}
if (!(prop_waiter_state.MightBeWaiting() || Service::is_exec_service_running())) {
// If there's more work to do, wake up again immediately.
if (am.HasMoreCommands()) epoll_timeout = 0ms;
}
auto pending_functions = epoll.Wait(epoll_timeout);
if (!pending_functions.ok()) {
LOG(ERROR) << pending_functions.error();
} else if (!pending_functions->empty()) {
// We always reap children before responding to the other pending functions. This is to
// prevent a race where other daemons see that a service has exited and ask init to
// start it again via ctl.start before init has reaped it.
ReapAnyOutstandingChildren();
for (const auto& function : *pending_functions) {
(*function)();
}
} else if (Service::is_exec_service_running()) {
std::chrono::duration<double> waited =
std::chrono::steady_clock::now() - Service::exec_service_started();
if (waited >= kDiagnosticTimeout) {
LOG(ERROR) << "Exec service is hung? Waited " << waited.count()
<< " without SIGCHLD";
}
}
if (!IsShuttingDown()) {
HandleControlMessages();
SetUsbController();
}
}
return 0;
}
主要职责:
- 初始化single句柄 : 新建epoll对象注册句柄,注册子进程死亡信号处理逻辑,防止出现僵尸进程。 每一个进程在proc下面对应一个文件。
- 开启属性服务:通过open打开属性文件dev/properties 通过mmap映射到init进程。并创建非阻塞的socket服务端,listen当前fd。通过epoll注册,当监听到子进程修改属性时,init进程来修改。
- 解析init.rc脚本 启动各种rc服务。
- 循环开启epoll.wait等待,只要有事件就会触发回调
二、Zygote服务启动
init在解析各种.rc文件阶段,会解析 init.zygote.rc文件。
system/core/rootdir/init.zygote64.rc
service zygote /system/bin/app_process64 -Xzygote /system/bin --zygote --start-system-server
class main
priority -20
user root
group root readproc reserved_disk
socket zygote stream 660 root system
socket usap_pool_primary stream 660 root system
onrestart exec_background - system system -- /system/bin/vdc volume abort_fuse
onrestart write /sys/power/state on
onrestart restart audioserver
onrestart restart cameraserver
onrestart restart media
onrestart restart media.tuner
onrestart restart netd
onrestart restart wificond
task_profiles ProcessCapacityHigh
critical window=${zygote.critical_window.minute:-off} target=zygote-fatal
职责:
- 创建zygote service服务
- 执行该服务对应的可执行文件
/system/bin/app_process/app_main.cpp - 通过fork()开启进程,创建socket服务。通过exev系统调用,进入到app_main.cpp的main()方法中
- 在main方法中调用runtime的start函数启动java测的 zygoteInit
// ....
if (zygote) {
runtime.start("com.android.internal.os.ZygoteInit", args, zygote);
} else if (className) {
runtime.start("com.android.internal.os.RuntimeInit", args, zygote);
} else {
fprintf(stderr, "Error: no class name or --zygote supplied.\n");
app_usage();
LOG_ALWAYS_FATAL("app_process: no class name or --zygote supplied.");
}
三、总结
init进程的核心职责:
- 属性值修改 通过开辟一块共享内存区域,提供socket服务端
- 回收僵尸进程 通过epoll机制
- 解析各种init.rc文件 启动zygote
参考: