前言
在dyld加载流程中,我们分析了dyld加载生成image(machO)的过程,但是现在还没有加载到内存,只有加载到内存我们才能去使用,那么这个过程是怎样的,接下来去分析下。
_objc_init
- 在
dyld加载流程研究中,我们核心流程是走到_objc_init,我们大致研究了_dyld_objc_notify_register,接下来分析下_objc_init的流程:
void _objc_init(void)
{
static bool initialized = false;
if (initialized) return;
initialized = true;
// fixme defer initialization until an objc-using image is found?
environ_init(); // 读取配置的环境变量,可以在终端
tls_init(); // 设置线程key
static_init(); // 执行C++静态函数
runtime_init(); // 运行环境初始化
exception_init(); // 异常系统初始化
#if __OBJC2__
cache_t::init(); // 缓存初始化
#endif
_imp_implementationWithBlock_init(); //启动回调机制
_dyld_objc_notify_register(&map_images, load_images, unmap_image); // 注册通知
#if __OBJC2__
didCallDyldNotifyRegister = true;
#endif
}
environ_init
environ_init是读取环境变量,将打印环境变量的方法拉出来,然后去掉限制条件:
打印查看
再运行查看所有的环境变量,可以看到有很多的:
可以看到设置了很多的环境变量,其中看到了比较熟悉的nonpointer_isa,环境变量的用法我们在开发中其实已经用到了,例如设置环境变量对NSLog的输出进行限制。首先nonpointer_isa代表是不是纯isa,我们来添加OBJC_DISABLE_NONPOINTER_ISA来打印下看看,
测试环境变量
-
cmd+shift+,打开Edit Scheme,选择Run->Arguments,然后在Environment Variables中添加OBJC_DISABLE_NONPOINTER_ISA,Value为YES,先 不勾选环境变量 -
然后打印
WSPerson的isa: -
再勾选环境变量,重写跑一下再打印:
-
发现设置了环境变量后,
isa是一个纯isa,里面只有class信息。 -
也可以设置
OBJC_PRINT_LOAD_METHODS来查看哪些地方调用+load方法,来处理项目中的一些问题等。
终端命令查看
- 也可以通过终端命令来
export OBJC_HELP=1来查看环境变量:
tls_init
tls_init主要是对本地线程设置key,代码如下
void tls_init(void)
{
#if SUPPORT_DIRECT_THREAD_KEYS
pthread_key_init_np(TLS_DIRECT_KEY, &_objc_pthread_destroyspecific);
#else
_objc_pthread_key = tls_create(&_objc_pthread_destroyspecific);
#endif
}
static_init
static_init主要是执行C++的静态函数,libc会在dyld调用静态函数之前调用_objc_init,也就是系统的C++函数优点其他自定义的函数调用。
/***********************************************************************
* static_init
* Run C++ static constructor functions.
* libc calls _objc_init() before dyld would call our static constructors,
* so we have to do it ourselves.
**********************************************************************/
static void static_init()
{
size_t count;
auto inits = getLibobjcInitializers(&_mh_dylib_header, &count);
for (size_t i = 0; i < count; i++) {
inits[i]();
}
auto offsets = getLibobjcInitializerOffsets(&_mh_dylib_header, &count);
for (size_t i = 0; i < count; i++) {
UnsignedInitializer init(offsets[i]);
init();
}
}
- 可以模拟下场景,在
_objc_init方法上面模拟系统函数写一个c++构造函数:
__attribute__((constructor)) void ws_func() {
printf("🎉🎉🎉 %s", __func__);
}
- 然后在
static_init中打断点,Step Over往下走,就看到函数已经被调用了,此时WSPerson的load还没走:
runtime_init
runtime_init主要是进行两个步骤:分类的初始化,和初始化一张类的表
void runtime_init(void)
{
objc::unattachedCategories.init(32); // 分类的初始化
objc::allocatedClasses.init(); // 初始化表
}
exception_init
exception_init是异常系统初始化
分析
- 主要是
注册异常的回调,当下层程序发现错误时,会触发这个回调,从而抛出异常,源码如下
void exception_init(void)
{
old_terminate = std::set_terminate(&_objc_terminate);
}
- 源码中主要实现在
_objc_terminate中:
static void (*old_terminate)(void) = nil;
static void _objc_terminate(void)
{
if (PrintExceptions) {
_objc_inform("EXCEPTIONS: terminating");
}
if (! __cxa_current_exception_type()) {
// No current exception.
(*old_terminate)();
}
else {
// There is a current exception. Check if it's an objc exception.
@try {
__cxa_rethrow(); // 正常执行
} @catch (id e) {
// It's an objc object. Call Foundation's handler, if any.
(*uncaught_handler)((id)e); // 异常时会通过uncaught_handler 回调 异常内容 e
(*old_terminate)();
} @catch (...) {
// It's not an objc object. Continue to C++ terminate.
(*old_terminate)();
}
}
}
- 整体是一个
try-catch运行,下层程序发现异常时,uncaught_handler会回调异常内容e uncaught_handler的源码如下:
static void _objc_default_uncaught_exception_handler(id exception)
{
}
static objc_uncaught_exception_handler uncaught_handler = _objc_default_uncaught_exception_handler;
// objc_uncaught_exception_handler源码:
objc_uncaught_exception_handler
objc_setUncaughtExceptionHandler(objc_uncaught_exception_handler fn)
{
objc_uncaught_exception_handler result = uncaught_handler;
uncaught_handler = fn;
return result;
}
- 通过分析,
objc_setUncaughtExceptionHandler传入一个参数fn,将fn赋值给uncaught_handler,当异常时,uncaught_handler将异常内容再传给外界使用
捕获异常
根据原理,我们可以模拟下这个场景,去捕获异常:
-
- 先定义个
UncaughtExceptionHanlder类,再定义好捕获的代码:
- 先定义个
// .h
@interface UncaughtExceptionHanlder : NSObject
+ (void)installHandler;
@end
// .m
@implementation UncaughtExceptionHanlder
void ws_exceptionHander(NSException *exception) {
NSLog(@"\n捕获到异常啦 🎉🎉🎉 %@\n", exception);
NSLog(@" 来啦,老弟~ ");
}
+ (void)installHandler {
NSSetUncaughtExceptionHandler(&ws_exceptionHander);
}
@end
-
- 然后再
AppDelegate中调用installHandler方法[UncaughtExceptionHanlder installHandler];
- 然后再
-
- 在
ViewController中写一个触发会异常的代码
- 在
@interface ViewController ()
@property (nonatomic, strong) NSArray *dataSource;
@end
@implementation ViewController
- (void)viewDidLoad {
[super viewDidLoad];
self.dataSource = @[@"Kobe", @"Garnett", @"O'Neal", @"Iversen", @"Duncan"];
}
- (void)touchesBegan:(NSSet<UITouch *> *)touches withEvent:(UIEvent *)event {
NSString *name = self.dataSource[5];
}
@end
-
- 断点在
ws_exceptionHander中第二个断点处,然后运行点击触发异常:
- 断点在
- 于是就捕获到了异常,证明之前的分析成立。
cache_t::init
cache_t::init是缓存初始化,源码如下
void cache_t::init()
{
#if HAVE_TASK_RESTARTABLE_RANGES
mach_msg_type_number_t count = 0;
kern_return_t kr;
while (objc_restartableRanges[count].location) {
count++;
}
// 为当前任务注册一组可重新启动的缓存
kr = task_restartable_ranges_register(mach_task_self(),
objc_restartableRanges, count);
if (kr == KERN_SUCCESS) return;
_objc_fatal("task_restartable_ranges_register failed (result 0x%x: %s)", kr, mach_error_string(kr));
#endif // HAVE_TASK_RESTARTABLE_RANGES
}
_imp_implementationWithBlock_init
_imp_implementationWithBlock_init:启动回调机制。通常这不会做什么,因为所有的初始化都是惰性的,但是对于某些进程,我们会迫不及待地加载trampolines dylib, 源码如下:
void
_imp_implementationWithBlock_init(void)
{
#if TARGET_OS_OSX
// Eagerly load libobjc-trampolines.dylib in certain processes. Some
// programs (most notably QtWebEngineProcess used by older versions of
// embedded Chromium) enable a highly restrictive sandbox profile which
// blocks access to that dylib. If anything calls
// imp_implementationWithBlock (as AppKit has started doing) then we'll
// crash trying to load it. Loading it here sets it up before the sandbox
// profile is enabled and blocks it.
//
// This fixes EA Origin (rdar://problem/50813789)
// and Steam (rdar://problem/55286131)
if (__progname &&
(strcmp(__progname, "QtWebEngineProcess") == 0 ||
strcmp(__progname, "Steam Helper") == 0)) {
Trampolines.Initialize();
}
#endif
}
_dyld_objc_notify_register
_dyld_objc_notify_register是注册通知,主要是传入的三个参数_dyld_objc_notify_register(&map_images, load_images, unmap_image);
load_images
- 在上一篇中分析了
load_images的作用,主要是调用+load方法。
unmap_image
dyld移除iamge时会调用该函数
map_images
- 这个参数的与其他两个不一样,它是
指针传递,需要实时变化的,macho加载到内存时会调用该方法。先来看看它的源码实现:
void
map_images(unsigned count, const char * const paths[],
const struct mach_header * const mhdrs[])
{
mutex_locker_t lock(runtimeLock);
return map_images_nolock(count, paths, mhdrs);
}
- 实现里明显返回值
map_images_nolock是重点,它的方法如下:
- 我们知道
MachO加载到内存后,才能被读取,所以根据主线走,这里可以断定重点是_read_images方法
_read_images分析
- 方法里的代码比较长,将代码折叠后,发现对应的都是一些
log:
void _read_images(header_info **hList, uint32_t hCount, int totalClasses, int unoptimizedTotalClasses)
{
#define EACH_HEADER \
hIndex = 0; \
hIndex < hCount && (hi = hList[hIndex]); \
hIndex++
// 1. 条件控制,进行第一次加载
if (!doneOnce) { ... }
// Fix up @selector references
// 2. 修复预编译时@selector混乱问题
static size_t UnfixedSelectors;
{ ... }
ts.log("IMAGE TIMES: fix up selector references");
// Discover classes. Fix up unresolved future classes. Mark bundle classes.
// 3. 错误混乱的类处理
bool hasDyldRoots = dyld_shared_cache_some_image_overridden();
for (EACH_HEADER) { ... }
ts.log("IMAGE TIMES: discover classes");
if (!noClassesRemapped()) { ... }
// 4. 修复重映射⼀些没有被镜像⽂件加载进来的 类
ts.log("IMAGE TIMES: remap classes");
#if SUPPORT_FIXUP
// Fix up old objc_msgSend_fixup call sites
// 5. 修复一些消息
for (EACH_HEADER) { ... }
ts.log("IMAGE TIMES: fix up objc_msgSend_fixup");
#endif
// Discover protocols. Fix up protocol refs.
// 6. 当我们类⾥⾯有协议的时候 读取协议
for (EACH_HEADER) { ... }
ts.log("IMAGE TIMES: discover protocols");
// 7. 修复没有被加载的协议
for (EACH_HEADER) { ... }
ts.log("IMAGE TIMES: fix up @protocol references");
// 8. 分类处理
if (didInitialAttachCategories) { ... }
ts.log("IMAGE TIMES: discover categories");
// 9. 类的加载处理
for (EACH_HEADER) { ... }
ts.log("IMAGE TIMES: realize non-lazy classes");
// 10. 没有被处理的类 优化那些被侵犯的类
if (resolvedFutureClasses) { ... }
ts.log("IMAGE TIMES: realize future classes");
if (DebugNonFragileIvars) { ... }
// Print preoptimization statistics
// 一些打印
if (PrintPreopt) { ... }
#undef EACH_HEADER
}
- 根据
log信息,可以将_read_images分为以下步骤:-
- 条件控制,进行第一次加载
-
- 修复预编译时@selector混乱问题
-
- 错误混乱的类处理
-
- 修复重映射⼀些没有被镜像⽂件加载进来的类
-
- 修复一些消息
-
- 当我们类⾥⾯有协议的时候 读取协议
-
- 修复没有被加载的协议
-
- 分类处理
-
- 类的加载处理
-
- 没有被处理的类 优化那些被侵犯的类
-
if (!doneOnce)
- 它主要进行变量状态的改变,从而达到控制第一次加载,源码如下
// 1. 条件控制,进行第一次加载
if (!doneOnce) {
doneOnce = YES;
launchTime = YES;
#if SUPPORT_NONPOINTER_ISA
// Disable non-pointer isa under some conditions.
# if SUPPORT_INDEXED_ISA
// Disable nonpointer isa if any image contains old Swift code
for (EACH_HEADER) { ... }
# endif
# if TARGET_OS_OSX
for (EACH_HEADER) { ... }
# endif
#endif
if (DisableTaggedPointers) {
disableTaggedPointers();
}
initializeTaggedPointerObfuscator(); // 小对象混淆
if (PrintConnecting) { ... }
// namedClasses
// Preoptimized classes don't go in this table.
// 4/3 is NXMapTable's load factor
int namedClassesSize =
(isPreoptimized() ? unoptimizedTotalClasses : totalClasses) * 4 / 3;
gdb_objc_realized_classes =
NXCreateMapTable(NXStrValueMapPrototype, namedClassesSize);
ts.log("IMAGE TIMES: first time tasks");
}
- 这里
NXCreateMapTable是创建一个表,表的大小里有个4/3,在之前的文章中提到了bucket的3/4扩容,这儿为什么是4/3呢?原因很简单,可以用一个公式表示:开辟大小 * 3 / 4 = class占用大小,所以创建表时肯定是需要开辟的大小,也就是class占用大小 * 4 / 3。 - 上面也提到了一个表
allocatedClasses,二者什么区别呢?allocatedClasses是存的是已加载的类,而NXCreateMapTable创建的是一张总表
size_t UnfixedSelectors
- 主要是修复预编译时@selector混乱问题,它的源码如下
// 2. 修复预编译时@selector混乱问题
static size_t UnfixedSelectors;
{
mutex_locker_t lock(selLock);
for (EACH_HEADER) {
if (hi->hasPreoptimizedSelectors()) continue;
bool isBundle = hi->isBundle();
SEL *sels = _getObjc2SelectorRefs(hi, &count);
UnfixedSelectors += count;
for (i = 0; i < count; i++) {
const char *name = sel_cname(sels[i]);
SEL sel = sel_registerNameNoLock(name, isBundle);
if (sels[i] != sel) {
sels[i] = sel;
}
}
}
}
sel是名字+地址,这里判断当取出来的sel不同时,会进行局部处理。_getObjc2SelectorRefs是从MachO取出来的sel,而sel_registerNameNoLock查看源码得知,是从dyld中取出来的sel,方法相同时,地址不一定一样,因为相对的位置不一样,所以要进行处理下,以dyld中的sel为准。
Discover classes
- 错误混乱类这一块,源码如下:
bool hasDyldRoots = dyld_shared_cache_some_image_overridden();
for (EACH_HEADER) {
if (! mustReadClasses(hi, hasDyldRoots)) {
// Image is sufficiently optimized that we need not call readClass()
continue;
}
classref_t const *classlist = _getObjc2ClassList(hi, &count);
bool headerIsBundle = hi->isBundle();
bool headerIsPreoptimized = hi->hasPreoptimizedClasses();
for (i = 0; i < count; i++) {
Class cls = (Class)classlist[i];
Class newCls = readClass(cls, headerIsBundle, headerIsPreoptimized);
if (newCls != cls && newCls) {
// Class was moved but not deleted. Currently this occurs
// only when the new class resolved a future class.
// Non-lazily realize the class below.
resolvedFutureClasses = (Class *)
realloc(resolvedFutureClasses,
(resolvedFutureClassCount+1) * sizeof(Class));
resolvedFutureClasses[resolvedFutureClassCount++] = newCls;
}
}
}
- 通过断点跟进,发现不走
if判断,根据注释说:类被移动但还未删除,只有新类解析了未来类时才会发生这个情况,所以这个可以不用去关注 - 然后通过断点在
readClass前后分别打印cls:
- 在
readClass之前,class是从MachO中取出来只有地址,但经过readClass后,就关联了名字,具体是怎么实现的呢,再去看看它的源码实现:
- 可以看到最终得到的是一个
cls,而if (missingWeakSuperclass(cls))得到是nil,所以直接不看- 再看看
if (mangledName != nullptr),它里面有写ro和rw相关的内容,难到要找的就是这里吗?,接下来去用LGPerson类去验证下,首先写个代码筛选LGPerson: strcmp是比较的意思,当相同时== 0,会进去。断点到printf,再运行当走到断点后,在Step Over往下走,说明操作的是LGPerson,发现根本没走到if (mangledName != nullptr)判断,走了addNamedClass和addClassTableEntry(cls);方法,再来看看这两个方法
- 再看看
addNamedClass
addNamedClass源码如下:
static void addNamedClass(Class cls, const char *name, Class replacing = nil)
{
runtimeLock.assertLocked();
Class old;
if ((old = getClassExceptSomeSwift(name)) && old != replacing) {
inform_duplicate(name, old, cls);
// getMaybeUnrealizedNonMetaClass uses name lookups.
// Classes not found by name lookup must be in the
// secondary meta->nonmeta table.
addNonMetaClass(cls);
} else {
NXMapInsert(gdb_objc_realized_classes, name, cls);
}
ASSERT(!(cls->data()->flags & RO_META));
// wrong: constructed classes are already realized when they get here
// ASSERT(!cls->isRealized());
}
- 断点往下走,发现最终走了
NXMapInsert这个哈希map表,在NXMapInsert前后,打印cls发现,是NXMapInsert将地址和类的名字关联。
addClassTableEntry
- 源码如下:
static void
addClassTableEntry(Class cls, bool addMeta = true)
{
runtimeLock.assertLocked();
// This class is allowed to be a known class via the shared cache or via
// data segments, but it is not allowed to be in the dynamic table already.
auto &set = objc::allocatedClasses.get(); // 从已经创建的class表中获取
ASSERT(set.find(cls) == set.end());
if (!isKnownClass(cls)) // 如果步是已知类,就记载到类的里面
set.insert(cls);
if (addMeta) // 如果是元类,需要加载到类的表里
addClassTableEntry(cls->ISA(), false); // 类加载的时候,元类也要加载
}
- 主要是处理类加载到表,如果不是已知类,就会插入到
allocatedClasses表,如果是元类,也要获取元类再进行这个流程。
虽然
readClass中提到rw和ro,但根据断点发现,它并没有走进了,说明这里只是进行了类的地址和类名关联然后插入到表。
remapClassRef
// 4. 修复重映射⼀些没有被镜像⽂件加载进来的类
if (!noClassesRemapped()) {
for (EACH_HEADER) {
Class *classrefs = _getObjc2ClassRefs(hi, &count); // macho中的类
for (i = 0; i < count; i++) {
remapClassRef(&classrefs[i]);
}
// fixme why doesn't test future1 catch the absence of this?
classrefs = _getObjc2SuperRefs(hi, &count);
for (i = 0; i < count; i++) {
remapClassRef(&classrefs[i]);
}
}
}
- 主要通过
remapClassRef方法检测一些没有加载进来的类,并加载
objc_msgSend_fixup
- 主要是修复一些消息,代码如下:
// 修复一些消息
for (EACH_HEADER) {
message_ref_t *refs = _getObjc2MessageRefs(hi, &count);
if (count == 0) continue;
if (PrintVtables) {
_objc_inform("VTABLES: repairing %zu unsupported vtable dispatch "
"call sites in %s", count, hi->fname());
}
for (i = 0; i < count; i++) {
fixupMessageRef(refs+i);
}
}
discover protocols
- 当我们类⾥⾯有协议的时候 读取协议
for (EACH_HEADER) {
extern objc_class OBJC_CLASS_$_Protocol;
Class cls = (Class)&OBJC_CLASS_$_Protocol;
ASSERT(cls);
NXMapTable *protocol_map = protocols();
bool isPreoptimized = hi->hasPreoptimizedProtocols();
// Skip reading protocols if this is an image from the shared cache
// and we support roots
// Note, after launch we do need to walk the protocol as the protocol
// in the shared cache is marked with isCanonical() and that may not
// be true if some non-shared cache binary was chosen as the canonical
// definition
if (launchTime && isPreoptimized) {
if (PrintProtocols) {
_objc_inform("PROTOCOLS: Skipping reading protocols in image: %s",
hi->fname());
}
continue;
}
bool isBundle = hi->isBundle();
protocol_t * const *protolist = _getObjc2ProtocolList(hi, &count);
for (i = 0; i < count; i++) {
readProtocol(protolist[i], cls, protocol_map,
isPreoptimized, isBundle);
}
}
- 主要从
MachO中获取协议数组,然后通过readProtocol方法读取
fix up @protocol
for (EACH_HEADER) {
// At launch time, we know preoptimized image refs are pointing at the
// shared cache definition of a protocol. We can skip the check on
// launch, but have to visit @protocol refs for shared cache images
// loaded later.
if (launchTime && hi->isPreoptimized())
continue;
protocol_t **protolist = _getObjc2ProtocolRefs(hi, &count);
for (i = 0; i < count; i++) {
remapProtocolRef(&protolist[i]);
}
}
- 通过
remapProtocolRef检测协议有没有被加载,如果没有则会再加载进去
discover categories
if (didInitialAttachCategories) {
for (EACH_HEADER) {
load_categories_nolock(hi);
}
}
- 如果有分类,则对分类进行加载处理:分为二步:
-
- 注册类对应的分类
-
- 如果类被实现,则重构类的方法列表
-
realize non-lazy classes
- 类的加载处理
for (EACH_HEADER) {
classref_t const *classlist = hi->nlclslist(&count);
for (i = 0; i < count; i++) {
Class cls = remapClass(classlist[i]);
if (!cls) continue;
addClassTableEntry(cls);
if (cls->isSwiftStable()) {
if (cls->swiftMetadataInitializer()) {
_objc_fatal("Swift class %s with a metadata initializer "
"is not allowed to be non-lazy",
cls->nameForLogging());
}
// fixme also disallow relocatable classes
// We can't disallow all Swift classes because of
// classes like Swift.__EmptyArrayStorage
}
realizeClassWithoutSwift(cls, nil);
}
}
- 通过断点走,会进入
addClassTableEntry和realizeClassWithoutSwift两个函数 addClassTableEntry作用:向所有类的表中添加一个类。如果是元类,也会自动添加类的元类,代码如下:
addClassTableEntry(Class cls, bool addMeta = true)
{
runtimeLock.assertLocked();
// This class is allowed to be a known class via the shared cache or via
// data segments, but it is not allowed to be in the dynamic table already.
auto &set = objc::allocatedClasses.get();
ASSERT(set.find(cls) == set.end());
if (!isKnownClass(cls))
set.insert(cls);
if (addMeta)
addClassTableEntry(cls->ISA(), false);
}
realizeClassWithoutSwift:rw和ro处理,下篇文章将详细讲解
realize future classes
if (resolvedFutureClasses) {
for (i = 0; i < resolvedFutureClassCount; i++) {
Class cls = resolvedFutureClasses[i];
if (cls->isSwiftStable()) {
_objc_fatal("Swift class is not allowed to be future");
}
realizeClassWithoutSwift(cls, nil);
cls->setInstancesRequireRawIsaRecursively(false/*inherited*/);
}
free(resolvedFutureClasses);
}
- 没有被处理的类,并通过
setInstancesRequireRawIsaRecursively方法将被侵犯的类及其所有子类标记为原始的isa指针。