最近看了 抖音 Android 性能优化系列:新一代全能型性能分析工具 Rhea,也心痒痒的想自己做个demo,先从 dl 开始吧,搞了几天,写了一个demo so库来练手时,一点毛病都没,一到设备上 dl libart 时却各种崩溃,现在终于没崩溃了,记录一下,虽然就几行代码。
0 实验环境
Android N
[ro.build.version.sdk]: [24]
使用 kwai-linker 绕过系统 dlopen so 限制
1 调用 int32_t Runtime::GetStat(int kind)
从系统中拉取 libart.so
使用 nm
命令查看符号表
nm libart.so |grep _ZN3art7Runtime
readelf
也可查看
readelf -Ws libart.so |grep _ZN3art7Runtime
从符号表找到 Runtime
实例和 GetStat
方法,直接开撸
void *libHandle = kwai::linker::DlFcn::dlopen("libart.so", RTLD_NOW);
// static Runtime* instance_;
void** runtime = (void**) kwai::linker::DlFcn::dlsym(libHandle, "_ZN3art7Runtime9instance_E");
ALOG("runtime addr is , %x", *runtime);
// int32_t Runtime::GetStat(int kind)
int (*GetStat)(void*, int);
GetStat = (int (*)(void*, int)) kwai::linker::DlFcn::dlsym(libHandle, "_ZN3art7Runtime7GetStatEi");
ALOG("GetStat addr is , %x", GetStat);
for (int i = 0; i <= 6; ++i) {
int c = GetStat(*runtime, 1<<i);
ALOG("GetStat %d, %d", i, c);
}
// 也可以从 JavaVM 里获取 Runtime 对象(epic代码里发现的)
struct JavaVMExt {
void* functions;
void* runtime;
};
JavaVM* javaVM;
env->GetJavaVM(&javaVM);
JavaVMExt* javaVMExt = (JavaVMExt*)javaVM;
void* runtime = javaVMExt->runtime;
2 修改 gLogVerbosity->systrace_lock_logging
为了能抓取 thinlock 的 trace,需要修改 gLogVerbosity->systrace_lock_logging
的值
在符号表找到后直接转型为 LogVerbosity
即可修改,LogVerbosity
是从源码直接复制
struct LogVerbosity {
bool class_linker; // Enabled with "-verbose:class".
bool collector;
bool compiler;
bool deopt;
bool gc;
bool heap;
bool jdwp;
bool jit;
bool jni;
bool monitor;
bool oat;
bool profiler;
bool signals;
bool simulator;
bool startup;
bool third_party_jni; // Enabled with "-verbose:third-party-jni".
bool threads;
bool verifier;
bool image;
bool systrace_lock_logging; // Enabled with "-verbose:sys-locks".
};
LogVerbosity* logVerbosity = (LogVerbosity*)kwai::linker::DlFcn::dlsym(libHandle, "_ZN3art13gLogVerbosityE");
ALOG("LogVerbosity %x", logVerbosity);
ALOG("LogVerbosity %d", logVerbosity->systrace_lock_logging);
logVerbosity->systrace_lock_logging = true;
ALOG("LogVerbosity %d", logVerbosity->systrace_lock_logging);
3 获取 Art Thread
对象
native Thread
会把地址保存在 java Thread
的 nativePeer
里。
同时 JNIEnvExt
里也保存着 Thread
对象,可以强转 JNIEnv
得到 JNIEnvExt
对象。
还可以从 Thread
的静态方法获取当前的 Thread
对象
// 1 从当前 java Thread 对象的字段 nativePeer 获取
jclass threadClass = env->FindClass("java/lang/Thread");
jmethodID currentThreadMethodId = env->GetStaticMethodID(threadClass, "currentThread", "()Ljava/lang/Thread;");
jobject currentThreadObject = env->CallStaticObjectMethod(threadClass, currentThreadMethodId);
jfieldID nativePeerField = env->GetFieldID(threadClass, "nativePeer", "J");
jlong nativePeer = env->GetLongField(currentThreadObject, nativePeerField);
// 2 从 JniEnv 里获取
struct JNIEnvExt {
void* functions;
void* thread; // self
}
JNIEnvExt* envExt = (JNIEnvExt*)env;
void* artThread = envExt->thread;
// 3 从 Thread 的静态方法获取
// static Thread* CurrentFromGdb()
void* (*currentThread)();
currentThread = reinterpret_cast<void* (*)()>(kwai::linker::DlFcn::dlsym(
libHandle, "_ZN3art6Thread14CurrentFromGdbEv"));
// dump thread
// void ShortDump(std::ostream& os) const;
void (*dumpThread)(void* thread, std::ostream& os);
dumpThread = reinterpret_cast<void (*)(void*, std::ostream&)>(kwai::linker::DlFcn::dlsym(
libHandle, "_ZNK3art6Thread9ShortDumpERNSt3__113basic_ostreamIcNS1_11char_traitsIcEEEE"));
void printThread(void* thread) {
std::ostringstream oss;
dumpThread(thread, oss);
ALOGE("dumpThread %x %s", thread, oss.str().c_str());
}
4 获取 Heap
对象
第一种是将 runtime.h
的字段定义复制出来,只需要到 heap_
字段就行。然后将 runtime
强转成 Runtime_7X
,就可以直接获取到 heap
。这段代码是 epic
代码里发现的,这里只复制了N版本的
struct Runtime_7X {
uint64_t callee_save_methods_[3];
void* pre_allocated_OutOfMemoryError_;
void* pre_allocated_NoClassDefFoundError_;
void* resolution_method_;
void* imt_conflict_method_;
void* imt_unimplemented_method_;
void* sentinel_;
int instruction_set_;
uint32_t callee_save_method_frame_infos_[9];
void* compiler_callbacks_;
bool is_zygote_;
bool must_relocate_;
bool is_concurrent_gc_enabled_;
bool is_explicit_gc_disabled_;
bool dex2oat_enabled_;
bool image_dex2oat_enabled_;
std::string compiler_executable_;
std::string patchoat_executable_;
std::vector<std::string> compiler_options_;
std::vector<std::string> image_compiler_options_;
std::string image_location_;
std::string boot_class_path_string_;
std::string class_path_string_;
std::vector<std::string> properties_;
// The default stack size for managed threads created by the runtime.
size_t default_stack_size_;
void* heap_;
};
Runtime_7X* runtime7X = (Runtime_7X*)runtime_instance_;
void* heap = runtime7X->heap_;
第二种是根据 runtime.h
里字段的定义顺序,找到 JavaVMExt* java_vm_
的偏移量后,在偏移一定距离找到 heap_
。此代码是从 whale
代码里看到的。
// runtime.h的部分定义
gc::Heap* heap_;
std::unique_ptr<ArenaPool> jit_arena_pool_;
std::unique_ptr<ArenaPool> arena_pool_;
std::unique_ptr<ArenaPool> low_4gb_arena_pool_;
std::unique_ptr<LinearAlloc> linear_alloc_;
size_t max_spins_before_thin_lock_inflation_;
MonitorList* monitor_list_;
MonitorPool* monitor_pool_;
ThreadList* thread_list_;
InternTable* intern_table_;
ClassLinker* class_linker_;
SignalCatcher* signal_catcher_;
std::string stack_trace_file_; // sizeof(std::string) == 3
JavaVMExt* java_vm_; // 找到 vm 的偏移量往回找
template<typename R>
static inline R OffsetOf(void* ptr, size_t offset) {
return (R) (reinterpret_cast<intptr_t>(ptr) + offset);
}
template<typename T>
static inline T MemberOf(void* ptr, size_t offset) {
return *OffsetOf<T *>(ptr, offset);
}
// 32bit
int kPointerSize = 4;
// 从 JavaVmExt 中找到 runtime
auto runtime = MemberOf<void*>(vm, kPointerSize);
void* heap = nullptr;
size_t start = 200;
size_t end = start + (100 * kPointerSize);
for (size_t offset = start; offset != end; offset += kPointerSize) {
// 从 runtime 中找到 java_vm_ 的偏移量
if (MemberOf<ptr_t>(runtime, offset) == vm) {
// 减去 std::string 、SignalCatcher* 和 ClassLinker*
size_t class_linker_offset = offset - (kPointerSize * 3) - (2 * kPointerSize);
size_t intern_table_offset = class_linker_offset - kPointerSize;
size_t thread_list_Offset = intern_table_offset - kPointerSize;
// heap 距离 thread_list_ 8个kPointerSize
size_t heap_offset = thread_list_Offset - (8 * kPointerSize);
heap = MemberOf<void*>(runtime, heap_offset);
break;
}
}
第三种方式是创建一个 partial Runtime
,只包含 runtime
的部分字段,从 heap_
到 java_vm_
,然后计算 offset
,原理和第二种类似。
struct PartialRuntimeN {
void* heap_;
void* jit_arena_pool_;
void* arena_pool_;
void* low_4gb_arena_pool_;
void* linear_alloc_;
size_t max_spins_before_thin_lock_inflation_;
void* monitor_list_;
void* monitor_pool_;
void* thread_list_;
void* intern_table_;
void* class_linker_; //1x4
void* signal_catcher_; // 1x4
std::string stack_trace_file_; // 3x4
void* java_vm_;
};
int kPointerSize = 4;
auto runtime = MemberOf<void*>(vm, kPointerSize);
void* heap = nullptr;
size_t start = 200;
size_t end = start + (100 * kPointerSize);
for (size_t offset = start; offset != end; offset += kPointerSize) {
if (MemberOf<ptr_t>(runtime, offset) == vm) {
PartialRuntimeN* partialRuntimeN = reinterpret_cast<PartialRuntimeN*>(reinterpret_cast<char*>(runtime) + offset - offsetof(PartialRuntimeN, java_vm_));
heap = partialRuntimeN->heap_;
break;
}
}
5 请求 concurren gc
// void Heap::RequestConcurrentGC(Thread* self, bool force_full)
void (*requestConcurrentGC)(void* heap, void* self, bool force_full);
requestConcurrentGC = reinterpret_cast<void (*)(void*, void*, bool)>(kwai::linker::DlFcn::dlsym(
libHandle, "_ZN3art2gc4Heap19RequestConcurrentGCEPNS_6ThreadEb"));
requestConcurrentGC(heap, currentThread(), true);
也可以直接通过 VMRuntime 来调用
val clazz = Class.forName("dalvik.system.VMRuntime")
val getRuntime = clazz.getDeclaredMethod("getRuntime")
val runtime = getRuntime.invoke(null)
val requestConcurrentGC = clazz.getDeclaredMethod("requestConcurrentGC")
requestConcurrentGC.invoke(runtime)
6 打印 gc log
LogGC
只有在 GcCause
是 kGcCauseExplicit
或者 gc pause 超过阈值才打印。因此出入 kGcCauseExplicit
,强制打印最后一次的 log
enum GcCause {
kGcCauseForAlloc,
kGcCauseBackground,
kGcCauseExplicit,
kGcCauseForNativeAlloc,
kGcCauseCollectorTransition,
kGcCauseDisableMovingGc,
kGcCauseTrim,
kGcCauseInstrumentation,
kGcCauseAddRemoveAppImageSpace,
kGcCauseHomogeneousSpaceCompact,
kGcCauseClassLinker,
};
enum GcType {
kGcTypeNone,
kGcTypeSticky,
kGcTypePartial,
kGcTypeFull,
kGcTypeMax,
};
//void LogGC(GcCause gc_cause, collector::GarbageCollector* collector);
void (*logGc)(void* heap, GcCause, void* collector);
logGc = reinterpret_cast<void (*)(void*, GcCause, void*)>(kwai::linker::DlFcn::dlsym(
libHandle, "_ZN3art2gc4Heap5LogGCENS0_7GcCauseEPNS0_9collector16GarbageCollectorE"));
//collector::GarbageCollector* FindCollectorByGcType(collector::GcType gc_type);
void* (*findCollectorByGcType)(void* heap, GcType);
findCollectorByGcType = reinterpret_cast<void* (*)(void*, GcType)>(kwai::linker::DlFcn::dlsym(
libHandle, "_ZN3art2gc4Heap21FindCollectorByGcTypeENS0_9collector6GcTypeE"));
void* collector = findCollectorByGcType(heap, kGcTypeFull);
logGc(heap, kGcCauseExplicit, collector);
7 dump heap spaces
打印 art 里管理的所有 space
//std::string Heap::DumpSpaces() const
std::string (*dumpSpace)(void *);
dumpSpace = reinterpret_cast<std::string (*)(void *)>(kwai::linker::DlFcn::dlsym(
libHandle, "_ZNK3art2gc4Heap10DumpSpacesEv"));
if (dumpSpace != nullptr) {
std::string spaces(dumpSpace(heap));
std::string token;
std::istringstream tokenStream(spaces);
while (std::getline(tokenStream, token, '\n')) {
ALOG("%s", token.c_str());
}
}
8 请求内存 Compact
请求进行内存整理,在 N 版本? 一般在应用退到后台或者oom前才会去做 Compact 操作
enum HomogeneousSpaceCompactResult {
kSuccess,
kErrorReject,
kErrorUnsupported,
kErrorVMShuttingDown,
};
// HomogeneousSpaceCompactResult PerformHomogeneousSpaceCompact()
HomogeneousSpaceCompactResult (*performHomogeneousSpaceCompact)(void* heap);
performHomogeneousSpaceCompact = reinterpret_cast<HomogeneousSpaceCompactResult (*)(void*)>(kwai::linker::DlFcn::dlsym(
libHandle, "_ZN3art2gc4Heap30PerformHomogeneousSpaceCompactEv"));
HomogeneousSpaceCompactResult hscResult = performHomogeneousSpaceCompact(heap);
// 调用成功时会打印以下log
// I/art: Starting a blocking GC HomogeneousSpaceCompact
// I/art: HomogeneousSpaceCompact marksweep + semispace GC freed 1882(51KB) AllocSpace objects, 0(0B) LOS objects, 39% free, 5MB/9MB, paused 44.782ms total 44.782ms
9 直接调用 gc
直接调用 gc 操作,可以传入不同的 GcType
//collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCause gc_cause, bool clear_soft_references)
GcType (*collectGarbageInternal)(void* heap, GcType, GcCause, bool clear_soft_references);
collectGarbageInternal = reinterpret_cast<GcType (*)(void*, GcType, GcCause, bool )>(kwai::linker::DlFcn::dlsym(
libHandle, "_ZN3art2gc4Heap22CollectGarbageInternalENS0_9collector6GcTypeENS0_7GcCauseEb"));
GcType gcType = collectGarbageInternal(heap, kGcTypeFull, kGcCauseExplicit, false);
// I/art: Starting a blocking GC Explicit
// I/art: Explicit concurrent mark sweep GC freed 3925(176KB) AllocSpace objects, 0(0B) LOS objects, 40% free, 5MB/8MB, paused 446us total 30.759ms