关于梆梆加固应用收集堆栈崩溃

401 阅读5分钟

问题背景

近期收到某大厂反馈他们的应用程序在我们的机型中频繁出现如下崩溃信息。这个问题处理前面是其他同事处理的,后来同事咨询问如何跟踪某个管道 FD 被关闭的堆栈,看了他们的文档介绍后,一看这东西咋那么眼熟。

 pid: 21992, tid: 28371, name: Signal Catcher  >>> a.b.c <<<
 uid: 10283
 tagged_addr_ctrl: 0000000000000001 (PR_TAGGED_ADDR_ENABLE)
 pac_enabled_keys: 000000000000000f (PR_PAC_APIAKEY, PR_PAC_APIBKEY, PR_PAC_APDAKEY, PR_PAC_APDBKEY)
 esr: 0000000092000006 (Data Abort Exception 0x24)
 signal 6 (SIGABRT), code -1 (SI_QUEUE), fault addr --------
 Abort message: 'Check failed: niceness != -1 || errno == 0  No such process tid:28373' 
     x0  0000000000000000  x1  0000000000006ed3  x2  0000000000000006  x3  000000774e92bd40
     x4  00000000000efe20  x5  00000000000efe20  x6  00000000000efe20  x7  0000000000000001
     x8  00000000000000f0  x9  000001ff00000020  x10 000000ff00000020  x11 000000000012800c
     x12 000000000000cc32  x13 000000005fff4000  x14 0000000000000001  x15 00000000000000f0
     x16 0000007a1e58a170  x17 0000007a1e570840  x18 00000076dde34000  x19 00000000000055e8
     x20 0000000000006ed3  x21 00000000ffffffff  x22 0000007761e146c8  x23 0000007855603010
     x24 0000007761e13000  x25 0000000000fffff9  x26 000000005a000000  x27 0000000000000000
     x28 b4000077e5616d10  x29 000000774e92bdc0
     lr  0000007a1e510c3c  sp  000000774e92bd40  pc  0000007a1e510c60  pst 0000000000001000
     esr 0000000092000006
 26 total frames
 backtrace:
       #00 pc 0000000000075c60  /apex/com.android.runtime/lib64/bionic/libc.so (abort+160) (BuildId: c56349c5b531d74a1e2fa0bafba78e5c)
       #01 pc 000000000091ee30  /apex/com.android.art/lib64/libart.so (art::Runtime::Abort(char const*)+1008) (BuildId: 358ba270e15145fce121992683a577bc)
       #02 pc 0000000000016134  /apex/com.android.art/lib64/libbase.so (android::base::SetAborter(std::__1::function<void (char const*)>&&)::$_0::__invoke(char const*)+68) (BuildId: e3a4bdc6221ada5240a090255fdc022d)
       #03 pc 0000000000015690  /apex/com.android.art/lib64/libbase.so (android::base::LogMessage::~LogMessage()+540) (BuildId: e3a4bdc6221ada5240a090255fdc022d)
       #04 pc 00000000004b0264  /apex/com.android.art/lib64/libart.so (art::Thread::GetNativeNiceness() const+412) (BuildId: 358ba270e15145fce121992683a577bc)
       #05 pc 0000000000456f24  /apex/com.android.art/lib64/libart.so (art::Thread::DumpState(std::__1::basic_ostream<char, std::__1::char_traits<char>>&, art::Thread const*, int)+3768) (BuildId: 358ba270e15145fce121992683a577bc)
       #06 pc 0000000000455a04  /apex/com.android.art/lib64/libart.so (art::Thread::Dump(std::__1::basic_ostream<char, std::__1::char_traits<char>>&, unwindstack::AndroidLocalUnwinder&, bool, bool) const+56) (BuildId: 358ba270e15145fce121992683a577bc)
       #07 pc 00000000004558f4  /apex/com.android.art/lib64/libart.so (art::DumpCheckpoint::Run(art::Thread*)+116) (BuildId: 358ba270e15145fce121992683a577bc) 
       #08 pc 00000000002c1a14  /apex/com.android.art/lib64/libart.so (art::ThreadList::RunCheckpoint(art::Closure*, art::Closure*, bool, bool)+1140) (BuildId: 358ba270e15145fce121992683a577bc)
       #09 pc 0000000000459fc8  /apex/com.android.art/lib64/libart.so (art::ThreadList::Dump(std::__1::basic_ostream<char, std::__1::char_traits<char>>&, bool)+216) (BuildId: 358ba270e15145fce121992683a577bc)
       #10 pc 0000000000922b50  /apex/com.android.art/lib64/libart.so (art::AbortState::Dump(std::__1::basic_ostream<char, std::__1::char_traits<char>>&) const+208) (BuildId: 358ba270e15145fce121992683a577bc)
       #11 pc 000000000091ee78  /apex/com.android.art/lib64/libart.so (art::Runtime::Abort(char const*)+1080) (BuildId: 358ba270e15145fce121992683a577bc)
       #12 pc 0000000000016134  /apex/com.android.art/lib64/libbase.so (android::base::SetAborter(std::__1::function<void (char const*)>&&)::$_0::__invoke(char const*)+68) (BuildId: e3a4bdc6221ada5240a090255fdc022d)
       #13 pc 0000000000015690  /apex/com.android.art/lib64/libbase.so (android::base::LogMessage::~LogMessage()+540) (BuildId: e3a4bdc6221ada5240a090255fdc022d)
       #14 pc 00000000004b0264  /apex/com.android.art/lib64/libart.so (art::Thread::GetNativeNiceness() const+412) (BuildId: 358ba270e15145fce121992683a577bc)
       #15 pc 0000000000456b3c  /apex/com.android.art/lib64/libart.so (art::Thread::DumpState(std::__1::basic_ostream<char, std::__1::char_traits<char>>&, art::Thread const*, int)+2768) (BuildId: 358ba270e15145fce121992683a577bc)
       #16 pc 0000000000455a04  /apex/com.android.art/lib64/libart.so (art::Thread::Dump(std::__1::basic_ostream<char, std::__1::char_traits<char>>&, unwindstack::AndroidLocalUnwinder&, bool, bool) const+56) (BuildId: 358ba270e15145fce121992683a577bc)
       #17 pc 00000000004558f4  /apex/com.android.art/lib64/libart.so (art::DumpCheckpoint::Run(art::Thread*)+116) (BuildId: 358ba270e15145fce121992683a577bc)
       #18 pc 00000000002c1a14  /apex/com.android.art/lib64/libart.so (art::ThreadList::RunCheckpoint(art::Closure*, art::Closure*, bool, bool)+1140) (BuildId: 358ba270e15145fce121992683a577bc)
       #19 pc 0000000000459fc8  /apex/com.android.art/lib64/libart.so (art::ThreadList::Dump(std::__1::basic_ostream<char, std::__1::char_traits<char>>&, bool)+216) (BuildId: 358ba270e15145fce121992683a577bc)
       #20 pc 0000000000459a58  /apex/com.android.art/lib64/libart.so (art::ThreadList::DumpForSigQuit(std::__1::basic_ostream<char, std::__1::char_traits<char>>&)+376) (BuildId: 358ba270e15145fce121992683a577bc)
       #21 pc 0000000000458cd0  /apex/com.android.art/lib64/libart.so (art::Runtime::DumpForSigQuit(std::__1::basic_ostream<char, std::__1::char_traits<char>>&)+48) (BuildId: 358ba270e15145fce121992683a577bc)
       #22 pc 00000000005097e0  /apex/com.android.art/lib64/libart.so (art::SignalCatcher::HandleSigQuit()+632) (BuildId: 358ba270e15145fce121992683a577bc)
       #23 pc 00000000004713a4  /apex/com.android.art/lib64/libart.so (art::SignalCatcher::Run(void*)+1388) (BuildId: 358ba270e15145fce121992683a577bc)
       #24 pc 0000000000086f60  /apex/com.android.runtime/lib64/bionic/libc.so (__pthread_start(void*) (.__uniq.67847048707805468364044055584648682506)+184) (BuildId: c56349c5b531d74a1e2fa0bafba78e5c)
       #25 pc 0000000000079690  /apex/com.android.runtime/lib64/bionic/libc.so (__start_thread+68) (BuildId: c56349c5b531d74a1e2fa0bafba78e5c)

其 abort 原因在函数 GetNativeNiceness 上发现该 art::Thread 已经不存在,从 Signal Catcher 线程号 28371,线程号 28373 基本可以判断是虚拟机创建的线程,诸如“Metrics Background Reporting Thread”,“perfetto_hprof_listener”,“ADB-JDWP Connection Control Thread” 等。

img_v3_02ue_63d643aa-ede9-486b-986d-74d08548a95g.jpg 该函数 art::Thread::GetNativeNiceness() 替代以前的 art::Thread::GetNativePriority();

问题分析

安装反馈提供的应用程序启动后,用抓取现场,可以看到线程 perfetto_hprof_listener 已经不存在了,但是它的 art::Thread 对象依旧挂在 thread_list_ 列表,意味着该线程退出前,没有 Unregister() 从列表中移除,于是调用 GetNativeNiceness 函数触发 abort 中断程序。

core-parser> t -a
 ID   TID    STATUS                          ADDRESS              NAME
*1    11589  Native                          0xb400007472617010   "main" 
 2    12468  WaitingInMainSignalCatcherLoop  0x747263b820         "Signal Catcher" 
 3    12469  Native                          0x747261fb20         "perfetto_hprof_listener" (NOT EXIST THREAD)
 4    12470  WaitingInMainDebuggerLoop       0x747261c380         "ADB-JDWP Connection Control Thread" 
 5    12472  Native                          0x747261a7b0         "Jit thread pool worker thread 0" 
 6    12474  Waiting                         0x7472628630         "ReferenceQueueDaemon" 
 7    12473  WaitingForTaskProcessor         0x74726216f0         "HeapTaskDaemon" 
 8    12475  Waiting                         0x74726232c0         "FinalizerDaemon" 
 9    12476  Sleeping                        0x7472639c50         "FinalizerWatchdogDaemon"

img_v3_02ue_f5cb46e6-0349-4275-8cd4-83a282ab3dfg.jpg

可以看到线程 perfetto_hprof_listener 代码仅有 AttachCurrentThread 函数添加到虚拟机管理,没有在异常退出处调用 DetachCurrentThread 反注册移除 thread_list_ 的列表。

img_v3_02ue_1dff549a-7782-41b1-b1fc-663fc6338fdg.jpg

从代码的角度,不难发现只有写管道的 g_signal_pipe_fds[1] 被关闭的情况下退出满足条件退出循环。看到这个变量符号那一刻,关于 Android15 GKI2407R40 导致梆梆加固软件崩溃,不就是当时这个问题里看到代码片段吗...。

image.png

调试取证

1 年前已经存在这样的逻辑,我想现在发生类似的事情,这部分代码应该还在,于是抓个现场证据看看。已知条件是通过读取 g_signal_pipe_fds 符号地址,获取写管道 fd 的值。调用 close 进行关闭操作。

注入 libopencore.so

# setenforce 0
./data/core-parser -p `pidof zygote64`

core-parser> remote hook --inject -l /data/libopencore.so
arm64: hook inject "/data/libopencore.so"
arm64: hook found "dlopen" address: 0x733a131020
arm64: target process current sp: 0x7ffe9f02a0
arm64: call dlopen(0x7ffe9f0280 "/data/libopencore.so", 0x2)
arm64: return 0xa4eeb80a85c4950f

修改 zygote64 内存

我们可以对 close 函数进行改造,如果 fd = g_signal_pipe_fds[1],则 segv 触发 core。

core-parser> disas _ZN14perfetto_hprof17g_signal_pipe_fdsE
LIB: /apex/com.android.art/lib64/libperfetto_hprof.so
SYMBOL: _ZN14perfetto_hprof17g_signal_pipe_fdsE
  * perfetto_hprof::g_signal_pipe_fds: 0x706ed634b8
  
core-parser> rd 0x706ed634b8
706ed634b8: 0000000000000000  ........
core-parser>

新增汇编代码逻辑改造 close 函数汇编代码如下,写入 zygote64 内存中。

mov x2, 0x34b8
movk x2, 0x6ed6, lsl #16
movk x2, 0x0070, lsl #32
ldr w3, [x2, #4]
cmp w0, w3
b.ne 0x8
ldr x1, [x1] // segv dead
core-parser> rd 7343fc6100 -e 7343fc6164 -i
0x7343fc6100: a9be7bfd | stp x29, x30, [sp, #-0x20]!
0x7343fc6104: f9000bf3 | str x19, [sp, #0x10]
0x7343fc6108: 910003fd | mov x29, sp
0x7343fc610c: aa1f03e1 | mov x1, xzr
0x7343fc6110: d2869702 | mov x2, #0x34b8
0x7343fc6114: f2addac2 | movk x2, #0x6ed6, lsl #16
0x7343fc6118: f2c00e02 | movk x2, #0x70, lsl #32
0x7343fc611c: b9400443 | ldr w3, [x2, #4]
0x7343fc6120: 6b03001f | cmp w0, w3
0x7343fc6124: 54000041 | b.ne 0x7343fc612c
0x7343fc6128: f9400021 | ldr x1, [x1]
0x7343fc612c: 97fffa35 | bl 0x7343fc4a00
0x7343fc6130: 3100041f | cmn w0, #1
0x7343fc6134: 54000121 | b.ne 0x7343fc6158
0x7343fc6138: 2a0003f3 | mov w19, w0
0x7343fc613c: 97fff7fd | bl 0x7343fc4130
0x7343fc6140: aa0003e8 | mov x8, x0
0x7343fc6144: 2a1303e0 | mov w0, w19
0x7343fc6148: b9400108 | ldr w8, [x8]
0x7343fc614c: 7100111f | cmp w8, #4
0x7343fc6150: 54000041 | b.ne 0x7343fc6158
0x7343fc6154: 2a1f03e0 | mov w0, wzr
0x7343fc6158: f9400bf3 | ldr x19, [sp, #0x10]
0x7343fc615c: a8c27bfd | ldp x29, x30, [sp], #0x20
0x7343fc6160: d65f03c0 | ret 
...
core-parser> disas close
LIB: /apex/com.android.runtime/lib64/bionic/libc.so
close: [7343f4daf0, 7343f4db38]
  0x7343f4daf0: 1401e184 | b 0x7343fc6100
  0x7343f4daf4: f9000bf3 | str x19, [sp, #0x10]
  0x7343f4daf8: 910003fd | mov x29, sp
  0x7343f4dafc: aa1f03e1 | mov x1, xzr
  0x7343f4db00: 9401dbc0 | bl 0x7343fc4a00
  0x7343f4db04: 3100041f | cmn w0, #1
  0x7343f4db08: 54000121 | b.ne 0x7343f4db2c
  0x7343f4db0c: 2a0003f3 | mov w19, w0
  0x7343f4db10: 9401d988 | bl 0x7343fc4130
  0x7343f4db14: aa0003e8 | mov x8, x0
  0x7343f4db18: 2a1303e0 | mov w0, w19
  0x7343f4db1c: b9400108 | ldr w8, [x8]
  0x7343f4db20: 7100111f | cmp w8, #4
  0x7343f4db24: 54000041 | b.ne 0x7343f4db2c
  0x7343f4db28: 2a1f03e0 | mov w0, wzr
  0x7343f4db2c: f9400bf3 | ldr x19, [sp, #0x10]
  0x7343f4db30: a8c27bfd | ldp x29, x30, [sp], #0x20
  0x7343f4db34: d65f03c0 | ret

修改 close 第一行机器码直接跳转到 0x7343fc6100 hook_close 位置。

启动应用程序

# killall -9 usap64  // 删掉 app 进程缓存
01-28 18:58:15.649 13270 13270 I opencore: Init inject opencore-1.4.16 environment..
01-28 19:00:24.215  5895  5895 I opencore: Wait (5984) coredump
01-28 19:00:24.217  5984  5984 I opencore: Coredump /sdcard/Android/data/a.b.c/files/core.a.b.c_5895_1769598024 ...
01-28 19:00:29.501  5984  5984 I opencore: Finish done.
core-parser> bt
"main" sysTid=5895 Native
  | group="main" daemon=0 prio=5 target=0x0 uncaught_exception=0x0
  | tid=1 sCount=0 flags=0 obj=0x728b83e0 self=0xb40000712140d7b0 env=0xb4000071b1414710
  | stack=0x7ffe1f5000-0x7ffe1f7000 stackSize=0x7ff000 handle=0x7357535118
  | mutexes=0xb40000712140df50 held=
  x0  0x000000000000005d  x1  0x0000000000000000  x2  0x000000706ed634b8  x3  0x000000000000005d  
  x4  0xffffffffffffffff  x5  0x0000007ffe9ed650  x6  0x0000000000000039  x7  0x7f7f7f7f7f7f7f7f  
  x8  0x0000007ffe9ed243  x9  0x0000000000000065  x10 0x0000000000000033  x11 0x0000000000000002  
  x12 0x0000007ffe9ecd14  x13 0xffffff80ffffffd8  x14 0x0000000000000010  x15 0x00000000ffffffa5  
  x16 0x0000006ff4a2ea68  x17 0x0000007343f4daf0  x18 0x0000007356464000  x19 0x0000006ff4a39930  
  x20 0x0000006ff4a39000  x21 0x000000706ed634b8  x22 0x000000000000005d  x23 0x0000000000000045  
  x24 0x0000007ffe9ed240  x25 0x0000006ff4a39000  x26 0x0000006ff4a39000  x27 0x0000006ff4a30000  
  x28 0x0000006ff4a393e0  fp  0x0000007ffe9ed0d0  
  lr  0x0000006ff4967de4  sp  0x0000007ffe9ed0d0  pc  0x0000007343fc6128  pst 0x0000000060001000  
  Native: #0  0000007343fc6128  /apex/com.android.runtime/lib64/bionic/libc.so+0xf2128
  Native: #1  0000006ff4967de0  
  Native: #2  0000006ff494e9bc  
  Native: #3  0000006ff49482d4  
  Native: #4  000000707d53966c  art::JavaVMExt::LoadNativeLibrary(_JNIEnv*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, _jobject*, _jclass*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*)+0x4ec
  Native: #5  000000706b8ee480  JVM_NativeLoad+0x170
  Native: #6  00000000715d2078  art_jni_trampoline+0x98
  JavaKt: #00  0000000000000000  java.lang.Runtime.nativeLoad
  JavaKt: #01  000000707cb0c56c  java.lang.Runtime.loadLibrary0
  JavaKt: #02  000000707cb0c4f8  java.lang.Runtime.loadLibrary0
  JavaKt: #03  000000707cb17b90  java.lang.System.loadLibrary
  JavaKt: #04  0000006ff947cafc  com.AppGuard.AppGuard.XLVQB.load
  JavaKt: #05  0000006ff947b74e  com.AppGuard.AppGuard.NZMXG.instantiateClassLoader
  JavaKt: #06  000000707c1516c6  android.app.LoadedApk.createOrUpdateClassLoaderLocked
  JavaKt: #07  000000707c150378  android.app.LoadedApk.getResources
  JavaKt: #08  000000707c0e0b30  android.app.ContextImpl.createAppContext
  JavaKt: #09  000000707c0ae14e  android.app.ActivityThread.handleBindApplication
  JavaKt: #10  000000707c0aa8fc  android.app.ActivityThread.-$$Nest$mhandleBindApplication
  JavaKt: #11  000000707c0a5780  android.app.ActivityThread$H.handleMessage
  JavaKt: #12  000000707afaabae  android.os.Handler.dispatchMessage
  JavaKt: #13  000000707afd3ed2  android.os.Looper.loopOnce
  JavaKt: #14  000000707afd4800  android.os.Looper.loop
  JavaKt: #15  000000707c0b3072  android.app.ActivityThread.main
  JavaKt: #16  0000000000000000  java.lang.reflect.Method.invoke
  JavaKt: #17  0000007079f6bbfa  com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run
  JavaKt: #18  0000007079f70ae0  com.android.internal.os.ZygoteInit.main
core-parser> 
core-parser> disas _ZN14perfetto_hprof17g_signal_pipe_fdsE
LIB: /apex/com.android.art/lib64/libperfetto_hprof.so
SYMBOL: _ZN14perfetto_hprof17g_signal_pipe_fdsE
  * perfetto_hprof::g_signal_pipe_fds: 0x706ed634b8
  
core-parser> rd 0x706ed634b8
706ed634b8: 0000005d00000045  E...]...
core-parser>
core-parser> f 1
  JavaKt: #01  000000707cb0c56c  java.lang.Runtime.loadLibrary0(java.lang.ClassLoader, java.lang.Class, java.lang.String)
  {
      Location: /apex/com.android.art/javalib/core-oj.jar
      art::ArtMethod: 0x6fc7bcf0
      dex_pc_ptr: 0x707cb0c56c
      quick_frame: 0x7ffe9ef810
      frame_pc: 0x71384bd8
      method_header: 0x71384a8c

      DEX CODE:
      0x707cb0c564: 020c                     | move-result-object v2
      0x707cb0c566: 2107                     | move-object v1, v2
      0x707cb0c568: 0138 0010                | if-eqz v1, 0x707cb0c588 //+16
      0x707cb0c56c: 3071 0dbb 0761           | invoke-static {v1, v6, v7}, java.lang.String java.lang.Runtime.nativeLoad(java.lang.String, java.lang.ClassLoader, java.lang.Class) // method@3515
      {
          v0 = w24    v1 = w26    v5 = w25    v6 = w22    
          v7 = w23    v8 = w24
      }

      OAT CODE:
      0x71384bb4: 14000002 | b 0x71384bbc
      0x71384bb8: aa0003fa | mov x26, x0
      0x71384bbc: 340003da | cbz w26, 0x71384c34
      0x71384bc0: aa1603e2 | mov x2, x22
      0x71384bc4: aa1703e3 | mov x3, x23
      0x71384bc8: aa1a03e1 | mov x1, x26
      0x71384bcc: f0ff47a0 | adrp x0, 0x6fc7b000
      0x71384bd0: 9135c000 | add x0, x0, #0xd70
      0x71384bd4: f9400c1e | ldr x30, [x0, #0x18]
      0x71384bd8: d63f03c0 | blr x30
      0x71384bdc: 350000c0 | cbnz w0, 0x71384bf4
      0x71384be0: aa0003fb | mov x27, x0
      {
          x19 = 0xb40000712140d7b0    x20 = 0x0000000000000000    x21 = 0xb40000712140d870    x22 = 0x00000000023bcc70    
          x23 = 0x00000000af2bdde0    x24 = 0x00000000023bec58    x25 = 0x000000006fa5cd10    x26 = 0x00000000023bef70    
          x27 = 0x0000000000000018    x28 = 0x0000007ffe9ef990    fp = 0x0000007ffe9ef96c    lr = 0x0000000071384bdc
      }
  }
core-parser> p 0x00000000023bef70
Size: 0x70
Object Name: java.lang.String
    [0x10] virutal char[] values = "/data/app/~~gKv2P9XhKhqKOOzrwlg7NQ==/a.b.c-SUPK3pUVM0I0SmBiF2mlGg==/lib/arm64/libAppGuard.so"
    [0x0c] private int hash = 0
    [0x08] private final int count = 96
  // extends java.lang.Object
    [0x04] private transient int shadow$_monitor_ = 0
    [0x00] private transient java.lang.Class shadow$_klass_ = 0x6faa0a80
core-parser> 
core-parser> vtor 0000006ff4967de0
  * VIRTUAL: 0x6ff4967de0
  * PHYSICAL: 0x9e501de0
  * OFFSET: 0x34de0
  * OR: 0x74bb701de0
  * MMAP: 0x0
  * OVERLAY: 0x0
[6ff4933000, 6ff4a24000)  rwx  00000f1000  00000f1000  [] [*]
core-parser>

可以看到在加载 libAppGuard.so 的过程中,在匿名代码段 <anonymous:6ff4933000> 中调用了 close 函数关闭了 g_signal_pipe_fds[1] 管道,于是 perfetto_hprof_listener 线程异常退出。

匿名代码段

core-parser> rd 6ff4933000 -e 6ff4a24000 -f /data/6ff4933000.bin
Saved [/data/6ff4933000.bin].
core-parser>

img_v3_02ue_afa09a21-5451-45df-9335-dd3efd0864eg.jpg

该代码片段和一年前的一模一样,埋下了这个隐患,直到 ART 代码更新,新增 GetNativeNiceness 函数后开始发生错误。

后记

perfetto_hprof_listener 线程的异常退出没有移除 thread_list_ 列表属 ART 的 BUG,但是依赖 Google Mainline 的更新,周期一般都挺长的,应用端更新相对较快,期望更多开发者能尽快更新,转达不易,感谢相互告知!