如何利用内存访问权限定位内存踩踏

1,692 阅读11分钟

前言

MTE、HWASAN 等内存检测方案,它们检测内存是否被踩踏上采用的是标记设计,这里存在一个弊端无法检测,踩踏内存的指针标记是相同的情况。那么这种情况下结合业务场景判断是否可使用内存访问权限来进行定位,下面介绍如何进行数据结构膨胀来进行访问权限设置。

struct S {
    uint64_t A;
    uint64_t B;
    uint64_t C;
};

如我们现有一个数据结构 S,里有数据成员 A、B、C,现发现成员 B 的内存始终被破坏,我们需要调查成员 B 被改写的地方。

结构膨胀

假设这个数据结构 S 大小 < 4K,在内存上两种排布,落在一张页表上或横跨两张页表上。

5.png

内存保护

假设 B 地址为 0x7F,1234,1234,那么页表起始点为 0x7F,1234,1234 & ~0xFFF = 0x7F,1234,1000,结束点则是 0x7F,1234,1234 + sizeof(B),保护内存范围取决于 sizeof(B) 的值,是否落在同一个页表上,因此大小计算公式为页表下界 - 页表上界即可。

AlignUp(0x7F,1234,1234 + sizeof(B), 4096) - AlignDown(0x7F,1234,1234, 4096)
Size = AlignUp(Ptr + sizeof(B), 4K) - AlignDown(Ptr, 4K)

// int mprotect(void *addr, size_t len, int prot);

// Read only
mprotect(AlignDown(Ptr, 4K), Size, PROT_READ)

// Read / Write
mprotect(AlignDown(Ptr, 4K), Size, PROT_READ | PROT_WRITE)

例子

// main.cpp

#include "GlobalLog.h"
#include <thread>
#include <dlfcn.h>

void push() {
    while (1) {
        gLog.record();
        std::this_thread::sleep_for(std::chrono::milliseconds(10));
    }
}

void load() {
    while (1) {
        std::this_thread::sleep_for(std::chrono::milliseconds(1000));
        void *handle = dlopen("/data/local/tmp/libsecond.so", RTLD_LAZY);
        if (handle) {
            void *sym = dlsym(handle, "_Z13unused_secondv");
            typedef void (*unused_second)();
            ((unused_second)sym)();
            dlclose(handle);
        }
    }
}

int main() {
    std::thread t1(push);
    std::thread t2(load);
    t1.join();
    t2.join();
    return 0;
}
// GlobalLog.h

#ifndef PADDINGSTRUCTTEST_GLOBALLOG_H
#define PADDINGSTRUCTTEST_GLOBALLOG_H

#include <string>
#include <deque>
#include <iostream>

// #define PADDING_CHECK

class GlobalLog {
public:
    void record();
    GlobalLog(std::string n) : name(n) {}
private:
    std::string name;
#ifdef PADDING_CHECK
    __attribute__((unused)) const char padding_start[0x1000] = {'s'};
#endif
    std::deque<std::string> mEntries;
#ifdef PADDING_CHECK
    __attribute__((unused)) const char padding_end[0x1000] = {'e'};
#endif
};

extern GlobalLog gLog;

#endif //PADDINGSTRUCTTEST_GLOBALLOG_H
// GlobalLog.cpp

#include "GlobalLog.h"

#include <stdint.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <iostream>

GlobalLog gLog("GlobalLog");

uint64_t align_down(uint64_t x, uint64_t n) {
    return (x & -n);
}

uint64_t align_up(uint64_t x, uint64_t n) {
    return align_down(x + n - 1, n);
}

void GlobalLog::record() {
#ifdef PADDING_CHECK
    mprotect((void *)align_down((uint64_t)&mEntries, 0x1000),
             align_up((uint64_t)&mEntries + sizeof(mEntries), 0x1000) - align_down((uint64_t)&mEntries, 0x1000),
             PROT_READ | PROT_WRITE);
#endif
    mEntries.push_back("test_msg");

#ifdef PADDING_CHECK
    mprotect((void *)align_down((uint64_t)&mEntries, 0x1000),
             align_up((uint64_t)&mEntries + sizeof(mEntries), 0x1000) - align_down((uint64_t)&mEntries, 0x1000),
             PROT_READ);
#endif
}
// second.cpp

#include "GlobalLog.h"
#include <iostream>

void __attribute__((unused)) unused_second() {
    // do nothing
    std::cout << std::hex << &gLog << std::endl;
}
// CMakeLists.txt

cmake_minimum_required(VERSION 3.21.1)
project(padding_tester)

add_library(global-lib STATIC GlobalLog.cpp)

add_library(second SHARED second.cpp)
target_link_libraries(second global-lib)

set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic -ldl -pthread")
add_executable(padding_tester main.cpp)
target_link_libraries(padding_tester global-lib)
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
      -DANDROID_ABI="arm64-v8a" \
      -DANDROID_NDK=$ANDROID_NDK \
      -DANDROID_PLATFORM=android-30 \
      -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
      -DCMAKE_BUILD_TARGET=android \
      CMakeLists.txt \
      -B android

make -C android -j8

测试

139|matisse:/data/local/tmp # ./padding_tester
0x56b50af388
0x56b50af388
0x56b50af388
0x56b50af388
0x56b50af388
0x56b50af388
0x56b50af388
0x56b50af388
Segmentation fault

pid: 19598, tid: 19599, name: padding_tester  >>> ./padding_tester <<<
uid: 0
tagged_addr_ctrl: 0000000000000001
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x10
Cause: null pointer dereference
    x0  00000056b50af388  x1  0000007066416bb8  x2  00000070e6a029c8  x3  00000070e6a029d8
    x4  0000000000000000  x5  0000000000000000  x6  0000000000000001  x7  00000000001f5000
    x8  0000000000000000  x9  0000000000989680  x10 0000000000000055  x11 0000000000000055
    x12 0000000000000000  x13 0000000000000000  x14 0000000000000800  x15 000000001a4820ee
    x16 00000056b50ae1a0  x17 00000070e6f83370  x18 0000007065e06000  x19 00000056b50af388
    x20 0000007066417000  x21 0000007066416cb0  x22 0000000000004c8e  x23 0000000000004c8e
    x24 0000007066416cb0  x25 0000007066416cb0  x26 0000007066416ff8  x27 00000000000fc000
    x28 00000000000fe000  x29 0000007066416bd0
    lr  00000056b5081e04  sp  0000007066416bb0  pc  00000056b50821f8  pst 0000000060001000

backtrace:
      #00 pc 00000000000241f8  /data/local/tmp/padding_tester (GlobalLog::record()+188) (BuildId: c37bc1b67c1d1a283ac77a3ed8a7335c48fa0b65)
      #01 pc 0000000000023e00  /data/local/tmp/padding_tester (push()+36) (BuildId: c37bc1b67c1d1a283ac77a3ed8a7335c48fa0b65)
      #02 pc 0000000000024010  /data/local/tmp/padding_tester (void* std::__ndk1::__thread_proxy<std::__ndk1::tuple<std::__ndk1::unique_ptr<std::__ndk1::__thread_struct, std::__ndk1::default_delete<std::__ndk1::__thread_struct> >, void (*)()> >(void*)+44) (BuildId: c37bc1b67c1d1a283ac77a3ed8a7335c48fa0b65)
      #03 pc 00000000000eae70  /apex/com.android.runtime/lib64/bionic/libc.so (__pthread_start(void*)+204) (BuildId: 4cbc2a7636b3e0b018386ca8427ddf9b)
      #04 pc 000000000008a82c  /apex/com.android.runtime/lib64/bionic/libc.so (__start_thread+64) (BuildId: 4cbc2a7636b3e0b018386ca8427ddf9b)

整个程序有且仅有一处使用到 gLog 这个变量进行 mEntries.push_back("test_msg");,这个 tombstone 的解析这里就不做太多解答,这一次 Native Crash 的直接原因是 mEntries 指向的内存被破坏导致 push_back 函数流程上发生段错误。 当我们打开宏定义 PADDING_CHECK,重新编译程序测试。

pid: 19902, tid: 19905, name: padding_tester  >>> ./padding_tester <<<
uid: 0
tagged_addr_ctrl: 0000000000000001
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x5fbde5f000
    x0  0000005fbde5e471  x1  0000000000000000  x2  0000000000000420  x3  0000005fbde5f000
    x4  0000005fbde5f4a0  x5  0000000000000004  x6  6f6c2f617461642f  x7  2f706d742f6c6163
    x8  0000000000000073  x9  2e90e08f834e33cb  x10 6f4c6c61626f6c47  x11 0000000000000067
    x12 7362696c2f706d74  x13 6f732e646e6f6365  x14 000000000281a7a7  x15 0000000000000000
    x16 0000007495ded008  x17 00000075195611a0  x18 0000007496df2000  x19 0000005fbde5e458
    x20 00000074989c1000  x21 0000007495dec888  x22 0000000000000000  x23 000000751dcf4104
    x24 000000751dcef359  x25 000000751de0d000  x26 000000751dcf4c7e  x27 0000000000000002
    x28 0000007495d81da8  x29 00000074989bf950
    lr  0000007495d81e10  sp  00000074989bf930  pc  00000075195612a0  pst 0000000020001000

backtrace:
      #00 pc 00000000000852a0  /apex/com.android.runtime/lib64/bionic/libc.so (memset+256) (BuildId: 4cbc2a7636b3e0b018386ca8427ddf9b)
      #01 pc 000000000005ae0c  /data/local/tmp/libsecond.so (_GLOBAL__sub_I_GlobalLog.cpp+100) (BuildId: ba6fec2c86fc767de42d0292e21713e734e2ea2e)
      #02 pc 0000000000051294  /apex/com.android.runtime/bin/linker64 (__dl__ZN6soinfo17call_constructorsEv+628) (BuildId: 402a7650c18b31ec79a6bce8358d1074)
      #03 pc 000000000003bd30  /apex/com.android.runtime/bin/linker64 (__dl__Z9do_dlopenPKciPK17android_dlextinfoPKv+2052) (BuildId: 402a7650c18b31ec79a6bce8358d1074)
      #04 pc 00000000000371d8  /apex/com.android.runtime/bin/linker64 (__loader_dlopen+76) (BuildId: 402a7650c18b31ec79a6bce8358d1074)
      #05 pc 0000000000001024  /apex/com.android.runtime/lib64/bionic/libdl.so (dlopen+16) (BuildId: 050162f4b068d2ea4e05bf86f04ca900)
      #06 pc 0000000000023e94  /data/local/tmp/padding_tester (load()+64) (BuildId: 56085108fcd1b328f00f00ccb7ba7fc2f7705f28)
      #07 pc 0000000000024050  /data/local/tmp/padding_tester (void* std::__ndk1::__thread_proxy<std::__ndk1::tuple<std::__ndk1::unique_ptr<std::__ndk1::__thread_struct, std::__ndk1::default_delete<std::__ndk1::__thread_struct> >, void (*)()> >(void*)+44) (BuildId: 56085108fcd1b328f00f00ccb7ba7fc2f7705f28)
      #08 pc 00000000000eae70  /apex/com.android.runtime/lib64/bionic/libc.so (__pthread_start(void*)+204) (BuildId: 4cbc2a7636b3e0b018386ca8427ddf9b)
      #09 pc 000000000008a82c  /apex/com.android.runtime/lib64/bionic/libc.so (__start_thread+64) (BuildId: 4cbc2a7636b3e0b018386ca8427ddf9b)

可以发现该问题原因是 gLog 全局变量重复初始化,导致 mEntries 内存被重置,因此其中一个线程在 push_back 过程中,内存被重置造成内存踩踏的现象。

解决方案

该问题的根本原因依赖 global-lib.a,gLog 变量使用同一个地址,并重复初始化导致内存被重置,具体的 Native Crash 分析这里就不一一解答,可以参考上一篇《如何理解Native Crash问题》

  1. 追加 -Wl,-Bsymbolic [优缺点明显,剥离全局变量能修复此问题,但也会引入其它问题]
  2. 在使用变量前完成所有的初始化,缺点库文件加载后不应该被移除,也就是不能进行热插拔