为什么需要动态链接
- 节省内存空间
- 方便更新、部署和发布
例子
*lib.c*
#include <stdio.h>
void foobar(int i)
{
printf("Printing form lib.so %d\n", i);
}
*program.c*
#include "lib.h"
int main()
{
foobar(1);
return 0;
}
将 lib.c 编译成共享对象(.so 文件)
gcc -fPIC -shared -o lib.so lib.c
-shared 表示产出共享对象,-fPIC 表示地址无关代码(PIC, Position-independent Code)
编译 program
gcc -o program program.c ./lib.so
当程序被装载的时候,系统的动态链接器会将程序所需要的所有动态链接库装载到进程的地址空间,并且将程序中所有未决议的符号绑定到相应的动态链接库中,并进行重定位工作。
readelf -S program
There are 31 section headers, starting at offset 0x3688:
Section Headers:
[Nr] Name Type Address Off Size ES Flg Lk Inf Al
[ 0] NULL 0000000000000000 000000 000000 00 0 0 0
[ 1] .interp PROGBITS 0000000000000318 000318 00001c 00 A 0 0 1
[ 2] .note.gnu.property NOTE 0000000000000338 000338 000030 00 A 0 0 8
[ 3] .note.gnu.build-id NOTE 0000000000000368 000368 000024 00 A 0 0 4
[ 4] .note.ABI-tag NOTE 000000000000038c 00038c 000020 00 A 0 0 4
[ 5] .gnu.hash GNU_HASH 00000000000003b0 0003b0 000024 00 A 6 0 8
[ 6] .dynsym DYNSYM 00000000000003d8 0003d8 0000a8 18 A 7 1 8
[ 7] .dynstr STRTAB 0000000000000480 000480 000098 00 A 0 0 1
[ 8] .gnu.version VERSYM 0000000000000518 000518 00000e 02 A 6 0 2
[ 9] .gnu.version_r VERNEED 0000000000000528 000528 000030 00 A 7 1 8
[10] .rela.dyn RELA 0000000000000558 000558 0000c0 18 A 6 0 8
[11] .rela.plt RELA 0000000000000618 000618 000018 18 AI 6 24 8
[12] .init PROGBITS 0000000000001000 001000 00001b 00 AX 0 0 4
[13] .plt PROGBITS 0000000000001020 001020 000020 10 AX 0 0 16
[14] .plt.got PROGBITS 0000000000001040 001040 000010 10 AX 0 0 16
[15] .plt.sec PROGBITS 0000000000001050 001050 000010 10 AX 0 0 16
[16] .text PROGBITS 0000000000001060 001060 000102 00 AX 0 0 16
[17] .fini PROGBITS 0000000000001164 001164 00000d 00 AX 0 0 4
[18] .rodata PROGBITS 0000000000002000 002000 000004 04 AM 0 0 4
[19] .eh_frame_hdr PROGBITS 0000000000002004 002004 000034 00 A 0 0 4
[20] .eh_frame PROGBITS 0000000000002038 002038 0000ac 00 A 0 0 8
[21] .init_array INIT_ARRAY 0000000000003da8 002da8 000008 08 WA 0 0 8
[22] .fini_array FINI_ARRAY 0000000000003db0 002db0 000008 08 WA 0 0 8
[23] .dynamic DYNAMIC 0000000000003db8 002db8 000200 10 WA 7 0 8
[24] .got PROGBITS 0000000000003fb8 002fb8 000048 08 WA 0 0 8
[25] .data PROGBITS 0000000000004000 003000 000010 00 WA 0 0 8
[26] .bss NOBITS 0000000000004010 003010 000008 00 WA 0 0 1
[27] .comment PROGBITS 0000000000000000 003010 000026 01 MS 0 0 1
[28] .symtab SYMTAB 0000000000000000 003038 000360 18 29 18 8
[29] .strtab STRTAB 0000000000000000 003398 0001d4 00 0 0 1
[30] .shstrtab STRTAB 0000000000000000 00356c 00011a 00 0 0 1
延迟绑定(PLT)
聊聊Linux动态链接中的PLT和GOT(2)--延迟重定位_海枫的博客-CSDN博客
动态链接相关结构
.interp 段
interpret 解释器,动态链接器的路径
objdump -s program
Contents of section .interp:
0318 2f6c6962 36342f6c 642d6c69 6e75782d /lib64/ld-linux-
0328 7838362d 36342e73 6f2e3200 x86-64.so.2.
.dynamic 段
该段保存了动态链接器所需要的基本信息:依赖于哪些共享对象、动态链接符号表的位置、动态链重定位表的位置、共享对象初始化代码的地址等。
typedef struct {
Elf64_Sxword d_tag; /* entry tag value */
union {
Elf64_Xword d_val;
Elf64_Addr d_ptr;
} d_un;
} Elf64_Dyn;
readelf -d program1
Dynamic section at offset 0x2db8 contains 28 entries:
Tag Type Name/Value
0x0000000000000001 (NEEDED) Shared library: [./lib.so]
0x0000000000000001 (NEEDED) Shared library: [libc.so.6]
0x000000000000000c (INIT) 0x1000
0x000000000000000d (FINI) 0x1164
0x0000000000000019 (INIT_ARRAY) 0x3da8
0x000000000000001b (INIT_ARRAYSZ) 8 (bytes)
0x000000000000001a (FINI_ARRAY) 0x3db0
0x000000000000001c (FINI_ARRAYSZ) 8 (bytes)
0x000000006ffffef5 (GNU_HASH) 0x3b0
0x0000000000000005 (STRTAB) 0x480
0x0000000000000006 (SYMTAB) 0x3d8
0x000000000000000a (STRSZ) 152 (bytes)
0x000000000000000b (SYMENT) 24 (bytes)
0x0000000000000015 (DEBUG) 0x0
0x0000000000000003 (PLTGOT) 0x3fb8
0x0000000000000002 (PLTRELSZ) 24 (bytes)
0x0000000000000014 (PLTREL) RELA
0x0000000000000017 (JMPREL) 0x618
0x0000000000000007 (RELA) 0x558
0x0000000000000008 (RELASZ) 192 (bytes)
0x0000000000000009 (RELAENT) 24 (bytes)
0x000000000000001e (FLAGS) BIND_NOW
0x000000006ffffffb (FLAGS_1) Flags: NOW PIE
0x000000006ffffffe (VERNEED) 0x528
0x000000006fffffff (VERNEEDNUM) 1
0x000000006ffffff0 (VERSYM) 0x518
0x000000006ffffff9 (RELACOUNT) 3
0x0000000000000000 (NULL) 0x0
以第一个 NEEDED 为例:
| 字段 | 占位(字节) | HEX |
|---|---|---|
| d_tag | 8 | 0000 0000 0000 0001(NEEDED) |
| d_ptr | 8 | 0000 0000 0000 006e(.dynstr 的偏移) |
str = .dynstr + 0x6e = 0x480 + 0x6e = 0x4ee
= {2e2f 6c69 622e 736f}
= ./lib.so
动态符号表 .dynsym
动态链接模块之间的符号导入导出关系,它只保存了与动态链接相关的符号
动态符号字符串表 .dynstr
保存符号名的字符串表
readelf -sD program1
Symbol table for image contains 7 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND _[...]@GLIBC_2.34 (2)
2: 0000000000000000 0 NOTYPE WEAK DEFAULT UND _ITM_deregisterT[...]
3: 0000000000000000 0 FUNC GLOBAL DEFAULT UND foobar
4: 0000000000000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__
5: 0000000000000000 0 NOTYPE WEAK DEFAULT UND _ITM_registerTMC[...]
6: 0000000000000000 0 FUNC WEAK DEFAULT UND [...]@GLIBC_2.2.5 (3)
一个 ELF 文件往往有大量导出的动态符号,线性搜索往往不是一个好的办法,使用哈希表可用于优化数千字符串的搜索。ELF 文件在静态链接时会创建一个哈希表,并以某种序列化格式保存在二进制文件中。
.hash
现已不用,暂时不分析
.gun.hash
hash 函数:
static uint32_t elf_gnu_hash(const uint8_t *name) {
uint32_t h = 5381;
while (*name) {
h += (h << 5) + *name++;
}
return h;
}
gun_hash_table 结构大致如下:
struct gnu_hash_table {
uint32_t nbuckets; // 哈希桶的数量
uint32_t symoffset; // 动态符号表中外部不能访问的符号数量,但它们仍然占用了动态符号表项
uint32_t bloom_size; // boom 数量
uint32_t bloom_shift; // 为使用同一哈希函数实现k=2的布隆滤波器,需要右移的位数
uint64_t bloom[bloom_size]; // bloom 的开始地址 /* uint32_t for 32-bit binaries */
uint32_t buckets[nbuckets]; // 哈希桶的开始地址
uint32_t chain[]; // 符号哈希值的开始地址
};
bloom filter
布隆过滤器用于尽早停止查找丢失的符号。
在进行符号查找之前,取 bloom[(hash / ELFCLASS_BITS) % bloom_size],如果设置了 hash % ELFCLASS_BITS 和 (hash >> bloom_shift) % ELFCLASS_BITS ,则符号可能在哈希表中,通过 bucket 和 chain 进行常规查找。但是,如果至少有一位未设置,则哈希表中肯定不存在符号。
buckets he chains
GNU 哈希表允许跳过符号表开头的第一个 symoffset 符号。
桶数组保存链中第一个符号的索引。 请注意,这些不是链数组的索引。 它的索引将是 bucket[foobar] - symoffset。
这里以Android 11 /apex/com.android.art/lib/libart.so ELFCLASS_BITS = 32;查找 _ZN3art16ScopedSuspendAllC1EPKcb 为例:
objdump -s libart.so| grep "gnu.hash" -A 5
Contents of section .gnu.hash:
19a1c 17050000 41020000 00080000 1a000000 ....A...........
nbuckets = 0x517 = 1303
symoffset = 0x241 = 577
bloom_size = 0x800 = 2048
bloom_shift = 0x1a = 26
19a2c 00000000 09414305 00000000 20282270 .....AC..... ("p
*bloom = 0x19a2c
19a3c 564a0080 00040008 08818000 04254400 VJ...........%D.
19a4c 600090a1 70010c10 83000c06 0048820a `...p........H..
19a5c 00000000 40000430 18030000 068221a4 ....@..0......!.
elf_gnu_hash("_ZN3art16ScopedSuspendAllC1EPKcb") = 827012993
// bloom filter word
word = bloom[(hash / ELFCLASS_BITS) % bloom_size]
= bloom[( 3980701119 / 23) % 2048]
= bloom[1389]
= 0x19a2c + 1389 * 4
= 0x1afe0
// bloom filter 两个 mask
mask1 = hash % ELFCLASS_BITS
= 3980701119 % 32
= 31
mask2 = (hash >> bloom_shift) % ELFCLASS_BITS
= (3980701119 >> 26) % 32
= 27
“_ZN3art16ScopedSuspendAllC1EPKcb” 对应的 hash 在第 1389 个 bloom word 处,它的起始地址为 0x1afe0
1afdc 0802ac00 40002088 c0052620 00009a00 ....@. ...& ....
1afec 20020000 60800400 09080010 040008a0 ...`...........
bloom[1389] = 0x88200040 其第31位和第27位均为1,所以布隆过滤器不能拒绝这个哈希值。
这时继续在对应的哈希桶上查找:
bucket = buckets[ hash % nbuckets ]
= buckets[ 3980701119 % 1303 ]
= buckets[ 938 ]
哈希桶的起始地址计算
*buckets = *bloom + bloom_size * 4
= 0x19a2c + 2048 * 4
= 0x1ba2c
// 第 938 个桶的位置
bucket = buckets [938]
= 0x1ba2c + 938 * 4
= 0x1c8d4
1c8cc d8100000 df100000 e0100000 e5100000 ................
1c8dc e8100000 ed100000 f2100000 f7100000 ................
buckets [938] = 0x10e0,接下来就可以到 chains[bucket - symoffset] 找字符的 chain_hash 值,判断是否与 hash 值相等。
完整的查找代码
static ElfW(Sym) *elf_dynsym_find_symbol_use_gnu_hash(xdl_t *self, const char *sym_name) {
uint32_t hash = elf_gnu_hash((const uint8_t *)sym_name);
static uint32_t elfclass_bits = sizeof(ElfW(Addr)) * 8;
size_t word = self->gnu_hash.bloom[(hash / elfclass_bits) % self->gnu_hash.bloom_cnt];
size_t mask = 0 | (size_t)1 << (hash % elfclass_bits) |
(size_t)1 << ((hash >> self->gnu_hash.bloom_shift) % elfclass_bits);
// if at least one bit is not set, this symbol is surely missing
if ((word & mask) != mask) return NULL;
// ignore STN_UNDEF
uint32_t i = self->gnu_hash.buckets[hash % self->gnu_hash.buckets_cnt];
if (i < self->gnu_hash.symoffset) return NULL;
// loop through the chain
while (1) {
ElfW(Sym) *sym = self->dynsym + i;
uint32_t sym_hash = self->gnu_hash.chains[i - self->gnu_hash.symoffset];
if ((hash | (uint32_t)1) == (sym_hash | (uint32_t)1)) {
if (0 == strcmp(self->dynstr + sym->st_name, sym_name)) {
return sym;
}
}
// chain ends with an element with the lowest bit set to 1
if (sym_hash & (uint32_t)1) break;
i++;
}
return NULL;
}
调试代码
#define ELFCLASS_BITS (32)
static uint32_t elf_gnu_hash(const uint8_t *name)
{
uint32_t h = 5381;
while(*name)
{
h += (h << 5) + *name++;
}
return h;
}
int main(int argc, char *argv[])
{
int bloom_size = 2048;
int bloom_shift = 26;
char *suspend = "_ZN3art16ScopedSuspendAllC1EPKcb";
uint32_t hash = elf_gnu_hash((const uint8_t*)suspend);
printf("%s hash: %u\n", suspend, hash);
int word = (hash / ELFCLASS_BITS) % bloom_size;
printf("bloom[%d]\n", word);
int mask1 = hash % ELFCLASS_BITS;
printf("mask1: %d\n", mask1);
int mask2 = (hash >> bloom_shift) % ELFCLASS_BITS;
printf("mask2: %d\n", mask2);
return 0;
}
动态链接重定位表
- rel.dyn 对数据引用的修正,它所修正的位置位于.got以及数据段
- rel.plt 对函数以引用的修正,它所修正的位置位于.got
参考
ELF: symbol lookup via DT_HASH
ELF: better symbol lookup via DT_GNU_HASH