C++学习---__libc_open函数的原理

335 阅读6分钟

“我报名参加金石计划1期挑战——瓜分10万奖池,这是我的第9篇文章,点击查看活动详情

引言

__libc_open是glibc中openat接口的封装,针对给定的file路径,打开文件,生成一个文件描述符fd。可以创建文件/创建临时文件/打开已有文件。在很多IO接口函数中都有使用到,我们来一起看看它的实现原理。

源码分析

代码参考:glibc/sysdeps/unix/sysv/linux/open.c

 28 /* Open FILE with access OFLAG.  If O_CREAT or O_TMPFILE is in OFLAG,
 29    a third argument is the file protection.  */
 30 int
 31 __libc_open (const char *file, int oflag, ...)
 32 {
 33   int mode = 0;
 34 
 35   if (__OPEN_NEEDS_MODE (oflag))
 36     {
 37       va_list arg;
 38       va_start (arg, oflag);
 39       mode = va_arg (arg, int);
 40       va_end (arg);
 41     }
 42 
 43   return SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode);
 44 }

首先根据oflag(里面是否包含O_CREAT或O_TMPFILE)检查,是否需要获取第三个参数mode(用来进行读写控制的)。

__OPEN_NEEDS_MODE宏判定

具体实现里面就是通过位运算检测oflag中是否包含__O_TMPFILE或O_CREAT

 //glibc/io/fcntl.h
 37 /* Detect if open needs mode as a third argument (or for openat as a fourth
 38    argument).  */
 39 #ifdef __O_TMPFILE
 40 # define __OPEN_NEEDS_MODE(oflag) \
 41   (((oflag) & O_CREAT) != 0 || ((oflag) & __O_TMPFILE) == __O_TMPFILE)
 42 #else
 43 # define __OPEN_NEEDS_MODE(oflag) (((oflag) & O_CREAT) != 0)
 44 #endif

SYSCALL_CANCEL

在这个宏中实际上最后还是调用INLINE_SYSCALL_CALL实现我们的调用,多余的部分实际上是syscall 取消的检测,这里我们就不展开了。

//glibc/sysdeps/unix/sysdep.h
111 #if IS_IN (rtld)
112 /* All cancellation points are compiled out in the dynamic loader.  */
113 # define NO_SYSCALL_CANCEL_CHECKING 1
114 #else
115 # define NO_SYSCALL_CANCEL_CHECKING SINGLE_THREAD_P
116 #endif
117 
118 #define SYSCALL_CANCEL(...) \                                                                                                                        
119   ({                                         \
120     long int sc_ret;                                 \
121     if (NO_SYSCALL_CANCEL_CHECKING)                      \
122       sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__);                \
123     else                                     \
124       {                                      \
125     int sc_cancel_oldtype = LIBC_CANCEL_ASYNC ();                \
126     sc_ret = INLINE_SYSCALL_CALL (__VA_ARGS__);              \
127         LIBC_CANCEL_RESET (sc_cancel_oldtype);                   \
128       }                                      \
129     sc_ret;                                  \
130   })

INLINE_SYSCALL_CALL

这里就是对syscall的相关封装了,实际上就是要从我们之前传入的 SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode);解析出具体需要执行的汇编指令

103 /* Issue a syscall defined by syscall number plus any other argument
104    required.  Any error will be handled using arch defined macros and errno
105    will be set accordingly.
106    It is similar to INLINE_SYSCALL macro, but without the need to pass the
107    expected argument number as second parameter.  */
108 #define INLINE_SYSCALL_CALL(...) \
109   __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, __VA_ARGS__)

宏展开之后 __INLINE_SYSCALL_DISP (__INLINE_SYSCALL, openat, AT_FDCWD, file, oflag, mode)

100 #define __INLINE_SYSCALL_DISP(b,...) \
101   __SYSCALL_CONCAT (b,__INLINE_SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)

展开之后: __SYSCALL_CONCAT (__INLINE_SYSCALL, __INLINE_SYSCALL_NARGS(openat, AT_FDCWD, file, oflag, mode)) (openat, AT_FDCWD, file, oflag, mode)

__INLINE_SYSCALL_NARGS

先具体分析__INLINE_SYSCALL_NARGS的宏展开方式:

 98 #define __INLINE_SYSCALL_NARGS(...) \                      
 99   __INLINE_SYSCALL_NARGS_X (__VA_ARGS__,7,6,5,4,3,2,1,0,)

展开如下,在原有参数后面增加了8个数字

__INLINE_SYSCALL_NARGS_X (openat, AT_FDCWD, file, oflag, mode,7,6,5,4,3,2,1,0,)

继续展开,可以看到这里是取第九个参数,对应上面这个表达式,也就是4,可以推出,如果输入有1个参数,那么返回0,输入有2个参数,返回1,依次类推,输入有8个参数,返回7。 97 #define __INLINE_SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n

__SYSCALL_CONCAT

实际上就是把两个字符进行了连接,“##”在宏当中是进行字符连接的

 27 #define __SYSCALL_CONCAT_X(a,b)     a##b
 28 #define __SYSCALL_CONCAT(a,b)       __SYSCALL_CONCAT_X (a, b)

至此,我们的宏展开就应该是这样的

__INLINE_SYSCALL4(openat, AT_FDCWD, file, oflag, mode)

__INLINE_SYSCALL4

对应的定义如下:就应该被展开为INLINE_SYSCALL (openat, 4, AT_FDCWD, file, oflag, mode)

 80 #define __INLINE_SYSCALL0(name) \
 81   INLINE_SYSCALL (name, 0)
 82 #define __INLINE_SYSCALL1(name, a1) \
 83   INLINE_SYSCALL (name, 1, a1)
 84 #define __INLINE_SYSCALL2(name, a1, a2) \
 85   INLINE_SYSCALL (name, 2, a1, a2)
 86 #define __INLINE_SYSCALL3(name, a1, a2, a3) \
 87   INLINE_SYSCALL (name, 3, a1, a2, a3)
 88 #define __INLINE_SYSCALL4(name, a1, a2, a3, a4) \
 89   INLINE_SYSCALL (name, 4, a1, a2, a3, a4)
 90 #define __INLINE_SYSCALL5(name, a1, a2, a3, a4, a5) \
 91   INLINE_SYSCALL (name, 5, a1, a2, a3, a4, a5)
 92 #define __INLINE_SYSCALL6(name, a1, a2, a3, a4, a5, a6) \
 93   INLINE_SYSCALL (name, 6, a1, a2, a3, a4, a5, a6)
 94 #define __INLINE_SYSCALL7(name, a1, a2, a3, a4, a5, a6, a7) \
 95   INLINE_SYSCALL (name, 7, a1, a2, a3, a4, a5, a6, a7)

INLINE_SYSCALL

封装调用了INTERNAL_SYSCALL宏

INTERNAL_SYSCALL (openat, 4, AT_FDCWD, file, oflag, mode)

 38 /* Define a macro which expands into the inline wrapper code for a system
 39    call.  It sets the errno and returns -1 on a failure, or the syscall
 40    return value otherwise.  */
 41 #undef INLINE_SYSCALL
 42 #define INLINE_SYSCALL(name, nr, args...)               \
 43   ({                                    \
 44     long int sc_ret = INTERNAL_SYSCALL (name, nr, args);        \
 45     __glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (sc_ret))        \
 46     ? SYSCALL_ERROR_LABEL (INTERNAL_SYSCALL_ERRNO (sc_ret))     \
 47     : sc_ret;                               \                                                                                                        
 48   })

INTERNAL_SYSCALL

这一步的实现就与每个平台不同了,这里以x86_64平台为例 上面的被依次如下封装:

internal_syscall4 (SYS_ify (openat), AT_FDCWD, file, oflag, mode)

internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)

//glibc/sysdeps/unix/sysv/linux/x86_64/sysdep.h

 29 /* For Linux we can use the system call table in the header file
 30     /usr/include/asm/unistd.h
 31    of the kernel.  But these symbols do not follow the SYS_* syntax
 32    so we have to redefine the `SYS_ify' macro here.  */                                                                                              
 33 #undef SYS_ify
 34 #define SYS_ify(syscall_name)   __NR_##syscall_name
 
233 #undef INTERNAL_SYSCALL
234 #define INTERNAL_SYSCALL(name, nr, args...)             \
235     internal_syscall##nr (SYS_ify (name), args)

最终我们的调用为internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode)

依次将4个参数装入寄存器中,然后调用syscall汇编代码实现调用,这里用到C语言的asm汇编语法

301 #undef internal_syscall4
302 #define internal_syscall4(number, arg1, arg2, arg3, arg4)       \
303 ({                                  \
304     unsigned long int resultvar;                    \
305     TYPEFY (arg4, __arg4) = ARGIFY (arg4);              \
306     TYPEFY (arg3, __arg3) = ARGIFY (arg3);              \
307     TYPEFY (arg2, __arg2) = ARGIFY (arg2);              \
308     TYPEFY (arg1, __arg1) = ARGIFY (arg1);              \
309     register TYPEFY (arg4, _a4) asm ("r10") = __arg4;           \
310     register TYPEFY (arg3, _a3) asm ("rdx") = __arg3;           \
311     register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;           \
312     register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;           \
313     asm volatile (                          \
314     "syscall\n\t"                           \
315     : "=a" (resultvar)                          \
316     : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4)      \
317     : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);            \
318     (long int) resultvar;                       \
319 })

我们看一下该架构下使用最多参数的宏,(x86_64架构最多只支持6个参数)

344 #undef internal_syscall6
345 #define internal_syscall6(number, arg1, arg2, arg3, arg4, arg5, arg6) \
346 ({                                  \
347     unsigned long int resultvar;                    \
348     TYPEFY (arg6, __arg6) = ARGIFY (arg6);              \
349     TYPEFY (arg5, __arg5) = ARGIFY (arg5);              \
350     TYPEFY (arg4, __arg4) = ARGIFY (arg4);              \
351     TYPEFY (arg3, __arg3) = ARGIFY (arg3);              \
352     TYPEFY (arg2, __arg2) = ARGIFY (arg2);              \
353     TYPEFY (arg1, __arg1) = ARGIFY (arg1);              \
354     register TYPEFY (arg6, _a6) asm ("r9") = __arg6;            \
355     register TYPEFY (arg5, _a5) asm ("r8") = __arg5;            \
356     register TYPEFY (arg4, _a4) asm ("r10") = __arg4;           \
357     register TYPEFY (arg3, _a3) asm ("rdx") = __arg3;           \
358     register TYPEFY (arg2, _a2) asm ("rsi") = __arg2;           \
359     register TYPEFY (arg1, _a1) asm ("rdi") = __arg1;           \
360     asm volatile (                          \
361     "syscall\n\t"                           \
362     : "=a" (resultvar)                          \
363     : "0" (number), "r" (_a1), "r" (_a2), "r" (_a3), "r" (_a4),     \
364       "r" (_a5), "r" (_a6)                      \
365     : "memory", REGISTERS_CLOBBERED_BY_SYSCALL);            \
366     (long int) resultvar;                       \
367 })

寄存器的使用中参数依次装入rdi,rsi,rdx,r10,r8,r9

支持最多7个参数的只有如下的架构:

riscv架构:glibc/sysdeps/unix/sysv/linux/riscv/sysdep.h 分别使用a0到a6寄存器装参数

323 # define internal_syscall7(number, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \                                                                       
324 ({                                  \
325     long int _sys_result;                       \
326     long int _arg0 = (long int) (arg0);             \
327     long int _arg1 = (long int) (arg1);             \
328     long int _arg2 = (long int) (arg2);             \
329     long int _arg3 = (long int) (arg3);             \
330     long int _arg4 = (long int) (arg4);             \
331     long int _arg5 = (long int) (arg5);             \
332     long int _arg6 = (long int) (arg6);             \
333                                     \
334     {                               \
335     register long int __a7 asm ("a7") = number;         \
336     register long int __a0 asm ("a0") = _arg0;          \
337     register long int __a1 asm ("a1") = _arg1;          \
338     register long int __a2 asm ("a2") = _arg2;          \
339     register long int __a3 asm ("a3") = _arg3;          \
340     register long int __a4 asm ("a4") = _arg4;          \
341     register long int __a5 asm ("a5") = _arg5;          \
342     register long int __a6 asm ("a6") = _arg6;          \
343     __asm__ volatile (                      \
344     "scall\n\t"                             \
345     : "+r" (__a0)                           \
346     : "r" (__a7), "r" (__a1), "r" (__a2), "r" (__a3),       \
347       "r" (__a4), "r" (__a5), "r" (__a6)                \
348     : __SYSCALL_CLOBBERS);                      \
349     _sys_result = __a0;                     \
350     }                               \
351     _sys_result;                            \
352 })

mips架构:glibc/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h

315 #define internal_syscall7(v0_init, input, number, err,          \
316               arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
317 ({                                  \
318     union __mips_syscall_return _sc_ret;                \
319     _sc_ret.val = __mips_syscall7 ((long int) (arg1),       \
320                        (long int) (arg2),       \
321                        (long int) (arg3),       \
322                        (long int) (arg4),       \
323                        (long int) (arg5),       \                                                                                                    
324                        (long int) (arg6),       \
325                        (long int) (arg7),       \
326                        (long int) (number));        \
327     _sc_ret.reg.v1 != 0 ? -_sc_ret.reg.v0 : _sc_ret.reg.v0;     \
328 })

总结

从最开始的SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode)宏,依次转换为

internal_syscall4 (__NR_openat, AT_FDCWD, file, oflag, mode),然后调用汇编实现底层syscall操作。