C++学习---_IO_new_fdopen函数原理分析学习

173 阅读8分钟

“我报名参加金石计划1期挑战——瓜分10万奖池,这是我的第11篇文章,点击查看活动详情

引言

_IO_new_fdopen是Glibc中fdopen函数的内部实现,接受fd和打开mode,返回文件流FILE指针。是stdio.h中比较重要的函数,我们来一起看看它的源码实现。

入参说明

接受两个参数,fd是对应文件的fd号,mode是对应的模式,包括如下的组合:

  • "r":read,文件必须存在;
  • "w":write,文件存在则从头开始覆盖写,不存在则创建新文件写入;
  • "a":append,文件存在则从末尾开始追加写入,不存在则创建新文件写入;
  • "r+":read/update,文件必须存在,可读可写;
  • "w+":write/update,文件存在则从头开始读写,不存在则创建新文件读写;
  • "a+":append/update,文件存在则从末尾开始追加读写,不存在则创建新文件读写。
//glibc/include/stdio.h
185 extern FILE *_IO_new_fdopen (int, const char*);
186 #   define fdopen(fd, mode) _IO_new_fdopen (fd, mode)

对于二进制文件读写则使用"b",则组合之后有"rb", "wb", "ab", "r+b", "w+b", "a+b","rb+", "wb+", "ab+"。 C++11中增加了一个新的"x"与"w"组合使用,如果当前文件存在,则函数强行返回fail,避免对已有文件的overwriting,"wx""wbx""w+x" or "w+bx"/"wb+x"

1.常用变量定义

定义了locked_FILE指针new_f,里面包含了_IO_FILE_plus,_IO_lock_t,_IO_wide_data,主要是_IO_FILE_plus包含了主要的记录内容。 do_seek表明当前默认是不进行文件读写指针移动的,除非mode指定需要append模式。

//glibc/libio/iofdopen.c
 33 FILE *
 34 _IO_new_fdopen (int fd, const char *mode)
 35 {
 36   int read_write;
 37   struct locked_FILE
 38   {
 39     struct _IO_FILE_plus fp;
 40 #ifdef _IO_MTSAFE_IO
 41     _IO_lock_t lock;
 42 #endif
 43     struct _IO_wide_data wd;
 44   } *new_f;
 45   int i;
 46   int use_mmap = 0;
 47 
 48   /* Decide whether we modify the offset of the file we attach to and seek to
 49      the end of file.  We only do this if the mode is 'a' and if the file
 50      descriptor did not have O_APPEND in its flags already.  */
 51   bool do_seek = false;

_IO_FILE_plus

其中实际上保存了我们最后要返回的FILE file,再加上针对C++ streambuf做的兼容虚函数表。

293 struct _IO_jump_t
294 {
295     JUMP_FIELD(size_t, __dummy);
296     JUMP_FIELD(size_t, __dummy2);
297     JUMP_FIELD(_IO_finish_t, __finish);                                                                                                              
298     JUMP_FIELD(_IO_overflow_t, __overflow);
299     JUMP_FIELD(_IO_underflow_t, __underflow);
300     JUMP_FIELD(_IO_underflow_t, __uflow);
301     JUMP_FIELD(_IO_pbackfail_t, __pbackfail);
302     /* showmany */
303     JUMP_FIELD(_IO_xsputn_t, __xsputn);
304     JUMP_FIELD(_IO_xsgetn_t, __xsgetn);
305     JUMP_FIELD(_IO_seekoff_t, __seekoff);
306     JUMP_FIELD(_IO_seekpos_t, __seekpos);
307     JUMP_FIELD(_IO_setbuf_t, __setbuf);
308     JUMP_FIELD(_IO_sync_t, __sync);
309     JUMP_FIELD(_IO_doallocate_t, __doallocate);
310     JUMP_FIELD(_IO_read_t, __read);
311     JUMP_FIELD(_IO_write_t, __write);
312     JUMP_FIELD(_IO_seek_t, __seek);
313     JUMP_FIELD(_IO_close_t, __close);
314     JUMP_FIELD(_IO_stat_t, __stat);
315     JUMP_FIELD(_IO_showmanyc_t, __showmanyc);
316     JUMP_FIELD(_IO_imbue_t, __imbue);
317 };
318 
319 /* We always allocate an extra word following an _IO_FILE.
320    This contains a pointer to the function jump table used.
321    This is for compatibility with C++ streambuf; the word can
322    be used to smash to a pointer to a virtual function table. */
323 
324 struct _IO_FILE_plus
325 {
326   FILE file;
327   const struct _IO_jump_t *vtable;
328 };

FILE结构体里面包含的内容参考FILE类型---文件流对象的说明

_IO_lock_t

参考C++学习---_IO_lock_t的源码学习

_IO_wide_data

宽字节数据的读写控制信息

120 /* Extra data for wide character streams.  */
121 struct _IO_wide_data
122 {
123   wchar_t *_IO_read_ptr;    /* Current read pointer */
124   wchar_t *_IO_read_end;    /* End of get area. */
125   wchar_t *_IO_read_base;   /* Start of putback+get area. */
126   wchar_t *_IO_write_base;  /* Start of put area. */
127   wchar_t *_IO_write_ptr;   /* Current put pointer. */
128   wchar_t *_IO_write_end;   /* End of put area. */
129   wchar_t *_IO_buf_base;    /* Start of reserve area. */
130   wchar_t *_IO_buf_end;     /* End of reserve area. */
131   /* The following fields are used to support backing up and undo. */
132   wchar_t *_IO_save_base;   /* Pointer to start of non-current get area. */
133   wchar_t *_IO_backup_base; /* Pointer to first valid character of
134                    backup area */
135   wchar_t *_IO_save_end;    /* Pointer to end of non-current get area. */
136 
137   __mbstate_t _IO_state;
138   __mbstate_t _IO_last_state;
139   struct _IO_codecvt _codecvt;
140 
141   wchar_t _shortbuf[1];
142 
143   const struct _IO_jump_t *_wide_vtable; 
144 };

2.mode信息解析

通过解析传入的mode字符串,对int read_write进行赋值,基本逻辑与上面提到的字符组合一致。 首先查看第一个字符,只能是"r"/"w"/"a"中的一个,进行组合赋值 70 #define _IO_NO_READS 0x0004 /* Reading not allowed. / 71 #define _IO_NO_WRITES 0x0008 / Writing not allowed. */ 80 #define _IO_IS_APPENDING 0x1000 然后检查第二个到第五个(最长的情况为"wb+x",加上最后的\0),注意这里只针对"+"和"m"做了特殊标记,分别为_IO_IS_APPENDING(注意这里是与,所以即使前面有rwa填充,+都只会保留为0x1000);use_mmap标记为1。

 53   switch (*mode)
 54     {
 55     case 'r':
 56       read_write = _IO_NO_WRITES;
 57       break;
 58     case 'w':
 59       read_write = _IO_NO_READS;
 60       break;
 61     case 'a':
 62       read_write = _IO_NO_READS|_IO_IS_APPENDING;
 63       break;
 64     default:
 65       __set_errno (EINVAL);
 66       return NULL;
 67   }
 68   for (i = 1; i < 5; ++i)
 69     {
 70       switch (*++mode)
 71     {
 72     case '\0':
 73       break;
 74     case '+':
 75       read_write &= _IO_IS_APPENDING;
 76       break;
 77     case 'm':
 78       use_mmap = 1;
 79       continue;
 80     case 'x':
 81     case 'b':
 82     default:                                                                                                                                         
 83       /* Ignore */
 84       continue;
 85     }
 86       break;
 87     }

3.调用__fcntl获取FD的状态信息

这里获取FD的状态信息(获取信息失败也返回NULL)后进行检查,以下两种情况都视为EINVAL,参数异常,返回NULL。

  • 如果当前fd的访问权限为只读,且前文要求的mode中没有"r",说明想要写或添加一个只读文件;
  • 如果当前fd的访问权限为只写,且前文要求的mode中没有"w",说明想要读或添加一个只写文件。
 88   int fd_flags = __fcntl (fd, F_GETFL);
 89   if (fd_flags == -1)
 90     return NULL;
 91 
 92   if (((fd_flags & O_ACCMODE) == O_RDONLY && !(read_write & _IO_NO_WRITES))
 93       || ((fd_flags & O_ACCMODE) == O_WRONLY && !(read_write & _IO_NO_READS)))
 94     {
 95       __set_errno (EINVAL);
 96       return NULL;
 97     }
 
 86 #define F_GETFL     3   /* Get file status flags.  */
 
 24 /* File access modes for `open' and `fcntl'.  */
 25 #define O_RDONLY    0   /* Open read-only.  */
 26 #define O_WRONLY    1   /* Open write-only.  */
 27 #define O_RDWR      2   /* Open read/write.  */
 
 78 /* Mask for file access modes.  This is system-dependent in case
 79    some system ever wants to define some other flavor of access.  */
 80 #define O_ACCMODE   (O_RDONLY|O_WRONLY|O_RDWR)

4.解析append信息并更新fd的status flags

这里判断read_write是否有设定"a",即追加模式,而且当前查询的文件status flags没有设置append,那么将do_seek(进行文件读写指针移动)置为true,

同时调用__fcntl,F_SETFL模式将文件的status flags 设置为原有flags与O_APPEND的或。

115   if ((read_write & _IO_IS_APPENDING) && !(fd_flags & O_APPEND))
116     {
117       do_seek = true;
118       if (__fcntl (fd, F_SETFL, fd_flags | O_APPEND) == -1)
119     return NULL;
120     }

 87 #define F_SETFL     4   /* Set file status flags.  */

5.为new_f分配堆内存空间

分配大小为sizeof (struct locked_FILE),如果分配失败,则返回NULL。

122   new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));
123   if (new_f == NULL)
124     return NULL;

6._IO_no_init初始化

使用&new_f->lock初始化new_f->fp.file._lock,即_IO_FILE中的_lock;

125 #ifdef _IO_MTSAFE_IO
126   new_f->fp.file._lock = &new_f->lock;
127 #endif
128   _IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd,
129 #if _G_HAVE_MMAP
130            (use_mmap && (read_write & _IO_NO_WRITES))
131            ? &_IO_wfile_jumps_maybe_mmap :
132 #endif
133            &_IO_wfile_jumps);

调用_IO_no_init初始化&new_f->fp.file指针,其中flags和orientation置0,其余参数基本都是置空,注意,这里还调用了_IO_old_init对一些基础参数做了置空操作,同时,值得关注的一点:_IO_lock_init初始化了*fp->_lock,便于后续的使用。

其中_IO_jump_t根据_G_HAVE_MMAP宏和当前模式中有"m"且"r",则使用_IO_wfile_jumps_maybe_mmap,否则使用_IO_wfile_jumps,两者区别就在于mmap和munmap在读写文件时的使用,这里就不细展开了。

//glibc/libio/genops.c
 560 void
 561 _IO_no_init (FILE *fp, int flags, int orientation,                         
 562          struct _IO_wide_data *wd, const struct _IO_jump_t *jmp)
 563 {
 564   _IO_old_init (fp, flags);
 565   fp->_mode = orientation;
 566   if (orientation >= 0)
 567     {
 568       fp->_wide_data = wd;
 569       fp->_wide_data->_IO_buf_base = NULL;
 570       fp->_wide_data->_IO_buf_end = NULL;
 571       fp->_wide_data->_IO_read_base = NULL;
 572       fp->_wide_data->_IO_read_ptr = NULL;
 573       fp->_wide_data->_IO_read_end = NULL;
 574       fp->_wide_data->_IO_write_base = NULL;
 575       fp->_wide_data->_IO_write_ptr = NULL;
 576       fp->_wide_data->_IO_write_end = NULL;
 577       fp->_wide_data->_IO_save_base = NULL;
 578       fp->_wide_data->_IO_backup_base = NULL;
 579       fp->_wide_data->_IO_save_end = NULL;
 580 
 581       fp->_wide_data->_wide_vtable = jmp;
 582     }
 583   else
 584     /* Cause predictable crash when a wide function is called on a byte
 585        stream.  */
 586     fp->_wide_data = (struct _IO_wide_data *) -1L;
 587   fp->_freeres_list = NULL;
 588 }
 
 529 void
 530 _IO_old_init (FILE *fp, int flags)
 531 {
 532   fp->_flags = _IO_MAGIC|flags;
 533   fp->_flags2 = 0;
 534   if (stdio_needs_locking)
 535     fp->_flags2 |= _IO_FLAGS2_NEED_LOCK;
 536   fp->_IO_buf_base = NULL;
 537   fp->_IO_buf_end = NULL;
 538   fp->_IO_read_base = NULL;
 539   fp->_IO_read_ptr = NULL;
 540   fp->_IO_read_end = NULL;
 541   fp->_IO_write_base = NULL;
 542   fp->_IO_write_ptr = NULL;
 543   fp->_IO_write_end = NULL;
 544   fp->_chain = NULL; /* Not necessary. */
 545 
 546   fp->_IO_save_base = NULL;
 547   fp->_IO_backup_base = NULL;
 548   fp->_IO_save_end = NULL;
 549   fp->_markers = NULL;
 550   fp->_cur_column = 0;
 551 #if _IO_JUMPS_OFFSET
 552   fp->_vtable_offset = 0;
 553 #endif
 554 #ifdef _IO_MTSAFE_IO
 555   if (fp->_lock != NULL)                                         
 556     _IO_lock_init (*fp->_lock);
 557 #endif
 558 }

7._IO_JUMPS初始化&new_f->fp

宏展开后,可以看到,实际上是初始化&new_f->fp->vtable,即前文提到的兼容C++ streambuf的虚函数表,根据当前模式中有"m"且"r"置为_IO_file_jumps_maybe_mmap,否则为_IO_file_jumps。

 98 #define _IO_JUMPS(THIS) (THIS)->vtable
 
134   _IO_JUMPS (&new_f->fp) =
135 #if _G_HAVE_MMAP
136     (use_mmap && (read_write & _IO_NO_WRITES)) ? &_IO_file_jumps_maybe_mmap :
137 #endif
138       &_IO_file_jumps;

8._IO_new_file_init_internal初始化

将_offset初始化为-1; _flags或上(_IO_IS_FILEBUF+_IO_NO_READS+_IO_NO_WRITES+_IO_TIED_PUT_GET)即,文件buffer+只写+只读+put/get指针移动一致指向同一个位置; 调用_IO_link_in链接fp; _fileno初始化为-1;

139   _IO_new_file_init_internal (&new_f->fp);

//glibc/libio/fileops.c
 104 void
 105 _IO_new_file_init_internal (struct _IO_FILE_plus *fp)
 106 {
 107   /* POSIX.1 allows another file handle to be used to change the position
 108      of our file descriptor.  Hence we actually don't know the actual
 109      position before we do the first fseek (and until a following fflush). */
 110   fp->file._offset = _IO_pos_BAD;
 111   fp->file._flags |= CLOSED_FILEBUF_FLAGS;
 112 
 113   _IO_link_in (fp);
 114   fp->file._fileno = -1;
 115 }
 
 93 /* _IO_pos_BAD is an off64_t value indicating error, unknown, or EOF.  */
 94 #define _IO_pos_BAD ((off64_t) -1)
 
  100 #define CLOSED_FILEBUF_FLAGS \
 101   (_IO_IS_FILEBUF+_IO_NO_READS+_IO_NO_WRITES+_IO_TIED_PUT_GET)
 
  78 #define _IO_TIED_PUT_GET      0x0400 /* Put and get pointer move in unison.  */

_IO_link_in (fp)

这里实际上就是链接文件的过程,_IO_lock_lock加锁解锁都是配对的,这里主要是要将_chain赋值为_IO_list_all,然后将_IO_list_all赋值为当前的fp,代表当前要打开的文件链接到_IO_list_all上了。

  35 #ifdef _IO_MTSAFE_IO
  36 static _IO_lock_t list_all_lock = _IO_lock_initializer;
  37 #endif
  85 void
  86 _IO_link_in (struct _IO_FILE_plus *fp)
  87 {
  88   if ((fp->file._flags & _IO_LINKED) == 0)
  89     {
  90       fp->file._flags |= _IO_LINKED;
  91 #ifdef _IO_MTSAFE_IO
  92       _IO_cleanup_region_start_noarg (flush_cleanup);
  93       _IO_lock_lock (list_all_lock);
  94       run_fp = (FILE *) fp;
  95       _IO_flockfile ((FILE *) fp);
  96 #endif
  97       fp->file._chain = (FILE *) _IO_list_all;
  98       _IO_list_all = fp;
  99 #ifdef _IO_MTSAFE_IO
 100       _IO_funlockfile ((FILE *) fp);
 101       run_fp = NULL;
 102       _IO_lock_unlock (list_all_lock);
 103       _IO_cleanup_region_end (0);
 104 #endif
 105     }
 106 }

9._IO_mask_flags设置mask

将在_IO_new_file_init_internal中置为-1的_fileno置为fd自身;

_flags先与~_IO_DELETE_DONT_CLOSE,表示后续可调用关闭close,再清空_flags后置为read_write的值。

140   /* We only need to record the fd because _IO_file_init_internal will
141      have unset the offset.  It is important to unset the cached
142      offset because the real offset in the file could change between
143      now and when the handle is activated and we would then mislead
144      ftell into believing that we have a valid offset.  */
145   new_f->fp.file._fileno = fd;
146   new_f->fp.file._flags &= ~_IO_DELETE_DONT_CLOSE;
147 
148   _IO_mask_flags (&new_f->fp.file, read_write,
149           _IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);

 74 #define _IO_DELETE_DONT_CLOSE 0x0040 /* Don't call close(_fileno) on close.  */
 
518 #define _IO_mask_flags(fp, f, mask) \                        
519        ((fp)->_flags = ((fp)->_flags & ~(mask)) | ((f) & (mask)))

10.针对append模式特殊处理

如果是append模式,那么我们要将对应的读写位置移动到文件末尾。

151   /* For append mode, set the file offset to the end of the file if we added
152      O_APPEND to the file descriptor flags.  Don't update the offset cache
153      though, since the file handle is not active.  */
154   if (do_seek && ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
155           == (_IO_IS_APPENDING | _IO_NO_READS)))
156     {
157       off64_t new_pos = _IO_SYSSEEK (&new_f->fp.file, 0, _IO_seek_end);
158       if (new_pos == _IO_pos_BAD && errno != ESPIPE)
159     return NULL;
160     }

_IO_SYSSEEK

_IO_SYSSEEK (&new_f->fp.file, 0, _IO_seek_end)即相对当前文件末尾,跳动0,即移动到末尾。

257 #define _IO_SYSSEEK(FP, OFFSET, MODE) JUMP2 (__seek, FP, OFFSET, MODE)
125 #define JUMP2(FUNC, THIS, X1, X2) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS, X1, X2)

252 /* The 'sysseek' hook is used to re-position an external file.
253    It generalizes the Unix lseek(2) function.
254    It matches the streambuf::sys_seek virtual function, which is
255    specific to this implementation. */
256 typedef off64_t (*_IO_seek_t) (FILE *, off64_t, int);
 57 #define _IO_seek_end 2

11.最后返回文件对象

161   return &new_f->fp.file;