C++学习---cstdio的源码学习分析04-创建临时文件函数tmpfile

144 阅读3分钟

“我报名参加金石计划1期挑战——瓜分10万奖池,这是我的第8篇文章,点击查看活动详情

续接上文:C++学习---cstdio的源码学习分析03-文件重命名函数rename

cstdio中的文件操作函数

stdio.h中定义了文件删除函数remove,文件重命名函数rename,创建临时文件函数tmpfile,生成临时文件名函数tmpnam。接下来我们一起来分析一下tmpfile对应的源码实现。

创建临时文件函数tmpfile

FILE * tmpfile ( void );

GLibc中还定义了tmpfile64,针对新的FILE结构定义,本文中以tmpfile函数进行分析。

183 /* Create a temporary file and open it read/write.
184       
185    This function is a possible cancellation point and therefore not
186    marked with __THROW.  */
187 #ifndef __USE_FILE_OFFSET64
188 extern FILE *tmpfile (void)
189   __attribute_malloc__ __attr_dealloc_fclose __wur;
190 #else
191 # ifdef __REDIRECT
192 extern FILE *__REDIRECT (tmpfile, (void), tmpfile64)
193   __attribute_malloc__ __attr_dealloc_fclose __wur;
194 # else
195 #  define tmpfile tmpfile64                                                   
196 # endif
197 #endif

实现方式

源码如下,基本流程是通过tmpnam函数生成一个文件名,然后以"w+b"(二进制读写)打开,返回,下面进行具体流程分析:

//glibc/stdio-common/tmpfile.c
 30 /* This returns a new stream opened on a temporary file (generated
 31    by tmpnam).  The file is opened with mode "w+b" (binary read/write).
 32    If we couldn't generate a unique filename or the file couldn't
 33    be opened, NULL is returned.  */
 34 FILE *
 35 tmpfile (void)
 36 {
 37   int fd;
 38   FILE *f;
 39   int flags = 0;
 40 #ifdef FLAGS
 41   flags = FLAGS;
 42 #endif
 43 
 44   /* First try a system specific method.  */
 45   fd = __gen_tempfd (flags);
 46 
 47   if (fd < 0)
 48     {
 49       char buf[FILENAME_MAX];
 50 
 51       if (__path_search (buf, sizeof buf, NULL, "tmpf", 0))
 52     return NULL;
 53 
 54       fd = __gen_tempname (buf, 0, flags, __GT_FILE);
 55       if (fd < 0)
 56     return NULL;
 57 
 58       /* Note that this relies on the Unix semantics that
 59      a file is not really removed until it is closed.  */
 60       (void) __unlink (buf);
 61     }
 62 
 63   if ((f = __fdopen (fd, "w+b")) == NULL)
 64     __close (fd);
 65 
 66   return f;
 67 }

__gen_tempfd获取临时fd

首先通过系统特定方法__gen_tempfd获取临时fd FLAGS定义,大文件size打开的标志,这里tmpfile和tmpfile64实际上是一种处理方式,所以__gen_tempfd的入参是0,除非是特定的架构中对O_LARGEFILE宏定义不同,这也提现了Glibc代码的兼容性,没有直接使用数字0,给后面的平台留足了兼容空间,同时的,代码也将会变得比较复杂难懂。

//glibc/bits/fcntl.h
 52 /* All opens support large file sizes, so there is no flag bit for this.  */
 53 #ifdef __USE_LARGEFILE64
 54 # define O_LARGEFILE    0
 55 #endif
 
 //glibc/stdio-common/tmpfile64.c
 21 /* If there is no O_LARGEFILE, then the plain tmpfile definition
 22    does the job and it gets tmpfile64 as an alias.  */
 23 
 24 #if defined O_LARGEFILE && O_LARGEFILE != 0                      
 25 # define FLAGS      O_LARGEFILE
 26 # define tmpfile    tmpfile64
 27 # include <tmpfile.c>
 28 #endif

__gen_tempfd的逻辑

这里我们就不细节展开__open的流程了,后续有专门的文章进行分析。关注打开的文件节点和flags信息。

先尝试打开P_tmpdir(默认是/tmp路径),如果失败返回fd小于0,且错误码为ENOENT(/* No such file or directory */),而且之前打开的P_tmpdir不是"/tmp",那就尝试打开"/tmp"路径,这也是为了兼容其它平台可以提前定义P_tmpdir的情况。

flags信息为:读写+临时文件+如果文件存在返回失败

文件控制信息为:被当前用户读或写

//glibc/libio/stdio.h
118 #if defined __USE_MISC || defined __USE_XOPEN
119 /* Default path prefix for `tempnam' and `tmpnam'.  */
120 # define P_tmpdir   "/tmp"
121 #endif

//glibc/sysdeps/unix/sysv/linux/gentempfd.c
 24 int   
 25 __gen_tempfd (int flags)
 26 {
 27   int fd = __open (P_tmpdir, O_RDWR | O_TMPFILE | O_EXCL | flags,
 28            S_IRUSR | S_IWUSR);
 29   if (fd < 0 && errno == ENOENT && strcmp (P_tmpdir, "/tmp") != 0)
 30     fd = __open ("/tmp", O_RDWR | O_TMPFILE | O_EXCL | flags,
 31          S_IRUSR | S_IWUSR);
 32 
 33   return fd;
 34 }

__open的函数原型 第一次参数是打开的文件名,第二个参数是open flag,当O_CREAT和O_TMPFILE在OFLAG中时,增加第三个参数表示文件的保护,即由谁进行读写。

//glibc/sysdeps/unix/sysv/linux/open.c
 28 /* Open FILE with access OFLAG.  If O_CREAT or O_TMPFILE is in OFLAG,
 29    a third argument is the file protection.  */
 30 int   
 31 __libc_open (const char *file, int oflag, ...)
 32 { 
 33   int mode = 0;
 34 
 35   if (__OPEN_NEEDS_MODE (oflag))
 36     {
 37       va_list arg;
 38       va_start (arg, oflag);
 39       mode = va_arg (arg, int);
 40       va_end (arg);
 41     }
 42   
 43   return SYSCALL_CANCEL (openat, AT_FDCWD, file, oflag, mode);
 44 }

__path_search生成临时文件路径

__path_search (buf, sizeof buf, NULL, "tmpf", 0) 根据函数原型的解释,在dir(默认可以选择/tmp)中查找可用的目录,然后生成文件路径dir/file,拷贝到buf中,中间的逻辑其实就是为了生成字符串 sprintf (tmpl, "%.*s/%.*sXXXXXX", (int) dlen, dir, (int) plen, pfx); 其中dir可以从几个路径获取,有优先级和控制考虑,pfx优先使用传入的,否则判空之后使用'file'

//glibc/sysdeps/posix/tempname.c
105 /* Path search algorithm, for tmpnam, tmpfile, etc.  If DIR is
106    non-null and exists, uses it; otherwise uses the first of $TMPDIR,
107    P_tmpdir, /tmp that exists.  Copies into TMPL a template suitable
108    for use with mk[s]temp.  Will fail (-1) if DIR is non-null and
109    doesn't exist, none of the searched dirs exists, or there's not
110    enough space in TMPL. */
111 int
112 __path_search (char *tmpl, size_t tmpl_len, const char *dir, const char *pfx,
113                int try_tmpdir)
114 {
115   const char *d;
116   size_t dlen, plen;
117 
118   if (!pfx || !pfx[0])                                           
119     {
120       pfx = "file";
121       plen = 4;
122     }
123   else
124     {
125       plen = strlen (pfx);
126       if (plen > 5)
127         plen = 5;
128     }
129 
130   if (try_tmpdir)
131     {
132       d = __secure_getenv ("TMPDIR");
133       if (d != NULL && direxists (d))
134         dir = d;
135       else if (dir != NULL && direxists (dir))
136         /* nothing */ ;
137       else
138         dir = NULL;
139     }
140   if (dir == NULL)
141     {
142       if (direxists (P_tmpdir))
143         dir = P_tmpdir;
144       else if (strcmp (P_tmpdir, "/tmp") != 0 && direxists ("/tmp"))
145         dir = "/tmp";
146       else
147         {
148           __set_errno (ENOENT);
149           return -1;
150         }
151     }
152 
153   dlen = strlen (dir);
154   while (dlen > 1 && dir[dlen - 1] == '/')
155     dlen--;                     /* remove trailing slashes */
156 
157   /* check we have room for "${dir}/${pfx}XXXXXX\0" */
158   if (tmpl_len < dlen + 1 + plen + 6 + 1)
159     {
160       __set_errno (EINVAL);
161       return -1;
162     }
163 
164   sprintf (tmpl, "%.*s/%.*sXXXXXX", (int) dlen, dir, (int) plen, pfx);
165   return 0;                                                           
166 }

__gen_tempname生成临时文件fd

fd = __gen_tempname (buf, 0, flags, __GT_FILE); __GT_FILE表示新建文件,注意上面我们获取到的临时文件路径为"/tmp/tmpfXXXXXX"(关键就在于后面的6个X)

gen_tempname_len函数转发时,增加了一个参数6,这个参数在其函数定义中有解释,表示临时路径中至少有X_SUFFIX_LEN个"X"s,这个是方面后面生成临时数字的,因为临时文件可能不止一个,它们按照/tmp/tmpfxxxxxx,/tmp/tmpfxxxxxx...排序,x是多个数字的组合

后面调用try_tempname_len获取,传入对应创建的函数指针try_file,try_dir,try_nocreate<这里我们就不深入分析是如何生成文件名的了,

看到最后的try_file函数,实际上它与我们前面看到的__gen_tempfd一样,也是调用__open打开文件的。

141 /* The __kind argument to __gen_tempname may be one of: */
142 #  define __GT_FILE 0   /* create a file */
143 #  define __GT_DIR  1   /* create a directory */
144 #  define __GT_NOCREATE 2   /* just find a name not currently in use */

//glibc/sysdeps/posix/tempname.c
332 int
333 __gen_tempname (char *tmpl, int suffixlen, int flags, int kind)
334 {
335   return gen_tempname_len (tmpl, suffixlen, flags, kind, 6); 
336 }

203 /* Generate a temporary file name based on TMPL.  TMPL must match the
204    rules for mk[s]temp (i.e., end in at least X_SUFFIX_LEN "X"s,
205    possibly with a suffix).
206    The name constructed does not exist at the time of the call to   
207    this function.  TMPL is overwritten with the result.
208 
209    KIND may be one of:
210    __GT_NOCREATE:       simply verify that the name does not exist
211                         at the time of the call.
212    __GT_FILE:           create the file using open(O_CREAT|O_EXCL)
213                         and return a read-write fd.  The file is mode 0600.
214    __GT_DIR:            create a directory, which will be mode 0700.
215 
216    We use a clever algorithm to get hard-to-predict names. */
217 #ifdef _LIBC
218 static
219 #endif
220 int
221 gen_tempname_len (char *tmpl, int suffixlen, int flags, int kind,
222                   size_t x_suffix_len)
223 {
224   static int (*const tryfunc[]) (char *, void *) =
225     {
226       [__GT_FILE] = try_file,
227       [__GT_DIR] = try_dir,
228       [__GT_NOCREATE] = try_nocreate
229     };
230   return try_tempname_len (tmpl, suffixlen, &flags, tryfunc[kind],
231                            x_suffix_len);
232 }

174 static int
175 try_file (char *tmpl, void *flags)
176 {
177   int *openflags = flags;
178   return __open (tmpl,
179                  (*openflags & ~O_ACCMODE)
180                  | O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
181 }

__unlink删除上面生成的临时文件

注意这里是比较重要的点,为什么刚生成fd之后又要删除这个临时文件呢,注释有说明,依赖于unix的语义,一个文件只有在被关闭的时候才会真正被删除,所以这也是临时文件的含义,只有调用者主动关闭tmpfile之后,它就会被直接删除,而不是像普通文件那样保存下来。

__fdopen打开上面的临时文件fd,返回file对象

具体细节我们就不做具体分析了,通过对内部_IO_new_fdopen函数的操作打开对应fd的文件,返回文件流对象。

_IO_FILE_plus是对FILE file和const struct _IO_jump_t *vtable的封装,后者是为了C++streambuf做的兼容封装,保存了一些函数跳转表

#define __fdopen _IO_fdopen

//glibc/libio/iofdopen.c
163 libc_hidden_ver (_IO_new_fdopen, _IO_fdopen)

 33 FILE *
 34 _IO_new_fdopen (int fd, const char *mode) 
 35 {
 36   int read_write;
 37   struct locked_FILE
 38   {
 39     struct _IO_FILE_plus fp;
 40 #ifdef _IO_MTSAFE_IO
 41     _IO_lock_t lock;
 42 #endif
 43     struct _IO_wide_data wd;
 44   } *new_f;
 ...
151   /* For append mode, set the file offset to the end of the file if we added
152      O_APPEND to the file descriptor flags.  Don't update the offset cache
153      though, since the file handle is not active.  */
154   if (do_seek && ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
155           == (_IO_IS_APPENDING | _IO_NO_READS)))
156     {
157       off64_t new_pos = _IO_SYSSEEK (&new_f->fp.file, 0, _IO_seek_end);
158       if (new_pos == _IO_pos_BAD && errno != ESPIPE)
159     return NULL;
160     }
161   return &new_f->fp.file;
162 }

//glibc/libio/libioP.h
319 /* We always allocate an extra word following an _IO_FILE.
320    This contains a pointer to the function jump table used.
321    This is for compatibility with C++ streambuf; the word can
322    be used to smash to a pointer to a virtual function table. */
323     
324 struct _IO_FILE_plus
325 {
326   FILE file;
327   const struct _IO_jump_t *vtable;
328 };

293 struct _IO_jump_t
294 {
295     JUMP_FIELD(size_t, __dummy);
296     JUMP_FIELD(size_t, __dummy2);
297     JUMP_FIELD(_IO_finish_t, __finish);
298     JUMP_FIELD(_IO_overflow_t, __overflow);
299     JUMP_FIELD(_IO_underflow_t, __underflow);
300     JUMP_FIELD(_IO_underflow_t, __uflow);
301     JUMP_FIELD(_IO_pbackfail_t, __pbackfail);
302     /* showmany */
303     JUMP_FIELD(_IO_xsputn_t, __xsputn);
304     JUMP_FIELD(_IO_xsgetn_t, __xsgetn);
305     JUMP_FIELD(_IO_seekoff_t, __seekoff);
306     JUMP_FIELD(_IO_seekpos_t, __seekpos);
307     JUMP_FIELD(_IO_setbuf_t, __setbuf);
308     JUMP_FIELD(_IO_sync_t, __sync);
309     JUMP_FIELD(_IO_doallocate_t, __doallocate);
310     JUMP_FIELD(_IO_read_t, __read);
311     JUMP_FIELD(_IO_write_t, __write);
312     JUMP_FIELD(_IO_seek_t, __seek);
313     JUMP_FIELD(_IO_close_t, __close);
314     JUMP_FIELD(_IO_stat_t, __stat);
315     JUMP_FIELD(_IO_showmanyc_t, __showmanyc);
316     JUMP_FIELD(_IO_imbue_t, __imbue);
317 };