X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Ffts.c;h=42c8067708beeee4b5685215c6aae12367584e92;hb=f68069b334f95cfd6d18db4bb059b792beb48158;hp=374c58ad5648fb36287fbc77c8a238db165e7c33;hpb=c0221df484c4a8a32e42a595b9f6caa446cc2226;p=gnulib.git diff --git a/lib/fts.c b/lib/fts.c index 374c58ad5..42c806770 100644 --- a/lib/fts.c +++ b/lib/fts.c @@ -1,6 +1,6 @@ /* Traverse a file hierarchy. - Copyright (C) 2004-2010 Free Software Foundation, Inc. + Copyright (C) 2004-2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ * may be used to endorse or promote products derived from this software * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE @@ -134,12 +134,21 @@ enum # define D_INO(dp) NOT_AN_INODE_NUMBER #endif +/* If possible (see max_entries, below), read no more than this many directory + entries at a time. Without this limit (i.e., when using non-NULL + fts_compar), processing a directory with 4,000,000 entries requires ~1GiB + of memory, and handling 64M entries would require 16GiB of memory. */ +#ifndef FTS_MAX_READDIR_ENTRIES +# define FTS_MAX_READDIR_ENTRIES 100000 +#endif + /* If there are more than this many entries in a directory, and the conditions mentioned below are satisfied, then sort the entries on inode number before any further processing. */ #ifndef FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD # define FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD 10000 #endif + enum { _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD = FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD @@ -160,8 +169,6 @@ enum Fts_stat # define fchdir __fchdir # undef open # define open __open -# undef opendir -# define opendir __opendir # undef readdir # define readdir __readdir #else @@ -205,14 +212,7 @@ static unsigned short int fts_stat (FTS *, FTSENT *, bool) internal_function; static int fts_safe_changedir (FTS *, FTSENT *, int, const char *) internal_function; -#if GNULIB_FTS -# include "fts-cycle.c" -#else -static bool enter_dir (FTS *fts, FTSENT *ent) { return true; } -static void leave_dir (FTS *fts, FTSENT *ent) {} -static bool setup_dir (FTS *fts) { return true; } -static void free_dir (FTS *fts) {} -#endif +#include "fts-cycle.c" #ifndef MAX # define MAX(a,b) ((a) > (b) ? (a) : (b)) @@ -229,11 +229,6 @@ static void free_dir (FTS *fts) {} #define ISSET(opt) (sp->fts_options & (opt)) #define SET(opt) (sp->fts_options |= (opt)) -/* FIXME: make this a function */ -#define RESTORE_INITIAL_CWD(sp) \ - (fd_ring_clear (&((sp)->fts_fd_ring)), \ - FCHDIR ((sp), (ISSET (FTS_CWDFD) ? AT_FDCWD : (sp)->fts_rfd))) - /* FIXME: FTS_NOCHDIR is now misnamed. Call it FTS_USE_FULL_RELATIVE_FILE_NAMES instead. */ #define FCHDIR(sp, fd) \ @@ -297,17 +292,20 @@ fts_set_stat_required (FTSENT *p, bool required) /* FIXME: if others need this function, move it into lib/openat.c */ static inline DIR * internal_function -opendirat (int fd, char const *dir) +opendirat (int fd, char const *dir, int extra_flags, int *pdir_fd) { int new_fd = openat (fd, dir, - O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK); + (O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK + | extra_flags)); DIR *dirp; if (new_fd < 0) return NULL; set_cloexec_flag (new_fd, true); dirp = fdopendir (new_fd); - if (dirp == NULL) + if (dirp) + *pdir_fd = new_fd; + else { int saved_errno = errno; close (new_fd); @@ -346,6 +344,18 @@ cwd_advance_fd (FTS *sp, int fd, bool chdir_down_one) sp->fts_cwd_fd = fd; } +/* Restore the initial, pre-traversal, "working directory". + In FTS_CWDFD mode, we merely call cwd_advance_fd, otherwise, + we may actually change the working directory. + Return 0 upon success. Upon failure, set errno and return nonzero. */ +static int +restore_initial_cwd (FTS *sp) +{ + int fail = FCHDIR (sp, ISSET (FTS_CWDFD) ? AT_FDCWD : sp->fts_rfd); + fd_ring_clear (&(sp->fts_fd_ring)); + return fail; +} + /* Open the directory DIR if possible, and return a file descriptor. Return -1 and set errno on failure. It doesn't matter whether the file descriptor has read or write access. */ @@ -354,8 +364,9 @@ static inline int internal_function diropen (FTS const *sp, char const *dir) { - int open_flags = (O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK - | (ISSET (FTS_PHYSICAL) ? O_NOFOLLOW : 0)); + int open_flags = (O_SEARCH | O_DIRECTORY | O_NOCTTY | O_NONBLOCK + | (ISSET (FTS_PHYSICAL) ? O_NOFOLLOW : 0) + | (ISSET (FTS_NOATIME) ? O_NOATIME : 0)); int fd = (ISSET (FTS_CWDFD) ? openat (sp->fts_cwd_fd, dir, open_flags) @@ -412,10 +423,11 @@ fts_open (char * const *argv, early, doing it here saves us the trouble of ensuring later (where it'd be messier) that "." can in fact be opened. If not, revert to FTS_NOCHDIR mode. */ - int fd = open (".", O_RDONLY); + int fd = open (".", + O_SEARCH | (ISSET (FTS_NOATIME) ? O_NOATIME : 0)); if (fd < 0) { - /* Even if `.' is unreadable, don't revert to FTS_NOCHDIR mode + /* Even if "." is unreadable, don't revert to FTS_NOCHDIR mode on systems like Linux+PROC_FS, where our openat emulation is good enough. Note: on a system that emulates openat via /proc, this technique can still fail, but @@ -614,10 +626,8 @@ fts_close (FTS *sp) fd_ring_clear (&sp->fts_fd_ring); -#if GNULIB_FTS if (sp->fts_leaf_optimization_works_ht) hash_free (sp->fts_leaf_optimization_works_ht); -#endif free_dir (sp); @@ -636,7 +646,7 @@ fts_close (FTS *sp) #if defined __linux__ \ && HAVE_SYS_VFS_H && HAVE_FSTATFS && HAVE_STRUCT_STATFS_F_TYPE -#include +# include /* Linux-specific constants from coreutils' src/fs.h */ # define S_MAGIC_TMPFS 0x1021994 @@ -694,7 +704,7 @@ leaf_optimization_applies (int dir_fd) switch (fs_buf.f_type) { - /* List here the file system types that lack useable dirent.d_type + /* List here the file system types that lack usable dirent.d_type info, yet for which the optimization does apply. */ case S_MAGIC_REISERFS: return true; @@ -717,9 +727,8 @@ static bool leaf_optimization_applies (int dir_fd _GL_UNUSED) { return false; } #endif -#if GNULIB_FTS /* link-count-optimization entry: - map an stat.st_dev number to a boolean: leaf_optimization_works */ + map a stat.st_dev number to a boolean: leaf_optimization_works */ struct LCO_ent { dev_t st_dev; @@ -799,7 +808,6 @@ link_count_optimize_ok (FTSENT const *p) return opt_ok; } -#endif /* * Special case of "/" at the end of the file name so that slashes aren't @@ -916,6 +924,27 @@ fts_read (register FTS *sp) /* Move to the next node on this level. */ next: tmp = p; + + /* If we have so many directory entries that we're reading them + in batches, and we've reached the end of the current batch, + read in a new batch. */ + if (p->fts_link == NULL && p->fts_parent->fts_dirp) + { + p = tmp->fts_parent; + sp->fts_cur = p; + sp->fts_path[p->fts_pathlen] = '\0'; + + if ((p = fts_build (sp, BREAD)) == NULL) + { + if (ISSET(FTS_STOP)) + return NULL; + goto cd_dot_dot; + } + + free(tmp); + goto name; + } + if ((p = p->fts_link) != NULL) { sp->fts_cur = p; free(tmp); @@ -926,7 +955,7 @@ next: tmp = p; * root. */ if (p->fts_level == FTS_ROOTLEVEL) { - if (RESTORE_INITIAL_CWD(sp)) { + if (restore_initial_cwd(sp)) { SET(FTS_STOP); return (NULL); } @@ -1004,6 +1033,7 @@ check_for_dir: } return p; } +cd_dot_dot: /* Move up to the parent node. */ p = tmp->fts_parent; @@ -1032,7 +1062,7 @@ check_for_dir: * one level, via "..". */ if (p->fts_level == FTS_ROOTLEVEL) { - if (RESTORE_INITIAL_CWD(sp)) { + if (restore_initial_cwd(sp)) { p->fts_errno = errno; SET(FTS_STOP); } @@ -1200,6 +1230,25 @@ set_stat_type (struct stat *st, unsigned int dtype) st->st_mode = type; } +#define closedir_and_clear(dirp) \ + do \ + { \ + closedir (dirp); \ + dirp = NULL; \ + } \ + while (0) + +#define fts_opendir(file, Pdir_fd) \ + opendirat((! ISSET(FTS_NOCHDIR) && ISSET(FTS_CWDFD) \ + ? sp->fts_cwd_fd : AT_FDCWD), \ + file, \ + (((ISSET(FTS_PHYSICAL) \ + && ! (ISSET(FTS_COMFOLLOW) \ + && cur->fts_level == FTS_ROOTLEVEL)) \ + ? O_NOFOLLOW : 0) \ + | (ISSET (FTS_NOATIME) ? O_NOATIME : 0)), \ + Pdir_fd) + /* * This is the tricky part -- do not casually change *anything* in here. The * idea is to build the linked list of entries that are used by fts_children @@ -1218,11 +1267,9 @@ static FTSENT * internal_function fts_build (register FTS *sp, int type) { - register struct dirent *dp; register FTSENT *p, *head; register size_t nitems; - FTSENT *cur, *tail; - DIR *dirp; + FTSENT *tail; void *oldaddr; int saved_errno; bool descend; @@ -1232,51 +1279,72 @@ fts_build (register FTS *sp, int type) bool nostat; size_t len, maxlen, new_len; char *cp; - - /* Set current node pointer. */ - cur = sp->fts_cur; - - /* - * Open the directory for reading. If this fails, we're done. - * If being called from fts_read, set the fts_info field. - */ -#if defined FTS_WHITEOUT && 0 - if (ISSET(FTS_WHITEOUT)) - oflag = DTF_NODUP|DTF_REWIND; + int dir_fd; + FTSENT *cur = sp->fts_cur; + bool continue_readdir = !!cur->fts_dirp; + + /* When cur->fts_dirp is non-NULL, that means we should + continue calling readdir on that existing DIR* pointer + rather than opening a new one. */ + if (continue_readdir) + { + DIR *dp = cur->fts_dirp; + dir_fd = dirfd (dp); + if (dir_fd < 0) + { + closedir_and_clear (cur->fts_dirp); + if (type == BREAD) + { + cur->fts_info = FTS_DNR; + cur->fts_errno = errno; + } + return NULL; + } + } else - oflag = DTF_HIDEW|DTF_NODUP|DTF_REWIND; -#else -# define __opendir2(file, flag) \ - ( ! ISSET(FTS_NOCHDIR) && ISSET(FTS_CWDFD) \ - ? opendirat(sp->fts_cwd_fd, file) \ - : opendir(file)) -#endif - if ((dirp = __opendir2(cur->fts_accpath, oflag)) == NULL) { - if (type == BREAD) { - cur->fts_info = FTS_DNR; - cur->fts_errno = errno; - } - return (NULL); - } - /* Rather than calling fts_stat for each and every entry encountered - in the readdir loop (below), stat each directory only right after - opening it. */ - if (cur->fts_info == FTS_NSOK) - cur->fts_info = fts_stat(sp, cur, false); - else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK) { + { + /* Open the directory for reading. If this fails, we're done. + If being called from fts_read, set the fts_info field. */ + if ((cur->fts_dirp = fts_opendir(cur->fts_accpath, &dir_fd)) == NULL) + { + if (type == BREAD) + { + cur->fts_info = FTS_DNR; + cur->fts_errno = errno; + } + return NULL; + } + /* Rather than calling fts_stat for each and every entry encountered + in the readdir loop (below), stat each directory only right after + opening it. */ + if (cur->fts_info == FTS_NSOK) + cur->fts_info = fts_stat(sp, cur, false); + else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK) + { /* Now read the stat info again after opening a directory to - * reveal eventual changes caused by a submount triggered by - * the traversal. But do it only for utilities which use - * FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du - * benefit/suffer from this feature for now. - */ + reveal eventual changes caused by a submount triggered by + the traversal. But do it only for utilities which use + FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du + benefit/suffer from this feature for now. */ LEAVE_DIR (sp, cur, "4"); fts_stat (sp, cur, false); - if (! enter_dir (sp, cur)) { - __set_errno (ENOMEM); - return NULL; - } - } + if (! enter_dir (sp, cur)) + { + __set_errno (ENOMEM); + return NULL; + } + } + } + + /* Maximum number of readdir entries to read at one time. This + limitation is to avoid reading millions of entries into memory + at once. When an fts_compar function is specified, we have no + choice: we must read all entries into memory before calling that + function. But when no such function is specified, we can read + entries in batches that are large enough to help us with inode- + sorting, yet not so large that we risk exhausting memory. */ + size_t max_entries = (sp->fts_compar == NULL + ? FTS_MAX_READDIR_ENTRIES : SIZE_MAX); /* * Nlinks is the number of possible entries of type directory in the @@ -1311,22 +1379,28 @@ fts_build (register FTS *sp, int type) * needed sorted entries or stat information, they had better be * checking FTS_NS on the returned nodes. */ - if (nlinks || type == BREAD) { - int dir_fd = dirfd(dirp); - if (ISSET(FTS_CWDFD) && 0 <= dir_fd) + if (continue_readdir) + { + /* When resuming a short readdir run, we already have + the required dirp and dir_fd. */ + descend = true; + } + else if (nlinks || type == BREAD) { + if (ISSET(FTS_CWDFD)) { dir_fd = dup (dir_fd); - set_cloexec_flag (dir_fd, true); + if (0 <= dir_fd) + set_cloexec_flag (dir_fd, true); } if (dir_fd < 0 || fts_safe_changedir(sp, cur, dir_fd, NULL)) { if (nlinks && type == BREAD) cur->fts_errno = errno; cur->fts_flags |= FTS_DONTCHDIR; descend = false; - closedir(dirp); + closedir_and_clear(cur->fts_dirp); if (ISSET(FTS_CWDFD) && 0 <= dir_fd) close (dir_fd); - dirp = NULL; + cur->fts_dirp = NULL; } else descend = true; } else @@ -1355,11 +1429,16 @@ fts_build (register FTS *sp, int type) level = cur->fts_level + 1; - /* Read the directory, attaching each entry to the `link' pointer. */ + /* Read the directory, attaching each entry to the "link" pointer. */ doadjust = false; - for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) { + head = NULL; + tail = NULL; + nitems = 0; + while (cur->fts_dirp) { bool is_dir; - + struct dirent *dp = readdir(cur->fts_dirp); + if (dp == NULL) + break; if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name)) continue; @@ -1378,7 +1457,7 @@ fts_build (register FTS *sp, int type) mem1: saved_errno = errno; free(p); fts_lfree(head); - closedir(dirp); + closedir_and_clear(cur->fts_dirp); cur->fts_info = FTS_ERR; SET(FTS_STOP); __set_errno (saved_errno); @@ -1403,7 +1482,7 @@ mem1: saved_errno = errno; */ free(p); fts_lfree(head); - closedir(dirp); + closedir_and_clear(cur->fts_dirp); cur->fts_info = FTS_ERR; SET(FTS_STOP); __set_errno (ENAMETOOLONG); @@ -1413,10 +1492,6 @@ mem1: saved_errno = errno; p->fts_parent = sp->fts_cur; p->fts_pathlen = new_len; -#if defined FTS_WHITEOUT && 0 - if (dp->d_type == DT_WHT) - p->fts_flags |= FTS_ISW; -#endif /* Store dirent.d_ino, in case we need to sort entries before processing them. */ p->fts_statp->st_ino = D_INO (dp); @@ -1472,9 +1547,18 @@ mem1: saved_errno = errno; tail = p; } ++nitems; + if (max_entries <= nitems) { + /* When there are too many dir entries, leave + fts_dirp open, so that a subsequent fts_read + can take up where we leave off. */ + goto break_without_closedir; + } } - if (dirp) - closedir(dirp); + + if (cur->fts_dirp) + closedir_and_clear(cur->fts_dirp); + + break_without_closedir: /* * If realloc() changed the address of the file name, adjust the @@ -1500,9 +1584,9 @@ mem1: saved_errno = errno; * to an empty directory, we wind up here with no other way back. If * can't get back, we're done. */ - if (descend && (type == BCHILD || !nitems) && + if (!continue_readdir && descend && (type == BCHILD || !nitems) && (cur->fts_level == FTS_ROOTLEVEL - ? RESTORE_INITIAL_CWD(sp) + ? restore_initial_cwd(sp) : fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) { cur->fts_info = FTS_ERR; SET(FTS_STOP); @@ -1657,7 +1741,7 @@ fd_ring_check (FTS const *sp) int fd = i_ring_pop (&fd_w); if (0 <= fd) { - int parent_fd = openat (cwd_fd, "..", O_RDONLY); + int parent_fd = openat (cwd_fd, "..", O_SEARCH | O_NOATIME); if (parent_fd < 0) { // Warn? @@ -1691,15 +1775,6 @@ fts_stat(FTS *sp, register FTSENT *p, bool follow) if (p->fts_level == FTS_ROOTLEVEL && ISSET(FTS_COMFOLLOW)) follow = true; -#if defined FTS_WHITEOUT && 0 - /* check for whiteout */ - if (p->fts_flags & FTS_ISW) { - memset(sbp, '\0', sizeof (*sbp)); - sbp->st_mode = S_IFWHT; - return (FTS_W); - } -#endif - /* * If doing a logical walk, or application requested FTS_FOLLOW, do * a stat(2). If that fails, check for a non-existent symlink. If @@ -1731,27 +1806,6 @@ err: memset(sbp, 0, sizeof(struct stat)); return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : FTS_DOT); } -#if !GNULIB_FTS - { - /* - * Cycle detection is done by brute force when the directory - * is first encountered. If the tree gets deep enough or the - * number of symbolic links to directories is high enough, - * something faster might be worthwhile. - */ - FTSENT *t; - - for (t = p->fts_parent; - t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent) - if (sbp->st_ino == t->fts_statp->st_ino - && sbp->st_dev == t->fts_statp->st_dev) - { - p->fts_cycle = t; - return (FTS_DC); - } - } -#endif - return (FTS_D); } if (S_ISLNK(sbp->st_mode)) @@ -1847,6 +1901,7 @@ fts_alloc (FTS *sp, const char *name, register size_t namelen) p->fts_fts = sp; p->fts_path = sp->fts_path; p->fts_errno = 0; + p->fts_dirp = NULL; p->fts_flags = 0; p->fts_instr = FTS_NOINSTR; p->fts_number = 0; @@ -1863,6 +1918,8 @@ fts_lfree (register FTSENT *head) /* Free a linked list of structures. */ while ((p = head)) { head = head->fts_link; + if (p->fts_dirp) + closedir (p->fts_dirp); free(p); } } @@ -1931,7 +1988,7 @@ fts_padjust (FTS *sp, FTSENT *head) } static size_t -internal_function +internal_function _GL_ATTRIBUTE_PURE fts_maxarglen (char * const *argv) { size_t len, max; @@ -1947,7 +2004,7 @@ fts_maxarglen (char * const *argv) * tricked by someone changing the world out from underneath us. * Assumes p->fts_statp->st_dev and p->fts_statp->st_ino are filled in. * If FD is non-negative, expect it to be used after this function returns, - * and to be closed eventually. So don't pass e.g., `dirfd(dirp)' and then + * and to be closed eventually. So don't pass e.g., 'dirfd(dirp)' and then * do closedir(dirp), because that would invalidate the saved FD. * Upon failure, close FD immediately and return nonzero. */ @@ -1996,7 +2053,7 @@ fts_safe_changedir (FTS *sp, FTSENT *p, int fd, char const *dir) return -1; /* The following dev/inode check is necessary if we're doing a - `logical' traversal (through symlinks, a la chown -L), if the + "logical" traversal (through symlinks, a la chown -L), if the system lacks O_NOFOLLOW support, or if we're changing to ".." (but not via a popped file descriptor). When changing to the name "..", O_NOFOLLOW can't help. In general, when the target is