X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Ffts.c;h=21c66583ac48c9b2e0a2b5863fe67bfb3fde5378;hb=80074d103cc72ea8f289b82c56bd3734aac82cd7;hp=c15f04c8f9a540bda237dc9056e09d6afeb9ec45;hpb=7c17dff3facb761377c5138c55faa2460d77e299;p=gnulib.git diff --git a/lib/fts.c b/lib/fts.c index c15f04c8f..21c66583a 100644 --- a/lib/fts.c +++ b/lib/fts.c @@ -1,11 +1,11 @@ /* Traverse a file hierarchy. - Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc. + Copyright (C) 2004-2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -13,8 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + along with this program. If not, see . */ /*- * Copyright (c) 1990, 1993, 1994 @@ -63,7 +62,6 @@ static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94"; #endif #include #include -#include "dirfd.h" #include #include #include @@ -71,9 +69,13 @@ static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94"; #if ! _LIBC # include "fcntl--.h" -# include "lstat.h" -# include "openat.h" +# include "dirent--.h" # include "unistd--.h" +/* FIXME - use fcntl(F_DUPFD_CLOEXEC)/openat(O_CLOEXEC) once they are + supported. */ +# include "cloexec.h" +# include "openat.h" +# include "same-inode.h" #endif #include @@ -86,13 +88,65 @@ static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94"; # define DT_IS_KNOWN(d) ((d)->d_type != DT_UNKNOWN) /* True if the type of the directory entry D must be T. */ # define DT_MUST_BE(d, t) ((d)->d_type == (t)) +# define D_TYPE(d) ((d)->d_type) #else # define DT_IS_KNOWN(d) false # define DT_MUST_BE(d, t) false +# define D_TYPE(d) DT_UNKNOWN + +# undef DT_UNKNOWN +# define DT_UNKNOWN 0 + +/* Any nonzero values will do here, so long as they're distinct. + Undef any existing macros out of the way. */ +# undef DT_BLK +# undef DT_CHR +# undef DT_DIR +# undef DT_FIFO +# undef DT_LNK +# undef DT_REG +# undef DT_SOCK +# define DT_BLK 1 +# define DT_CHR 2 +# define DT_DIR 3 +# define DT_FIFO 4 +# define DT_LNK 5 +# define DT_REG 6 +# define DT_SOCK 7 +#endif + +#ifndef S_IFLNK +# define S_IFLNK 0 +#endif +#ifndef S_IFSOCK +# define S_IFSOCK 0 +#endif + +enum +{ + NOT_AN_INODE_NUMBER = 0 +}; + +#ifdef D_INO_IN_DIRENT +# define D_INO(dp) (dp)->d_ino +#else +/* Some systems don't have inodes, so fake them to avoid lots of ifdefs. */ +# define D_INO(dp) NOT_AN_INODE_NUMBER #endif +/* If there are more than this many entries in a directory, + and the conditions mentioned below are satisfied, then sort + the entries on inode number before any further processing. */ +#ifndef FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD +# define FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD 10000 +#endif enum { + _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD = FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD +}; + +enum Fts_stat +{ FTS_NO_STAT_REQUIRED = 1, FTS_STAT_REQUIRED = 2 }; @@ -119,16 +173,6 @@ enum # define __set_errno(Val) errno = (Val) #endif -#ifndef __attribute__ -# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 8) || __STRICT_ANSI__ -# define __attribute__(x) /* empty */ -# endif -#endif - -#ifndef ATTRIBUTE_UNUSED -# define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) -#endif - /* If this host provides the openat function, then we can avoid attempting to open "." in some initialization code below. */ #ifdef HAVE_OPENAT @@ -137,6 +181,18 @@ enum # define HAVE_OPENAT_SUPPORT 0 #endif +#ifdef NDEBUG +# define fts_assert(expr) ((void) 0) +#else +# define fts_assert(expr) \ + do \ + { \ + if (!(expr)) \ + abort (); \ + } \ + while (false) +#endif + static FTSENT *fts_alloc (FTS *, const char *, size_t) internal_function; static FTSENT *fts_build (FTS *, int) internal_function; static void fts_lfree (FTSENT *) internal_function; @@ -149,14 +205,13 @@ static unsigned short int fts_stat (FTS *, FTSENT *, bool) internal_function; static int fts_safe_changedir (FTS *, FTSENT *, int, const char *) internal_function; -#if _LGPL_PACKAGE +#if GNULIB_FTS +# include "fts-cycle.c" +#else static bool enter_dir (FTS *fts, FTSENT *ent) { return true; } static void leave_dir (FTS *fts, FTSENT *ent) {} static bool setup_dir (FTS *fts) { return true; } static void free_dir (FTS *fts) {} -#else -# include "fcntl--.h" -# include "fts-cycle.c" #endif #ifndef MAX @@ -167,10 +222,6 @@ static void free_dir (FTS *fts) {} # define SIZE_MAX ((size_t) -1) #endif -#ifndef O_DIRECTORY -# define O_DIRECTORY 0 -#endif - #define ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) #define STREQ(a, b) (strcmp ((a), (b)) == 0) @@ -178,17 +229,21 @@ static void free_dir (FTS *fts) {} #define ISSET(opt) (sp->fts_options & (opt)) #define SET(opt) (sp->fts_options |= (opt)) -#define RESTORE_INITIAL_CWD(sp) FCHDIR (sp, (ISSET (FTS_CWDFD) \ - ? AT_FDCWD \ - : sp->fts_rfd)) +/* FIXME: make this a function */ +#define RESTORE_INITIAL_CWD(sp) \ + (fd_ring_clear (&((sp)->fts_fd_ring)), \ + FCHDIR ((sp), (ISSET (FTS_CWDFD) ? AT_FDCWD : (sp)->fts_rfd))) -#define FCHDIR(sp, fd) (!ISSET(FTS_NOCHDIR) \ - && (ISSET(FTS_CWDFD) \ - ? (cwd_advance_fd (sp, fd), 0) \ - : fchdir(fd))) +/* FIXME: FTS_NOCHDIR is now misnamed. + Call it FTS_USE_FULL_RELATIVE_FILE_NAMES instead. */ +#define FCHDIR(sp, fd) \ + (!ISSET(FTS_NOCHDIR) && (ISSET(FTS_CWDFD) \ + ? (cwd_advance_fd ((sp), (fd), true), 0) \ + : fchdir (fd))) /* fts_build flags */ +/* FIXME: make this an enum */ #define BCHILD 1 /* fts_children */ #define BNAMES 2 /* fts_children, names only */ #define BREAD 3 /* fts_read */ @@ -197,10 +252,13 @@ static void free_dir (FTS *fts) {} # include # include # include +# include "getcwdat.h" bool fts_debug = false; -# define Dprintf(x) do { if (fts_debug) printf x; } while (0) +# define Dprintf(x) do { if (fts_debug) printf x; } while (false) #else # define Dprintf(x) +# define fd_ring_check(x) +# define fd_ring_print(a, b, c) #endif #define LEAVE_DIR(Fts, Ent, Tag) \ @@ -208,17 +266,28 @@ bool fts_debug = false; { \ Dprintf ((" %s-leaving: %s\n", Tag, (Ent)->fts_path)); \ leave_dir (Fts, Ent); \ + fd_ring_check (Fts); \ } \ while (false) +static void +fd_ring_clear (I_ring *fd_ring) +{ + while ( ! i_ring_empty (fd_ring)) + { + int fd = i_ring_pop (fd_ring); + if (0 <= fd) + close (fd); + } +} + /* Overload the fts_statp->st_size member (otherwise unused, when fts_info is FTS_NSOK) to indicate whether fts_read should stat this entry or not. */ static void fts_set_stat_required (FTSENT *p, bool required) { - if (p->fts_info != FTS_NSOK) - abort (); + fts_assert (p->fts_info == FTS_NSOK); p->fts_statp->st_size = (required ? FTS_STAT_REQUIRED : FTS_NO_STAT_REQUIRED); @@ -230,11 +299,13 @@ static inline DIR * internal_function opendirat (int fd, char const *dir) { - int new_fd = openat (fd, dir, O_RDONLY); + int new_fd = openat (fd, dir, + O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK); DIR *dirp; if (new_fd < 0) return NULL; + set_cloexec_flag (new_fd, true); dirp = fdopendir (new_fd); if (dirp == NULL) { @@ -245,19 +316,34 @@ opendirat (int fd, char const *dir) return dirp; } -/* Virtual fchdir. Advance SP's working directory - file descriptor, SP->fts_cwd_fd, to FD, and close - the previous one, ignoring any error. */ +/* Virtual fchdir. Advance SP's working directory file descriptor, + SP->fts_cwd_fd, to FD, and push the previous value onto the fd_ring. + CHDIR_DOWN_ONE is true if FD corresponds to an entry in the directory + open on sp->fts_cwd_fd; i.e., to move the working directory one level + down. */ static void internal_function -cwd_advance_fd (FTS *sp, int fd) +cwd_advance_fd (FTS *sp, int fd, bool chdir_down_one) { int old = sp->fts_cwd_fd; - if (old == fd && old != AT_FDCWD) - abort (); + fts_assert (old != fd || old == AT_FDCWD); + + if (chdir_down_one) + { + /* Push "old" onto the ring. + If the displaced file descriptor is non-negative, close it. */ + int prev_fd_in_slot = i_ring_push (&sp->fts_fd_ring, old); + fd_ring_print (sp, stderr, "post-push"); + if (0 <= prev_fd_in_slot) + close (prev_fd_in_slot); /* ignore any close failure */ + } + else if ( ! ISSET (FTS_NOCHDIR)) + { + if (0 <= old) + close (old); /* ignore any close failure */ + } + sp->fts_cwd_fd = fd; - if (0 <= old) - close (old); /* ignore any close failure */ } /* Open the directory DIR if possible, and return a file @@ -271,9 +357,12 @@ diropen (FTS const *sp, char const *dir) int open_flags = (O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK | (ISSET (FTS_PHYSICAL) ? O_NOFOLLOW : 0)); - return (ISSET (FTS_CWDFD) - ? openat (sp->fts_cwd_fd, dir, open_flags) - : open (dir, open_flags)); + int fd = (ISSET (FTS_CWDFD) + ? openat (sp->fts_cwd_fd, dir, open_flags) + : open (dir, open_flags)); + if (0 <= fd) + set_cloexec_flag (fd, true); + return fd; } FTS * @@ -449,6 +538,7 @@ fts_open (char * const *argv, && (sp->fts_rfd = diropen (sp, ".")) < 0) SET(FTS_NOCHDIR); + i_ring_init (&sp->fts_fd_ring, -1); return (sp); mem3: fts_lfree(root); @@ -480,7 +570,6 @@ fts_load (FTS *sp, register FTSENT *p) p->fts_namelen = len; } p->fts_accpath = p->fts_path = sp->fts_path; - sp->fts_dev = p->fts_statp->st_dev; } int @@ -512,16 +601,29 @@ fts_close (FTS *sp) if (ISSET(FTS_CWDFD)) { if (0 <= sp->fts_cwd_fd) - close (sp->fts_cwd_fd); + if (close (sp->fts_cwd_fd)) + saved_errno = errno; } else if (!ISSET(FTS_NOCHDIR)) { /* Return to original directory, save errno if necessary. */ if (fchdir(sp->fts_rfd)) saved_errno = errno; - close(sp->fts_rfd); + + /* If close fails, record errno only if saved_errno is zero, + so that we report the probably-more-meaningful fchdir errno. */ + if (close (sp->fts_rfd)) + if (saved_errno == 0) + saved_errno = errno; } + fd_ring_clear (&sp->fts_fd_ring); + +#if GNULIB_FTS + if (sp->fts_leaf_optimization_works_ht) + hash_free (sp->fts_leaf_optimization_works_ht); +#endif + free_dir (sp); /* Free up the stream pointer. */ @@ -536,6 +638,174 @@ fts_close (FTS *sp) return (0); } +#if defined __linux__ \ + && HAVE_SYS_VFS_H && HAVE_FSTATFS && HAVE_STRUCT_STATFS_F_TYPE + +#include + +/* Linux-specific constants from coreutils' src/fs.h */ +# define S_MAGIC_TMPFS 0x1021994 +# define S_MAGIC_NFS 0x6969 +# define S_MAGIC_REISERFS 0x52654973 +# define S_MAGIC_PROC 0x9FA0 + +/* Return false if it is easy to determine the file system type of + the directory on which DIR_FD is open, and sorting dirents on + inode numbers is known not to improve traversal performance with + that type of file system. Otherwise, return true. */ +static bool +dirent_inode_sort_may_be_useful (int dir_fd) +{ + /* Skip the sort only if we can determine efficiently + that skipping it is the right thing to do. + The cost of performing an unnecessary sort is negligible, + while the cost of *not* performing it can be O(N^2) with + a very large constant. */ + struct statfs fs_buf; + + /* If fstatfs fails, assume sorting would be useful. */ + if (fstatfs (dir_fd, &fs_buf) != 0) + return true; + + /* FIXME: what about when f_type is not an integral type? + deal with that if/when it's encountered. */ + switch (fs_buf.f_type) + { + case S_MAGIC_TMPFS: + case S_MAGIC_NFS: + /* On a file system of any of these types, sorting + is unnecessary, and hence wasteful. */ + return false; + + default: + return true; + } +} + +/* Given a file descriptor DIR_FD open on a directory D, + return true if it is valid to apply the leaf-optimization + technique of counting directories in D via stat.st_nlink. */ +static bool +leaf_optimization_applies (int dir_fd) +{ + struct statfs fs_buf; + + /* If fstatfs fails, assume we can't use the optimization. */ + if (fstatfs (dir_fd, &fs_buf) != 0) + return false; + + /* FIXME: do we need to detect AFS mount points? I doubt it, + unless fstatfs can report S_MAGIC_REISERFS for such a directory. */ + + switch (fs_buf.f_type) + { + /* List here the file system types that lack useable dirent.d_type + info, yet for which the optimization does apply. */ + case S_MAGIC_REISERFS: + return true; + + case S_MAGIC_PROC: + /* Explicitly listing this or any other file system type for which + the optimization is not applicable is not necessary, but we leave + it here to document the risk. Per http://bugs.debian.org/143111, + /proc may have bogus stat.st_nlink values. */ + /* fall through */ + default: + return false; + } +} + +#else +static bool +dirent_inode_sort_may_be_useful (int dir_fd _UNUSED_PARAMETER_) { return true; } +static bool +leaf_optimization_applies (int dir_fd _UNUSED_PARAMETER_) { return false; } +#endif + +#if GNULIB_FTS +/* link-count-optimization entry: + map an stat.st_dev number to a boolean: leaf_optimization_works */ +struct LCO_ent +{ + dev_t st_dev; + bool opt_ok; +}; + +/* Use a tiny initial size. If a traversal encounters more than + a few devices, the cost of growing/rehashing this table will be + rendered negligible by the number of inodes processed. */ +enum { LCO_HT_INITIAL_SIZE = 13 }; + +static size_t +LCO_hash (void const *x, size_t table_size) +{ + struct LCO_ent const *ax = x; + return (uintmax_t) ax->st_dev % table_size; +} + +static bool +LCO_compare (void const *x, void const *y) +{ + struct LCO_ent const *ax = x; + struct LCO_ent const *ay = y; + return ax->st_dev == ay->st_dev; +} + +/* Ask the same question as leaf_optimization_applies, but query + the cache first (FTS.fts_leaf_optimization_works_ht), and if necessary, + update that cache. */ +static bool +link_count_optimize_ok (FTSENT const *p) +{ + FTS *sp = p->fts_fts; + Hash_table *h = sp->fts_leaf_optimization_works_ht; + struct LCO_ent tmp; + struct LCO_ent *ent; + bool opt_ok; + struct LCO_ent *t2; + + /* If we're not in CWDFD mode, don't bother with this optimization, + since the caller is not serious about performance. */ + if (!ISSET(FTS_CWDFD)) + return false; + + /* map st_dev to the boolean, leaf_optimization_works */ + if (h == NULL) + { + h = sp->fts_leaf_optimization_works_ht + = hash_initialize (LCO_HT_INITIAL_SIZE, NULL, LCO_hash, + LCO_compare, free); + if (h == NULL) + return false; + } + tmp.st_dev = p->fts_statp->st_dev; + ent = hash_lookup (h, &tmp); + if (ent) + return ent->opt_ok; + + /* Look-up failed. Query directly and cache the result. */ + t2 = malloc (sizeof *t2); + if (t2 == NULL) + return false; + + /* Is it ok to perform the optimization in the dir, FTS_CWD_FD? */ + opt_ok = leaf_optimization_applies (sp->fts_cwd_fd); + t2->opt_ok = opt_ok; + t2->st_dev = p->fts_statp->st_dev; + + ent = hash_insert (h, t2); + if (ent == NULL) + { + /* insertion failed */ + free (t2); + return false; + } + fts_assert (ent == t2); + + return opt_ok; +} +#endif + /* * Special case of "/" at the end of the file name so that slashes aren't * appended which would cause file names to be written as "....//foo". @@ -639,7 +909,7 @@ fts_read (register FTS *sp) /* If fts_build's call to fts_safe_changedir failed because it was not able to fchdir into a subdirectory, tell the caller. */ - if (p->fts_errno) + if (p->fts_errno && p->fts_info != FTS_DNR) p->fts_info = FTS_ERR; LEAVE_DIR (sp, p, "2"); return (p); @@ -652,6 +922,7 @@ fts_read (register FTS *sp) /* Move to the next node on this level. */ next: tmp = p; if ((p = p->fts_link) != NULL) { + sp->fts_cur = p; free(tmp); /* @@ -662,10 +933,11 @@ next: tmp = p; if (p->fts_level == FTS_ROOTLEVEL) { if (RESTORE_INITIAL_CWD(sp)) { SET(FTS_STOP); - sp->fts_cur = p; return (NULL); } + free_dir(sp); fts_load(sp, p); + setup_dir(sp); goto check_for_dir; } @@ -692,23 +964,43 @@ name: t = sp->fts_path + NAPPEND(p->fts_parent); *t++ = '/'; memmove(t, p->fts_name, p->fts_namelen + 1); check_for_dir: + sp->fts_cur = p; if (p->fts_info == FTS_NSOK) { - switch (p->fts_statp->st_size) + if (p->fts_statp->st_size == FTS_STAT_REQUIRED) { - case FTS_STAT_REQUIRED: - p->fts_info = fts_stat(sp, p, false); - break; - case FTS_NO_STAT_REQUIRED: - break; - default: - abort (); + FTSENT *parent = p->fts_parent; + if (FTS_ROOTLEVEL < p->fts_level + /* ->fts_n_dirs_remaining is not valid + for command-line-specified names. */ + && parent->fts_n_dirs_remaining == 0 + && ISSET(FTS_NOSTAT) + && ISSET(FTS_PHYSICAL) + && link_count_optimize_ok (parent)) + { + /* nothing more needed */ + } + else + { + p->fts_info = fts_stat(sp, p, false); + if (S_ISDIR(p->fts_statp->st_mode) + && p->fts_level != FTS_ROOTLEVEL + && parent->fts_n_dirs_remaining) + parent->fts_n_dirs_remaining--; + } } + else + fts_assert (p->fts_statp->st_size == FTS_NO_STAT_REQUIRED); } - sp->fts_cur = p; + if (p->fts_info == FTS_D) { - Dprintf ((" %s-entering: %s\n", sp, p->fts_path)); + /* Now that P->fts_statp is guaranteed to be valid, + if this is a command-line directory, record its + device number, to be used for FTS_XDEV. */ + if (p->fts_level == FTS_ROOTLEVEL) + sp->fts_dev = p->fts_statp->st_dev; + Dprintf ((" entering: %s\n", p->fts_path)); if (! enter_dir (sp, p)) { __set_errno (ENOMEM); @@ -720,6 +1012,7 @@ check_for_dir: /* Move up to the parent node. */ p = tmp->fts_parent; + sp->fts_cur = p; free(tmp); if (p->fts_level == FTS_ROOTPARENTLEVEL) { @@ -732,8 +1025,7 @@ check_for_dir: return (sp->fts_cur = NULL); } - if (p->fts_info == FTS_NSOK) - abort (); + fts_assert (p->fts_info != FTS_NSOK); /* NUL terminate the file name. */ sp->fts_path[p->fts_pathlen] = '\0'; @@ -766,7 +1058,6 @@ check_for_dir: p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP; if (p->fts_errno == 0) LEAVE_DIR (sp, p, "3"); - sp->fts_cur = p; return ISSET(FTS_STOP) ? NULL : p; } @@ -778,7 +1069,7 @@ check_for_dir: */ /* ARGSUSED */ int -fts_set(FTS *sp ATTRIBUTE_UNUSED, FTSENT *p, int instr) +fts_set(FTS *sp _UNUSED_PARAMETER_, FTSENT *p, int instr) { if (instr != 0 && instr != FTS_AGAIN && instr != FTS_FOLLOW && instr != FTS_NOINSTR && instr != FTS_SKIP) { @@ -851,7 +1142,7 @@ fts_children (register FTS *sp, int instr) sp->fts_child = fts_build(sp, instr); if (ISSET(FTS_CWDFD)) { - cwd_advance_fd (sp, fd); + cwd_advance_fd (sp, fd, true); } else { @@ -867,6 +1158,53 @@ fts_children (register FTS *sp, int instr) return (sp->fts_child); } +/* A comparison function to sort on increasing inode number. + For some file system types, sorting either way makes a huge + performance difference for a directory with very many entries, + but sorting on increasing values is slightly better than sorting + on decreasing values. The difference is in the 5% range. */ +static int +fts_compare_ino (struct _ftsent const **a, struct _ftsent const **b) +{ + return (a[0]->fts_statp->st_ino < b[0]->fts_statp->st_ino ? -1 + : b[0]->fts_statp->st_ino < a[0]->fts_statp->st_ino ? 1 : 0); +} + +/* Map the dirent.d_type value, DTYPE, to the corresponding stat.st_mode + S_IF* bit and set ST.st_mode, thus clearing all other bits in that field. */ +static void +set_stat_type (struct stat *st, unsigned int dtype) +{ + mode_t type; + switch (dtype) + { + case DT_BLK: + type = S_IFBLK; + break; + case DT_CHR: + type = S_IFCHR; + break; + case DT_DIR: + type = S_IFDIR; + break; + case DT_FIFO: + type = S_IFIFO; + break; + case DT_LNK: + type = S_IFLNK; + break; + case DT_REG: + type = S_IFREG; + break; + case DT_SOCK: + type = S_IFSOCK; + break; + default: + type = 0; + } + st->st_mode = type; +} + /* * This is the tricky part -- do not casually change *anything* in here. The * idea is to build the linked list of entries that are used by fts_children @@ -930,6 +1268,20 @@ fts_build (register FTS *sp, int type) opening it. */ if (cur->fts_info == FTS_NSOK) cur->fts_info = fts_stat(sp, cur, false); + else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK) { + /* Now read the stat info again after opening a directory to + * reveal eventual changes caused by a submount triggered by + * the traversal. But do it only for utilities which use + * FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du + * benefit/suffer from this feature for now. + */ + LEAVE_DIR (sp, cur, "4"); + fts_stat (sp, cur, false); + if (! enter_dir (sp, cur)) { + __set_errno (ENOMEM); + return NULL; + } + } /* * Nlinks is the number of possible entries of type directory in the @@ -967,7 +1319,10 @@ fts_build (register FTS *sp, int type) if (nlinks || type == BREAD) { int dir_fd = dirfd(dirp); if (ISSET(FTS_CWDFD) && 0 <= dir_fd) - dir_fd = dup (dir_fd); + { + dir_fd = dup (dir_fd); + set_cloexec_flag (dir_fd, true); + } if (dir_fd < 0 || fts_safe_changedir(sp, cur, dir_fd, NULL)) { if (nlinks && type == BREAD) cur->fts_errno = errno; @@ -1046,7 +1401,7 @@ mem1: saved_errno = errno; new_len = len + _D_EXACT_NAMLEN (dp); if (new_len < len) { /* - * In the unlikely even that we would end up + * In the unlikely event that we would end up * with a file name longer than SIZE_MAX, free up * the current structure and the structures already * allocated, then error out with ENAMETOOLONG. @@ -1067,6 +1422,9 @@ mem1: saved_errno = errno; if (dp->d_type == DT_WHT) p->fts_flags |= FTS_ISW; #endif + /* Store dirent.d_ino, in case we need to sort + entries before processing them. */ + p->fts_statp->st_ino = D_INO (dp); /* Build a file name for fts_stat to stat. */ if (ISSET(FTS_NOCHDIR)) { @@ -1093,8 +1451,11 @@ mem1: saved_errno = errno; && DT_IS_KNOWN(dp) && ! DT_MUST_BE(dp, DT_DIR)); p->fts_info = FTS_NSOK; + /* Propagate dirent.d_type information back + to caller, when possible. */ + set_stat_type (p->fts_statp, D_TYPE (dp)); fts_set_stat_required(p, !skip_stat); - is_dir = (ISSET(FTS_PHYSICAL) && ISSET(FTS_NOSTAT) + is_dir = (ISSET(FTS_PHYSICAL) && DT_MUST_BE(dp, DT_DIR)); } else { p->fts_info = fts_stat(sp, p, false); @@ -1162,6 +1523,19 @@ mem1: saved_errno = errno; return (NULL); } + /* If there are many entries, no sorting function has been specified, + and this file system is of a type that may be slow with a large + number of entries, then sort the directory entries on increasing + inode numbers. */ + if (nitems > _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD + && !sp->fts_compar + && ISSET (FTS_CWDFD) + && dirent_inode_sort_may_be_useful (sp->fts_cwd_fd)) { + sp->fts_compar = fts_compare_ino; + head = fts_sort (sp, head, nitems); + sp->fts_compar = NULL; + } + /* Sort the entries. */ if (sp->fts_compar && nitems > 1) head = fts_sort(sp, head, nitems); @@ -1229,6 +1603,87 @@ fts_cross_check (FTS const *sp) } } } + +static bool +same_fd (int fd1, int fd2) +{ + struct stat sb1, sb2; + return (fstat (fd1, &sb1) == 0 + && fstat (fd2, &sb2) == 0 + && SAME_INODE (sb1, sb2)); +} + +static void +fd_ring_print (FTS const *sp, FILE *stream, char const *msg) +{ + I_ring const *fd_ring = &sp->fts_fd_ring; + unsigned int i = fd_ring->fts_front; + char *cwd = getcwdat (sp->fts_cwd_fd, NULL, 0); + fprintf (stream, "=== %s ========== %s\n", msg, cwd); + free (cwd); + if (i_ring_empty (fd_ring)) + return; + + while (true) + { + int fd = fd_ring->fts_fd_ring[i]; + if (fd < 0) + fprintf (stream, "%d: %d:\n", i, fd); + else + { + char *wd = getcwdat (fd, NULL, 0); + fprintf (stream, "%d: %d: %s\n", i, fd, wd); + free (wd); + } + if (i == fd_ring->fts_back) + break; + i = (i + I_RING_SIZE - 1) % I_RING_SIZE; + } +} + +/* Ensure that each file descriptor on the fd_ring matches a + parent, grandparent, etc. of the current working directory. */ +static void +fd_ring_check (FTS const *sp) +{ + if (!fts_debug) + return; + + /* Make a writable copy. */ + I_ring fd_w = sp->fts_fd_ring; + + int cwd_fd = sp->fts_cwd_fd; + cwd_fd = dup (cwd_fd); + char *dot = getcwdat (cwd_fd, NULL, 0); + error (0, 0, "===== check ===== cwd: %s", dot); + free (dot); + while ( ! i_ring_empty (&fd_w)) + { + int fd = i_ring_pop (&fd_w); + if (0 <= fd) + { + int parent_fd = openat (cwd_fd, "..", O_RDONLY); + if (parent_fd < 0) + { + // Warn? + break; + } + if (!same_fd (fd, parent_fd)) + { + char *cwd = getcwdat (fd, NULL, 0); + error (0, errno, "ring : %s", cwd); + char *c2 = getcwdat (parent_fd, NULL, 0); + error (0, errno, "parent: %s", c2); + free (cwd); + free (c2); + fts_assert (0); + } + close (cwd_fd); + cwd_fd = parent_fd; + } + } + close (cwd_fd); +} #endif static unsigned short int @@ -1274,12 +1729,14 @@ err: memset(sbp, 0, sizeof(struct stat)); } if (S_ISDIR(sbp->st_mode)) { + p->fts_n_dirs_remaining = (sbp->st_nlink + - (ISSET(FTS_SEEDOT) ? 0 : 2)); if (ISDOT(p->fts_name)) { /* Command-line "." and ".." are real directories. */ return (p->fts_level == FTS_ROOTLEVEL ? FTS_D : FTS_DOT); } -#if _LGPL_PACKAGE +#if !GNULIB_FTS { /* * Cycle detection is done by brute force when the directory @@ -1350,7 +1807,7 @@ fts_sort (FTS *sp, FTSENT *head, register size_t nitems) * 40 so don't realloc one entry at a time. */ if (nitems > sp->fts_nitems) { - struct _ftsent **a; + FTSENT **a; sp->fts_nitems = nitems + 40; if (SIZE_MAX / sizeof *a < sp->fts_nitems @@ -1504,23 +1961,52 @@ internal_function fts_safe_changedir (FTS *sp, FTSENT *p, int fd, char const *dir) { int ret; + bool is_dotdot = dir && STREQ (dir, ".."); + int newfd; + + /* This clause handles the unusual case in which FTS_NOCHDIR + is specified, along with FTS_CWDFD. In that case, there is + no need to change even the virtual cwd file descriptor. + However, if FD is non-negative, we do close it here. */ + if (ISSET (FTS_NOCHDIR)) + { + if (ISSET (FTS_CWDFD) && 0 <= fd) + close (fd); + return 0; + } - int newfd = fd; - if (ISSET(FTS_NOCHDIR)) { - if (ISSET(FTS_CWDFD) && 0 <= fd) - close (fd); - return (0); - } - if (fd < 0 && (newfd = diropen (sp, dir)) < 0) - return (-1); + if (fd < 0 && is_dotdot && ISSET (FTS_CWDFD)) + { + /* When possible, skip the diropen and subsequent fstat+dev/ino + comparison. I.e., when changing to parent directory + (chdir ("..")), use a file descriptor from the ring and + save the overhead of diropen+fstat, as well as avoiding + failure when we lack "x" access to the virtual cwd. */ + if ( ! i_ring_empty (&sp->fts_fd_ring)) + { + int parent_fd; + fd_ring_print (sp, stderr, "pre-pop"); + parent_fd = i_ring_pop (&sp->fts_fd_ring); + is_dotdot = true; + if (0 <= parent_fd) + { + fd = parent_fd; + dir = NULL; + } + } + } - /* The following dev/inode check is necessary if we're doing - a `logical' traversal (through symlinks, a la chown -L), - if the system lacks O_NOFOLLOW support, or if we're changing - to "..". In the latter case, O_NOFOLLOW can't help. In - general (when the target is not ".."), diropen's use of - O_NOFOLLOW ensures we don't mistakenly follow a symlink, - so we can avoid the expense of this fstat. */ + newfd = fd; + if (fd < 0 && (newfd = diropen (sp, dir)) < 0) + return -1; + + /* The following dev/inode check is necessary if we're doing a + `logical' traversal (through symlinks, a la chown -L), if the + system lacks O_NOFOLLOW support, or if we're changing to ".." + (but not via a popped file descriptor). When changing to the + name "..", O_NOFOLLOW can't help. In general, when the target is + not "..", diropen's use of O_NOFOLLOW ensures we don't mistakenly + follow a symlink, so we can avoid the expense of this fstat. */ if (ISSET(FTS_LOGICAL) || ! HAVE_WORKING_O_NOFOLLOW || (dir && STREQ (dir, ".."))) { @@ -1541,7 +2027,7 @@ fts_safe_changedir (FTS *sp, FTSENT *p, int fd, char const *dir) if (ISSET(FTS_CWDFD)) { - cwd_advance_fd (sp, newfd); + cwd_advance_fd (sp, newfd, ! is_dotdot); return 0; } @@ -1553,5 +2039,5 @@ bail: (void)close(newfd); __set_errno (oerrno); } - return (ret); + return ret; }