X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Ffts.c;h=d4f7db21cb7a051582869e1c638bf52dad98d4ae;hb=671c9011cb6944a63f0d15f60252c31bf0ee8e9b;hp=36ccf683d53b73c8cf928befbe6a2a376e12c7db;hpb=15834b98efed40c82ee182c367b636e1769cb62a;p=gnulib.git diff --git a/lib/fts.c b/lib/fts.c index 36ccf683d..d4f7db21c 100644 --- a/lib/fts.c +++ b/lib/fts.c @@ -91,6 +91,29 @@ static char sccsid[] = "@(#)fts.c 8.6 (Berkeley) 8/14/94"; # define DT_MUST_BE(d, t) false #endif +enum +{ + NOT_AN_INODE_NUMBER = 0 +}; + +#ifdef D_INO_IN_DIRENT +# define D_INO(dp) (dp)->d_ino +#else +/* Some systems don't have inodes, so fake them to avoid lots of ifdefs. */ +# define D_INO(dp) NOT_AN_INODE_NUMBER +#endif + +/* If there are more than this many entries in a directory, + and the conditions mentioned below are satisfied, then sort + the entries on inode number before any further processing. */ +#ifndef FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD +# define FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD 10000 +#endif +enum +{ + _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD = FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD +}; + enum Fts_stat { FTS_NO_STAT_REQUIRED = 1, @@ -911,6 +934,63 @@ fts_children (register FTS *sp, int instr) return (sp->fts_child); } +#if defined __linux__ \ + && HAVE_SYS_VFS_H && HAVE_FSTATFS && HAVE_STRUCT_STATFS_F_TYPE + +#include + +/* Linux-specific constants from coreutils' src/fs.h */ +# define S_MAGIC_TMPFS 0x1021994 +# define S_MAGIC_NFS 0x6969 + +/* Return false if it is easy to determine the file system type of + the directory on which DIR_FD is open, and sorting dirents on + inode numbers is known not to improve traversal performance with + that type of file system. Otherwise, return true. */ +static bool +dirent_inode_sort_may_be_useful (int dir_fd) +{ + /* Skip the sort only if we can determine efficiently + that skipping it is the right thing to do. + The cost of performing an unnecessary sort is negligible, + while the cost of *not* performing it can be O(N^2) with + a very large constant. */ + struct statfs fs_buf; + + /* If fstatfs fails, assume sorting would be useful. */ + if (fstatfs (dir_fd, &fs_buf) != 0) + return true; + + /* FIXME: what about when f_type is not an integral type? + deal with that if/when it's encountered. */ + switch (fs_buf.f_type) + { + case S_MAGIC_TMPFS: + case S_MAGIC_NFS: + /* On a file system of any of these types, sorting + is unnecessary, and hence wasteful. */ + return false; + + default: + return true; + } +} +#else +static bool dirent_inode_sort_may_be_useful (int dir_fd) { return true; } +#endif + +/* A comparison function to sort on increasing inode number. + For some file system types, sorting either way makes a huge + performance difference for a directory with very many entries, + but sorting on increasing values is slightly better than sorting + on decreasing values. The difference is in the 5% range. */ +static int +fts_compare_ino (struct _ftsent const **a, struct _ftsent const **b) +{ + return (a[0]->fts_statp->st_ino < b[0]->fts_statp->st_ino ? -1 + : b[0]->fts_statp->st_ino < a[0]->fts_statp->st_ino ? 1 : 0); +} + /* * This is the tricky part -- do not casually change *anything* in here. The * idea is to build the linked list of entries that are used by fts_children @@ -1090,7 +1170,7 @@ mem1: saved_errno = errno; new_len = len + _D_EXACT_NAMLEN (dp); if (new_len < len) { /* - * In the unlikely even that we would end up + * In the unlikely event that we would end up * with a file name longer than SIZE_MAX, free up * the current structure and the structures already * allocated, then error out with ENAMETOOLONG. @@ -1111,6 +1191,9 @@ mem1: saved_errno = errno; if (dp->d_type == DT_WHT) p->fts_flags |= FTS_ISW; #endif + /* Store dirent.d_ino, in case we need to sort + entries before processing them. */ + p->fts_statp->st_ino = D_INO (dp); /* Build a file name for fts_stat to stat. */ if (ISSET(FTS_NOCHDIR)) { @@ -1206,6 +1289,19 @@ mem1: saved_errno = errno; return (NULL); } + /* If there are many entries, no sorting function has been specified, + and this file system is of a type that may be slow with a large + number of entries, then sort the directory entries on increasing + inode numbers. */ + if (nitems > _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD + && !sp->fts_compar + && ISSET (FTS_CWDFD) + && dirent_inode_sort_may_be_useful (sp->fts_cwd_fd)) { + sp->fts_compar = fts_compare_ino; + head = fts_sort (sp, head, nitems); + sp->fts_compar = NULL; + } + /* Sort the entries. */ if (sp->fts_compar && nitems > 1) head = fts_sort(sp, head, nitems);