/* Traverse a file hierarchy.
- Copyright (C) 2004-2011 Free Software Foundation, Inc.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# define D_INO(dp) NOT_AN_INODE_NUMBER
#endif
+/* If possible (see max_entries, below), read no more than this many directory
+ entries at a time. Without this limit (i.e., when using non-NULL
+ fts_compar), processing a directory with 4,000,000 entries requires ~1GiB
+ of memory, and handling 64M entries would require 16GiB of memory. */
+#ifndef FTS_MAX_READDIR_ENTRIES
+# define FTS_MAX_READDIR_ENTRIES 100000
+#endif
+
/* If there are more than this many entries in a directory,
and the conditions mentioned below are satisfied, then sort
the entries on inode number before any further processing. */
#ifndef FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
# define FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD 10000
#endif
+
enum
{
_FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD = FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
#define ISSET(opt) (sp->fts_options & (opt))
#define SET(opt) (sp->fts_options |= (opt))
-/* FIXME: make this a function */
-#define RESTORE_INITIAL_CWD(sp) \
- (fd_ring_clear (&((sp)->fts_fd_ring)), \
- FCHDIR ((sp), (ISSET (FTS_CWDFD) ? AT_FDCWD : (sp)->fts_rfd)))
-
/* FIXME: FTS_NOCHDIR is now misnamed.
Call it FTS_USE_FULL_RELATIVE_FILE_NAMES instead. */
#define FCHDIR(sp, fd) \
} \
while (false)
-/* The attribute __pure__ was added in gcc 2.96. */
-#undef _GL_ATTRIBUTE_PURE
-#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
-# define _GL_ATTRIBUTE_PURE __attribute__ ((__pure__))
-#else
-# define _GL_ATTRIBUTE_PURE /* empty */
-#endif
-
static void
fd_ring_clear (I_ring *fd_ring)
{
/* file-descriptor-relative opendir. */
/* FIXME: if others need this function, move it into lib/openat.c */
-static inline DIR *
+static DIR *
internal_function
opendirat (int fd, char const *dir, int extra_flags, int *pdir_fd)
{
sp->fts_cwd_fd = fd;
}
+/* Restore the initial, pre-traversal, "working directory".
+ In FTS_CWDFD mode, we merely call cwd_advance_fd, otherwise,
+ we may actually change the working directory.
+ Return 0 upon success. Upon failure, set errno and return nonzero. */
+static int
+restore_initial_cwd (FTS *sp)
+{
+ int fail = FCHDIR (sp, ISSET (FTS_CWDFD) ? AT_FDCWD : sp->fts_rfd);
+ fd_ring_clear (&(sp->fts_fd_ring));
+ return fail;
+}
+
/* Open the directory DIR if possible, and return a file
descriptor. Return -1 and set errno on failure. It doesn't matter
whether the file descriptor has read or write access. */
-static inline int
+static int
internal_function
diropen (FTS const *sp, char const *dir)
{
int open_flags = (O_SEARCH | O_DIRECTORY | O_NOCTTY | O_NONBLOCK
- | (ISSET (FTS_PHYSICAL) ? O_NOFOLLOW : 0));
+ | (ISSET (FTS_PHYSICAL) ? O_NOFOLLOW : 0)
+ | (ISSET (FTS_NOATIME) ? O_NOATIME : 0));
int fd = (ISSET (FTS_CWDFD)
? openat (sp->fts_cwd_fd, dir, open_flags)
early, doing it here saves us the trouble of ensuring
later (where it'd be messier) that "." can in fact
be opened. If not, revert to FTS_NOCHDIR mode. */
- int fd = open (".", O_SEARCH);
+ int fd = open (".",
+ O_SEARCH | (ISSET (FTS_NOATIME) ? O_NOATIME : 0));
if (fd < 0)
{
- /* Even if `.' is unreadable, don't revert to FTS_NOCHDIR mode
+ /* Even if "." is unreadable, don't revert to FTS_NOCHDIR mode
on systems like Linux+PROC_FS, where our openat emulation
is good enough. Note: on a system that emulates
openat via /proc, this technique can still fail, but
for (root = NULL, nitems = 0; *argv != NULL; ++argv, ++nitems) {
/* *Do* allow zero-length file names. */
size_t len = strlen(*argv);
+
+ if ( ! (options & FTS_VERBATIM))
+ {
+ /* If there are two or more trailing slashes, trim all but one,
+ but don't change "//" to "/", and do map "///" to "/". */
+ char const *v = *argv;
+ if (2 < len && v[len - 1] == '/')
+ while (1 < len && v[len - 2] == '/')
+ --len;
+ }
+
if ((p = fts_alloc(sp, *argv, len)) == NULL)
goto mem3;
p->fts_level = FTS_ROOTLEVEL;
switch (fs_buf.f_type)
{
- /* List here the file system types that lack useable dirent.d_type
+ /* List here the file system types that lack usable dirent.d_type
info, yet for which the optimization does apply. */
case S_MAGIC_REISERFS:
return true;
#endif
/* link-count-optimization entry:
- map an stat.st_dev number to a boolean: leaf_optimization_works */
+ map a stat.st_dev number to a boolean: leaf_optimization_works */
struct LCO_ent
{
dev_t st_dev;
/* Move to the next node on this level. */
next: tmp = p;
+
+ /* If we have so many directory entries that we're reading them
+ in batches, and we've reached the end of the current batch,
+ read in a new batch. */
+ if (p->fts_link == NULL && p->fts_parent->fts_dirp)
+ {
+ p = tmp->fts_parent;
+ sp->fts_cur = p;
+ sp->fts_path[p->fts_pathlen] = '\0';
+
+ if ((p = fts_build (sp, BREAD)) == NULL)
+ {
+ if (ISSET(FTS_STOP))
+ return NULL;
+ goto cd_dot_dot;
+ }
+
+ free(tmp);
+ goto name;
+ }
+
if ((p = p->fts_link) != NULL) {
sp->fts_cur = p;
free(tmp);
* root.
*/
if (p->fts_level == FTS_ROOTLEVEL) {
- if (RESTORE_INITIAL_CWD(sp)) {
+ if (restore_initial_cwd(sp)) {
SET(FTS_STOP);
return (NULL);
}
}
return p;
}
+cd_dot_dot:
/* Move up to the parent node. */
p = tmp->fts_parent;
* one level, via "..".
*/
if (p->fts_level == FTS_ROOTLEVEL) {
- if (RESTORE_INITIAL_CWD(sp)) {
+ if (restore_initial_cwd(sp)) {
p->fts_errno = errno;
SET(FTS_STOP);
}
st->st_mode = type;
}
+#define closedir_and_clear(dirp) \
+ do \
+ { \
+ closedir (dirp); \
+ dirp = NULL; \
+ } \
+ while (0)
+
+#define fts_opendir(file, Pdir_fd) \
+ opendirat((! ISSET(FTS_NOCHDIR) && ISSET(FTS_CWDFD) \
+ ? sp->fts_cwd_fd : AT_FDCWD), \
+ file, \
+ (((ISSET(FTS_PHYSICAL) \
+ && ! (ISSET(FTS_COMFOLLOW) \
+ && cur->fts_level == FTS_ROOTLEVEL)) \
+ ? O_NOFOLLOW : 0) \
+ | (ISSET (FTS_NOATIME) ? O_NOATIME : 0)), \
+ Pdir_fd)
+
/*
* This is the tricky part -- do not casually change *anything* in here. The
* idea is to build the linked list of entries that are used by fts_children
internal_function
fts_build (register FTS *sp, int type)
{
- register struct dirent *dp;
register FTSENT *p, *head;
register size_t nitems;
- FTSENT *cur, *tail;
- DIR *dirp;
+ FTSENT *tail;
void *oldaddr;
int saved_errno;
bool descend;
size_t len, maxlen, new_len;
char *cp;
int dir_fd;
+ FTSENT *cur = sp->fts_cur;
+ bool continue_readdir = !!cur->fts_dirp;
- /* Set current node pointer. */
- cur = sp->fts_cur;
-
- /*
- * Open the directory for reading. If this fails, we're done.
- * If being called from fts_read, set the fts_info field.
- */
-#if defined FTS_WHITEOUT && 0
- if (ISSET(FTS_WHITEOUT))
- oflag = DTF_NODUP|DTF_REWIND;
+ /* When cur->fts_dirp is non-NULL, that means we should
+ continue calling readdir on that existing DIR* pointer
+ rather than opening a new one. */
+ if (continue_readdir)
+ {
+ DIR *dp = cur->fts_dirp;
+ dir_fd = dirfd (dp);
+ if (dir_fd < 0)
+ {
+ closedir_and_clear (cur->fts_dirp);
+ if (type == BREAD)
+ {
+ cur->fts_info = FTS_DNR;
+ cur->fts_errno = errno;
+ }
+ return NULL;
+ }
+ }
else
- oflag = DTF_HIDEW|DTF_NODUP|DTF_REWIND;
-#else
-# define __opendir2(file, flag) \
- opendirat((! ISSET(FTS_NOCHDIR) && ISSET(FTS_CWDFD) \
- ? sp->fts_cwd_fd : AT_FDCWD), \
- file, \
- ((ISSET(FTS_PHYSICAL) \
- && ! (ISSET(FTS_COMFOLLOW) \
- && cur->fts_level == FTS_ROOTLEVEL)) \
- ? O_NOFOLLOW : 0), \
- &dir_fd)
-#endif
- if ((dirp = __opendir2(cur->fts_accpath, oflag)) == NULL) {
- if (type == BREAD) {
- cur->fts_info = FTS_DNR;
- cur->fts_errno = errno;
- }
- return (NULL);
- }
- /* Rather than calling fts_stat for each and every entry encountered
- in the readdir loop (below), stat each directory only right after
- opening it. */
- if (cur->fts_info == FTS_NSOK)
- cur->fts_info = fts_stat(sp, cur, false);
- else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK) {
+ {
+ /* Open the directory for reading. If this fails, we're done.
+ If being called from fts_read, set the fts_info field. */
+ if ((cur->fts_dirp = fts_opendir(cur->fts_accpath, &dir_fd)) == NULL)
+ {
+ if (type == BREAD)
+ {
+ cur->fts_info = FTS_DNR;
+ cur->fts_errno = errno;
+ }
+ return NULL;
+ }
+ /* Rather than calling fts_stat for each and every entry encountered
+ in the readdir loop (below), stat each directory only right after
+ opening it. */
+ if (cur->fts_info == FTS_NSOK)
+ cur->fts_info = fts_stat(sp, cur, false);
+ else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK)
+ {
/* Now read the stat info again after opening a directory to
- * reveal eventual changes caused by a submount triggered by
- * the traversal. But do it only for utilities which use
- * FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du
- * benefit/suffer from this feature for now.
- */
+ reveal eventual changes caused by a submount triggered by
+ the traversal. But do it only for utilities which use
+ FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du
+ benefit/suffer from this feature for now. */
LEAVE_DIR (sp, cur, "4");
fts_stat (sp, cur, false);
- if (! enter_dir (sp, cur)) {
- __set_errno (ENOMEM);
- return NULL;
- }
- }
+ if (! enter_dir (sp, cur))
+ {
+ __set_errno (ENOMEM);
+ return NULL;
+ }
+ }
+ }
+
+ /* Maximum number of readdir entries to read at one time. This
+ limitation is to avoid reading millions of entries into memory
+ at once. When an fts_compar function is specified, we have no
+ choice: we must read all entries into memory before calling that
+ function. But when no such function is specified, we can read
+ entries in batches that are large enough to help us with inode-
+ sorting, yet not so large that we risk exhausting memory. */
+ size_t max_entries = (sp->fts_compar == NULL
+ ? FTS_MAX_READDIR_ENTRIES : SIZE_MAX);
/*
* Nlinks is the number of possible entries of type directory in the
* needed sorted entries or stat information, they had better be
* checking FTS_NS on the returned nodes.
*/
- if (nlinks || type == BREAD) {
+ if (continue_readdir)
+ {
+ /* When resuming a short readdir run, we already have
+ the required dirp and dir_fd. */
+ descend = true;
+ }
+ else if (nlinks || type == BREAD) {
if (ISSET(FTS_CWDFD))
{
dir_fd = dup (dir_fd);
cur->fts_errno = errno;
cur->fts_flags |= FTS_DONTCHDIR;
descend = false;
- closedir(dirp);
+ closedir_and_clear(cur->fts_dirp);
if (ISSET(FTS_CWDFD) && 0 <= dir_fd)
close (dir_fd);
- dirp = NULL;
+ cur->fts_dirp = NULL;
} else
descend = true;
} else
level = cur->fts_level + 1;
- /* Read the directory, attaching each entry to the `link' pointer. */
+ /* Read the directory, attaching each entry to the "link" pointer. */
doadjust = false;
- for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) {
+ head = NULL;
+ tail = NULL;
+ nitems = 0;
+ while (cur->fts_dirp) {
bool is_dir;
-
+ struct dirent *dp = readdir(cur->fts_dirp);
+ if (dp == NULL)
+ break;
if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name))
continue;
mem1: saved_errno = errno;
free(p);
fts_lfree(head);
- closedir(dirp);
+ closedir_and_clear(cur->fts_dirp);
cur->fts_info = FTS_ERR;
SET(FTS_STOP);
__set_errno (saved_errno);
*/
free(p);
fts_lfree(head);
- closedir(dirp);
+ closedir_and_clear(cur->fts_dirp);
cur->fts_info = FTS_ERR;
SET(FTS_STOP);
__set_errno (ENAMETOOLONG);
p->fts_parent = sp->fts_cur;
p->fts_pathlen = new_len;
-#if defined FTS_WHITEOUT && 0
- if (dp->d_type == DT_WHT)
- p->fts_flags |= FTS_ISW;
-#endif
/* Store dirent.d_ino, in case we need to sort
entries before processing them. */
p->fts_statp->st_ino = D_INO (dp);
tail = p;
}
++nitems;
+ if (max_entries <= nitems) {
+ /* When there are too many dir entries, leave
+ fts_dirp open, so that a subsequent fts_read
+ can take up where we leave off. */
+ goto break_without_closedir;
+ }
}
- if (dirp)
- closedir(dirp);
+
+ if (cur->fts_dirp)
+ closedir_and_clear(cur->fts_dirp);
+
+ break_without_closedir:
/*
* If realloc() changed the address of the file name, adjust the
* to an empty directory, we wind up here with no other way back. If
* can't get back, we're done.
*/
- if (descend && (type == BCHILD || !nitems) &&
+ if (!continue_readdir && descend && (type == BCHILD || !nitems) &&
(cur->fts_level == FTS_ROOTLEVEL
- ? RESTORE_INITIAL_CWD(sp)
+ ? restore_initial_cwd(sp)
: fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) {
cur->fts_info = FTS_ERR;
SET(FTS_STOP);
int fd = i_ring_pop (&fd_w);
if (0 <= fd)
{
- int parent_fd = openat (cwd_fd, "..", O_SEARCH);
+ int parent_fd = openat (cwd_fd, "..", O_SEARCH | O_NOATIME);
if (parent_fd < 0)
{
// Warn?
if (p->fts_level == FTS_ROOTLEVEL && ISSET(FTS_COMFOLLOW))
follow = true;
-#if defined FTS_WHITEOUT && 0
- /* check for whiteout */
- if (p->fts_flags & FTS_ISW) {
- memset(sbp, '\0', sizeof (*sbp));
- sbp->st_mode = S_IFWHT;
- return (FTS_W);
- }
-#endif
-
/*
* If doing a logical walk, or application requested FTS_FOLLOW, do
* a stat(2). If that fails, check for a non-existent symlink. If
return (NULL);
/* Copy the name and guarantee NUL termination. */
- memmove(p->fts_name, name, namelen);
+ memcpy(p->fts_name, name, namelen);
p->fts_name[namelen] = '\0';
p->fts_namelen = namelen;
p->fts_fts = sp;
p->fts_path = sp->fts_path;
p->fts_errno = 0;
+ p->fts_dirp = NULL;
p->fts_flags = 0;
p->fts_instr = FTS_NOINSTR;
p->fts_number = 0;
/* Free a linked list of structures. */
while ((p = head)) {
head = head->fts_link;
+ if (p->fts_dirp)
+ closedir (p->fts_dirp);
free(p);
}
}
* tricked by someone changing the world out from underneath us.
* Assumes p->fts_statp->st_dev and p->fts_statp->st_ino are filled in.
* If FD is non-negative, expect it to be used after this function returns,
- * and to be closed eventually. So don't pass e.g., `dirfd(dirp)' and then
+ * and to be closed eventually. So don't pass e.g., 'dirfd(dirp)' and then
* do closedir(dirp), because that would invalidate the saved FD.
* Upon failure, close FD immediately and return nonzero.
*/
return -1;
/* The following dev/inode check is necessary if we're doing a
- `logical' traversal (through symlinks, a la chown -L), if the
+ "logical" traversal (through symlinks, a la chown -L), if the
system lacks O_NOFOLLOW support, or if we're changing to ".."
(but not via a popped file descriptor). When changing to the
name "..", O_NOFOLLOW can't help. In general, when the target is