/* Traverse a file hierarchy.
- Copyright (C) 2004-2011 Free Software Foundation, Inc.
+ Copyright (C) 2004-2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# define D_INO(dp) NOT_AN_INODE_NUMBER
#endif
+/* If possible (see max_entries, below), read no more than this many directory
+ entries at a time. Without this limit (i.e., when using non-NULL
+ fts_compar), processing a directory with 4,000,000 entries requires ~1GiB
+ of memory, and handling 64M entries would require 16GiB of memory. */
+#ifndef FTS_MAX_READDIR_ENTRIES
+# define FTS_MAX_READDIR_ENTRIES 100000
+#endif
+
/* If there are more than this many entries in a directory,
and the conditions mentioned below are satisfied, then sort
the entries on inode number before any further processing. */
#ifndef FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
# define FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD 10000
#endif
+
enum
{
_FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD = FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
#define ISSET(opt) (sp->fts_options & (opt))
#define SET(opt) (sp->fts_options |= (opt))
-/* FIXME: make this a function */
-#define RESTORE_INITIAL_CWD(sp) \
- (fd_ring_clear (&((sp)->fts_fd_ring)), \
- FCHDIR ((sp), (ISSET (FTS_CWDFD) ? AT_FDCWD : (sp)->fts_rfd)))
-
/* FIXME: FTS_NOCHDIR is now misnamed.
Call it FTS_USE_FULL_RELATIVE_FILE_NAMES instead. */
#define FCHDIR(sp, fd) \
sp->fts_cwd_fd = fd;
}
+/* Restore the initial, pre-traversal, "working directory".
+ In FTS_CWDFD mode, we merely call cwd_advance_fd, otherwise,
+ we may actually change the working directory.
+ Return 0 upon success. Upon failure, set errno and return nonzero. */
+static int
+restore_initial_cwd (FTS *sp)
+{
+ int fail = FCHDIR (sp, ISSET (FTS_CWDFD) ? AT_FDCWD : sp->fts_rfd);
+ fd_ring_clear (&(sp->fts_fd_ring));
+ return fail;
+}
+
/* Open the directory DIR if possible, and return a file
descriptor. Return -1 and set errno on failure. It doesn't matter
whether the file descriptor has read or write access. */
O_SEARCH | (ISSET (FTS_NOATIME) ? O_NOATIME : 0));
if (fd < 0)
{
- /* Even if `.' is unreadable, don't revert to FTS_NOCHDIR mode
+ /* Even if "." is unreadable, don't revert to FTS_NOCHDIR mode
on systems like Linux+PROC_FS, where our openat emulation
is good enough. Note: on a system that emulates
openat via /proc, this technique can still fail, but
switch (fs_buf.f_type)
{
- /* List here the file system types that lack useable dirent.d_type
+ /* List here the file system types that lack usable dirent.d_type
info, yet for which the optimization does apply. */
case S_MAGIC_REISERFS:
return true;
/* Move to the next node on this level. */
next: tmp = p;
+
+ /* If we have so many directory entries that we're reading them
+ in batches, and we've reached the end of the current batch,
+ read in a new batch. */
+ if (p->fts_link == NULL && p->fts_parent->fts_dirp)
+ {
+ p = tmp->fts_parent;
+ sp->fts_cur = p;
+ sp->fts_path[p->fts_pathlen] = '\0';
+
+ if ((p = fts_build (sp, BREAD)) == NULL)
+ {
+ if (ISSET(FTS_STOP))
+ return NULL;
+ goto cd_dot_dot;
+ }
+
+ free(tmp);
+ goto name;
+ }
+
if ((p = p->fts_link) != NULL) {
sp->fts_cur = p;
free(tmp);
* root.
*/
if (p->fts_level == FTS_ROOTLEVEL) {
- if (RESTORE_INITIAL_CWD(sp)) {
+ if (restore_initial_cwd(sp)) {
SET(FTS_STOP);
return (NULL);
}
}
return p;
}
+cd_dot_dot:
/* Move up to the parent node. */
p = tmp->fts_parent;
* one level, via "..".
*/
if (p->fts_level == FTS_ROOTLEVEL) {
- if (RESTORE_INITIAL_CWD(sp)) {
+ if (restore_initial_cwd(sp)) {
p->fts_errno = errno;
SET(FTS_STOP);
}
char *cp;
int dir_fd;
FTSENT *cur = sp->fts_cur;
+ bool continue_readdir = !!cur->fts_dirp;
- /* Open the directory for reading. If this fails, we're done.
- If being called from fts_read, set the fts_info field. */
- if ((cur->fts_dirp = fts_opendir(cur->fts_accpath, &dir_fd)) == NULL) {
- if (type == BREAD) {
- cur->fts_info = FTS_DNR;
- cur->fts_errno = errno;
- }
- return (NULL);
- }
- /* Rather than calling fts_stat for each and every entry encountered
- in the readdir loop (below), stat each directory only right after
- opening it. */
- if (cur->fts_info == FTS_NSOK)
- cur->fts_info = fts_stat(sp, cur, false);
- else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK)
+ /* When cur->fts_dirp is non-NULL, that means we should
+ continue calling readdir on that existing DIR* pointer
+ rather than opening a new one. */
+ if (continue_readdir)
+ {
+ DIR *dp = cur->fts_dirp;
+ dir_fd = dirfd (dp);
+ if (dir_fd < 0)
+ {
+ closedir_and_clear (cur->fts_dirp);
+ if (type == BREAD)
+ {
+ cur->fts_info = FTS_DNR;
+ cur->fts_errno = errno;
+ }
+ return NULL;
+ }
+ }
+ else
{
- /* Now read the stat info again after opening a directory to
- reveal eventual changes caused by a submount triggered by
- the traversal. But do it only for utilities which use
- FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du
- benefit/suffer from this feature for now. */
- LEAVE_DIR (sp, cur, "4");
- fts_stat (sp, cur, false);
- if (! enter_dir (sp, cur))
+ /* Open the directory for reading. If this fails, we're done.
+ If being called from fts_read, set the fts_info field. */
+ if ((cur->fts_dirp = fts_opendir(cur->fts_accpath, &dir_fd)) == NULL)
{
- __set_errno (ENOMEM);
+ if (type == BREAD)
+ {
+ cur->fts_info = FTS_DNR;
+ cur->fts_errno = errno;
+ }
return NULL;
}
+ /* Rather than calling fts_stat for each and every entry encountered
+ in the readdir loop (below), stat each directory only right after
+ opening it. */
+ if (cur->fts_info == FTS_NSOK)
+ cur->fts_info = fts_stat(sp, cur, false);
+ else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK)
+ {
+ /* Now read the stat info again after opening a directory to
+ reveal eventual changes caused by a submount triggered by
+ the traversal. But do it only for utilities which use
+ FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du
+ benefit/suffer from this feature for now. */
+ LEAVE_DIR (sp, cur, "4");
+ fts_stat (sp, cur, false);
+ if (! enter_dir (sp, cur))
+ {
+ __set_errno (ENOMEM);
+ return NULL;
+ }
+ }
}
- /* Nlinks is the number of possible entries of type directory in the
- directory if we're cheating on stat calls, 0 if we're not doing
- any stat calls at all, (nlink_t) -1 if we're statting everything. */
+ /* Maximum number of readdir entries to read at one time. This
+ limitation is to avoid reading millions of entries into memory
+ at once. When an fts_compar function is specified, we have no
+ choice: we must read all entries into memory before calling that
+ function. But when no such function is specified, we can read
+ entries in batches that are large enough to help us with inode-
+ sorting, yet not so large that we risk exhausting memory. */
+ size_t max_entries = (sp->fts_compar == NULL
+ ? FTS_MAX_READDIR_ENTRIES : SIZE_MAX);
+
+ /*
+ * Nlinks is the number of possible entries of type directory in the
+ * directory if we're cheating on stat calls, 0 if we're not doing
+ * any stat calls at all, (nlink_t) -1 if we're statting everything.
+ */
if (type == BNAMES) {
nlinks = 0;
/* Be quiet about nostat, GCC. */
* needed sorted entries or stat information, they had better be
* checking FTS_NS on the returned nodes.
*/
- if (nlinks || type == BREAD) {
+ if (continue_readdir)
+ {
+ /* When resuming a short readdir run, we already have
+ the required dirp and dir_fd. */
+ descend = true;
+ }
+ else if (nlinks || type == BREAD) {
if (ISSET(FTS_CWDFD))
{
dir_fd = dup (dir_fd);
level = cur->fts_level + 1;
- /* Read the directory, attaching each entry to the `link' pointer. */
+ /* Read the directory, attaching each entry to the "link" pointer. */
doadjust = false;
head = NULL;
tail = NULL;
tail = p;
}
++nitems;
+ if (max_entries <= nitems) {
+ /* When there are too many dir entries, leave
+ fts_dirp open, so that a subsequent fts_read
+ can take up where we leave off. */
+ goto break_without_closedir;
+ }
}
+
if (cur->fts_dirp)
closedir_and_clear(cur->fts_dirp);
+ break_without_closedir:
+
/*
* If realloc() changed the address of the file name, adjust the
* addresses for the rest of the tree and the dir list.
* to an empty directory, we wind up here with no other way back. If
* can't get back, we're done.
*/
- if (descend && (type == BCHILD || !nitems) &&
+ if (!continue_readdir && descend && (type == BCHILD || !nitems) &&
(cur->fts_level == FTS_ROOTLEVEL
- ? RESTORE_INITIAL_CWD(sp)
+ ? restore_initial_cwd(sp)
: fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) {
cur->fts_info = FTS_ERR;
SET(FTS_STOP);
}
static size_t
-internal_function
+internal_function _GL_ATTRIBUTE_PURE
fts_maxarglen (char * const *argv)
{
size_t len, max;
* tricked by someone changing the world out from underneath us.
* Assumes p->fts_statp->st_dev and p->fts_statp->st_ino are filled in.
* If FD is non-negative, expect it to be used after this function returns,
- * and to be closed eventually. So don't pass e.g., `dirfd(dirp)' and then
+ * and to be closed eventually. So don't pass e.g., 'dirfd(dirp)' and then
* do closedir(dirp), because that would invalidate the saved FD.
* Upon failure, close FD immediately and return nonzero.
*/
return -1;
/* The following dev/inode check is necessary if we're doing a
- `logical' traversal (through symlinks, a la chown -L), if the
+ "logical" traversal (through symlinks, a la chown -L), if the
system lacks O_NOFOLLOW support, or if we're changing to ".."
(but not via a popped file descriptor). When changing to the
name "..", O_NOFOLLOW can't help. In general, when the target is