X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Ffts.c;h=e3829f324845b39c3494db27db8d0fbd12f5aacf;hb=47cb657eca1abf2c26c32c8ce03def994a3ee37c;hp=c96dd9d1cde798219ceb647a03c7c7a07490a133;hpb=9f5e100923da93c9e470227db4aa4ac1a2e2c019;p=gnulib.git diff --git a/lib/fts.c b/lib/fts.c index c96dd9d1c..e3829f324 100644 --- a/lib/fts.c +++ b/lib/fts.c @@ -134,12 +134,21 @@ enum # define D_INO(dp) NOT_AN_INODE_NUMBER #endif +/* If possible (see max_entries, below), read no more than this many directory + entries at a time. Without this limit (i.e., when using non-NULL + fts_compar), processing a directory with 4,000,000 entries requires ~1GiB + of memory, and handling 64M entries would require 16GiB of memory. */ +#ifndef FTS_MAX_READDIR_ENTRIES +# define FTS_MAX_READDIR_ENTRIES 100000 +#endif + /* If there are more than this many entries in a directory, and the conditions mentioned below are satisfied, then sort the entries on inode number before any further processing. */ #ifndef FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD # define FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD 10000 #endif + enum { _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD = FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD @@ -908,6 +917,27 @@ fts_read (register FTS *sp) /* Move to the next node on this level. */ next: tmp = p; + + /* If we have so many directory entries that we're reading them + in batches, and we've reached the end of the current batch, + read in a new batch. */ + if (p->fts_link == NULL && p->fts_parent->fts_dirp) + { + p = tmp->fts_parent; + sp->fts_cur = p; + sp->fts_path[p->fts_pathlen] = '\0'; + + if ((p = fts_build (sp, BREAD)) == NULL) + { + if (ISSET(FTS_STOP)) + return NULL; + goto cd_dot_dot; + } + + free(tmp); + goto name; + } + if ((p = p->fts_link) != NULL) { sp->fts_cur = p; free(tmp); @@ -996,6 +1026,7 @@ check_for_dir: } return p; } +cd_dot_dot: /* Move up to the parent node. */ p = tmp->fts_parent; @@ -1192,6 +1223,25 @@ set_stat_type (struct stat *st, unsigned int dtype) st->st_mode = type; } +#define closedir_and_clear(dirp) \ + do \ + { \ + closedir (dirp); \ + dirp = NULL; \ + } \ + while (0) + +#define fts_opendir(file, Pdir_fd) \ + opendirat((! ISSET(FTS_NOCHDIR) && ISSET(FTS_CWDFD) \ + ? sp->fts_cwd_fd : AT_FDCWD), \ + file, \ + (((ISSET(FTS_PHYSICAL) \ + && ! (ISSET(FTS_COMFOLLOW) \ + && cur->fts_level == FTS_ROOTLEVEL)) \ + ? O_NOFOLLOW : 0) \ + | (ISSET (FTS_NOATIME) ? O_NOATIME : 0)), \ + Pdir_fd) + /* * This is the tricky part -- do not casually change *anything* in here. The * idea is to build the linked list of entries that are used by fts_children @@ -1210,11 +1260,9 @@ static FTSENT * internal_function fts_build (register FTS *sp, int type) { - register struct dirent *dp; register FTSENT *p, *head; register size_t nitems; - FTSENT *cur, *tail; - DIR *dirp; + FTSENT *tail; void *oldaddr; int saved_errno; bool descend; @@ -1225,51 +1273,71 @@ fts_build (register FTS *sp, int type) size_t len, maxlen, new_len; char *cp; int dir_fd; + FTSENT *cur = sp->fts_cur; + bool continue_readdir = !!cur->fts_dirp; - /* Set current node pointer. */ - cur = sp->fts_cur; - - /* - * Open the directory for reading. If this fails, we're done. - * If being called from fts_read, set the fts_info field. - */ -# define __opendir2(file, flag) \ - opendirat((! ISSET(FTS_NOCHDIR) && ISSET(FTS_CWDFD) \ - ? sp->fts_cwd_fd : AT_FDCWD), \ - file, \ - (((ISSET(FTS_PHYSICAL) \ - && ! (ISSET(FTS_COMFOLLOW) \ - && cur->fts_level == FTS_ROOTLEVEL)) \ - ? O_NOFOLLOW : 0) \ - | (ISSET (FTS_NOATIME) ? O_NOATIME : 0)), \ - &dir_fd) - - if ((dirp = __opendir2(cur->fts_accpath, oflag)) == NULL) { - if (type == BREAD) { - cur->fts_info = FTS_DNR; - cur->fts_errno = errno; - } - return (NULL); - } - /* Rather than calling fts_stat for each and every entry encountered - in the readdir loop (below), stat each directory only right after - opening it. */ - if (cur->fts_info == FTS_NSOK) - cur->fts_info = fts_stat(sp, cur, false); - else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK) { + /* When cur->fts_dirp is non-NULL, that means we should + continue calling readdir on that existing DIR* pointer + rather than opening a new one. */ + if (continue_readdir) + { + DIR *dp = cur->fts_dirp; + dir_fd = dirfd (dp); + if (dir_fd < 0) + { + closedir_and_clear (cur->fts_dirp); + if (type == BREAD) + { + cur->fts_info = FTS_DNR; + cur->fts_errno = errno; + } + return NULL; + } + } + else + { + /* Open the directory for reading. If this fails, we're done. + If being called from fts_read, set the fts_info field. */ + if ((cur->fts_dirp = fts_opendir(cur->fts_accpath, &dir_fd)) == NULL) + { + if (type == BREAD) + { + cur->fts_info = FTS_DNR; + cur->fts_errno = errno; + } + return NULL; + } + /* Rather than calling fts_stat for each and every entry encountered + in the readdir loop (below), stat each directory only right after + opening it. */ + if (cur->fts_info == FTS_NSOK) + cur->fts_info = fts_stat(sp, cur, false); + else if (sp->fts_options & FTS_TIGHT_CYCLE_CHECK) + { /* Now read the stat info again after opening a directory to - * reveal eventual changes caused by a submount triggered by - * the traversal. But do it only for utilities which use - * FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du - * benefit/suffer from this feature for now. - */ + reveal eventual changes caused by a submount triggered by + the traversal. But do it only for utilities which use + FTS_TIGHT_CYCLE_CHECK. Therefore, only find and du + benefit/suffer from this feature for now. */ LEAVE_DIR (sp, cur, "4"); fts_stat (sp, cur, false); - if (! enter_dir (sp, cur)) { - __set_errno (ENOMEM); - return NULL; - } - } + if (! enter_dir (sp, cur)) + { + __set_errno (ENOMEM); + return NULL; + } + } + } + + /* Maximum number of readdir entries to read at one time. This + limitation is to avoid reading millions of entries into memory + at once. When an fts_compar function is specified, we have no + choice: we must read all entries into memory before calling that + function. But when no such function is specified, we can read + entries in batches that are large enough to help us with inode- + sorting, yet not so large that we risk exhausting memory. */ + size_t max_entries = (sp->fts_compar == NULL + ? FTS_MAX_READDIR_ENTRIES : SIZE_MAX); /* * Nlinks is the number of possible entries of type directory in the @@ -1304,7 +1372,13 @@ fts_build (register FTS *sp, int type) * needed sorted entries or stat information, they had better be * checking FTS_NS on the returned nodes. */ - if (nlinks || type == BREAD) { + if (continue_readdir) + { + /* When resuming a short readdir run, we already have + the required dirp and dir_fd. */ + descend = true; + } + else if (nlinks || type == BREAD) { if (ISSET(FTS_CWDFD)) { dir_fd = dup (dir_fd); @@ -1316,10 +1390,10 @@ fts_build (register FTS *sp, int type) cur->fts_errno = errno; cur->fts_flags |= FTS_DONTCHDIR; descend = false; - closedir(dirp); + closedir_and_clear(cur->fts_dirp); if (ISSET(FTS_CWDFD) && 0 <= dir_fd) close (dir_fd); - dirp = NULL; + cur->fts_dirp = NULL; } else descend = true; } else @@ -1350,9 +1424,14 @@ fts_build (register FTS *sp, int type) /* Read the directory, attaching each entry to the `link' pointer. */ doadjust = false; - for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) { + head = NULL; + tail = NULL; + nitems = 0; + while (cur->fts_dirp) { bool is_dir; - + struct dirent *dp = readdir(cur->fts_dirp); + if (dp == NULL) + break; if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name)) continue; @@ -1371,7 +1450,7 @@ fts_build (register FTS *sp, int type) mem1: saved_errno = errno; free(p); fts_lfree(head); - closedir(dirp); + closedir_and_clear(cur->fts_dirp); cur->fts_info = FTS_ERR; SET(FTS_STOP); __set_errno (saved_errno); @@ -1396,7 +1475,7 @@ mem1: saved_errno = errno; */ free(p); fts_lfree(head); - closedir(dirp); + closedir_and_clear(cur->fts_dirp); cur->fts_info = FTS_ERR; SET(FTS_STOP); __set_errno (ENAMETOOLONG); @@ -1461,9 +1540,18 @@ mem1: saved_errno = errno; tail = p; } ++nitems; + if (max_entries <= nitems) { + /* When there are too many dir entries, leave + fts_dirp open, so that a subsequent fts_read + can take up where we leave off. */ + goto break_without_closedir; + } } - if (dirp) - closedir(dirp); + + if (cur->fts_dirp) + closedir_and_clear(cur->fts_dirp); + + break_without_closedir: /* * If realloc() changed the address of the file name, adjust the @@ -1489,7 +1577,7 @@ mem1: saved_errno = errno; * to an empty directory, we wind up here with no other way back. If * can't get back, we're done. */ - if (descend && (type == BCHILD || !nitems) && + if (!continue_readdir && descend && (type == BCHILD || !nitems) && (cur->fts_level == FTS_ROOTLEVEL ? RESTORE_INITIAL_CWD(sp) : fts_safe_changedir(sp, cur->fts_parent, -1, ".."))) { @@ -1806,6 +1894,7 @@ fts_alloc (FTS *sp, const char *name, register size_t namelen) p->fts_fts = sp; p->fts_path = sp->fts_path; p->fts_errno = 0; + p->fts_dirp = NULL; p->fts_flags = 0; p->fts_instr = FTS_NOINSTR; p->fts_number = 0; @@ -1822,6 +1911,8 @@ fts_lfree (register FTSENT *head) /* Free a linked list of structures. */ while ((p = head)) { head = head->fts_link; + if (p->fts_dirp) + closedir (p->fts_dirp); free(p); } }