X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fexclude.c;h=0865ceed4a0673eadb793d3eec599604c86e9684;hb=1276a2c5f24c0c932426aca9c899fa524d2443f2;hp=7df879dd17dfaa3f605a0b2249b09ce8dc0ab520;hpb=02e3bf2a532498c860f248a9bd81869dbf514404;p=gnulib.git diff --git a/lib/exclude.c b/lib/exclude.c index 7df879dd1..0865ceed4 100644 --- a/lib/exclude.c +++ b/lib/exclude.c @@ -1,12 +1,12 @@ /* exclude.c -- exclude file names - Copyright 1992, 1993, 1994, 1997, 1999, 2000, 2001 Free Software + Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2014 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -14,63 +14,59 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; see the file COPYING. - If not, write to the Free Software Foundation, - 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + along with this program. If not, see . */ -/* Written by Paul Eggert */ +/* Written by Paul Eggert + and Sergey Poznyakoff . + Thanks to Phil Proudman + for improvement suggestions. */ -#if HAVE_CONFIG_H -# include -#endif +#include -#if HAVE_STDBOOL_H -# include -#else -typedef enum {false = 0, true = 1} bool; -#endif +#include +#include #include -#ifndef errno -extern int errno; -#endif +#include #include -#if HAVE_SYS_TYPES_H -# include -#endif -#if HAVE_STDLIB_H -# include -#endif -#if HAVE_STRING_H -# include -#endif -#if HAVE_STRINGS_H -# include -#endif -#if HAVE_INTTYPES_H -# include -#else -# if HAVE_STDINT_H -# include -# endif -#endif +#include +#include +#include #include "exclude.h" +#include "hash.h" +#include "mbuiter.h" #include "fnmatch.h" #include "xalloc.h" +#include "verify.h" + +#if USE_UNLOCKED_IO +# include "unlocked-io.h" +#endif -#ifndef SIZE_MAX -# define SIZE_MAX ((size_t) -1) +/* Non-GNU systems lack these options, so we don't need to check them. */ +#ifndef FNM_CASEFOLD +# define FNM_CASEFOLD 0 #endif +#ifndef FNM_EXTMATCH +# define FNM_EXTMATCH 0 +#endif +#ifndef FNM_LEADING_DIR +# define FNM_LEADING_DIR 0 +#endif + +verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS) + & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR + | FNM_CASEFOLD | FNM_EXTMATCH)) + == 0); -/* Verify a requirement at compile-time (unlike assert, which is runtime). */ -#define verify(name, assertion) struct name { char a[(assertion) ? 1 : -1]; } -verify (EXCLUDE_macros_do_not_collide_with_FNM_macros, - (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS) - & (FNM_FILE_NAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR - | FNM_CASEFOLD)) - == 0)); +/* Exclusion patterns are grouped into a singly-linked list of + "exclusion segments". Each segment represents a set of patterns + that can be matches using the same algorithm. Non-wildcard + patterns are kept in hash tables, to speed up searches. Wildcard + patterns are stored as arrays of patterns. */ + /* An exclude pattern-options pair. The options are fnmatch options ORed with EXCLUDE_* options. */ @@ -81,34 +77,196 @@ struct patopts int options; }; -/* An exclude list, of pattern-options pairs. */ +/* An array of pattern-options pairs. */ -struct exclude +struct exclude_pattern { struct patopts *exclude; size_t exclude_alloc; size_t exclude_count; }; +enum exclude_type + { + exclude_hash, /* a hash table of excluded names */ + exclude_pattern /* an array of exclude patterns */ + }; + +struct exclude_segment + { + struct exclude_segment *next; /* next segment in list */ + enum exclude_type type; /* type of this segment */ + int options; /* common options for this segment */ + union + { + Hash_table *table; /* for type == exclude_hash */ + struct exclude_pattern pat; /* for type == exclude_pattern */ + } v; + }; + +/* The exclude structure keeps a singly-linked list of exclude segments, + maintained in reverse order. */ +struct exclude + { + struct exclude_segment *head; + }; + +/* Return true if STR has or may have wildcards, when matched with OPTIONS. + Return false if STR definitely does not have wildcards. */ +bool +fnmatch_pattern_has_wildcards (const char *str, int options) +{ + while (1) + { + switch (*str++) + { + case '\\': + str += ! (options & FNM_NOESCAPE) && *str; + break; + + case '+': case '@': case '!': + if (options & FNM_EXTMATCH && *str == '(') + return true; + break; + + case '?': case '*': case '[': + return true; + + case '\0': + return false; + } + } +} + +static void +unescape_pattern (char *str) +{ + char const *q = str; + do + q += *q == '\\' && q[1]; + while ((*str++ = *q++)); +} + /* Return a newly allocated and empty exclude list. */ struct exclude * new_exclude (void) { - struct exclude *ex = (struct exclude *) xmalloc (sizeof *ex); - ex->exclude_count = 0; - ex->exclude_alloc = (1 << 6); /* This must be a power of 2. */ - ex->exclude = (struct patopts *) xmalloc (ex->exclude_alloc - * sizeof ex->exclude[0]); - return ex; + return xzalloc (sizeof *new_exclude ()); } -/* Free the storage associated with an exclude list. */ +/* Calculate the hash of string. */ +static size_t +string_hasher (void const *data, size_t n_buckets) +{ + char const *p = data; + return hash_string (p, n_buckets); +} + +/* Ditto, for case-insensitive hashes */ +static size_t +string_hasher_ci (void const *data, size_t n_buckets) +{ + char const *p = data; + mbui_iterator_t iter; + size_t value = 0; + + for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter)) + { + mbchar_t m = mbui_cur (iter); + wchar_t wc; + + if (m.wc_valid) + wc = towlower (m.wc); + else + wc = *m.ptr; + value = (value * 31 + wc) % n_buckets; + } + + return value; +} + +/* compare two strings for equality */ +static bool +string_compare (void const *data1, void const *data2) +{ + char const *p1 = data1; + char const *p2 = data2; + return strcmp (p1, p2) == 0; +} + +/* compare two strings for equality, case-insensitive */ +static bool +string_compare_ci (void const *data1, void const *data2) +{ + char const *p1 = data1; + char const *p2 = data2; + return mbscasecmp (p1, p2) == 0; +} + +static void +string_free (void *data) +{ + free (data); +} + +/* Create new exclude segment of given TYPE and OPTIONS, and attach it + to the head of EX. */ +static void +new_exclude_segment (struct exclude *ex, enum exclude_type type, int options) +{ + struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment)); + sp->type = type; + sp->options = options; + switch (type) + { + case exclude_pattern: + break; + + case exclude_hash: + sp->v.table = hash_initialize (0, NULL, + (options & FNM_CASEFOLD) ? + string_hasher_ci + : string_hasher, + (options & FNM_CASEFOLD) ? + string_compare_ci + : string_compare, + string_free); + break; + } + sp->next = ex->head; + ex->head = sp; +} + +/* Free a single exclude segment */ +static void +free_exclude_segment (struct exclude_segment *seg) +{ + switch (seg->type) + { + case exclude_pattern: + free (seg->v.pat.exclude); + break; + + case exclude_hash: + hash_free (seg->v.table); + break; + } + free (seg); +} + +/* Free the storage associated with an exclude list. */ void free_exclude (struct exclude *ex) { - free (ex->exclude); + struct exclude_segment *seg; + for (seg = ex->head; seg; ) + { + struct exclude_segment *next = seg->next; + free_exclude_segment (seg); + seg = next; + } free (ex); } @@ -118,70 +276,176 @@ free_exclude (struct exclude *ex) static int fnmatch_no_wildcards (char const *pattern, char const *f, int options) { - if (! (options & FNM_CASEFOLD)) - return ((options & FNM_LEADING_DIR) - ? strcasecmp (pattern, f) - : strcmp (pattern, f)); - else + if (! (options & FNM_LEADING_DIR)) + return ((options & FNM_CASEFOLD) + ? mbscasecmp (pattern, f) + : strcmp (pattern, f)); + else if (! (options & FNM_CASEFOLD)) { size_t patlen = strlen (pattern); - int r = ((options & FNM_LEADING_DIR) - ? strncasecmp (pattern, f, patlen) - : strncmp (pattern, f, patlen)); + int r = strncmp (pattern, f, patlen); if (! r) - { - r = f[patlen]; - if (r == '/' && (options & FNM_LEADING_DIR)) - r = 0; - } + { + r = f[patlen]; + if (r == '/') + r = 0; + } + return r; + } + else + { + /* Walk through a copy of F, seeing whether P matches any prefix + of F. + + FIXME: This is an O(N**2) algorithm; it should be O(N). + Also, the copy should not be necessary. However, fixing this + will probably involve a change to the mbs* API. */ + + char *fcopy = xstrdup (f); + char *p; + int r; + for (p = fcopy; ; *p++ = '/') + { + p = strchr (p, '/'); + if (p) + *p = '\0'; + r = mbscasecmp (pattern, fcopy); + if (!p || r <= 0) + break; + } + free (fcopy); return r; } } +bool +exclude_fnmatch (char const *pattern, char const *f, int options) +{ + int (*matcher) (char const *, char const *, int) = + (options & EXCLUDE_WILDCARDS + ? fnmatch + : fnmatch_no_wildcards); + bool matched = ((*matcher) (pattern, f, options) == 0); + char const *p; + + if (! (options & EXCLUDE_ANCHORED)) + for (p = f; *p && ! matched; p++) + if (*p == '/' && p[1] != '/') + matched = ((*matcher) (pattern, p + 1, options) == 0); + + return matched; +} + +/* Return true if the exclude_pattern segment SEG matches F. */ + +static bool +file_pattern_matches (struct exclude_segment const *seg, char const *f) +{ + size_t exclude_count = seg->v.pat.exclude_count; + struct patopts const *exclude = seg->v.pat.exclude; + size_t i; + + for (i = 0; i < exclude_count; i++) + { + char const *pattern = exclude[i].pattern; + int options = exclude[i].options; + if (exclude_fnmatch (pattern, f, options)) + return true; + } + return false; +} + +/* Return true if the exclude_hash segment SEG matches F. + BUFFER is an auxiliary storage of the same length as F (with nul + terminator included) */ +static bool +file_name_matches (struct exclude_segment const *seg, char const *f, + char *buffer) +{ + int options = seg->options; + Hash_table *table = seg->v.table; + + do + { + /* initialize the pattern */ + strcpy (buffer, f); + + while (1) + { + if (hash_lookup (table, buffer)) + return true; + if (options & FNM_LEADING_DIR) + { + char *p = strrchr (buffer, '/'); + if (p) + { + *p = 0; + continue; + } + } + break; + } + + if (!(options & EXCLUDE_ANCHORED)) + { + f = strchr (f, '/'); + if (f) + f++; + } + else + break; + } + while (f); + + return false; +} + /* Return true if EX excludes F. */ bool -excluded_filename (struct exclude const *ex, char const *f) +excluded_file_name (struct exclude const *ex, char const *f) { - size_t exclude_count = ex->exclude_count; - - /* If no options are given, the default is to include. */ - if (exclude_count == 0) - return 0; - else + struct exclude_segment *seg; + bool invert = false; + char *filename = NULL; + + /* If no patterns are given, the default is to include. */ + if (!ex->head) + return false; + + /* Scan through the segments, reporting the status of the first match. + The segments are in reverse order, so this reports the status of + the last match in the original option list. */ + for (seg = ex->head; ; seg = seg->next) { - struct patopts const *exclude = ex->exclude; - size_t i; - - /* Otherwise, the default is the opposite of the first option. */ - bool excluded = !! (exclude[0].options & EXCLUDE_INCLUDE); - - /* Scan through the options, seeing whether they change F from - excluded to included or vice versa. */ - for (i = 0; i < exclude_count; i++) - { - char const *pattern = exclude[i].pattern; - int options = exclude[i].options; - if (excluded == !! (options & EXCLUDE_INCLUDE)) - { - int (*matcher) PARAMS ((char const *, char const *, int)) = - (options & EXCLUDE_WILDCARDS - ? fnmatch - : fnmatch_no_wildcards); - bool matched = ((*matcher) (pattern, f, options) == 0); - char const *p; - - if (! (options & EXCLUDE_ANCHORED)) - for (p = f; *p && ! matched; p++) - if (*p == '/' && p[1] != '/') - matched = ((*matcher) (pattern, p + 1, options) == 0); - - excluded ^= matched; - } - } - - return excluded; + if (seg->type == exclude_hash) + { + if (!filename) + filename = xmalloc (strlen (f) + 1); + if (file_name_matches (seg, f, filename)) + break; + } + else + { + if (file_pattern_matches (seg, f)) + break; + } + + if (! seg->next) + { + /* If patterns are given but none match, the default is the + opposite of the last segment (i.e., the first in the + original option list). For example, in the command + 'grep -r --exclude="a*" --include="*b" pat dir', the + first option is --exclude so any file name matching + neither a* nor *b is included. */ + invert = true; + break; + } } + + free (filename); + return invert ^ ! (seg->options & EXCLUDE_INCLUDE); } /* Append to EX the exclusion PATTERN with OPTIONS. */ @@ -189,61 +453,79 @@ excluded_filename (struct exclude const *ex, char const *f) void add_exclude (struct exclude *ex, char const *pattern, int options) { - struct patopts *patopts; + struct exclude_segment *seg; - if (ex->exclude_alloc <= ex->exclude_count) + if ((options & EXCLUDE_WILDCARDS) + && fnmatch_pattern_has_wildcards (pattern, options)) { - size_t s = 2 * ex->exclude_alloc; - if (! (0 < s && s <= SIZE_MAX / sizeof ex->exclude[0])) - xalloc_die (); - ex->exclude_alloc = s; - ex->exclude = (struct patopts *) xrealloc (ex->exclude, - s * sizeof ex->exclude[0]); + struct exclude_pattern *pat; + struct patopts *patopts; + + if (! (ex->head && ex->head->type == exclude_pattern + && ((ex->head->options & EXCLUDE_INCLUDE) + == (options & EXCLUDE_INCLUDE)))) + new_exclude_segment (ex, exclude_pattern, options); + seg = ex->head; + + pat = &seg->v.pat; + if (pat->exclude_count == pat->exclude_alloc) + pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc, + sizeof *pat->exclude); + patopts = &pat->exclude[pat->exclude_count++]; + patopts->pattern = pattern; + patopts->options = options; + } + else + { + char *str, *p; + int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED + | FNM_LEADING_DIR | FNM_CASEFOLD); + if (! (ex->head && ex->head->type == exclude_hash + && ((ex->head->options & exclude_hash_flags) + == (options & exclude_hash_flags)))) + new_exclude_segment (ex, exclude_hash, options); + seg = ex->head; + + str = xstrdup (pattern); + if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS) + unescape_pattern (str); + p = hash_insert (seg->v.table, str); + if (p != str) + free (str); } - - patopts = &ex->exclude[ex->exclude_count++]; - patopts->pattern = pattern; - patopts->options = options; } -/* Use ADD_FUNC to append to EX the patterns in FILENAME, each with - OPTIONS. LINE_END terminates each pattern in the file. Return -1 - on failure, 0 on success. */ +/* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with + OPTIONS. LINE_END terminates each pattern in the file. If + LINE_END is a space character, ignore trailing spaces and empty + lines in FILE. Return -1 on failure, 0 on success. */ int -add_exclude_file (void (*add_func) PARAMS ((struct exclude *, - char const *, int)), - struct exclude *ex, char const *filename, int options, - char line_end) +add_exclude_file (void (*add_func) (struct exclude *, char const *, int), + struct exclude *ex, char const *file_name, int options, + char line_end) { - bool use_stdin = filename[0] == '-' && !filename[1]; + bool use_stdin = file_name[0] == '-' && !file_name[1]; FILE *in; - char *buf; + char *buf = NULL; char *p; char const *pattern; char const *lim; - size_t buf_alloc = (1 << 10); /* This must be a power of two. */ + size_t buf_alloc = 0; size_t buf_count = 0; int c; int e = 0; if (use_stdin) in = stdin; - else if (! (in = fopen (filename, "r"))) + else if (! (in = fopen (file_name, "r"))) return -1; - buf = xmalloc (buf_alloc); - while ((c = getc (in)) != EOF) { - buf[buf_count++] = c; if (buf_count == buf_alloc) - { - buf_alloc *= 2; - if (! buf_alloc) - xalloc_die (); - buf = xrealloc (buf, buf_alloc); - } + buf = x2realloc (buf, &buf_alloc); + buf[buf_count++] = c; } if (ferror (in)) @@ -253,13 +535,29 @@ add_exclude_file (void (*add_func) PARAMS ((struct exclude *, e = errno; buf = xrealloc (buf, buf_count + 1); + buf[buf_count] = line_end; + lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end); + pattern = buf; - for (pattern = p = buf, lim = buf + buf_count; p <= lim; p++) - if (p < lim ? *p == line_end : buf < p && p[-1]) + for (p = buf; p < lim; p++) + if (*p == line_end) { - *p = '\0'; - (*add_func) (ex, pattern, options); - pattern = p + 1; + char *pattern_end = p; + + if (isspace ((unsigned char) line_end)) + { + for (; ; pattern_end--) + if (pattern_end == pattern) + goto next_pattern; + else if (! isspace ((unsigned char) pattern_end[-1])) + break; + } + + *pattern_end = '\0'; + (*add_func) (ex, pattern, options); + + next_pattern: + pattern = p + 1; } errno = e;