1 /* exclude.c -- exclude file names
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2012 Free Software
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
44 # include "unlocked-io.h"
47 /* Non-GNU systems lack these options, so we don't need to check them. */
49 # define FNM_CASEFOLD 0
52 # define FNM_EXTMATCH 0
54 #ifndef FNM_LEADING_DIR
55 # define FNM_LEADING_DIR 0
58 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
59 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
60 | FNM_CASEFOLD | FNM_EXTMATCH))
64 /* Exclusion patterns are grouped into a singly-linked list of
65 "exclusion segments". Each segment represents a set of patterns
66 that can be matches using the same algorithm. Non-wildcard
67 patterns are kept in hash tables, to speed up searches. Wildcard
68 patterns are stored as arrays of patterns. */
71 /* An exclude pattern-options pair. The options are fnmatch options
72 ORed with EXCLUDE_* options. */
80 /* An array of pattern-options pairs. */
82 struct exclude_pattern
84 struct patopts *exclude;
91 exclude_hash, /* a hash table of excluded names */
92 exclude_pattern /* an array of exclude patterns */
95 struct exclude_segment
97 struct exclude_segment *next; /* next segment in list */
98 enum exclude_type type; /* type of this segment */
99 int options; /* common options for this segment */
102 Hash_table *table; /* for type == exclude_hash */
103 struct exclude_pattern pat; /* for type == exclude_pattern */
107 /* The exclude structure keeps a singly-linked list of exclude segments,
108 maintained in reverse order. */
111 struct exclude_segment *head;
114 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
115 Return false if STR definitely does not have wildcards. */
117 fnmatch_pattern_has_wildcards (const char *str, int options)
124 str += ! (options & FNM_NOESCAPE) && *str;
127 case '+': case '@': case '!':
128 if (options & FNM_EXTMATCH && *str == '(')
132 case '?': case '*': case '[':
142 unescape_pattern (char *str)
146 q += *q == '\\' && q[1];
147 while ((*str++ = *q++));
150 /* Return a newly allocated and empty exclude list. */
155 return xzalloc (sizeof *new_exclude ());
158 /* Calculate the hash of string. */
160 string_hasher (void const *data, size_t n_buckets)
162 char const *p = data;
163 return hash_string (p, n_buckets);
166 /* Ditto, for case-insensitive hashes */
168 string_hasher_ci (void const *data, size_t n_buckets)
170 char const *p = data;
171 mbui_iterator_t iter;
174 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
176 mbchar_t m = mbui_cur (iter);
180 wc = towlower (m.wc);
184 value = (value * 31 + wc) % n_buckets;
190 /* compare two strings for equality */
192 string_compare (void const *data1, void const *data2)
194 char const *p1 = data1;
195 char const *p2 = data2;
196 return strcmp (p1, p2) == 0;
199 /* compare two strings for equality, case-insensitive */
201 string_compare_ci (void const *data1, void const *data2)
203 char const *p1 = data1;
204 char const *p2 = data2;
205 return mbscasecmp (p1, p2) == 0;
209 string_free (void *data)
214 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
215 to the head of EX. */
217 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
219 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
221 sp->options = options;
224 case exclude_pattern:
228 sp->v.table = hash_initialize (0, NULL,
229 (options & FNM_CASEFOLD) ?
232 (options & FNM_CASEFOLD) ?
242 /* Free a single exclude segment */
244 free_exclude_segment (struct exclude_segment *seg)
248 case exclude_pattern:
249 free (seg->v.pat.exclude);
253 hash_free (seg->v.table);
259 /* Free the storage associated with an exclude list. */
261 free_exclude (struct exclude *ex)
263 struct exclude_segment *seg;
264 for (seg = ex->head; seg; )
266 struct exclude_segment *next = seg->next;
267 free_exclude_segment (seg);
273 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
274 (unlike fnmatch) wildcards are disabled in PATTERN. */
277 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
279 if (! (options & FNM_LEADING_DIR))
280 return ((options & FNM_CASEFOLD)
281 ? mbscasecmp (pattern, f)
282 : strcmp (pattern, f));
283 else if (! (options & FNM_CASEFOLD))
285 size_t patlen = strlen (pattern);
286 int r = strncmp (pattern, f, patlen);
297 /* Walk through a copy of F, seeing whether P matches any prefix
300 FIXME: This is an O(N**2) algorithm; it should be O(N).
301 Also, the copy should not be necessary. However, fixing this
302 will probably involve a change to the mbs* API. */
304 char *fcopy = xstrdup (f);
307 for (p = fcopy; ; *p++ = '/')
312 r = mbscasecmp (pattern, fcopy);
322 exclude_fnmatch (char const *pattern, char const *f, int options)
324 int (*matcher) (char const *, char const *, int) =
325 (options & EXCLUDE_WILDCARDS
327 : fnmatch_no_wildcards);
328 bool matched = ((*matcher) (pattern, f, options) == 0);
331 if (! (options & EXCLUDE_ANCHORED))
332 for (p = f; *p && ! matched; p++)
333 if (*p == '/' && p[1] != '/')
334 matched = ((*matcher) (pattern, p + 1, options) == 0);
339 /* Return true if the exclude_pattern segment SEG matches F. */
342 file_pattern_matches (struct exclude_segment const *seg, char const *f)
344 size_t exclude_count = seg->v.pat.exclude_count;
345 struct patopts const *exclude = seg->v.pat.exclude;
348 for (i = 0; i < exclude_count; i++)
350 char const *pattern = exclude[i].pattern;
351 int options = exclude[i].options;
352 if (exclude_fnmatch (pattern, f, options))
358 /* Return true if the exclude_hash segment SEG matches F.
359 BUFFER is an auxiliary storage of the same length as F (with nul
360 terminator included) */
362 file_name_matches (struct exclude_segment const *seg, char const *f,
365 int options = seg->options;
366 Hash_table *table = seg->v.table;
370 /* initialize the pattern */
375 if (hash_lookup (table, buffer))
377 if (options & FNM_LEADING_DIR)
379 char *p = strrchr (buffer, '/');
389 if (!(options & EXCLUDE_ANCHORED))
403 /* Return true if EX excludes F. */
406 excluded_file_name (struct exclude const *ex, char const *f)
408 struct exclude_segment *seg;
410 char *filename = NULL;
412 /* If no patterns are given, the default is to include. */
416 /* Scan through the segments, reporting the status of the first match.
417 The segments are in reverse order, so this reports the status of
418 the last match in the original option list. */
419 for (seg = ex->head; ; seg = seg->next)
421 if (seg->type == exclude_hash)
424 filename = xmalloc (strlen (f) + 1);
425 if (file_name_matches (seg, f, filename))
430 if (file_pattern_matches (seg, f))
436 /* If patterns are given but none match, the default is the
437 opposite of the last segment (i.e., the first in the
438 original option list). For example, in the command
439 'grep -r --exclude="a*" --include="*b" pat dir', the
440 first option is --exclude so any file name matching
441 neither a* nor *b is included. */
448 return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
451 /* Append to EX the exclusion PATTERN with OPTIONS. */
454 add_exclude (struct exclude *ex, char const *pattern, int options)
456 struct exclude_segment *seg;
458 if ((options & EXCLUDE_WILDCARDS)
459 && fnmatch_pattern_has_wildcards (pattern, options))
461 struct exclude_pattern *pat;
462 struct patopts *patopts;
464 if (! (ex->head && ex->head->type == exclude_pattern
465 && ((ex->head->options & EXCLUDE_INCLUDE)
466 == (options & EXCLUDE_INCLUDE))))
467 new_exclude_segment (ex, exclude_pattern, options);
471 if (pat->exclude_count == pat->exclude_alloc)
472 pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
473 sizeof *pat->exclude);
474 patopts = &pat->exclude[pat->exclude_count++];
475 patopts->pattern = pattern;
476 patopts->options = options;
481 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
482 | FNM_LEADING_DIR | FNM_CASEFOLD);
483 if (! (ex->head && ex->head->type == exclude_hash
484 && ((ex->head->options & exclude_hash_flags)
485 == (options & exclude_hash_flags))))
486 new_exclude_segment (ex, exclude_hash, options);
489 str = xstrdup (pattern);
490 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
491 unescape_pattern (str);
492 p = hash_insert (seg->v.table, str);
498 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
499 OPTIONS. LINE_END terminates each pattern in the file. If
500 LINE_END is a space character, ignore trailing spaces and empty
501 lines in FILE. Return -1 on failure, 0 on success. */
504 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
505 struct exclude *ex, char const *file_name, int options,
508 bool use_stdin = file_name[0] == '-' && !file_name[1];
514 size_t buf_alloc = 0;
515 size_t buf_count = 0;
521 else if (! (in = fopen (file_name, "r")))
524 while ((c = getc (in)) != EOF)
526 if (buf_count == buf_alloc)
527 buf = x2realloc (buf, &buf_alloc);
528 buf[buf_count++] = c;
534 if (!use_stdin && fclose (in) != 0)
537 buf = xrealloc (buf, buf_count + 1);
538 buf[buf_count] = line_end;
539 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
542 for (p = buf; p < lim; p++)
545 char *pattern_end = p;
547 if (isspace ((unsigned char) line_end))
549 for (; ; pattern_end--)
550 if (pattern_end == pattern)
552 else if (! isspace ((unsigned char) pattern_end[-1]))
557 (*add_func) (ex, pattern, options);