1 /* exclude.c -- exclude file names
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2012 Free Software
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
44 # include "unlocked-io.h"
47 /* Non-GNU systems lack these options, so we don't need to check them. */
49 # define FNM_CASEFOLD 0
52 # define FNM_EXTMATCH 0
54 #ifndef FNM_LEADING_DIR
55 # define FNM_LEADING_DIR 0
58 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
59 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
60 | FNM_CASEFOLD | FNM_EXTMATCH))
64 /* Exclusion patterns are grouped into a singly-linked list of
65 "exclusion segments". Each segment represents a set of patterns
66 that can be matches using the same algorithm. Non-wildcard
67 patterns are kept in hash tables, to speed up searches. Wildcard
68 patterns are stored as arrays of patterns. */
71 /* An exclude pattern-options pair. The options are fnmatch options
72 ORed with EXCLUDE_* options. */
80 /* An array of pattern-options pairs. */
82 struct exclude_pattern
84 struct patopts *exclude;
91 exclude_hash, /* a hash table of excluded names */
92 exclude_pattern /* an array of exclude patterns */
95 struct exclude_segment
97 struct exclude_segment *next; /* next segment in list */
98 enum exclude_type type; /* type of this segment */
99 int options; /* common options for this segment */
102 Hash_table *table; /* for type == exclude_hash */
103 struct exclude_pattern pat; /* for type == exclude_pattern */
107 /* The exclude structure keeps a singly-linked list of exclude segments */
110 struct exclude_segment *head, *tail;
113 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
114 Return false if STR definitely does not have wildcards. */
116 fnmatch_pattern_has_wildcards (const char *str, int options)
123 str += ! (options & FNM_NOESCAPE) && *str;
126 case '+': case '@': case '!':
127 if (options & FNM_EXTMATCH && *str == '(')
131 case '?': case '*': case '[':
141 unescape_pattern (char *str)
145 q += *q == '\\' && q[1];
146 while ((*str++ = *q++));
149 /* Return a newly allocated and empty exclude list. */
154 return xzalloc (sizeof *new_exclude ());
157 /* Calculate the hash of string. */
159 string_hasher (void const *data, size_t n_buckets)
161 char const *p = data;
162 return hash_string (p, n_buckets);
165 /* Ditto, for case-insensitive hashes */
167 string_hasher_ci (void const *data, size_t n_buckets)
169 char const *p = data;
170 mbui_iterator_t iter;
173 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
175 mbchar_t m = mbui_cur (iter);
179 wc = towlower (m.wc);
183 value = (value * 31 + wc) % n_buckets;
189 /* compare two strings for equality */
191 string_compare (void const *data1, void const *data2)
193 char const *p1 = data1;
194 char const *p2 = data2;
195 return strcmp (p1, p2) == 0;
198 /* compare two strings for equality, case-insensitive */
200 string_compare_ci (void const *data1, void const *data2)
202 char const *p1 = data1;
203 char const *p2 = data2;
204 return mbscasecmp (p1, p2) == 0;
208 string_free (void *data)
213 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
214 to the tail of list in EX */
215 static struct exclude_segment *
216 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
218 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
220 sp->options = options;
223 case exclude_pattern:
227 sp->v.table = hash_initialize (0, NULL,
228 (options & FNM_CASEFOLD) ?
231 (options & FNM_CASEFOLD) ?
245 /* Free a single exclude segment */
247 free_exclude_segment (struct exclude_segment *seg)
251 case exclude_pattern:
252 free (seg->v.pat.exclude);
256 hash_free (seg->v.table);
262 /* Free the storage associated with an exclude list. */
264 free_exclude (struct exclude *ex)
266 struct exclude_segment *seg;
267 for (seg = ex->head; seg; )
269 struct exclude_segment *next = seg->next;
270 free_exclude_segment (seg);
276 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
277 (unlike fnmatch) wildcards are disabled in PATTERN. */
280 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
282 if (! (options & FNM_LEADING_DIR))
283 return ((options & FNM_CASEFOLD)
284 ? mbscasecmp (pattern, f)
285 : strcmp (pattern, f));
286 else if (! (options & FNM_CASEFOLD))
288 size_t patlen = strlen (pattern);
289 int r = strncmp (pattern, f, patlen);
300 /* Walk through a copy of F, seeing whether P matches any prefix
303 FIXME: This is an O(N**2) algorithm; it should be O(N).
304 Also, the copy should not be necessary. However, fixing this
305 will probably involve a change to the mbs* API. */
307 char *fcopy = xstrdup (f);
310 for (p = fcopy; ; *p++ = '/')
315 r = mbscasecmp (pattern, fcopy);
325 exclude_fnmatch (char const *pattern, char const *f, int options)
327 int (*matcher) (char const *, char const *, int) =
328 (options & EXCLUDE_WILDCARDS
330 : fnmatch_no_wildcards);
331 bool matched = ((*matcher) (pattern, f, options) == 0);
334 if (! (options & EXCLUDE_ANCHORED))
335 for (p = f; *p && ! matched; p++)
336 if (*p == '/' && p[1] != '/')
337 matched = ((*matcher) (pattern, p + 1, options) == 0);
342 /* Return true if the exclude_pattern segment SEG excludes F. */
345 excluded_file_pattern_p (struct exclude_segment const *seg, char const *f)
347 size_t exclude_count = seg->v.pat.exclude_count;
348 struct patopts const *exclude = seg->v.pat.exclude;
350 bool excluded = !! (exclude[0].options & EXCLUDE_INCLUDE);
352 /* Scan through the options, until they change excluded */
353 for (i = 0; i < exclude_count; i++)
355 char const *pattern = exclude[i].pattern;
356 int options = exclude[i].options;
357 if (exclude_fnmatch (pattern, f, options))
363 /* Return true if the exclude_hash segment SEG excludes F.
364 BUFFER is an auxiliary storage of the same length as F (with nul
365 terminator included) */
367 excluded_file_name_p (struct exclude_segment const *seg, char const *f,
370 int options = seg->options;
371 bool excluded = !! (options & EXCLUDE_INCLUDE);
372 Hash_table *table = seg->v.table;
376 /* initialize the pattern */
381 if (hash_lookup (table, buffer))
383 if (options & FNM_LEADING_DIR)
385 char *p = strrchr (buffer, '/');
395 if (!(options & EXCLUDE_ANCHORED))
408 /* Return true if EX excludes F. */
411 excluded_file_name (struct exclude const *ex, char const *f)
413 struct exclude_segment *seg;
415 char *filename = NULL;
417 /* If no patterns are given, the default is to include. */
421 /* Otherwise, the default is the opposite of the first option. */
422 excluded = !! (ex->head->options & EXCLUDE_INCLUDE);
423 /* Scan through the segments, seeing whether they change status from
424 excluded to included or vice versa. */
425 for (seg = ex->head; seg; seg = seg->next)
431 case exclude_pattern:
432 rc = excluded_file_pattern_p (seg, f);
437 filename = xmalloc (strlen (f) + 1);
438 rc = excluded_file_name_p (seg, f, filename);
454 /* Append to EX the exclusion PATTERN with OPTIONS. */
457 add_exclude (struct exclude *ex, char const *pattern, int options)
459 struct exclude_segment *seg;
461 if ((options & EXCLUDE_WILDCARDS)
462 && fnmatch_pattern_has_wildcards (pattern, options))
464 struct exclude_pattern *pat;
465 struct patopts *patopts;
467 if (ex->tail && ex->tail->type == exclude_pattern
468 && ((ex->tail->options & EXCLUDE_INCLUDE) ==
469 (options & EXCLUDE_INCLUDE)))
472 seg = new_exclude_segment (ex, exclude_pattern, options);
475 if (pat->exclude_count == pat->exclude_alloc)
476 pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
477 sizeof *pat->exclude);
478 patopts = &pat->exclude[pat->exclude_count++];
479 patopts->pattern = pattern;
480 patopts->options = options;
485 #define EXCLUDE_HASH_FLAGS (EXCLUDE_INCLUDE|EXCLUDE_ANCHORED|\
486 FNM_LEADING_DIR|FNM_CASEFOLD)
487 if (ex->tail && ex->tail->type == exclude_hash
488 && ((ex->tail->options & EXCLUDE_HASH_FLAGS) ==
489 (options & EXCLUDE_HASH_FLAGS)))
492 seg = new_exclude_segment (ex, exclude_hash, options);
494 str = xstrdup (pattern);
495 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
496 unescape_pattern (str);
497 p = hash_insert (seg->v.table, str);
503 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
504 OPTIONS. LINE_END terminates each pattern in the file. If
505 LINE_END is a space character, ignore trailing spaces and empty
506 lines in FILE. Return -1 on failure, 0 on success. */
509 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
510 struct exclude *ex, char const *file_name, int options,
513 bool use_stdin = file_name[0] == '-' && !file_name[1];
519 size_t buf_alloc = 0;
520 size_t buf_count = 0;
526 else if (! (in = fopen (file_name, "r")))
529 while ((c = getc (in)) != EOF)
531 if (buf_count == buf_alloc)
532 buf = x2realloc (buf, &buf_alloc);
533 buf[buf_count++] = c;
539 if (!use_stdin && fclose (in) != 0)
542 buf = xrealloc (buf, buf_count + 1);
543 buf[buf_count] = line_end;
544 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
547 for (p = buf; p < lim; p++)
550 char *pattern_end = p;
552 if (isspace ((unsigned char) line_end))
554 for (; ; pattern_end--)
555 if (pattern_end == pattern)
557 else if (! isspace ((unsigned char) pattern_end[-1]))
562 (*add_func) (ex, pattern, options);