1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
2 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 /* Match STRING against the file name pattern PATTERN, returning zero if
20 it matches, nonzero if not. */
21 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
22 const CHAR *string_end, bool no_leading_period, int flags)
24 static const CHAR *END (const CHAR *patternp) internal_function;
28 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
29 bool no_leading_period, int flags)
31 register const CHAR *p = pattern, *n = string;
34 # if WIDE_CHAR_VERSION
35 const char *collseq = (const char *)
36 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
38 const UCHAR *collseq = (const UCHAR *)
39 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
43 while ((c = *p++) != L_('\0'))
45 bool new_no_leading_period = false;
51 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
55 res = EXT (c, p, n, string_end, no_leading_period,
63 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
65 else if (*n == L_('.') && no_leading_period)
70 if (!(flags & FNM_NOESCAPE))
74 /* Trailing \ loses. */
78 if (n == string_end || FOLD ((UCHAR) *n) != c)
83 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
87 res = EXT (c, p, n, string_end, no_leading_period,
93 if (n != string_end && *n == L_('.') && no_leading_period)
96 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
98 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
100 const CHAR *endp = END (p);
103 /* This is a pattern. Skip over it. */
111 /* A ? needs to match one character. */
113 /* There isn't another character; no match. */
115 else if (*n == L_('/')
116 && __builtin_expect (flags & FNM_FILE_NAME, 0))
117 /* A slash does not match a wildcard under
121 /* One character of the string is consumed in matching
122 this ? wildcard, so *??? won't match if there are
123 less than three characters. */
129 /* The wildcard(s) is/are the last element of the pattern.
130 If the name is a file name and contains another slash
131 this means it cannot match, unless the FNM_LEADING_DIR
134 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
136 if (flags & FNM_FILE_NAME)
138 if (flags & FNM_LEADING_DIR)
142 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
153 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
159 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
160 && (c == L_('@') || c == L_('+') || c == L_('!'))
163 int flags2 = ((flags & FNM_FILE_NAME)
164 ? flags : (flags & ~FNM_PERIOD));
165 bool no_leading_period2 = no_leading_period;
167 for (--p; n < endp; ++n, no_leading_period2 = false)
168 if (FCT (p, n, string_end, no_leading_period2, flags2)
172 else if (c == L_('/') && (flags & FNM_FILE_NAME))
174 while (n < string_end && *n != L_('/'))
176 if (n < string_end && *n == L_('/')
177 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
183 int flags2 = ((flags & FNM_FILE_NAME)
184 ? flags : (flags & ~FNM_PERIOD));
185 int no_leading_period2 = no_leading_period;
187 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
190 for (--p; n < endp; ++n, no_leading_period2 = false)
191 if (FOLD ((UCHAR) *n) == c
192 && (FCT (p, n, string_end, no_leading_period2, flags2)
198 /* If we come here no match is possible with the wildcard. */
203 /* Nonzero if the sense of the character class is inverted. */
208 if (posixly_correct == 0)
209 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
214 if (*n == L_('.') && no_leading_period)
217 if (*n == L_('/') && (flags & FNM_FILE_NAME))
218 /* `/' cannot be matched. */
221 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
225 fn = FOLD ((UCHAR) *n);
230 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
234 c = FOLD ((UCHAR) *p);
239 else if (c == L_('[') && *p == L_(':'))
241 /* Leave room for the null. */
242 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
244 #if defined _LIBC || WIDE_CHAR_SUPPORT
247 const CHAR *startp = p;
251 if (c1 == CHAR_CLASS_MAX_LENGTH)
252 /* The name is too long and therefore the pattern
257 if (c == L_(':') && p[1] == L_(']'))
262 if (c < L_('a') || c >= L_('z'))
264 /* This cannot possibly be a character class name.
265 Match it as a normal range. */
274 #if defined _LIBC || WIDE_CHAR_SUPPORT
275 wt = IS_CHAR_CLASS (str);
277 /* Invalid character class name. */
280 # if defined _LIBC && ! WIDE_CHAR_VERSION
281 /* The following code is glibc specific but does
282 there a good job in speeding up the code since
283 we can avoid the btowc() call. */
284 if (_ISCTYPE ((UCHAR) *n, wt))
287 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
308 else if (c == L_('[') && *p == L_('='))
312 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313 const CHAR *startp = p;
325 if (c != L_('=') || p[1] != L_(']'))
335 if ((UCHAR) *n == str[0])
340 const int32_t *table;
341 # if WIDE_CHAR_VERSION
342 const int32_t *weights;
343 const int32_t *extra;
345 const unsigned char *weights;
346 const unsigned char *extra;
348 const int32_t *indirect;
350 const UCHAR *cp = (const UCHAR *) str;
352 /* This #include defines a local function! */
353 # if WIDE_CHAR_VERSION
354 # include <locale/weightwc.h>
356 # include <locale/weight.h>
359 # if WIDE_CHAR_VERSION
360 table = (const int32_t *)
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362 weights = (const int32_t *)
363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364 extra = (const int32_t *)
365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366 indirect = (const int32_t *)
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
369 table = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371 weights = (const unsigned char *)
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373 extra = (const unsigned char *)
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375 indirect = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
382 /* We found a table entry. Now see whether the
383 character we are currently at has the same
384 equivalance class value. */
385 int len = weights[idx];
387 const UCHAR *np = (const UCHAR *) n;
389 idx2 = findidx (&np);
390 if (idx2 != 0 && len == weights[idx2])
395 && (weights[idx + 1 + cnt]
396 == weights[idx2 + 1 + cnt]))
408 else if (c == L_('\0'))
409 /* [ (unterminated) loses. */
413 bool is_range = false;
416 bool is_seqval = false;
418 if (c == L_('[') && *p == L_('.'))
421 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
422 const CHAR *startp = p;
428 if (c == L_('.') && p[1] == L_(']'))
438 /* We have to handling the symbols differently in
439 ranges since then the collation sequence is
441 is_range = *p == L_('-') && p[1] != L_('\0');
445 /* There are no names defined in the collation
446 data. Therefore we only accept the trivial
447 names consisting of the character itself. */
451 if (!is_range && *n == startp[1])
460 const int32_t *symb_table;
461 # ifdef WIDE_CHAR_VERSION
465 # define str (startp + 1)
467 const unsigned char *extra;
473 # ifdef WIDE_CHAR_VERSION
474 /* We have to convert the name to a single-byte
475 string. This is possible since the names
476 consist of ASCII characters and the internal
477 representation is UCS4. */
478 for (strcnt = 0; strcnt < c1; ++strcnt)
479 str[strcnt] = startp[1 + strcnt];
483 _NL_CURRENT_WORD (LC_COLLATE,
484 _NL_COLLATE_SYMB_HASH_SIZEMB);
485 symb_table = (const int32_t *)
486 _NL_CURRENT (LC_COLLATE,
487 _NL_COLLATE_SYMB_TABLEMB);
488 extra = (const unsigned char *)
489 _NL_CURRENT (LC_COLLATE,
490 _NL_COLLATE_SYMB_EXTRAMB);
492 /* Locate the character in the hashing table. */
493 hash = elem_hash (str, c1);
496 elem = hash % table_size;
497 if (symb_table[2 * elem] != 0)
499 second = hash % (table_size - 2) + 1;
503 /* First compare the hashing value. */
504 if (symb_table[2 * elem] == hash
506 == extra[symb_table[2 * elem + 1]])
508 &extra[symb_table[2 * elem
512 /* Yep, this is the entry. */
513 idx = symb_table[2 * elem + 1];
514 idx += 1 + extra[idx];
521 while (symb_table[2 * elem] != 0);
524 if (symb_table[2 * elem] != 0)
526 /* Compare the byte sequence but only if
527 this is not part of a range. */
528 # ifdef WIDE_CHAR_VERSION
531 idx += 1 + extra[idx];
532 /* Adjust for the alignment. */
533 idx = (idx + 3) & ~3;
535 wextra = (int32_t *) &extra[idx + 4];
540 # ifdef WIDE_CHAR_VERSION
542 (int32_t) c1 < wextra[idx];
544 if (n[c1] != wextra[1 + c1])
547 if ((int32_t) c1 == wextra[idx])
550 for (c1 = 0; c1 < extra[idx]; ++c1)
551 if (n[c1] != extra[1 + c1])
554 if (c1 == extra[idx])
559 /* Get the collation sequence value. */
561 # ifdef WIDE_CHAR_VERSION
562 cold = wextra[1 + wextra[idx]];
564 /* Adjust for the alignment. */
565 idx += 1 + extra[idx];
566 idx = (idx + 3) & ~4;
567 cold = *((int32_t *) &extra[idx]);
574 /* No valid character. Match it as a
576 if (!is_range && *n == str[0])
593 /* We have to handling the symbols differently in
594 ranges since then the collation sequence is
596 is_range = (*p == L_('-') && p[1] != L_('\0')
599 if (!is_range && c == fn)
603 /* This is needed if we goto normal_bracket; from
604 outside of is_seqval's scope. */
612 if (c == L_('-') && *p != L_(']'))
615 /* We have to find the collation sequence
616 value for C. Collation sequence is nothing
617 we can regularly access. The sequence
618 value is defined by the order in which the
619 definitions of the collation values for the
620 various characters appear in the source
621 file. A strange concept, nowhere
627 # ifdef WIDE_CHAR_VERSION
628 /* Search in the `names' array for the characters. */
629 fcollseq = __collseq_table_lookup (collseq, fn);
630 if (fcollseq == ~((uint32_t) 0))
631 /* XXX We don't know anything about the character
632 we are supposed to match. This means we are
634 goto range_not_matched;
639 lcollseq = __collseq_table_lookup (collseq, cold);
641 fcollseq = collseq[fn];
642 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
646 if (cend == L_('[') && *p == L_('.'))
649 _NL_CURRENT_WORD (LC_COLLATE,
651 const CHAR *startp = p;
657 if (c == L_('.') && p[1] == L_(']'))
669 /* There are no names defined in the
670 collation data. Therefore we only
671 accept the trivial names consisting
672 of the character itself. */
681 const int32_t *symb_table;
682 # ifdef WIDE_CHAR_VERSION
686 # define str (startp + 1)
688 const unsigned char *extra;
694 # ifdef WIDE_CHAR_VERSION
695 /* We have to convert the name to a single-byte
696 string. This is possible since the names
697 consist of ASCII characters and the internal
698 representation is UCS4. */
699 for (strcnt = 0; strcnt < c1; ++strcnt)
700 str[strcnt] = startp[1 + strcnt];
704 _NL_CURRENT_WORD (LC_COLLATE,
705 _NL_COLLATE_SYMB_HASH_SIZEMB);
706 symb_table = (const int32_t *)
707 _NL_CURRENT (LC_COLLATE,
708 _NL_COLLATE_SYMB_TABLEMB);
709 extra = (const unsigned char *)
710 _NL_CURRENT (LC_COLLATE,
711 _NL_COLLATE_SYMB_EXTRAMB);
713 /* Locate the character in the hashing
715 hash = elem_hash (str, c1);
718 elem = hash % table_size;
719 if (symb_table[2 * elem] != 0)
721 second = hash % (table_size - 2) + 1;
725 /* First compare the hashing value. */
726 if (symb_table[2 * elem] == hash
728 == extra[symb_table[2 * elem + 1]])
730 &extra[symb_table[2 * elem + 1]
733 /* Yep, this is the entry. */
734 idx = symb_table[2 * elem + 1];
735 idx += 1 + extra[idx];
742 while (symb_table[2 * elem] != 0);
745 if (symb_table[2 * elem] != 0)
747 /* Compare the byte sequence but only if
748 this is not part of a range. */
749 # ifdef WIDE_CHAR_VERSION
752 idx += 1 + extra[idx];
753 /* Adjust for the alignment. */
754 idx = (idx + 3) & ~4;
756 wextra = (int32_t *) &extra[idx + 4];
758 /* Get the collation sequence value. */
760 # ifdef WIDE_CHAR_VERSION
761 cend = wextra[1 + wextra[idx]];
763 /* Adjust for the alignment. */
764 idx += 1 + extra[idx];
765 idx = (idx + 3) & ~4;
766 cend = *((int32_t *) &extra[idx]);
769 else if (symb_table[2 * elem] != 0 && c1 == 1)
781 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
783 if (cend == L_('\0'))
788 /* XXX It is not entirely clear to me how to handle
789 characters which are not mentioned in the
790 collation specification. */
792 # ifdef WIDE_CHAR_VERSION
793 lcollseq == 0xffffffff ||
795 lcollseq <= fcollseq)
797 /* We have to look at the upper bound. */
804 # ifdef WIDE_CHAR_VERSION
806 __collseq_table_lookup (collseq, cend);
807 if (hcollseq == ~((uint32_t) 0))
809 /* Hum, no information about the upper
810 bound. The matching succeeds if the
811 lower bound is matched exactly. */
812 if (lcollseq != fcollseq)
813 goto range_not_matched;
818 hcollseq = collseq[cend];
822 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
825 # ifdef WIDE_CHAR_VERSION
829 /* We use a boring value comparison of the character
830 values. This is better than comparing using
831 `strcoll' since the latter would have surprising
832 and sometimes fatal consequences. */
835 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
837 if (cend == L_('\0'))
841 if (cold <= fn && fn <= cend)
858 /* Skip the rest of the [...] that already matched. */
865 /* [... (unterminated) loses. */
868 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
872 /* XXX 1003.2d11 is unclear if this is right. */
875 else if (c == L_('[') && *p == L_(':'))
878 const CHAR *startp = p;
883 if (++c1 == CHAR_CLASS_MAX_LENGTH)
886 if (*p == L_(':') && p[1] == L_(']'))
889 if (c < L_('a') || c >= L_('z'))
898 else if (c == L_('[') && *p == L_('='))
904 if (c != L_('=') || p[1] != L_(']'))
909 else if (c == L_('[') && *p == L_('.'))
918 if (*p == L_('.') && p[1] == L_(']'))
925 while (c != L_(']'));
934 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
938 res = EXT (c, p, n, string_end, no_leading_period, flags);
945 if (NO_LEADING_PERIOD (flags))
947 if (n == string_end || c != (UCHAR) *n)
950 new_no_leading_period = true;
956 if (n == string_end || c != FOLD ((UCHAR) *n))
960 no_leading_period = new_no_leading_period;
967 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
968 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
977 END (const CHAR *pattern)
979 const CHAR *p = pattern;
982 if (*++p == L_('\0'))
983 /* This is an invalid pattern. */
985 else if (*p == L_('['))
987 /* Handle brackets special. */
988 if (posixly_correct == 0)
989 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
991 /* Skip the not sign. We have to recognize it because of a possibly
993 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
995 /* A leading ']' is recognized as such. */
998 /* Skip over all characters of the list. */
999 while (*p != L_(']'))
1000 if (*p++ == L_('\0'))
1001 /* This is no valid pattern. */
1004 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1005 || *p == L_('!')) && p[1] == L_('('))
1007 else if (*p == L_(')'))
1016 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1017 bool no_leading_period, int flags)
1023 struct patternlist *next;
1026 struct patternlist **lastp = &list;
1027 size_t pattern_len = STRLEN (pattern);
1030 enum { ALLOCA_LIMIT = 8000 };
1032 /* Parse the pattern. Store the individual parts in the list. */
1034 for (startp = p = pattern + 1; ; ++p)
1036 /* This is an invalid pattern. */
1038 else if (*p == L_('['))
1040 /* Handle brackets special. */
1041 if (posixly_correct == 0)
1042 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1044 /* Skip the not sign. We have to recognize it because of a possibly
1046 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1048 /* A leading ']' is recognized as such. */
1051 /* Skip over all characters of the list. */
1052 while (*p != L_(']'))
1053 if (*p++ == L_('\0'))
1054 /* This is no valid pattern. */
1057 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1058 || *p == L_('!')) && p[1] == L_('('))
1059 /* Remember the nesting level. */
1061 else if (*p == L_(')'))
1065 /* This means we found the end of the pattern. */
1066 #define NEW_PATTERN \
1067 struct patternlist *newp; \
1072 plen = (opt == L_('?') || opt == L_('@') \
1074 : p - startp + 1); \
1075 plensize = plen * sizeof (CHAR); \
1076 newpsize = offsetof (struct patternlist, str) + plensize; \
1077 if ((size_t) -1 / sizeof (CHAR) < plen \
1078 || newpsize < offsetof (struct patternlist, str) \
1079 || ALLOCA_LIMIT <= newpsize) \
1081 newp = (struct patternlist *) alloca (newpsize); \
1082 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1083 newp->next = NULL; \
1090 else if (*p == L_('|'))
1098 assert (list != NULL);
1099 assert (p[-1] == L_(')'));
1105 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1112 for (rs = string; rs <= string_end; ++rs)
1113 /* First match the prefix with the current pattern with the
1115 if (FCT (list->str, string, rs, no_leading_period,
1116 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1117 /* This was successful. Now match the rest with the rest
1119 && (FCT (p, rs, string_end,
1122 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1123 flags & FNM_FILE_NAME
1124 ? flags : flags & ~FNM_PERIOD) == 0
1125 /* This didn't work. Try the whole pattern. */
1127 && FCT (pattern - 1, rs, string_end,
1130 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1131 flags & FNM_FILE_NAME
1132 ? flags : flags & ~FNM_PERIOD) == 0)))
1133 /* It worked. Signal success. */
1136 while ((list = list->next) != NULL);
1138 /* None of the patterns lead to a match. */
1142 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1148 /* I cannot believe it but `strcat' is actually acceptable
1149 here. Match the entire string with the prefix from the
1150 pattern list and the rest of the pattern following the
1152 if (FCT (STRCAT (list->str, p), string, string_end,
1154 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1155 /* It worked. Signal success. */
1157 while ((list = list->next) != NULL);
1159 /* None of the patterns lead to a match. */
1163 for (rs = string; rs <= string_end; ++rs)
1165 struct patternlist *runp;
1167 for (runp = list; runp != NULL; runp = runp->next)
1168 if (FCT (runp->str, string, rs, no_leading_period,
1169 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1172 /* If none of the patterns matched see whether the rest does. */
1174 && (FCT (p, rs, string_end,
1177 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1178 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1180 /* This is successful. */
1184 /* None of the patterns together with the rest of the pattern
1189 assert (! "Invalid extended matching operator");