1 /* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
2 2003, 2004, 2005, 2006, 2009, 2010 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 /* Match STRING against the file name pattern PATTERN, returning zero if
20 it matches, nonzero if not. */
21 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
22 const CHAR *string_end, bool no_leading_period, int flags)
24 static const CHAR *END (const CHAR *patternp) internal_function;
28 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
29 bool no_leading_period, int flags)
31 register const CHAR *p = pattern, *n = string;
34 # if WIDE_CHAR_VERSION
35 const char *collseq = (const char *)
36 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
38 const UCHAR *collseq = (const UCHAR *)
39 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
43 while ((c = *p++) != L_('\0'))
45 bool new_no_leading_period = false;
51 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
55 res = EXT (c, p, n, string_end, no_leading_period,
63 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
65 else if (*n == L_('.') && no_leading_period)
70 if (!(flags & FNM_NOESCAPE))
74 /* Trailing \ loses. */
78 if (n == string_end || FOLD ((UCHAR) *n) != c)
83 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
87 res = EXT (c, p, n, string_end, no_leading_period,
93 if (n != string_end && *n == L_('.') && no_leading_period)
96 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
98 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
100 const CHAR *endp = END (p);
103 /* This is a pattern. Skip over it. */
111 /* A ? needs to match one character. */
113 /* There isn't another character; no match. */
115 else if (*n == L_('/')
116 && __builtin_expect (flags & FNM_FILE_NAME, 0))
117 /* A slash does not match a wildcard under
121 /* One character of the string is consumed in matching
122 this ? wildcard, so *??? won't match if there are
123 less than three characters. */
129 /* The wildcard(s) is/are the last element of the pattern.
130 If the name is a file name and contains another slash
131 this means it cannot match, unless the FNM_LEADING_DIR
134 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
136 if (flags & FNM_FILE_NAME)
138 if (flags & FNM_LEADING_DIR)
142 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
153 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
159 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
160 && (c == L_('@') || c == L_('+') || c == L_('!'))
163 int flags2 = ((flags & FNM_FILE_NAME)
164 ? flags : (flags & ~FNM_PERIOD));
165 bool no_leading_period2 = no_leading_period;
167 for (--p; n < endp; ++n, no_leading_period2 = false)
168 if (FCT (p, n, string_end, no_leading_period2, flags2)
172 else if (c == L_('/') && (flags & FNM_FILE_NAME))
174 while (n < string_end && *n != L_('/'))
176 if (n < string_end && *n == L_('/')
177 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
183 int flags2 = ((flags & FNM_FILE_NAME)
184 ? flags : (flags & ~FNM_PERIOD));
185 int no_leading_period2 = no_leading_period;
187 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
190 for (--p; n < endp; ++n, no_leading_period2 = false)
191 if (FOLD ((UCHAR) *n) == c
192 && (FCT (p, n, string_end, no_leading_period2, flags2)
198 /* If we come here no match is possible with the wildcard. */
203 /* Nonzero if the sense of the character class is inverted. */
208 if (posixly_correct == 0)
209 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
214 if (*n == L_('.') && no_leading_period)
217 if (*n == L_('/') && (flags & FNM_FILE_NAME))
218 /* `/' cannot be matched. */
221 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
225 fn = FOLD ((UCHAR) *n);
230 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
234 c = FOLD ((UCHAR) *p);
239 else if (c == L_('[') && *p == L_(':'))
241 /* Leave room for the null. */
242 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
244 #if defined _LIBC || WIDE_CHAR_SUPPORT
247 const CHAR *startp = p;
251 if (c1 == CHAR_CLASS_MAX_LENGTH)
252 /* The name is too long and therefore the pattern
257 if (c == L_(':') && p[1] == L_(']'))
262 if (c < L_('a') || c >= L_('z'))
264 /* This cannot possibly be a character class name.
265 Match it as a normal range. */
274 #if defined _LIBC || WIDE_CHAR_SUPPORT
275 wt = IS_CHAR_CLASS (str);
277 /* Invalid character class name. */
280 # if defined _LIBC && ! WIDE_CHAR_VERSION
281 /* The following code is glibc specific but does
282 there a good job in speeding up the code since
283 we can avoid the btowc() call. */
284 if (_ISCTYPE ((UCHAR) *n, wt))
287 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
308 else if (c == L_('[') && *p == L_('='))
312 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313 const CHAR *startp = p;
325 if (c != L_('=') || p[1] != L_(']'))
335 if ((UCHAR) *n == str[0])
340 const int32_t *table;
341 # if WIDE_CHAR_VERSION
342 const int32_t *weights;
343 const int32_t *extra;
345 const unsigned char *weights;
346 const unsigned char *extra;
348 const int32_t *indirect;
350 const UCHAR *cp = (const UCHAR *) str;
352 /* This #include defines a local function! */
353 # if WIDE_CHAR_VERSION
354 # include <locale/weightwc.h>
356 # include <locale/weight.h>
359 # if WIDE_CHAR_VERSION
360 table = (const int32_t *)
361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362 weights = (const int32_t *)
363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364 extra = (const int32_t *)
365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366 indirect = (const int32_t *)
367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
369 table = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371 weights = (const unsigned char *)
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373 extra = (const unsigned char *)
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375 indirect = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
382 /* We found a table entry. Now see whether the
383 character we are currently at has the same
384 equivalance class value. */
385 int len = weights[idx & 0xffffff];
387 const UCHAR *np = (const UCHAR *) n;
389 idx2 = findidx (&np);
391 && (idx >> 24) == (idx2 >> 24)
392 && len == weights[idx2 & 0xffffff])
400 && (weights[idx + 1 + cnt]
401 == weights[idx2 + 1 + cnt]))
413 else if (c == L_('\0'))
414 /* [ (unterminated) loses. */
418 bool is_range = false;
421 bool is_seqval = false;
423 if (c == L_('[') && *p == L_('.'))
426 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
427 const CHAR *startp = p;
433 if (c == L_('.') && p[1] == L_(']'))
443 /* We have to handling the symbols differently in
444 ranges since then the collation sequence is
446 is_range = *p == L_('-') && p[1] != L_('\0');
450 /* There are no names defined in the collation
451 data. Therefore we only accept the trivial
452 names consisting of the character itself. */
456 if (!is_range && *n == startp[1])
465 const int32_t *symb_table;
466 # ifdef WIDE_CHAR_VERSION
470 # define str (startp + 1)
472 const unsigned char *extra;
478 # ifdef WIDE_CHAR_VERSION
479 /* We have to convert the name to a single-byte
480 string. This is possible since the names
481 consist of ASCII characters and the internal
482 representation is UCS4. */
483 for (strcnt = 0; strcnt < c1; ++strcnt)
484 str[strcnt] = startp[1 + strcnt];
488 _NL_CURRENT_WORD (LC_COLLATE,
489 _NL_COLLATE_SYMB_HASH_SIZEMB);
490 symb_table = (const int32_t *)
491 _NL_CURRENT (LC_COLLATE,
492 _NL_COLLATE_SYMB_TABLEMB);
493 extra = (const unsigned char *)
494 _NL_CURRENT (LC_COLLATE,
495 _NL_COLLATE_SYMB_EXTRAMB);
497 /* Locate the character in the hashing table. */
498 hash = elem_hash (str, c1);
501 elem = hash % table_size;
502 if (symb_table[2 * elem] != 0)
504 second = hash % (table_size - 2) + 1;
508 /* First compare the hashing value. */
509 if (symb_table[2 * elem] == hash
511 == extra[symb_table[2 * elem + 1]])
513 &extra[symb_table[2 * elem
517 /* Yep, this is the entry. */
518 idx = symb_table[2 * elem + 1];
519 idx += 1 + extra[idx];
526 while (symb_table[2 * elem] != 0);
529 if (symb_table[2 * elem] != 0)
531 /* Compare the byte sequence but only if
532 this is not part of a range. */
533 # ifdef WIDE_CHAR_VERSION
536 idx += 1 + extra[idx];
537 /* Adjust for the alignment. */
538 idx = (idx + 3) & ~3;
540 wextra = (int32_t *) &extra[idx + 4];
545 # ifdef WIDE_CHAR_VERSION
547 (int32_t) c1 < wextra[idx];
549 if (n[c1] != wextra[1 + c1])
552 if ((int32_t) c1 == wextra[idx])
555 for (c1 = 0; c1 < extra[idx]; ++c1)
556 if (n[c1] != extra[1 + c1])
559 if (c1 == extra[idx])
564 /* Get the collation sequence value. */
566 # ifdef WIDE_CHAR_VERSION
567 cold = wextra[1 + wextra[idx]];
569 /* Adjust for the alignment. */
570 idx += 1 + extra[idx];
571 idx = (idx + 3) & ~4;
572 cold = *((int32_t *) &extra[idx]);
579 /* No valid character. Match it as a
581 if (!is_range && *n == str[0])
598 /* We have to handling the symbols differently in
599 ranges since then the collation sequence is
601 is_range = (*p == L_('-') && p[1] != L_('\0')
604 if (!is_range && c == fn)
608 /* This is needed if we goto normal_bracket; from
609 outside of is_seqval's scope. */
617 if (c == L_('-') && *p != L_(']'))
620 /* We have to find the collation sequence
621 value for C. Collation sequence is nothing
622 we can regularly access. The sequence
623 value is defined by the order in which the
624 definitions of the collation values for the
625 various characters appear in the source
626 file. A strange concept, nowhere
632 # ifdef WIDE_CHAR_VERSION
633 /* Search in the `names' array for the characters. */
634 fcollseq = __collseq_table_lookup (collseq, fn);
635 if (fcollseq == ~((uint32_t) 0))
636 /* XXX We don't know anything about the character
637 we are supposed to match. This means we are
639 goto range_not_matched;
644 lcollseq = __collseq_table_lookup (collseq, cold);
646 fcollseq = collseq[fn];
647 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
651 if (cend == L_('[') && *p == L_('.'))
654 _NL_CURRENT_WORD (LC_COLLATE,
656 const CHAR *startp = p;
662 if (c == L_('.') && p[1] == L_(']'))
674 /* There are no names defined in the
675 collation data. Therefore we only
676 accept the trivial names consisting
677 of the character itself. */
686 const int32_t *symb_table;
687 # ifdef WIDE_CHAR_VERSION
691 # define str (startp + 1)
693 const unsigned char *extra;
699 # ifdef WIDE_CHAR_VERSION
700 /* We have to convert the name to a single-byte
701 string. This is possible since the names
702 consist of ASCII characters and the internal
703 representation is UCS4. */
704 for (strcnt = 0; strcnt < c1; ++strcnt)
705 str[strcnt] = startp[1 + strcnt];
709 _NL_CURRENT_WORD (LC_COLLATE,
710 _NL_COLLATE_SYMB_HASH_SIZEMB);
711 symb_table = (const int32_t *)
712 _NL_CURRENT (LC_COLLATE,
713 _NL_COLLATE_SYMB_TABLEMB);
714 extra = (const unsigned char *)
715 _NL_CURRENT (LC_COLLATE,
716 _NL_COLLATE_SYMB_EXTRAMB);
718 /* Locate the character in the hashing
720 hash = elem_hash (str, c1);
723 elem = hash % table_size;
724 if (symb_table[2 * elem] != 0)
726 second = hash % (table_size - 2) + 1;
730 /* First compare the hashing value. */
731 if (symb_table[2 * elem] == hash
733 == extra[symb_table[2 * elem + 1]])
735 &extra[symb_table[2 * elem + 1]
738 /* Yep, this is the entry. */
739 idx = symb_table[2 * elem + 1];
740 idx += 1 + extra[idx];
747 while (symb_table[2 * elem] != 0);
750 if (symb_table[2 * elem] != 0)
752 /* Compare the byte sequence but only if
753 this is not part of a range. */
754 # ifdef WIDE_CHAR_VERSION
757 idx += 1 + extra[idx];
758 /* Adjust for the alignment. */
759 idx = (idx + 3) & ~4;
761 wextra = (int32_t *) &extra[idx + 4];
763 /* Get the collation sequence value. */
765 # ifdef WIDE_CHAR_VERSION
766 cend = wextra[1 + wextra[idx]];
768 /* Adjust for the alignment. */
769 idx += 1 + extra[idx];
770 idx = (idx + 3) & ~4;
771 cend = *((int32_t *) &extra[idx]);
774 else if (symb_table[2 * elem] != 0 && c1 == 1)
786 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
788 if (cend == L_('\0'))
793 /* XXX It is not entirely clear to me how to handle
794 characters which are not mentioned in the
795 collation specification. */
797 # ifdef WIDE_CHAR_VERSION
798 lcollseq == 0xffffffff ||
800 lcollseq <= fcollseq)
802 /* We have to look at the upper bound. */
809 # ifdef WIDE_CHAR_VERSION
811 __collseq_table_lookup (collseq, cend);
812 if (hcollseq == ~((uint32_t) 0))
814 /* Hum, no information about the upper
815 bound. The matching succeeds if the
816 lower bound is matched exactly. */
817 if (lcollseq != fcollseq)
818 goto range_not_matched;
823 hcollseq = collseq[cend];
827 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
830 # ifdef WIDE_CHAR_VERSION
834 /* We use a boring value comparison of the character
835 values. This is better than comparing using
836 `strcoll' since the latter would have surprising
837 and sometimes fatal consequences. */
840 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
842 if (cend == L_('\0'))
846 if (cold <= fn && fn <= cend)
863 /* Skip the rest of the [...] that already matched. */
870 /* [... (unterminated) loses. */
873 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
877 /* XXX 1003.2d11 is unclear if this is right. */
880 else if (c == L_('[') && *p == L_(':'))
883 const CHAR *startp = p;
888 if (++c1 == CHAR_CLASS_MAX_LENGTH)
891 if (*p == L_(':') && p[1] == L_(']'))
894 if (c < L_('a') || c >= L_('z'))
903 else if (c == L_('[') && *p == L_('='))
909 if (c != L_('=') || p[1] != L_(']'))
914 else if (c == L_('[') && *p == L_('.'))
923 if (*p == L_('.') && p[1] == L_(']'))
930 while (c != L_(']'));
939 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
943 res = EXT (c, p, n, string_end, no_leading_period, flags);
950 if (NO_LEADING_PERIOD (flags))
952 if (n == string_end || c != (UCHAR) *n)
955 new_no_leading_period = true;
961 if (n == string_end || c != FOLD ((UCHAR) *n))
965 no_leading_period = new_no_leading_period;
972 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
973 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
982 END (const CHAR *pattern)
984 const CHAR *p = pattern;
987 if (*++p == L_('\0'))
988 /* This is an invalid pattern. */
990 else if (*p == L_('['))
992 /* Handle brackets special. */
993 if (posixly_correct == 0)
994 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
996 /* Skip the not sign. We have to recognize it because of a possibly
998 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1000 /* A leading ']' is recognized as such. */
1003 /* Skip over all characters of the list. */
1004 while (*p != L_(']'))
1005 if (*p++ == L_('\0'))
1006 /* This is no valid pattern. */
1009 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1010 || *p == L_('!')) && p[1] == L_('('))
1012 else if (*p == L_(')'))
1021 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1022 bool no_leading_period, int flags)
1028 struct patternlist *next;
1031 struct patternlist **lastp = &list;
1032 size_t pattern_len = STRLEN (pattern);
1035 enum { ALLOCA_LIMIT = 8000 };
1037 /* Parse the pattern. Store the individual parts in the list. */
1039 for (startp = p = pattern + 1; ; ++p)
1041 /* This is an invalid pattern. */
1043 else if (*p == L_('['))
1045 /* Handle brackets special. */
1046 if (posixly_correct == 0)
1047 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1049 /* Skip the not sign. We have to recognize it because of a possibly
1051 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1053 /* A leading ']' is recognized as such. */
1056 /* Skip over all characters of the list. */
1057 while (*p != L_(']'))
1058 if (*p++ == L_('\0'))
1059 /* This is no valid pattern. */
1062 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1063 || *p == L_('!')) && p[1] == L_('('))
1064 /* Remember the nesting level. */
1066 else if (*p == L_(')'))
1070 /* This means we found the end of the pattern. */
1071 #define NEW_PATTERN \
1072 struct patternlist *newp; \
1077 plen = (opt == L_('?') || opt == L_('@') \
1079 : p - startp + 1UL); \
1080 plensize = plen * sizeof (CHAR); \
1081 newpsize = offsetof (struct patternlist, str) + plensize; \
1082 if ((size_t) -1 / sizeof (CHAR) < plen \
1083 || newpsize < offsetof (struct patternlist, str) \
1084 || ALLOCA_LIMIT <= newpsize) \
1086 newp = (struct patternlist *) alloca (newpsize); \
1087 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1088 newp->next = NULL; \
1095 else if (*p == L_('|'))
1103 assert (list != NULL);
1104 assert (p[-1] == L_(')'));
1110 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1117 for (rs = string; rs <= string_end; ++rs)
1118 /* First match the prefix with the current pattern with the
1120 if (FCT (list->str, string, rs, no_leading_period,
1121 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1122 /* This was successful. Now match the rest with the rest
1124 && (FCT (p, rs, string_end,
1127 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1128 flags & FNM_FILE_NAME
1129 ? flags : flags & ~FNM_PERIOD) == 0
1130 /* This didn't work. Try the whole pattern. */
1132 && FCT (pattern - 1, rs, string_end,
1135 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1136 flags & FNM_FILE_NAME
1137 ? flags : flags & ~FNM_PERIOD) == 0)))
1138 /* It worked. Signal success. */
1141 while ((list = list->next) != NULL);
1143 /* None of the patterns lead to a match. */
1147 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1153 /* I cannot believe it but `strcat' is actually acceptable
1154 here. Match the entire string with the prefix from the
1155 pattern list and the rest of the pattern following the
1157 if (FCT (STRCAT (list->str, p), string, string_end,
1159 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1160 /* It worked. Signal success. */
1162 while ((list = list->next) != NULL);
1164 /* None of the patterns lead to a match. */
1168 for (rs = string; rs <= string_end; ++rs)
1170 struct patternlist *runp;
1172 for (runp = list; runp != NULL; runp = runp->next)
1173 if (FCT (runp->str, string, rs, no_leading_period,
1174 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1177 /* If none of the patterns matched see whether the rest does. */
1179 && (FCT (p, rs, string_end,
1182 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1183 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1185 /* This is successful. */
1189 /* None of the patterns together with the rest of the pattern
1194 assert (! "Invalid extended matching operator");