1 /* Copyright (C) 1991-1993, 1996-2006, 2009-2011 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 /* Match STRING against the file name pattern PATTERN, returning zero if
19 it matches, nonzero if not. */
20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
21 const CHAR *string_end, bool no_leading_period, int flags)
23 static const CHAR *END (const CHAR *patternp) internal_function;
27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
28 bool no_leading_period, int flags)
30 register const CHAR *p = pattern, *n = string;
33 # if WIDE_CHAR_VERSION
34 const char *collseq = (const char *)
35 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
37 const UCHAR *collseq = (const UCHAR *)
38 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
42 while ((c = *p++) != L_('\0'))
44 bool new_no_leading_period = false;
50 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
54 res = EXT (c, p, n, string_end, no_leading_period,
62 else if (*n == L_('/') && (flags & FNM_FILE_NAME))
64 else if (*n == L_('.') && no_leading_period)
69 if (!(flags & FNM_NOESCAPE))
73 /* Trailing \ loses. */
77 if (n == string_end || FOLD ((UCHAR) *n) != c)
82 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
86 res = EXT (c, p, n, string_end, no_leading_period,
92 if (n != string_end && *n == L_('.') && no_leading_period)
95 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
97 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
99 const CHAR *endp = END (p);
102 /* This is a pattern. Skip over it. */
110 /* A ? needs to match one character. */
112 /* There isn't another character; no match. */
114 else if (*n == L_('/')
115 && __builtin_expect (flags & FNM_FILE_NAME, 0))
116 /* A slash does not match a wildcard under
120 /* One character of the string is consumed in matching
121 this ? wildcard, so *??? won't match if there are
122 less than three characters. */
128 /* The wildcard(s) is/are the last element of the pattern.
129 If the name is a file name and contains another slash
130 this means it cannot match, unless the FNM_LEADING_DIR
133 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
135 if (flags & FNM_FILE_NAME)
137 if (flags & FNM_LEADING_DIR)
141 if (MEMCHR (n, L_('/'), string_end - n) == NULL)
152 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
158 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
159 && (c == L_('@') || c == L_('+') || c == L_('!'))
162 int flags2 = ((flags & FNM_FILE_NAME)
163 ? flags : (flags & ~FNM_PERIOD));
164 bool no_leading_period2 = no_leading_period;
166 for (--p; n < endp; ++n, no_leading_period2 = false)
167 if (FCT (p, n, string_end, no_leading_period2, flags2)
171 else if (c == L_('/') && (flags & FNM_FILE_NAME))
173 while (n < string_end && *n != L_('/'))
175 if (n < string_end && *n == L_('/')
176 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
182 int flags2 = ((flags & FNM_FILE_NAME)
183 ? flags : (flags & ~FNM_PERIOD));
184 int no_leading_period2 = no_leading_period;
186 if (c == L_('\\') && !(flags & FNM_NOESCAPE))
189 for (--p; n < endp; ++n, no_leading_period2 = false)
190 if (FOLD ((UCHAR) *n) == c
191 && (FCT (p, n, string_end, no_leading_period2, flags2)
197 /* If we come here no match is possible with the wildcard. */
202 /* Nonzero if the sense of the character class is inverted. */
207 if (posixly_correct == 0)
208 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
213 if (*n == L_('.') && no_leading_period)
216 if (*n == L_('/') && (flags & FNM_FILE_NAME))
217 /* `/' cannot be matched. */
220 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
224 fn = FOLD ((UCHAR) *n);
229 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
233 c = FOLD ((UCHAR) *p);
238 else if (c == L_('[') && *p == L_(':'))
240 /* Leave room for the null. */
241 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
243 #if defined _LIBC || WIDE_CHAR_SUPPORT
246 const CHAR *startp = p;
250 if (c1 == CHAR_CLASS_MAX_LENGTH)
251 /* The name is too long and therefore the pattern
256 if (c == L_(':') && p[1] == L_(']'))
261 if (c < L_('a') || c >= L_('z'))
263 /* This cannot possibly be a character class name.
264 Match it as a normal range. */
273 #if defined _LIBC || WIDE_CHAR_SUPPORT
274 wt = IS_CHAR_CLASS (str);
276 /* Invalid character class name. */
279 # if defined _LIBC && ! WIDE_CHAR_VERSION
280 /* The following code is glibc specific but does
281 there a good job in speeding up the code since
282 we can avoid the btowc() call. */
283 if (_ISCTYPE ((UCHAR) *n, wt))
286 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
290 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
291 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
292 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
293 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
294 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
295 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
296 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
297 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
298 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
299 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
300 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
301 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
307 else if (c == L_('[') && *p == L_('='))
311 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
312 const CHAR *startp = p;
324 if (c != L_('=') || p[1] != L_(']'))
334 if ((UCHAR) *n == str[0])
339 const int32_t *table;
340 # if WIDE_CHAR_VERSION
341 const int32_t *weights;
342 const int32_t *extra;
344 const unsigned char *weights;
345 const unsigned char *extra;
347 const int32_t *indirect;
349 const UCHAR *cp = (const UCHAR *) str;
351 /* This #include defines a local function! */
352 # if WIDE_CHAR_VERSION
353 # include <locale/weightwc.h>
355 # include <locale/weight.h>
358 # if WIDE_CHAR_VERSION
359 table = (const int32_t *)
360 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
361 weights = (const int32_t *)
362 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
363 extra = (const int32_t *)
364 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
365 indirect = (const int32_t *)
366 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
368 table = (const int32_t *)
369 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
370 weights = (const unsigned char *)
371 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
372 extra = (const unsigned char *)
373 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
374 indirect = (const int32_t *)
375 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
381 /* We found a table entry. Now see whether the
382 character we are currently at has the same
383 equivalance class value. */
384 int len = weights[idx & 0xffffff];
386 const UCHAR *np = (const UCHAR *) n;
388 idx2 = findidx (&np);
390 && (idx >> 24) == (idx2 >> 24)
391 && len == weights[idx2 & 0xffffff])
399 && (weights[idx + 1 + cnt]
400 == weights[idx2 + 1 + cnt]))
412 else if (c == L_('\0'))
413 /* [ (unterminated) loses. */
417 bool is_range = false;
420 bool is_seqval = false;
422 if (c == L_('[') && *p == L_('.'))
425 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
426 const CHAR *startp = p;
432 if (c == L_('.') && p[1] == L_(']'))
442 /* We have to handling the symbols differently in
443 ranges since then the collation sequence is
445 is_range = *p == L_('-') && p[1] != L_('\0');
449 /* There are no names defined in the collation
450 data. Therefore we only accept the trivial
451 names consisting of the character itself. */
455 if (!is_range && *n == startp[1])
464 const int32_t *symb_table;
465 # ifdef WIDE_CHAR_VERSION
469 # define str (startp + 1)
471 const unsigned char *extra;
477 # ifdef WIDE_CHAR_VERSION
478 /* We have to convert the name to a single-byte
479 string. This is possible since the names
480 consist of ASCII characters and the internal
481 representation is UCS4. */
482 for (strcnt = 0; strcnt < c1; ++strcnt)
483 str[strcnt] = startp[1 + strcnt];
487 _NL_CURRENT_WORD (LC_COLLATE,
488 _NL_COLLATE_SYMB_HASH_SIZEMB);
489 symb_table = (const int32_t *)
490 _NL_CURRENT (LC_COLLATE,
491 _NL_COLLATE_SYMB_TABLEMB);
492 extra = (const unsigned char *)
493 _NL_CURRENT (LC_COLLATE,
494 _NL_COLLATE_SYMB_EXTRAMB);
496 /* Locate the character in the hashing table. */
497 hash = elem_hash (str, c1);
500 elem = hash % table_size;
501 if (symb_table[2 * elem] != 0)
503 second = hash % (table_size - 2) + 1;
507 /* First compare the hashing value. */
508 if (symb_table[2 * elem] == hash
510 == extra[symb_table[2 * elem + 1]])
512 &extra[symb_table[2 * elem
516 /* Yep, this is the entry. */
517 idx = symb_table[2 * elem + 1];
518 idx += 1 + extra[idx];
525 while (symb_table[2 * elem] != 0);
528 if (symb_table[2 * elem] != 0)
530 /* Compare the byte sequence but only if
531 this is not part of a range. */
532 # ifdef WIDE_CHAR_VERSION
535 idx += 1 + extra[idx];
536 /* Adjust for the alignment. */
537 idx = (idx + 3) & ~3;
539 wextra = (int32_t *) &extra[idx + 4];
544 # ifdef WIDE_CHAR_VERSION
546 (int32_t) c1 < wextra[idx];
548 if (n[c1] != wextra[1 + c1])
551 if ((int32_t) c1 == wextra[idx])
554 for (c1 = 0; c1 < extra[idx]; ++c1)
555 if (n[c1] != extra[1 + c1])
558 if (c1 == extra[idx])
563 /* Get the collation sequence value. */
565 # ifdef WIDE_CHAR_VERSION
566 cold = wextra[1 + wextra[idx]];
568 /* Adjust for the alignment. */
569 idx += 1 + extra[idx];
570 idx = (idx + 3) & ~4;
571 cold = *((int32_t *) &extra[idx]);
578 /* No valid character. Match it as a
580 if (!is_range && *n == str[0])
597 /* We have to handling the symbols differently in
598 ranges since then the collation sequence is
600 is_range = (*p == L_('-') && p[1] != L_('\0')
603 if (!is_range && c == fn)
607 /* This is needed if we goto normal_bracket; from
608 outside of is_seqval's scope. */
616 if (c == L_('-') && *p != L_(']'))
619 /* We have to find the collation sequence
620 value for C. Collation sequence is nothing
621 we can regularly access. The sequence
622 value is defined by the order in which the
623 definitions of the collation values for the
624 various characters appear in the source
625 file. A strange concept, nowhere
631 # ifdef WIDE_CHAR_VERSION
632 /* Search in the `names' array for the characters. */
633 fcollseq = __collseq_table_lookup (collseq, fn);
634 if (fcollseq == ~((uint32_t) 0))
635 /* XXX We don't know anything about the character
636 we are supposed to match. This means we are
638 goto range_not_matched;
643 lcollseq = __collseq_table_lookup (collseq, cold);
645 fcollseq = collseq[fn];
646 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
650 if (cend == L_('[') && *p == L_('.'))
653 _NL_CURRENT_WORD (LC_COLLATE,
655 const CHAR *startp = p;
661 if (c == L_('.') && p[1] == L_(']'))
673 /* There are no names defined in the
674 collation data. Therefore we only
675 accept the trivial names consisting
676 of the character itself. */
685 const int32_t *symb_table;
686 # ifdef WIDE_CHAR_VERSION
690 # define str (startp + 1)
692 const unsigned char *extra;
698 # ifdef WIDE_CHAR_VERSION
699 /* We have to convert the name to a single-byte
700 string. This is possible since the names
701 consist of ASCII characters and the internal
702 representation is UCS4. */
703 for (strcnt = 0; strcnt < c1; ++strcnt)
704 str[strcnt] = startp[1 + strcnt];
708 _NL_CURRENT_WORD (LC_COLLATE,
709 _NL_COLLATE_SYMB_HASH_SIZEMB);
710 symb_table = (const int32_t *)
711 _NL_CURRENT (LC_COLLATE,
712 _NL_COLLATE_SYMB_TABLEMB);
713 extra = (const unsigned char *)
714 _NL_CURRENT (LC_COLLATE,
715 _NL_COLLATE_SYMB_EXTRAMB);
717 /* Locate the character in the hashing
719 hash = elem_hash (str, c1);
722 elem = hash % table_size;
723 if (symb_table[2 * elem] != 0)
725 second = hash % (table_size - 2) + 1;
729 /* First compare the hashing value. */
730 if (symb_table[2 * elem] == hash
732 == extra[symb_table[2 * elem + 1]])
734 &extra[symb_table[2 * elem + 1]
737 /* Yep, this is the entry. */
738 idx = symb_table[2 * elem + 1];
739 idx += 1 + extra[idx];
746 while (symb_table[2 * elem] != 0);
749 if (symb_table[2 * elem] != 0)
751 /* Compare the byte sequence but only if
752 this is not part of a range. */
753 # ifdef WIDE_CHAR_VERSION
756 idx += 1 + extra[idx];
757 /* Adjust for the alignment. */
758 idx = (idx + 3) & ~4;
760 wextra = (int32_t *) &extra[idx + 4];
762 /* Get the collation sequence value. */
764 # ifdef WIDE_CHAR_VERSION
765 cend = wextra[1 + wextra[idx]];
767 /* Adjust for the alignment. */
768 idx += 1 + extra[idx];
769 idx = (idx + 3) & ~4;
770 cend = *((int32_t *) &extra[idx]);
773 else if (symb_table[2 * elem] != 0 && c1 == 1)
785 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
787 if (cend == L_('\0'))
792 /* XXX It is not entirely clear to me how to handle
793 characters which are not mentioned in the
794 collation specification. */
796 # ifdef WIDE_CHAR_VERSION
797 lcollseq == 0xffffffff ||
799 lcollseq <= fcollseq)
801 /* We have to look at the upper bound. */
808 # ifdef WIDE_CHAR_VERSION
810 __collseq_table_lookup (collseq, cend);
811 if (hcollseq == ~((uint32_t) 0))
813 /* Hum, no information about the upper
814 bound. The matching succeeds if the
815 lower bound is matched exactly. */
816 if (lcollseq != fcollseq)
817 goto range_not_matched;
822 hcollseq = collseq[cend];
826 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
829 # ifdef WIDE_CHAR_VERSION
833 /* We use a boring value comparison of the character
834 values. This is better than comparing using
835 `strcoll' since the latter would have surprising
836 and sometimes fatal consequences. */
839 if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
841 if (cend == L_('\0'))
845 if (cold <= fn && fn <= cend)
862 /* Skip the rest of the [...] that already matched. */
869 /* [... (unterminated) loses. */
872 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
876 /* XXX 1003.2d11 is unclear if this is right. */
879 else if (c == L_('[') && *p == L_(':'))
882 const CHAR *startp = p;
887 if (++c1 == CHAR_CLASS_MAX_LENGTH)
890 if (*p == L_(':') && p[1] == L_(']'))
893 if (c < L_('a') || c >= L_('z'))
902 else if (c == L_('[') && *p == L_('='))
908 if (c != L_('=') || p[1] != L_(']'))
913 else if (c == L_('[') && *p == L_('.'))
922 if (*p == L_('.') && p[1] == L_(']'))
929 while (c != L_(']'));
938 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
942 res = EXT (c, p, n, string_end, no_leading_period, flags);
949 if (NO_LEADING_PERIOD (flags))
951 if (n == string_end || c != (UCHAR) *n)
954 new_no_leading_period = true;
960 if (n == string_end || c != FOLD ((UCHAR) *n))
964 no_leading_period = new_no_leading_period;
971 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
972 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
981 END (const CHAR *pattern)
983 const CHAR *p = pattern;
986 if (*++p == L_('\0'))
987 /* This is an invalid pattern. */
989 else if (*p == L_('['))
991 /* Handle brackets special. */
992 if (posixly_correct == 0)
993 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
995 /* Skip the not sign. We have to recognize it because of a possibly
997 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
999 /* A leading ']' is recognized as such. */
1002 /* Skip over all characters of the list. */
1003 while (*p != L_(']'))
1004 if (*p++ == L_('\0'))
1005 /* This is no valid pattern. */
1008 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1009 || *p == L_('!')) && p[1] == L_('('))
1011 else if (*p == L_(')'))
1020 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1021 bool no_leading_period, int flags)
1027 struct patternlist *next;
1030 struct patternlist **lastp = &list;
1031 size_t pattern_len = STRLEN (pattern);
1034 enum { ALLOCA_LIMIT = 8000 };
1036 /* Parse the pattern. Store the individual parts in the list. */
1038 for (startp = p = pattern + 1; ; ++p)
1040 /* This is an invalid pattern. */
1042 else if (*p == L_('['))
1044 /* Handle brackets special. */
1045 if (posixly_correct == 0)
1046 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1048 /* Skip the not sign. We have to recognize it because of a possibly
1050 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1052 /* A leading ']' is recognized as such. */
1055 /* Skip over all characters of the list. */
1056 while (*p != L_(']'))
1057 if (*p++ == L_('\0'))
1058 /* This is no valid pattern. */
1061 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1062 || *p == L_('!')) && p[1] == L_('('))
1063 /* Remember the nesting level. */
1065 else if (*p == L_(')'))
1069 /* This means we found the end of the pattern. */
1070 #define NEW_PATTERN \
1071 struct patternlist *newp; \
1076 plen = (opt == L_('?') || opt == L_('@') \
1078 : p - startp + 1UL); \
1079 plensize = plen * sizeof (CHAR); \
1080 newpsize = offsetof (struct patternlist, str) + plensize; \
1081 if ((size_t) -1 / sizeof (CHAR) < plen \
1082 || newpsize < offsetof (struct patternlist, str) \
1083 || ALLOCA_LIMIT <= newpsize) \
1085 newp = (struct patternlist *) alloca (newpsize); \
1086 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1087 newp->next = NULL; \
1094 else if (*p == L_('|'))
1102 assert (list != NULL);
1103 assert (p[-1] == L_(')'));
1109 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1116 for (rs = string; rs <= string_end; ++rs)
1117 /* First match the prefix with the current pattern with the
1119 if (FCT (list->str, string, rs, no_leading_period,
1120 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1121 /* This was successful. Now match the rest with the rest
1123 && (FCT (p, rs, string_end,
1126 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1127 flags & FNM_FILE_NAME
1128 ? flags : flags & ~FNM_PERIOD) == 0
1129 /* This didn't work. Try the whole pattern. */
1131 && FCT (pattern - 1, rs, string_end,
1134 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1135 flags & FNM_FILE_NAME
1136 ? flags : flags & ~FNM_PERIOD) == 0)))
1137 /* It worked. Signal success. */
1140 while ((list = list->next) != NULL);
1142 /* None of the patterns lead to a match. */
1146 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1152 /* I cannot believe it but `strcat' is actually acceptable
1153 here. Match the entire string with the prefix from the
1154 pattern list and the rest of the pattern following the
1156 if (FCT (STRCAT (list->str, p), string, string_end,
1158 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1159 /* It worked. Signal success. */
1161 while ((list = list->next) != NULL);
1163 /* None of the patterns lead to a match. */
1167 for (rs = string; rs <= string_end; ++rs)
1169 struct patternlist *runp;
1171 for (runp = list; runp != NULL; runp = runp->next)
1172 if (FCT (runp->str, string, rs, no_leading_period,
1173 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1176 /* If none of the patterns matched see whether the rest does. */
1178 && (FCT (p, rs, string_end,
1181 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1182 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1184 /* This is successful. */
1188 /* None of the patterns together with the rest of the pattern
1193 assert (! "Invalid extended matching operator");