(NEW_PATTERN): Cast alloca return value to proper type.
[gnulib.git] / lib / fnmatch_loop.c
1 /* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2000, 2001,
2    2002, 2003 Free Software Foundation, Inc.
3
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software Foundation,
16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
17
18 /* Match STRING against the filename pattern PATTERN, returning zero if
19    it matches, nonzero if not.  */
20 static int FCT (const CHAR *pattern, const CHAR *string,
21                 const CHAR *string_end, int no_leading_period, int flags)
22      internal_function;
23 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
24                 const CHAR *string_end, int no_leading_period, int flags)
25      internal_function;
26 static const CHAR *END (const CHAR *patternp) internal_function;
27
28 static int
29 internal_function
30 FCT (pattern, string, string_end, no_leading_period, flags)
31      const CHAR *pattern;
32      const CHAR *string;
33      const CHAR *string_end;
34      int no_leading_period;
35      int flags;
36 {
37   register const CHAR *p = pattern, *n = string;
38   register UCHAR c;
39 #ifdef _LIBC
40 # if WIDE_CHAR_VERSION
41   const char *collseq = (const char *)
42     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
43 # else
44   const UCHAR *collseq = (const UCHAR *)
45     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
46 # endif
47 #endif
48
49   while ((c = *p++) != L('\0'))
50     {
51       int new_no_leading_period = 0;
52       c = FOLD (c);
53
54       switch (c)
55         {
56         case L('?'):
57           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
58             {
59               int res;
60
61               res = EXT (c, p, n, string_end, no_leading_period,
62                          flags);
63               if (res != -1)
64                 return res;
65             }
66
67           if (n == string_end)
68             return FNM_NOMATCH;
69           else if (*n == L('/') && (flags & FNM_FILE_NAME))
70             return FNM_NOMATCH;
71           else if (*n == L('.') && no_leading_period)
72             return FNM_NOMATCH;
73           break;
74
75         case L('\\'):
76           if (!(flags & FNM_NOESCAPE))
77             {
78               c = *p++;
79               if (c == L('\0'))
80                 /* Trailing \ loses.  */
81                 return FNM_NOMATCH;
82               c = FOLD (c);
83             }
84           if (n == string_end || FOLD ((UCHAR) *n) != c)
85             return FNM_NOMATCH;
86           break;
87
88         case L('*'):
89           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
90             {
91               int res;
92
93               res = EXT (c, p, n, string_end, no_leading_period,
94                          flags);
95               if (res != -1)
96                 return res;
97             }
98
99           if (n != string_end && *n == L('.') && no_leading_period)
100             return FNM_NOMATCH;
101
102           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
103             {
104               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
105                 {
106                   const CHAR *endp = END (p);
107                   if (endp != p)
108                     {
109                       /* This is a pattern.  Skip over it.  */
110                       p = endp;
111                       continue;
112                     }
113                 }
114
115               if (c == L('?'))
116                 {
117                   /* A ? needs to match one character.  */
118                   if (n == string_end)
119                     /* There isn't another character; no match.  */
120                     return FNM_NOMATCH;
121                   else if (*n == L('/')
122                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
123                     /* A slash does not match a wildcard under
124                        FNM_FILE_NAME.  */
125                     return FNM_NOMATCH;
126                   else
127                     /* One character of the string is consumed in matching
128                        this ? wildcard, so *??? won't match if there are
129                        less than three characters.  */
130                     ++n;
131                 }
132             }
133
134           if (c == L('\0'))
135             /* The wildcard(s) is/are the last element of the pattern.
136                If the name is a file name and contains another slash
137                this means it cannot match, unless the FNM_LEADING_DIR
138                flag is set.  */
139             {
140               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
141
142               if (flags & FNM_FILE_NAME)
143                 {
144                   if (flags & FNM_LEADING_DIR)
145                     result = 0;
146                   else
147                     {
148                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
149                         result = 0;
150                     }
151                 }
152
153               return result;
154             }
155           else
156             {
157               const CHAR *endp;
158
159               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
160                              string_end - n);
161               if (endp == NULL)
162                 endp = string_end;
163
164               if (c == L('[')
165                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
166                       && (c == L('@') || c == L('+') || c == L('!'))
167                       && *p == L('(')))
168                 {
169                   int flags2 = ((flags & FNM_FILE_NAME)
170                                 ? flags : (flags & ~FNM_PERIOD));
171                   int no_leading_period2 = no_leading_period;
172
173                   for (--p; n < endp; ++n, no_leading_period2 = 0)
174                     if (FCT (p, n, string_end, no_leading_period2, flags2)
175                         == 0)
176                       return 0;
177                 }
178               else if (c == L('/') && (flags & FNM_FILE_NAME))
179                 {
180                   while (n < string_end && *n != L('/'))
181                     ++n;
182                   if (n < string_end && *n == L('/')
183                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
184                           == 0))
185                     return 0;
186                 }
187               else
188                 {
189                   int flags2 = ((flags & FNM_FILE_NAME)
190                                 ? flags : (flags & ~FNM_PERIOD));
191                   int no_leading_period2 = no_leading_period;
192
193                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
194                     c = *p;
195                   c = FOLD (c);
196                   for (--p; n < endp; ++n, no_leading_period2 = 0)
197                     if (FOLD ((UCHAR) *n) == c
198                         && (FCT (p, n, string_end, no_leading_period2, flags2)
199                             == 0))
200                       return 0;
201                 }
202             }
203
204           /* If we come here no match is possible with the wildcard.  */
205           return FNM_NOMATCH;
206
207         case L('['):
208           {
209             /* Nonzero if the sense of the character class is inverted.  */
210             register int not;
211             CHAR cold;
212             UCHAR fn;
213
214             if (posixly_correct == 0)
215               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
216
217             if (n == string_end)
218               return FNM_NOMATCH;
219
220             if (*n == L('.') && no_leading_period)
221               return FNM_NOMATCH;
222
223             if (*n == L('/') && (flags & FNM_FILE_NAME))
224               /* `/' cannot be matched.  */
225               return FNM_NOMATCH;
226
227             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
228             if (not)
229               ++p;
230
231             fn = FOLD ((UCHAR) *n);
232
233             c = *p++;
234             for (;;)
235               {
236                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
237                   {
238                     if (*p == L('\0'))
239                       return FNM_NOMATCH;
240                     c = FOLD ((UCHAR) *p);
241                     ++p;
242
243                     if (c == fn)
244                       goto matched;
245                   }
246                 else if (c == L('[') && *p == L(':'))
247                   {
248                     /* Leave room for the null.  */
249                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
250                     size_t c1 = 0;
251 #if defined _LIBC || WIDE_CHAR_SUPPORT
252                     wctype_t wt;
253 #endif
254                     const CHAR *startp = p;
255
256                     for (;;)
257                       {
258                         if (c1 == CHAR_CLASS_MAX_LENGTH)
259                           /* The name is too long and therefore the pattern
260                              is ill-formed.  */
261                           return FNM_NOMATCH;
262
263                         c = *++p;
264                         if (c == L(':') && p[1] == L(']'))
265                           {
266                             p += 2;
267                             break;
268                           }
269                         if (c < L('a') || c >= L('z'))
270                           {
271                             /* This cannot possibly be a character class name.
272                                Match it as a normal range.  */
273                             p = startp;
274                             c = L('[');
275                             goto normal_bracket;
276                           }
277                         str[c1++] = c;
278                       }
279                     str[c1] = L('\0');
280
281 #if defined _LIBC || WIDE_CHAR_SUPPORT
282                     wt = IS_CHAR_CLASS (str);
283                     if (wt == 0)
284                       /* Invalid character class name.  */
285                       return FNM_NOMATCH;
286
287 # if defined _LIBC && ! WIDE_CHAR_VERSION
288                     /* The following code is glibc specific but does
289                        there a good job in speeding up the code since
290                        we can avoid the btowc() call.  */
291                     if (_ISCTYPE ((UCHAR) *n, wt))
292                       goto matched;
293 # else
294                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
295                       goto matched;
296 # endif
297 #else
298                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
299                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
300                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
301                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
302                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
303                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
304                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
305                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
306                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
307                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
308                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
309                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
310                       goto matched;
311 #endif
312                     c = *p++;
313                   }
314 #ifdef _LIBC
315                 else if (c == L('[') && *p == L('='))
316                   {
317                     UCHAR str[1];
318                     uint32_t nrules =
319                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
320                     const CHAR *startp = p;
321
322                     c = *++p;
323                     if (c == L('\0'))
324                       {
325                         p = startp;
326                         c = L('[');
327                         goto normal_bracket;
328                       }
329                     str[0] = c;
330
331                     c = *++p;
332                     if (c != L('=') || p[1] != L(']'))
333                       {
334                         p = startp;
335                         c = L('[');
336                         goto normal_bracket;
337                       }
338                     p += 2;
339
340                     if (nrules == 0)
341                       {
342                         if ((UCHAR) *n == str[0])
343                           goto matched;
344                       }
345                     else
346                       {
347                         const int32_t *table;
348 # if WIDE_CHAR_VERSION
349                         const int32_t *weights;
350                         const int32_t *extra;
351 # else
352                         const unsigned char *weights;
353                         const unsigned char *extra;
354 # endif
355                         const int32_t *indirect;
356                         int32_t idx;
357                         const UCHAR *cp = (const UCHAR *) str;
358
359                         /* This #include defines a local function!  */
360 # if WIDE_CHAR_VERSION
361 #  include <locale/weightwc.h>
362 # else
363 #  include <locale/weight.h>
364 # endif
365
366 # if WIDE_CHAR_VERSION
367                         table = (const int32_t *)
368                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
369                         weights = (const int32_t *)
370                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
371                         extra = (const int32_t *)
372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
373                         indirect = (const int32_t *)
374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
375 # else
376                         table = (const int32_t *)
377                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
378                         weights = (const unsigned char *)
379                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
380                         extra = (const unsigned char *)
381                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
382                         indirect = (const int32_t *)
383                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
384 # endif
385
386                         idx = findidx (&cp);
387                         if (idx != 0)
388                           {
389                             /* We found a table entry.  Now see whether the
390                                character we are currently at has the same
391                                equivalance class value.  */
392                             int len = weights[idx];
393                             int32_t idx2;
394                             const UCHAR *np = (const UCHAR *) n;
395
396                             idx2 = findidx (&np);
397                             if (idx2 != 0 && len == weights[idx2])
398                               {
399                                 int cnt = 0;
400
401                                 while (cnt < len
402                                        && (weights[idx + 1 + cnt]
403                                            == weights[idx2 + 1 + cnt]))
404                                   ++cnt;
405
406                                 if (cnt == len)
407                                   goto matched;
408                               }
409                           }
410                       }
411
412                     c = *p++;
413                   }
414 #endif
415                 else if (c == L('\0'))
416                   /* [ (unterminated) loses.  */
417                   return FNM_NOMATCH;
418                 else
419                   {
420                     int is_range = 0;
421
422 #ifdef _LIBC
423                     int is_seqval = 0;
424
425                     if (c == L('[') && *p == L('.'))
426                       {
427                         uint32_t nrules =
428                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
429                         const CHAR *startp = p;
430                         size_t c1 = 0;
431
432                         while (1)
433                           {
434                             c = *++p;
435                             if (c == L('.') && p[1] == L(']'))
436                               {
437                                 p += 2;
438                                 break;
439                               }
440                             if (c == '\0')
441                               return FNM_NOMATCH;
442                             ++c1;
443                           }
444
445                         /* We have to handling the symbols differently in
446                            ranges since then the collation sequence is
447                            important.  */
448                         is_range = *p == L('-') && p[1] != L('\0');
449
450                         if (nrules == 0)
451                           {
452                             /* There are no names defined in the collation
453                                data.  Therefore we only accept the trivial
454                                names consisting of the character itself.  */
455                             if (c1 != 1)
456                               return FNM_NOMATCH;
457
458                             if (!is_range && *n == startp[1])
459                               goto matched;
460
461                             cold = startp[1];
462                             c = *p++;
463                           }
464                         else
465                           {
466                             int32_t table_size;
467                             const int32_t *symb_table;
468 # ifdef WIDE_CHAR_VERSION
469                             char str[c1];
470                             unsigned int strcnt;
471 # else
472 #  define str (startp + 1)
473 # endif
474                             const unsigned char *extra;
475                             int32_t idx;
476                             int32_t elem;
477                             int32_t second;
478                             int32_t hash;
479
480 # ifdef WIDE_CHAR_VERSION
481                             /* We have to convert the name to a single-byte
482                                string.  This is possible since the names
483                                consist of ASCII characters and the internal
484                                representation is UCS4.  */
485                             for (strcnt = 0; strcnt < c1; ++strcnt)
486                               str[strcnt] = startp[1 + strcnt];
487 # endif
488
489                             table_size =
490                               _NL_CURRENT_WORD (LC_COLLATE,
491                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
492                             symb_table = (const int32_t *)
493                               _NL_CURRENT (LC_COLLATE,
494                                            _NL_COLLATE_SYMB_TABLEMB);
495                             extra = (const unsigned char *)
496                               _NL_CURRENT (LC_COLLATE,
497                                            _NL_COLLATE_SYMB_EXTRAMB);
498
499                             /* Locate the character in the hashing table.  */
500                             hash = elem_hash (str, c1);
501
502                             idx = 0;
503                             elem = hash % table_size;
504                             second = hash % (table_size - 2);
505                             while (symb_table[2 * elem] != 0)
506                               {
507                                 /* First compare the hashing value.  */
508                                 if (symb_table[2 * elem] == hash
509                                     && c1 == extra[symb_table[2 * elem + 1]]
510                                     && memcmp (str,
511                                                &extra[symb_table[2 * elem + 1]
512                                                      + 1], c1) == 0)
513                                   {
514                                     /* Yep, this is the entry.  */
515                                     idx = symb_table[2 * elem + 1];
516                                     idx += 1 + extra[idx];
517                                     break;
518                                   }
519
520                                 /* Next entry.  */
521                                 elem += second;
522                               }
523
524                             if (symb_table[2 * elem] != 0)
525                               {
526                                 /* Compare the byte sequence but only if
527                                    this is not part of a range.  */
528 # ifdef WIDE_CHAR_VERSION
529                                 int32_t *wextra;
530
531                                 idx += 1 + extra[idx];
532                                 /* Adjust for the alignment.  */
533                                 idx = (idx + 3) & ~3;
534
535                                 wextra = (int32_t *) &extra[idx + 4];
536 # endif
537
538                                 if (! is_range)
539                                   {
540 # ifdef WIDE_CHAR_VERSION
541                                     for (c1 = 0; c1 < wextra[idx]; ++c1)
542                                       if (n[c1] != wextra[1 + c1])
543                                         break;
544
545                                     if (c1 == wextra[idx])
546                                       goto matched;
547 # else
548                                     for (c1 = 0; c1 < extra[idx]; ++c1)
549                                       if (n[c1] != extra[1 + c1])
550                                         break;
551
552                                     if (c1 == extra[idx])
553                                       goto matched;
554 # endif
555                                   }
556
557                                 /* Get the collation sequence value.  */
558                                 is_seqval = 1;
559 # ifdef WIDE_CHAR_VERSION
560                                 cold = wextra[1 + wextra[idx]];
561 # else
562                                 /* Adjust for the alignment.  */
563                                 idx += 1 + extra[idx];
564                                 idx = (idx + 3) & ~4;
565                                 cold = *((int32_t *) &extra[idx]);
566 # endif
567
568                                 c = *p++;
569                               }
570                             else if (c1 == 1)
571                               {
572                                 /* No valid character.  Match it as a
573                                    single byte.  */
574                                 if (!is_range && *n == str[0])
575                                   goto matched;
576
577                                 cold = str[0];
578                                 c = *p++;
579                               }
580                             else
581                               return FNM_NOMATCH;
582                           }
583                       }
584                     else
585 # undef str
586 #endif
587                       {
588                         c = FOLD (c);
589                       normal_bracket:
590
591                         /* We have to handling the symbols differently in
592                            ranges since then the collation sequence is
593                            important.  */
594                         is_range = (*p == L('-') && p[1] != L('\0')
595                                     && p[1] != L(']'));
596
597                         if (!is_range && c == fn)
598                           goto matched;
599
600                         cold = c;
601                         c = *p++;
602                       }
603
604                     if (c == L('-') && *p != L(']'))
605                       {
606 #if _LIBC
607                         /* We have to find the collation sequence
608                            value for C.  Collation sequence is nothing
609                            we can regularly access.  The sequence
610                            value is defined by the order in which the
611                            definitions of the collation values for the
612                            various characters appear in the source
613                            file.  A strange concept, nowhere
614                            documented.  */
615                         uint32_t fcollseq;
616                         uint32_t lcollseq;
617                         UCHAR cend = *p++;
618
619 # ifdef WIDE_CHAR_VERSION
620                         /* Search in the `names' array for the characters.  */
621                         fcollseq = collseq_table_lookup (collseq, fn);
622                         if (fcollseq == ~((uint32_t) 0))
623                           /* XXX We don't know anything about the character
624                              we are supposed to match.  This means we are
625                              failing.  */
626                           goto range_not_matched;
627
628                         if (is_seqval)
629                           lcollseq = cold;
630                         else
631                           lcollseq = collseq_table_lookup (collseq, cold);
632 # else
633                         fcollseq = collseq[fn];
634                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
635 # endif
636
637                         is_seqval = 0;
638                         if (cend == L('[') && *p == L('.'))
639                           {
640                             uint32_t nrules =
641                               _NL_CURRENT_WORD (LC_COLLATE,
642                                                 _NL_COLLATE_NRULES);
643                             const CHAR *startp = p;
644                             size_t c1 = 0;
645
646                             while (1)
647                               {
648                                 c = *++p;
649                                 if (c == L('.') && p[1] == L(']'))
650                                   {
651                                     p += 2;
652                                     break;
653                                   }
654                                 if (c == '\0')
655                                   return FNM_NOMATCH;
656                                 ++c1;
657                               }
658
659                             if (nrules == 0)
660                               {
661                                 /* There are no names defined in the
662                                    collation data.  Therefore we only
663                                    accept the trivial names consisting
664                                    of the character itself.  */
665                                 if (c1 != 1)
666                                   return FNM_NOMATCH;
667
668                                 cend = startp[1];
669                               }
670                             else
671                               {
672                                 int32_t table_size;
673                                 const int32_t *symb_table;
674 # ifdef WIDE_CHAR_VERSION
675                                 char str[c1];
676                                 unsigned int strcnt;
677 # else
678 #  define str (startp + 1)
679 # endif
680                                 const unsigned char *extra;
681                                 int32_t idx;
682                                 int32_t elem;
683                                 int32_t second;
684                                 int32_t hash;
685
686 # ifdef WIDE_CHAR_VERSION
687                                 /* We have to convert the name to a single-byte
688                                    string.  This is possible since the names
689                                    consist of ASCII characters and the internal
690                                    representation is UCS4.  */
691                                 for (strcnt = 0; strcnt < c1; ++strcnt)
692                                   str[strcnt] = startp[1 + strcnt];
693 # endif
694
695                                 table_size =
696                                   _NL_CURRENT_WORD (LC_COLLATE,
697                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
698                                 symb_table = (const int32_t *)
699                                   _NL_CURRENT (LC_COLLATE,
700                                                _NL_COLLATE_SYMB_TABLEMB);
701                                 extra = (const unsigned char *)
702                                   _NL_CURRENT (LC_COLLATE,
703                                                _NL_COLLATE_SYMB_EXTRAMB);
704
705                                 /* Locate the character in the hashing
706                                    table.  */
707                                 hash = elem_hash (str, c1);
708
709                                 idx = 0;
710                                 elem = hash % table_size;
711                                 second = hash % (table_size - 2);
712                                 while (symb_table[2 * elem] != 0)
713                                   {
714                                 /* First compare the hashing value.  */
715                                     if (symb_table[2 * elem] == hash
716                                         && (c1
717                                             == extra[symb_table[2 * elem + 1]])
718                                         && memcmp (str,
719                                                    &extra[symb_table[2 * elem + 1]
720                                                          + 1], c1) == 0)
721                                       {
722                                         /* Yep, this is the entry.  */
723                                         idx = symb_table[2 * elem + 1];
724                                         idx += 1 + extra[idx];
725                                         break;
726                                       }
727
728                                     /* Next entry.  */
729                                     elem += second;
730                                   }
731
732                                 if (symb_table[2 * elem] != 0)
733                                   {
734                                     /* Compare the byte sequence but only if
735                                        this is not part of a range.  */
736 # ifdef WIDE_CHAR_VERSION
737                                     int32_t *wextra;
738
739                                     idx += 1 + extra[idx];
740                                     /* Adjust for the alignment.  */
741                                     idx = (idx + 3) & ~4;
742
743                                     wextra = (int32_t *) &extra[idx + 4];
744 # endif
745                                     /* Get the collation sequence value.  */
746                                     is_seqval = 1;
747 # ifdef WIDE_CHAR_VERSION
748                                     cend = wextra[1 + wextra[idx]];
749 # else
750                                     /* Adjust for the alignment.  */
751                                     idx += 1 + extra[idx];
752                                     idx = (idx + 3) & ~4;
753                                     cend = *((int32_t *) &extra[idx]);
754 # endif
755                                   }
756                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
757                                   {
758                                     cend = str[0];
759                                     c = *p++;
760                                   }
761                                 else
762                                   return FNM_NOMATCH;
763                               }
764 # undef str
765                           }
766                         else
767                           {
768                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
769                               cend = *p++;
770                             if (cend == L('\0'))
771                               return FNM_NOMATCH;
772                             cend = FOLD (cend);
773                           }
774
775                         /* XXX It is not entirely clear to me how to handle
776                            characters which are not mentioned in the
777                            collation specification.  */
778                         if (
779 # ifdef WIDE_CHAR_VERSION
780                             lcollseq == 0xffffffff ||
781 # endif
782                             lcollseq <= fcollseq)
783                           {
784                             /* We have to look at the upper bound.  */
785                             uint32_t hcollseq;
786
787                             if (is_seqval)
788                               hcollseq = cend;
789                             else
790                               {
791 # ifdef WIDE_CHAR_VERSION
792                                 hcollseq =
793                                   collseq_table_lookup (collseq, cend);
794                                 if (hcollseq == ~((uint32_t) 0))
795                                   {
796                                     /* Hum, no information about the upper
797                                        bound.  The matching succeeds if the
798                                        lower bound is matched exactly.  */
799                                     if (lcollseq != fcollseq)
800                                       goto range_not_matched;
801
802                                     goto matched;
803                                   }
804 # else
805                                 hcollseq = collseq[cend];
806 # endif
807                               }
808
809                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
810                               goto matched;
811                           }
812 # ifdef WIDE_CHAR_VERSION
813                       range_not_matched:
814 # endif
815 #else
816                         /* We use a boring value comparison of the character
817                            values.  This is better than comparing using
818                            `strcoll' since the latter would have surprising
819                            and sometimes fatal consequences.  */
820                         UCHAR cend = *p++;
821
822                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
823                           cend = *p++;
824                         if (cend == L('\0'))
825                           return FNM_NOMATCH;
826
827                         /* It is a range.  */
828                         if (cold <= fn && fn <= cend)
829                           goto matched;
830 #endif
831
832                         c = *p++;
833                       }
834                   }
835
836                 if (c == L(']'))
837                   break;
838               }
839
840             if (!not)
841               return FNM_NOMATCH;
842             break;
843
844           matched:
845             /* Skip the rest of the [...] that already matched.  */
846             do
847               {
848               ignore_next:
849                 c = *p++;
850
851                 if (c == L('\0'))
852                   /* [... (unterminated) loses.  */
853                   return FNM_NOMATCH;
854
855                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
856                   {
857                     if (*p == L('\0'))
858                       return FNM_NOMATCH;
859                     /* XXX 1003.2d11 is unclear if this is right.  */
860                     ++p;
861                   }
862                 else if (c == L('[') && *p == L(':'))
863                   {
864                     int c1 = 0;
865                     const CHAR *startp = p;
866
867                     while (1)
868                       {
869                         c = *++p;
870                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
871                           return FNM_NOMATCH;
872
873                         if (*p == L(':') && p[1] == L(']'))
874                           break;
875
876                         if (c < L('a') || c >= L('z'))
877                           {
878                             p = startp;
879                             goto ignore_next;
880                           }
881                       }
882                     p += 2;
883                     c = *p++;
884                   }
885                 else if (c == L('[') && *p == L('='))
886                   {
887                     c = *++p;
888                     if (c == L('\0'))
889                       return FNM_NOMATCH;
890                     c = *++p;
891                     if (c != L('=') || p[1] != L(']'))
892                       return FNM_NOMATCH;
893                     p += 2;
894                     c = *p++;
895                   }
896                 else if (c == L('[') && *p == L('.'))
897                   {
898                     ++p;
899                     while (1)
900                       {
901                         c = *++p;
902                         if (c == '\0')
903                           return FNM_NOMATCH;
904
905                         if (*p == L('.') && p[1] == L(']'))
906                           break;
907                       }
908                     p += 2;
909                     c = *p++;
910                   }
911               }
912             while (c != L(']'));
913             if (not)
914               return FNM_NOMATCH;
915           }
916           break;
917
918         case L('+'):
919         case L('@'):
920         case L('!'):
921           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
922             {
923               int res;
924
925               res = EXT (c, p, n, string_end, no_leading_period, flags);
926               if (res != -1)
927                 return res;
928             }
929           goto normal_match;
930
931         case L('/'):
932           if (NO_LEADING_PERIOD (flags))
933             {
934               if (n == string_end || c != *n)
935                 return FNM_NOMATCH;
936
937               new_no_leading_period = 1;
938               break;
939             }
940           /* FALLTHROUGH */
941         default:
942         normal_match:
943           if (n == string_end || c != FOLD ((UCHAR) *n))
944             return FNM_NOMATCH;
945         }
946
947       no_leading_period = new_no_leading_period;
948       ++n;
949     }
950
951   if (n == string_end)
952     return 0;
953
954   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
955     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
956     return 0;
957
958   return FNM_NOMATCH;
959 }
960
961
962 static const CHAR *
963 internal_function
964 END (const CHAR *pattern)
965 {
966   const CHAR *p = pattern;
967
968   while (1)
969     if (*++p == L('\0'))
970       /* This is an invalid pattern.  */
971       return pattern;
972     else if (*p == L('['))
973       {
974         /* Handle brackets special.  */
975         if (posixly_correct == 0)
976           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
977
978         /* Skip the not sign.  We have to recognize it because of a possibly
979            following ']'.  */
980         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
981           ++p;
982         /* A leading ']' is recognized as such.  */
983         if (*p == L(']'))
984           ++p;
985         /* Skip over all characters of the list.  */
986         while (*p != L(']'))
987           if (*p++ == L('\0'))
988             /* This is no valid pattern.  */
989             return pattern;
990       }
991     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
992               || *p == L('!')) && p[1] == L('('))
993       p = END (p + 1);
994     else if (*p == L(')'))
995       break;
996
997   return p + 1;
998 }
999
1000
1001 static int
1002 internal_function
1003 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1004      int no_leading_period, int flags)
1005 {
1006   const CHAR *startp;
1007   int level;
1008   struct patternlist
1009   {
1010     struct patternlist *next;
1011     CHAR str[1];
1012   } *list = NULL;
1013   struct patternlist **lastp = &list;
1014   size_t pattern_len = STRLEN (pattern);
1015   const CHAR *p;
1016   const CHAR *rs;
1017
1018   /* Parse the pattern.  Store the individual parts in the list.  */
1019   level = 0;
1020   for (startp = p = pattern + 1; level >= 0; ++p)
1021     if (*p == L('\0'))
1022       /* This is an invalid pattern.  */
1023       return -1;
1024     else if (*p == L('['))
1025       {
1026         /* Handle brackets special.  */
1027         if (posixly_correct == 0)
1028           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1029
1030         /* Skip the not sign.  We have to recognize it because of a possibly
1031            following ']'.  */
1032         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1033           ++p;
1034         /* A leading ']' is recognized as such.  */
1035         if (*p == L(']'))
1036           ++p;
1037         /* Skip over all characters of the list.  */
1038         while (*p != L(']'))
1039           if (*p++ == L('\0'))
1040             /* This is no valid pattern.  */
1041             return -1;
1042       }
1043     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1044               || *p == L('!')) && p[1] == L('('))
1045       /* Remember the nesting level.  */
1046       ++level;
1047     else if (*p == L(')'))
1048       {
1049         if (level-- == 0)
1050           {
1051             /* This means we found the end of the pattern.  */
1052 #define NEW_PATTERN \
1053             struct patternlist *newp;                                         \
1054             size_t plen;                                                      \
1055                                                                               \
1056             plen = (opt == L('?') || opt == L('@')                            \
1057                     ? pattern_len                                             \
1058                     : p - startp + 1);                                        \
1059             newp = (struct patternlist *)                                     \
1060               alloca (offsetof (struct patternlist, str)                      \
1061                       + (plen * sizeof (CHAR)));                              \
1062             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1063             newp->next = NULL;                                                \
1064             *lastp = newp;                                                    \
1065             lastp = &newp->next
1066             NEW_PATTERN;
1067           }
1068       }
1069     else if (*p == L('|'))
1070       {
1071         if (level == 0)
1072           {
1073             NEW_PATTERN;
1074             startp = p + 1;
1075           }
1076       }
1077   assert (list != NULL);
1078   assert (p[-1] == L(')'));
1079 #undef NEW_PATTERN
1080
1081   switch (opt)
1082     {
1083     case L('*'):
1084       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1085         return 0;
1086       /* FALLTHROUGH */
1087
1088     case L('+'):
1089       do
1090         {
1091           for (rs = string; rs <= string_end; ++rs)
1092             /* First match the prefix with the current pattern with the
1093                current pattern.  */
1094             if (FCT (list->str, string, rs, no_leading_period,
1095                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1096                 /* This was successful.  Now match the rest with the rest
1097                    of the pattern.  */
1098                 && (FCT (p, rs, string_end,
1099                          rs == string
1100                          ? no_leading_period
1101                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1102                          flags & FNM_FILE_NAME
1103                          ? flags : flags & ~FNM_PERIOD) == 0
1104                     /* This didn't work.  Try the whole pattern.  */
1105                     || (rs != string
1106                         && FCT (pattern - 1, rs, string_end,
1107                                 rs == string
1108                                 ? no_leading_period
1109                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1110                                    ? 1 : 0),
1111                                 flags & FNM_FILE_NAME
1112                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1113               /* It worked.  Signal success.  */
1114               return 0;
1115         }
1116       while ((list = list->next) != NULL);
1117
1118       /* None of the patterns lead to a match.  */
1119       return FNM_NOMATCH;
1120
1121     case L('?'):
1122       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1123         return 0;
1124       /* FALLTHROUGH */
1125
1126     case L('@'):
1127       do
1128         /* I cannot believe it but `strcat' is actually acceptable
1129            here.  Match the entire string with the prefix from the
1130            pattern list and the rest of the pattern following the
1131            pattern list.  */
1132         if (FCT (STRCAT (list->str, p), string, string_end,
1133                  no_leading_period,
1134                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1135           /* It worked.  Signal success.  */
1136           return 0;
1137       while ((list = list->next) != NULL);
1138
1139       /* None of the patterns lead to a match.  */
1140       return FNM_NOMATCH;
1141
1142     case L('!'):
1143       for (rs = string; rs <= string_end; ++rs)
1144         {
1145           struct patternlist *runp;
1146
1147           for (runp = list; runp != NULL; runp = runp->next)
1148             if (FCT (runp->str, string, rs,  no_leading_period,
1149                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1150               break;
1151
1152           /* If none of the patterns matched see whether the rest does.  */
1153           if (runp == NULL
1154               && (FCT (p, rs, string_end,
1155                        rs == string
1156                        ? no_leading_period
1157                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1158                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1159                   == 0))
1160             /* This is successful.  */
1161             return 0;
1162         }
1163
1164       /* None of the patterns together with the rest of the pattern
1165          lead to a match.  */
1166       return FNM_NOMATCH;
1167
1168     default:
1169       assert (! "Invalid extended matching operator");
1170       break;
1171     }
1172
1173   return -1;
1174 }
1175
1176
1177 #undef FOLD
1178 #undef CHAR
1179 #undef UCHAR
1180 #undef INT
1181 #undef FCT
1182 #undef EXT
1183 #undef END
1184 #undef MEMPCPY
1185 #undef MEMCHR
1186 #undef STRCOLL
1187 #undef STRLEN
1188 #undef STRCAT
1189 #undef L
1190 #undef BTOWC