regex: port to Stratus OpenVOS
[gnulib.git] / lib / fnmatch_loop.c
1 /* Copyright (C) 1991-1993, 1996-2006, 2009-2011 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software Foundation,
16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
17
18 /* Match STRING against the file name pattern PATTERN, returning zero if
19    it matches, nonzero if not.  */
20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
21                 const CHAR *string_end, bool no_leading_period, int flags)
22      internal_function;
23 static const CHAR *END (const CHAR *patternp) internal_function;
24
25 static int
26 internal_function
27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
28      bool no_leading_period, int flags)
29 {
30   register const CHAR *p = pattern, *n = string;
31   register UCHAR c;
32 #ifdef _LIBC
33 # if WIDE_CHAR_VERSION
34   const char *collseq = (const char *)
35     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
36 # else
37   const UCHAR *collseq = (const UCHAR *)
38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
39 # endif
40 #endif
41
42   while ((c = *p++) != L_('\0'))
43     {
44       bool new_no_leading_period = false;
45       c = FOLD (c);
46
47       switch (c)
48         {
49         case L_('?'):
50           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
51             {
52               int res;
53
54               res = EXT (c, p, n, string_end, no_leading_period,
55                          flags);
56               if (res != -1)
57                 return res;
58             }
59
60           if (n == string_end)
61             return FNM_NOMATCH;
62           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
63             return FNM_NOMATCH;
64           else if (*n == L_('.') && no_leading_period)
65             return FNM_NOMATCH;
66           break;
67
68         case L_('\\'):
69           if (!(flags & FNM_NOESCAPE))
70             {
71               c = *p++;
72               if (c == L_('\0'))
73                 /* Trailing \ loses.  */
74                 return FNM_NOMATCH;
75               c = FOLD (c);
76             }
77           if (n == string_end || FOLD ((UCHAR) *n) != c)
78             return FNM_NOMATCH;
79           break;
80
81         case L_('*'):
82           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
83             {
84               int res;
85
86               res = EXT (c, p, n, string_end, no_leading_period,
87                          flags);
88               if (res != -1)
89                 return res;
90             }
91
92           if (n != string_end && *n == L_('.') && no_leading_period)
93             return FNM_NOMATCH;
94
95           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
96             {
97               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
98                 {
99                   const CHAR *endp = END (p);
100                   if (endp != p)
101                     {
102                       /* This is a pattern.  Skip over it.  */
103                       p = endp;
104                       continue;
105                     }
106                 }
107
108               if (c == L_('?'))
109                 {
110                   /* A ? needs to match one character.  */
111                   if (n == string_end)
112                     /* There isn't another character; no match.  */
113                     return FNM_NOMATCH;
114                   else if (*n == L_('/')
115                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
116                     /* A slash does not match a wildcard under
117                        FNM_FILE_NAME.  */
118                     return FNM_NOMATCH;
119                   else
120                     /* One character of the string is consumed in matching
121                        this ? wildcard, so *??? won't match if there are
122                        less than three characters.  */
123                     ++n;
124                 }
125             }
126
127           if (c == L_('\0'))
128             /* The wildcard(s) is/are the last element of the pattern.
129                If the name is a file name and contains another slash
130                this means it cannot match, unless the FNM_LEADING_DIR
131                flag is set.  */
132             {
133               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
134
135               if (flags & FNM_FILE_NAME)
136                 {
137                   if (flags & FNM_LEADING_DIR)
138                     result = 0;
139                   else
140                     {
141                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
142                         result = 0;
143                     }
144                 }
145
146               return result;
147             }
148           else
149             {
150               const CHAR *endp;
151
152               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
153                              string_end - n);
154               if (endp == NULL)
155                 endp = string_end;
156
157               if (c == L_('[')
158                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
159                       && (c == L_('@') || c == L_('+') || c == L_('!'))
160                       && *p == L_('(')))
161                 {
162                   int flags2 = ((flags & FNM_FILE_NAME)
163                                 ? flags : (flags & ~FNM_PERIOD));
164                   bool no_leading_period2 = no_leading_period;
165
166                   for (--p; n < endp; ++n, no_leading_period2 = false)
167                     if (FCT (p, n, string_end, no_leading_period2, flags2)
168                         == 0)
169                       return 0;
170                 }
171               else if (c == L_('/') && (flags & FNM_FILE_NAME))
172                 {
173                   while (n < string_end && *n != L_('/'))
174                     ++n;
175                   if (n < string_end && *n == L_('/')
176                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
177                           == 0))
178                     return 0;
179                 }
180               else
181                 {
182                   int flags2 = ((flags & FNM_FILE_NAME)
183                                 ? flags : (flags & ~FNM_PERIOD));
184                   int no_leading_period2 = no_leading_period;
185
186                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
187                     c = *p;
188                   c = FOLD (c);
189                   for (--p; n < endp; ++n, no_leading_period2 = false)
190                     if (FOLD ((UCHAR) *n) == c
191                         && (FCT (p, n, string_end, no_leading_period2, flags2)
192                             == 0))
193                       return 0;
194                 }
195             }
196
197           /* If we come here no match is possible with the wildcard.  */
198           return FNM_NOMATCH;
199
200         case L_('['):
201           {
202             /* Nonzero if the sense of the character class is inverted.  */
203             const CHAR *p_init = p;
204             const CHAR *n_init = n;
205             register bool not;
206             CHAR cold;
207             UCHAR fn;
208
209             if (posixly_correct == 0)
210               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
211
212             if (n == string_end)
213               return FNM_NOMATCH;
214
215             if (*n == L_('.') && no_leading_period)
216               return FNM_NOMATCH;
217
218             if (*n == L_('/') && (flags & FNM_FILE_NAME))
219               /* `/' cannot be matched.  */
220               return FNM_NOMATCH;
221
222             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
223             if (not)
224               ++p;
225
226             fn = FOLD ((UCHAR) *n);
227
228             c = *p++;
229             for (;;)
230               {
231                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
232                   {
233                     if (*p == L_('\0'))
234                       return FNM_NOMATCH;
235                     c = FOLD ((UCHAR) *p);
236                     ++p;
237
238                     goto normal_bracket;
239                   }
240                 else if (c == L_('[') && *p == L_(':'))
241                   {
242                     /* Leave room for the null.  */
243                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
244                     size_t c1 = 0;
245 #if defined _LIBC || WIDE_CHAR_SUPPORT
246                     wctype_t wt;
247 #endif
248                     const CHAR *startp = p;
249
250                     for (;;)
251                       {
252                         if (c1 == CHAR_CLASS_MAX_LENGTH)
253                           /* The name is too long and therefore the pattern
254                              is ill-formed.  */
255                           return FNM_NOMATCH;
256
257                         c = *++p;
258                         if (c == L_(':') && p[1] == L_(']'))
259                           {
260                             p += 2;
261                             break;
262                           }
263                         if (c < L_('a') || c >= L_('z'))
264                           {
265                             /* This cannot possibly be a character class name.
266                                Match it as a normal range.  */
267                             p = startp;
268                             c = L_('[');
269                             goto normal_bracket;
270                           }
271                         str[c1++] = c;
272                       }
273                     str[c1] = L_('\0');
274
275 #if defined _LIBC || WIDE_CHAR_SUPPORT
276                     wt = IS_CHAR_CLASS (str);
277                     if (wt == 0)
278                       /* Invalid character class name.  */
279                       return FNM_NOMATCH;
280
281 # if defined _LIBC && ! WIDE_CHAR_VERSION
282                     /* The following code is glibc specific but does
283                        there a good job in speeding up the code since
284                        we can avoid the btowc() call.  */
285                     if (_ISCTYPE ((UCHAR) *n, wt))
286                       goto matched;
287 # else
288                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
289                       goto matched;
290 # endif
291 #else
292                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
293                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
294                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
295                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
296                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
297                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
298                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
299                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
300                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
301                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
302                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
303                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
304                       goto matched;
305 #endif
306                     c = *p++;
307                   }
308 #ifdef _LIBC
309                 else if (c == L_('[') && *p == L_('='))
310                   {
311                     UCHAR str[1];
312                     uint32_t nrules =
313                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
314                     const CHAR *startp = p;
315
316                     c = *++p;
317                     if (c == L_('\0'))
318                       {
319                         p = startp;
320                         c = L_('[');
321                         goto normal_bracket;
322                       }
323                     str[0] = c;
324
325                     c = *++p;
326                     if (c != L_('=') || p[1] != L_(']'))
327                       {
328                         p = startp;
329                         c = L_('[');
330                         goto normal_bracket;
331                       }
332                     p += 2;
333
334                     if (nrules == 0)
335                       {
336                         if ((UCHAR) *n == str[0])
337                           goto matched;
338                       }
339                     else
340                       {
341                         const int32_t *table;
342 # if WIDE_CHAR_VERSION
343                         const int32_t *weights;
344                         const int32_t *extra;
345 # else
346                         const unsigned char *weights;
347                         const unsigned char *extra;
348 # endif
349                         const int32_t *indirect;
350                         int32_t idx;
351                         const UCHAR *cp = (const UCHAR *) str;
352
353                         /* This #include defines a local function!  */
354 # if WIDE_CHAR_VERSION
355 #  include <locale/weightwc.h>
356 # else
357 #  include <locale/weight.h>
358 # endif
359
360 # if WIDE_CHAR_VERSION
361                         table = (const int32_t *)
362                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
363                         weights = (const int32_t *)
364                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
365                         extra = (const int32_t *)
366                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
367                         indirect = (const int32_t *)
368                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
369 # else
370                         table = (const int32_t *)
371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
372                         weights = (const unsigned char *)
373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
374                         extra = (const unsigned char *)
375                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
376                         indirect = (const int32_t *)
377                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
378 # endif
379
380                         idx = findidx (&cp);
381                         if (idx != 0)
382                           {
383                             /* We found a table entry.  Now see whether the
384                                character we are currently at has the same
385                                equivalance class value.  */
386                             int len = weights[idx & 0xffffff];
387                             int32_t idx2;
388                             const UCHAR *np = (const UCHAR *) n;
389
390                             idx2 = findidx (&np);
391                             if (idx2 != 0
392                                 && (idx >> 24) == (idx2 >> 24)
393                                 && len == weights[idx2 & 0xffffff])
394                               {
395                                 int cnt = 0;
396
397                                 idx &= 0xffffff;
398                                 idx2 &= 0xffffff;
399
400                                 while (cnt < len
401                                        && (weights[idx + 1 + cnt]
402                                            == weights[idx2 + 1 + cnt]))
403                                   ++cnt;
404
405                                 if (cnt == len)
406                                   goto matched;
407                               }
408                           }
409                       }
410
411                     c = *p++;
412                   }
413 #endif
414                 else if (c == L_('\0'))
415                   {
416                     /* [ unterminated, treat as normal character.  */
417                     p = p_init;
418                     n = n_init;
419                     c = L_('[');
420                     goto normal_match;
421                   }
422                 else
423                   {
424                     bool is_range = false;
425
426 #ifdef _LIBC
427                     bool is_seqval = false;
428
429                     if (c == L_('[') && *p == L_('.'))
430                       {
431                         uint32_t nrules =
432                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
433                         const CHAR *startp = p;
434                         size_t c1 = 0;
435
436                         while (1)
437                           {
438                             c = *++p;
439                             if (c == L_('.') && p[1] == L_(']'))
440                               {
441                                 p += 2;
442                                 break;
443                               }
444                             if (c == '\0')
445                               return FNM_NOMATCH;
446                             ++c1;
447                           }
448
449                         /* We have to handling the symbols differently in
450                            ranges since then the collation sequence is
451                            important.  */
452                         is_range = *p == L_('-') && p[1] != L_('\0');
453
454                         if (nrules == 0)
455                           {
456                             /* There are no names defined in the collation
457                                data.  Therefore we only accept the trivial
458                                names consisting of the character itself.  */
459                             if (c1 != 1)
460                               return FNM_NOMATCH;
461
462                             if (!is_range && *n == startp[1])
463                               goto matched;
464
465                             cold = startp[1];
466                             c = *p++;
467                           }
468                         else
469                           {
470                             int32_t table_size;
471                             const int32_t *symb_table;
472 # ifdef WIDE_CHAR_VERSION
473                             char str[c1];
474                             size_t strcnt;
475 # else
476 #  define str (startp + 1)
477 # endif
478                             const unsigned char *extra;
479                             int32_t idx;
480                             int32_t elem;
481                             int32_t second;
482                             int32_t hash;
483
484 # ifdef WIDE_CHAR_VERSION
485                             /* We have to convert the name to a single-byte
486                                string.  This is possible since the names
487                                consist of ASCII characters and the internal
488                                representation is UCS4.  */
489                             for (strcnt = 0; strcnt < c1; ++strcnt)
490                               str[strcnt] = startp[1 + strcnt];
491 # endif
492
493                             table_size =
494                               _NL_CURRENT_WORD (LC_COLLATE,
495                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
496                             symb_table = (const int32_t *)
497                               _NL_CURRENT (LC_COLLATE,
498                                            _NL_COLLATE_SYMB_TABLEMB);
499                             extra = (const unsigned char *)
500                               _NL_CURRENT (LC_COLLATE,
501                                            _NL_COLLATE_SYMB_EXTRAMB);
502
503                             /* Locate the character in the hashing table.  */
504                             hash = elem_hash (str, c1);
505
506                             idx = 0;
507                             elem = hash % table_size;
508                             if (symb_table[2 * elem] != 0)
509                               {
510                                 second = hash % (table_size - 2) + 1;
511
512                                 do
513                                   {
514                                     /* First compare the hashing value.  */
515                                     if (symb_table[2 * elem] == hash
516                                         && (c1
517                                             == extra[symb_table[2 * elem + 1]])
518                                         && memcmp (str,
519                                                    &extra[symb_table[2 * elem
520                                                                      + 1]
521                                                           + 1], c1) == 0)
522                                       {
523                                         /* Yep, this is the entry.  */
524                                         idx = symb_table[2 * elem + 1];
525                                         idx += 1 + extra[idx];
526                                         break;
527                                       }
528
529                                     /* Next entry.  */
530                                     elem += second;
531                                   }
532                                 while (symb_table[2 * elem] != 0);
533                               }
534
535                             if (symb_table[2 * elem] != 0)
536                               {
537                                 /* Compare the byte sequence but only if
538                                    this is not part of a range.  */
539 # ifdef WIDE_CHAR_VERSION
540                                 int32_t *wextra;
541
542                                 idx += 1 + extra[idx];
543                                 /* Adjust for the alignment.  */
544                                 idx = (idx + 3) & ~3;
545
546                                 wextra = (int32_t *) &extra[idx + 4];
547 # endif
548
549                                 if (! is_range)
550                                   {
551 # ifdef WIDE_CHAR_VERSION
552                                     for (c1 = 0;
553                                          (int32_t) c1 < wextra[idx];
554                                          ++c1)
555                                       if (n[c1] != wextra[1 + c1])
556                                         break;
557
558                                     if ((int32_t) c1 == wextra[idx])
559                                       goto matched;
560 # else
561                                     for (c1 = 0; c1 < extra[idx]; ++c1)
562                                       if (n[c1] != extra[1 + c1])
563                                         break;
564
565                                     if (c1 == extra[idx])
566                                       goto matched;
567 # endif
568                                   }
569
570                                 /* Get the collation sequence value.  */
571                                 is_seqval = true;
572 # ifdef WIDE_CHAR_VERSION
573                                 cold = wextra[1 + wextra[idx]];
574 # else
575                                 /* Adjust for the alignment.  */
576                                 idx += 1 + extra[idx];
577                                 idx = (idx + 3) & ~4;
578                                 cold = *((int32_t *) &extra[idx]);
579 # endif
580
581                                 c = *p++;
582                               }
583                             else if (c1 == 1)
584                               {
585                                 /* No valid character.  Match it as a
586                                    single byte.  */
587                                 if (!is_range && *n == str[0])
588                                   goto matched;
589
590                                 cold = str[0];
591                                 c = *p++;
592                               }
593                             else
594                               return FNM_NOMATCH;
595                           }
596                       }
597                     else
598 # undef str
599 #endif
600                       {
601                         c = FOLD (c);
602                       normal_bracket:
603
604                         /* We have to handling the symbols differently in
605                            ranges since then the collation sequence is
606                            important.  */
607                         is_range = (*p == L_('-') && p[1] != L_('\0')
608                                     && p[1] != L_(']'));
609
610                         if (!is_range && c == fn)
611                           goto matched;
612
613 #if _LIBC
614                         /* This is needed if we goto normal_bracket; from
615                            outside of is_seqval's scope.  */
616                         is_seqval = false;
617 #endif
618
619                         cold = c;
620                         c = *p++;
621                       }
622
623                     if (c == L_('-') && *p != L_(']'))
624                       {
625 #if _LIBC
626                         /* We have to find the collation sequence
627                            value for C.  Collation sequence is nothing
628                            we can regularly access.  The sequence
629                            value is defined by the order in which the
630                            definitions of the collation values for the
631                            various characters appear in the source
632                            file.  A strange concept, nowhere
633                            documented.  */
634                         uint32_t fcollseq;
635                         uint32_t lcollseq;
636                         UCHAR cend = *p++;
637
638 # ifdef WIDE_CHAR_VERSION
639                         /* Search in the `names' array for the characters.  */
640                         fcollseq = __collseq_table_lookup (collseq, fn);
641                         if (fcollseq == ~((uint32_t) 0))
642                           /* XXX We don't know anything about the character
643                              we are supposed to match.  This means we are
644                              failing.  */
645                           goto range_not_matched;
646
647                         if (is_seqval)
648                           lcollseq = cold;
649                         else
650                           lcollseq = __collseq_table_lookup (collseq, cold);
651 # else
652                         fcollseq = collseq[fn];
653                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
654 # endif
655
656                         is_seqval = false;
657                         if (cend == L_('[') && *p == L_('.'))
658                           {
659                             uint32_t nrules =
660                               _NL_CURRENT_WORD (LC_COLLATE,
661                                                 _NL_COLLATE_NRULES);
662                             const CHAR *startp = p;
663                             size_t c1 = 0;
664
665                             while (1)
666                               {
667                                 c = *++p;
668                                 if (c == L_('.') && p[1] == L_(']'))
669                                   {
670                                     p += 2;
671                                     break;
672                                   }
673                                 if (c == '\0')
674                                   return FNM_NOMATCH;
675                                 ++c1;
676                               }
677
678                             if (nrules == 0)
679                               {
680                                 /* There are no names defined in the
681                                    collation data.  Therefore we only
682                                    accept the trivial names consisting
683                                    of the character itself.  */
684                                 if (c1 != 1)
685                                   return FNM_NOMATCH;
686
687                                 cend = startp[1];
688                               }
689                             else
690                               {
691                                 int32_t table_size;
692                                 const int32_t *symb_table;
693 # ifdef WIDE_CHAR_VERSION
694                                 char str[c1];
695                                 size_t strcnt;
696 # else
697 #  define str (startp + 1)
698 # endif
699                                 const unsigned char *extra;
700                                 int32_t idx;
701                                 int32_t elem;
702                                 int32_t second;
703                                 int32_t hash;
704
705 # ifdef WIDE_CHAR_VERSION
706                                 /* We have to convert the name to a single-byte
707                                    string.  This is possible since the names
708                                    consist of ASCII characters and the internal
709                                    representation is UCS4.  */
710                                 for (strcnt = 0; strcnt < c1; ++strcnt)
711                                   str[strcnt] = startp[1 + strcnt];
712 # endif
713
714                                 table_size =
715                                   _NL_CURRENT_WORD (LC_COLLATE,
716                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
717                                 symb_table = (const int32_t *)
718                                   _NL_CURRENT (LC_COLLATE,
719                                                _NL_COLLATE_SYMB_TABLEMB);
720                                 extra = (const unsigned char *)
721                                   _NL_CURRENT (LC_COLLATE,
722                                                _NL_COLLATE_SYMB_EXTRAMB);
723
724                                 /* Locate the character in the hashing
725                                    table.  */
726                                 hash = elem_hash (str, c1);
727
728                                 idx = 0;
729                                 elem = hash % table_size;
730                                 if (symb_table[2 * elem] != 0)
731                                   {
732                                     second = hash % (table_size - 2) + 1;
733
734                                     do
735                                       {
736                                         /* First compare the hashing value.  */
737                                         if (symb_table[2 * elem] == hash
738                                             && (c1
739                                                 == extra[symb_table[2 * elem + 1]])
740                                             && memcmp (str,
741                                                        &extra[symb_table[2 * elem + 1]
742                                                               + 1], c1) == 0)
743                                           {
744                                             /* Yep, this is the entry.  */
745                                             idx = symb_table[2 * elem + 1];
746                                             idx += 1 + extra[idx];
747                                             break;
748                                           }
749
750                                         /* Next entry.  */
751                                         elem += second;
752                                       }
753                                     while (symb_table[2 * elem] != 0);
754                                   }
755
756                                 if (symb_table[2 * elem] != 0)
757                                   {
758                                     /* Compare the byte sequence but only if
759                                        this is not part of a range.  */
760 # ifdef WIDE_CHAR_VERSION
761                                     int32_t *wextra;
762
763                                     idx += 1 + extra[idx];
764                                     /* Adjust for the alignment.  */
765                                     idx = (idx + 3) & ~4;
766
767                                     wextra = (int32_t *) &extra[idx + 4];
768 # endif
769                                     /* Get the collation sequence value.  */
770                                     is_seqval = true;
771 # ifdef WIDE_CHAR_VERSION
772                                     cend = wextra[1 + wextra[idx]];
773 # else
774                                     /* Adjust for the alignment.  */
775                                     idx += 1 + extra[idx];
776                                     idx = (idx + 3) & ~4;
777                                     cend = *((int32_t *) &extra[idx]);
778 # endif
779                                   }
780                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
781                                   {
782                                     cend = str[0];
783                                     c = *p++;
784                                   }
785                                 else
786                                   return FNM_NOMATCH;
787                               }
788 # undef str
789                           }
790                         else
791                           {
792                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
793                               cend = *p++;
794                             if (cend == L_('\0'))
795                               return FNM_NOMATCH;
796                             cend = FOLD (cend);
797                           }
798
799                         /* XXX It is not entirely clear to me how to handle
800                            characters which are not mentioned in the
801                            collation specification.  */
802                         if (
803 # ifdef WIDE_CHAR_VERSION
804                             lcollseq == 0xffffffff ||
805 # endif
806                             lcollseq <= fcollseq)
807                           {
808                             /* We have to look at the upper bound.  */
809                             uint32_t hcollseq;
810
811                             if (is_seqval)
812                               hcollseq = cend;
813                             else
814                               {
815 # ifdef WIDE_CHAR_VERSION
816                                 hcollseq =
817                                   __collseq_table_lookup (collseq, cend);
818                                 if (hcollseq == ~((uint32_t) 0))
819                                   {
820                                     /* Hum, no information about the upper
821                                        bound.  The matching succeeds if the
822                                        lower bound is matched exactly.  */
823                                     if (lcollseq != fcollseq)
824                                       goto range_not_matched;
825
826                                     goto matched;
827                                   }
828 # else
829                                 hcollseq = collseq[cend];
830 # endif
831                               }
832
833                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
834                               goto matched;
835                           }
836 # ifdef WIDE_CHAR_VERSION
837                       range_not_matched:
838 # endif
839 #else
840                         /* We use a boring value comparison of the character
841                            values.  This is better than comparing using
842                            `strcoll' since the latter would have surprising
843                            and sometimes fatal consequences.  */
844                         UCHAR cend = *p++;
845
846                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
847                           cend = *p++;
848                         if (cend == L_('\0'))
849                           return FNM_NOMATCH;
850
851                         /* It is a range.  */
852                         if (cold <= fn && fn <= cend)
853                           goto matched;
854 #endif
855
856                         c = *p++;
857                       }
858                   }
859
860                 if (c == L_(']'))
861                   break;
862               }
863
864             if (!not)
865               return FNM_NOMATCH;
866             break;
867
868           matched:
869             /* Skip the rest of the [...] that already matched.  */
870             do
871               {
872               ignore_next:
873                 c = *p++;
874
875                 if (c == L_('\0'))
876                   /* [... (unterminated) loses.  */
877                   return FNM_NOMATCH;
878
879                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
880                   {
881                     if (*p == L_('\0'))
882                       return FNM_NOMATCH;
883                     /* XXX 1003.2d11 is unclear if this is right.  */
884                     ++p;
885                   }
886                 else if (c == L_('[') && *p == L_(':'))
887                   {
888                     int c1 = 0;
889                     const CHAR *startp = p;
890
891                     while (1)
892                       {
893                         c = *++p;
894                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
895                           return FNM_NOMATCH;
896
897                         if (*p == L_(':') && p[1] == L_(']'))
898                           break;
899
900                         if (c < L_('a') || c >= L_('z'))
901                           {
902                             p = startp;
903                             goto ignore_next;
904                           }
905                       }
906                     p += 2;
907                     c = *p++;
908                   }
909                 else if (c == L_('[') && *p == L_('='))
910                   {
911                     c = *++p;
912                     if (c == L_('\0'))
913                       return FNM_NOMATCH;
914                     c = *++p;
915                     if (c != L_('=') || p[1] != L_(']'))
916                       return FNM_NOMATCH;
917                     p += 2;
918                     c = *p++;
919                   }
920                 else if (c == L_('[') && *p == L_('.'))
921                   {
922                     ++p;
923                     while (1)
924                       {
925                         c = *++p;
926                         if (c == '\0')
927                           return FNM_NOMATCH;
928
929                         if (*p == L_('.') && p[1] == L_(']'))
930                           break;
931                       }
932                     p += 2;
933                     c = *p++;
934                   }
935               }
936             while (c != L_(']'));
937             if (not)
938               return FNM_NOMATCH;
939           }
940           break;
941
942         case L_('+'):
943         case L_('@'):
944         case L_('!'):
945           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
946             {
947               int res;
948
949               res = EXT (c, p, n, string_end, no_leading_period, flags);
950               if (res != -1)
951                 return res;
952             }
953           goto normal_match;
954
955         case L_('/'):
956           if (NO_LEADING_PERIOD (flags))
957             {
958               if (n == string_end || c != (UCHAR) *n)
959                 return FNM_NOMATCH;
960
961               new_no_leading_period = true;
962               break;
963             }
964           /* FALLTHROUGH */
965         default:
966         normal_match:
967           if (n == string_end || c != FOLD ((UCHAR) *n))
968             return FNM_NOMATCH;
969         }
970
971       no_leading_period = new_no_leading_period;
972       ++n;
973     }
974
975   if (n == string_end)
976     return 0;
977
978   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
979     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
980     return 0;
981
982   return FNM_NOMATCH;
983 }
984
985
986 static const CHAR *
987 internal_function
988 END (const CHAR *pattern)
989 {
990   const CHAR *p = pattern;
991
992   while (1)
993     if (*++p == L_('\0'))
994       /* This is an invalid pattern.  */
995       return pattern;
996     else if (*p == L_('['))
997       {
998         /* Handle brackets special.  */
999         if (posixly_correct == 0)
1000           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1001
1002         /* Skip the not sign.  We have to recognize it because of a possibly
1003            following ']'.  */
1004         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1005           ++p;
1006         /* A leading ']' is recognized as such.  */
1007         if (*p == L_(']'))
1008           ++p;
1009         /* Skip over all characters of the list.  */
1010         while (*p != L_(']'))
1011           if (*p++ == L_('\0'))
1012             /* This is no valid pattern.  */
1013             return pattern;
1014       }
1015     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1016               || *p == L_('!')) && p[1] == L_('('))
1017       p = END (p + 1);
1018     else if (*p == L_(')'))
1019       break;
1020
1021   return p + 1;
1022 }
1023
1024
1025 static int
1026 internal_function
1027 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1028      bool no_leading_period, int flags)
1029 {
1030   const CHAR *startp;
1031   size_t level;
1032   struct patternlist
1033   {
1034     struct patternlist *next;
1035     CHAR str[1];
1036   } *list = NULL;
1037   struct patternlist **lastp = &list;
1038   size_t pattern_len = STRLEN (pattern);
1039   const CHAR *p;
1040   const CHAR *rs;
1041   enum { ALLOCA_LIMIT = 8000 };
1042
1043   /* Parse the pattern.  Store the individual parts in the list.  */
1044   level = 0;
1045   for (startp = p = pattern + 1; ; ++p)
1046     if (*p == L_('\0'))
1047       /* This is an invalid pattern.  */
1048       return -1;
1049     else if (*p == L_('['))
1050       {
1051         /* Handle brackets special.  */
1052         if (posixly_correct == 0)
1053           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1054
1055         /* Skip the not sign.  We have to recognize it because of a possibly
1056            following ']'.  */
1057         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1058           ++p;
1059         /* A leading ']' is recognized as such.  */
1060         if (*p == L_(']'))
1061           ++p;
1062         /* Skip over all characters of the list.  */
1063         while (*p != L_(']'))
1064           if (*p++ == L_('\0'))
1065             /* This is no valid pattern.  */
1066             return -1;
1067       }
1068     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1069               || *p == L_('!')) && p[1] == L_('('))
1070       /* Remember the nesting level.  */
1071       ++level;
1072     else if (*p == L_(')'))
1073       {
1074         if (level-- == 0)
1075           {
1076             /* This means we found the end of the pattern.  */
1077 #define NEW_PATTERN \
1078             struct patternlist *newp;                                         \
1079             size_t plen;                                                      \
1080             size_t plensize;                                                  \
1081             size_t newpsize;                                                  \
1082                                                                               \
1083             plen = (opt == L_('?') || opt == L_('@')                          \
1084                     ? pattern_len                                             \
1085                     : p - startp + 1UL);                                      \
1086             plensize = plen * sizeof (CHAR);                                  \
1087             newpsize = offsetof (struct patternlist, str) + plensize;         \
1088             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1089                 || newpsize < offsetof (struct patternlist, str)              \
1090                 || ALLOCA_LIMIT <= newpsize)                                  \
1091               return -1;                                                      \
1092             newp = (struct patternlist *) alloca (newpsize);                  \
1093             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1094             newp->next = NULL;                                                \
1095             *lastp = newp;                                                    \
1096             lastp = &newp->next
1097             NEW_PATTERN;
1098             break;
1099           }
1100       }
1101     else if (*p == L_('|'))
1102       {
1103         if (level == 0)
1104           {
1105             NEW_PATTERN;
1106             startp = p + 1;
1107           }
1108       }
1109   assert (list != NULL);
1110   assert (p[-1] == L_(')'));
1111 #undef NEW_PATTERN
1112
1113   switch (opt)
1114     {
1115     case L_('*'):
1116       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1117         return 0;
1118       /* FALLTHROUGH */
1119
1120     case L_('+'):
1121       do
1122         {
1123           for (rs = string; rs <= string_end; ++rs)
1124             /* First match the prefix with the current pattern with the
1125                current pattern.  */
1126             if (FCT (list->str, string, rs, no_leading_period,
1127                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1128                 /* This was successful.  Now match the rest with the rest
1129                    of the pattern.  */
1130                 && (FCT (p, rs, string_end,
1131                          rs == string
1132                          ? no_leading_period
1133                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1134                          flags & FNM_FILE_NAME
1135                          ? flags : flags & ~FNM_PERIOD) == 0
1136                     /* This didn't work.  Try the whole pattern.  */
1137                     || (rs != string
1138                         && FCT (pattern - 1, rs, string_end,
1139                                 rs == string
1140                                 ? no_leading_period
1141                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1142                                 flags & FNM_FILE_NAME
1143                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1144               /* It worked.  Signal success.  */
1145               return 0;
1146         }
1147       while ((list = list->next) != NULL);
1148
1149       /* None of the patterns lead to a match.  */
1150       return FNM_NOMATCH;
1151
1152     case L_('?'):
1153       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1154         return 0;
1155       /* FALLTHROUGH */
1156
1157     case L_('@'):
1158       do
1159         /* I cannot believe it but `strcat' is actually acceptable
1160            here.  Match the entire string with the prefix from the
1161            pattern list and the rest of the pattern following the
1162            pattern list.  */
1163         if (FCT (STRCAT (list->str, p), string, string_end,
1164                  no_leading_period,
1165                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1166           /* It worked.  Signal success.  */
1167           return 0;
1168       while ((list = list->next) != NULL);
1169
1170       /* None of the patterns lead to a match.  */
1171       return FNM_NOMATCH;
1172
1173     case L_('!'):
1174       for (rs = string; rs <= string_end; ++rs)
1175         {
1176           struct patternlist *runp;
1177
1178           for (runp = list; runp != NULL; runp = runp->next)
1179             if (FCT (runp->str, string, rs,  no_leading_period,
1180                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1181               break;
1182
1183           /* If none of the patterns matched see whether the rest does.  */
1184           if (runp == NULL
1185               && (FCT (p, rs, string_end,
1186                        rs == string
1187                        ? no_leading_period
1188                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1189                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1190                   == 0))
1191             /* This is successful.  */
1192             return 0;
1193         }
1194
1195       /* None of the patterns together with the rest of the pattern
1196          lead to a match.  */
1197       return FNM_NOMATCH;
1198
1199     default:
1200       assert (! "Invalid extended matching operator");
1201       break;
1202     }
1203
1204   return -1;
1205 }
1206
1207
1208 #undef FOLD
1209 #undef CHAR
1210 #undef UCHAR
1211 #undef INT
1212 #undef FCT
1213 #undef EXT
1214 #undef END
1215 #undef MEMPCPY
1216 #undef MEMCHR
1217 #undef STRCOLL
1218 #undef STRLEN
1219 #undef STRCAT
1220 #undef L_
1221 #undef BTOWC