lib/fnmatch_loop.c

   1 /* Copyright (C) 1991-1993, 1996-2006, 2009-2011 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  17
  18 /* Match STRING against the file name pattern PATTERN, returning zero if
  19    it matches, nonzero if not.  */
  20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  21                 const CHAR *string_end, bool no_leading_period, int flags)
  22      internal_function;
  23 static const CHAR *END (const CHAR *patternp) internal_function;
  24
  25 static int
  26 internal_function
  27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  28      bool no_leading_period, int flags)
  29 {
  30   register const CHAR *p = pattern, *n = string;
  31   register UCHAR c;
  32 #ifdef _LIBC
  33 # if WIDE_CHAR_VERSION
  34   const char *collseq = (const char *)
  35     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  36 # else
  37   const UCHAR *collseq = (const UCHAR *)
  38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  39 # endif
  40 #endif
  41
  42   while ((c = *p++) != L_('\0'))
  43     {
  44       bool new_no_leading_period = false;
  45       c = FOLD (c);
  46
  47       switch (c)
  48         {
  49         case L_('?'):
  50           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  51             {
  52               int res;
  53
  54               res = EXT (c, p, n, string_end, no_leading_period,
  55                          flags);
  56               if (res != -1)
  57                 return res;
  58             }
  59
  60           if (n == string_end)
  61             return FNM_NOMATCH;
  62           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
  63             return FNM_NOMATCH;
  64           else if (*n == L_('.') && no_leading_period)
  65             return FNM_NOMATCH;
  66           break;
  67
  68         case L_('\\'):
  69           if (!(flags & FNM_NOESCAPE))
  70             {
  71               c = *p++;
  72               if (c == L_('\0'))
  73                 /* Trailing \ loses.  */
  74                 return FNM_NOMATCH;
  75               c = FOLD (c);
  76             }
  77           if (n == string_end || FOLD ((UCHAR) *n) != c)
  78             return FNM_NOMATCH;
  79           break;
  80
  81         case L_('*'):
  82           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  83             {
  84               int res;
  85
  86               res = EXT (c, p, n, string_end, no_leading_period,
  87                          flags);
  88               if (res != -1)
  89                 return res;
  90             }
  91
  92           if (n != string_end && *n == L_('.') && no_leading_period)
  93             return FNM_NOMATCH;
  94
  95           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
  96             {
  97               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
  98                 {
  99                   const CHAR *endp = END (p);
 100                   if (endp != p)
 101                     {
 102                       /* This is a pattern.  Skip over it.  */
 103                       p = endp;
 104                       continue;
 105                     }
 106                 }
 107
 108               if (c == L_('?'))
 109                 {
 110                   /* A ? needs to match one character.  */
 111                   if (n == string_end)
 112                     /* There isn't another character; no match.  */
 113                     return FNM_NOMATCH;
 114                   else if (*n == L_('/')
 115                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 116                     /* A slash does not match a wildcard under
 117                        FNM_FILE_NAME.  */
 118                     return FNM_NOMATCH;
 119                   else
 120                     /* One character of the string is consumed in matching
 121                        this ? wildcard, so *??? won't match if there are
 122                        less than three characters.  */
 123                     ++n;
 124                 }
 125             }
 126
 127           if (c == L_('\0'))
 128             /* The wildcard(s) is/are the last element of the pattern.
 129                If the name is a file name and contains another slash
 130                this means it cannot match, unless the FNM_LEADING_DIR
 131                flag is set.  */
 132             {
 133               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 134
 135               if (flags & FNM_FILE_NAME)
 136                 {
 137                   if (flags & FNM_LEADING_DIR)
 138                     result = 0;
 139                   else
 140                     {
 141                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
 142                         result = 0;
 143                     }
 144                 }
 145
 146               return result;
 147             }
 148           else
 149             {
 150               const CHAR *endp;
 151
 152               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
 153                              string_end - n);
 154               if (endp == NULL)
 155                 endp = string_end;
 156
 157               if (c == L_('[')
 158                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 159                       && (c == L_('@') || c == L_('+') || c == L_('!'))
 160                       && *p == L_('(')))
 161                 {
 162                   int flags2 = ((flags & FNM_FILE_NAME)
 163                                 ? flags : (flags & ~FNM_PERIOD));
 164                   bool no_leading_period2 = no_leading_period;
 165
 166                   for (--p; n < endp; ++n, no_leading_period2 = false)
 167                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 168                         == 0)
 169                       return 0;
 170                 }
 171               else if (c == L_('/') && (flags & FNM_FILE_NAME))
 172                 {
 173                   while (n < string_end && *n != L_('/'))
 174                     ++n;
 175                   if (n < string_end && *n == L_('/')
 176                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 177                           == 0))
 178                     return 0;
 179                 }
 180               else
 181                 {
 182                   int flags2 = ((flags & FNM_FILE_NAME)
 183                                 ? flags : (flags & ~FNM_PERIOD));
 184                   int no_leading_period2 = no_leading_period;
 185
 186                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
 187                     c = *p;
 188                   c = FOLD (c);
 189                   for (--p; n < endp; ++n, no_leading_period2 = false)
 190                     if (FOLD ((UCHAR) *n) == c
 191                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 192                             == 0))
 193                       return 0;
 194                 }
 195             }
 196
 197           /* If we come here no match is possible with the wildcard.  */
 198           return FNM_NOMATCH;
 199
 200         case L_('['):
 201           {
 202             /* Nonzero if the sense of the character class is inverted.  */
 203             register bool not;
 204             CHAR cold;
 205             UCHAR fn;
 206
 207             if (posixly_correct == 0)
 208               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 209
 210             if (n == string_end)
 211               return FNM_NOMATCH;
 212
 213             if (*n == L_('.') && no_leading_period)
 214               return FNM_NOMATCH;
 215
 216             if (*n == L_('/') && (flags & FNM_FILE_NAME))
 217               /* `/' cannot be matched.  */
 218               return FNM_NOMATCH;
 219
 220             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
 221             if (not)
 222               ++p;
 223
 224             fn = FOLD ((UCHAR) *n);
 225
 226             c = *p++;
 227             for (;;)
 228               {
 229                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 230                   {
 231                     if (*p == L_('\0'))
 232                       return FNM_NOMATCH;
 233                     c = FOLD ((UCHAR) *p);
 234                     ++p;
 235
 236                     goto normal_bracket;
 237                   }
 238                 else if (c == L_('[') && *p == L_(':'))
 239                   {
 240                     /* Leave room for the null.  */
 241                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 242                     size_t c1 = 0;
 243 #if defined _LIBC || WIDE_CHAR_SUPPORT
 244                     wctype_t wt;
 245 #endif
 246                     const CHAR *startp = p;
 247
 248                     for (;;)
 249                       {
 250                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 251                           /* The name is too long and therefore the pattern
 252                              is ill-formed.  */
 253                           return FNM_NOMATCH;
 254
 255                         c = *++p;
 256                         if (c == L_(':') && p[1] == L_(']'))
 257                           {
 258                             p += 2;
 259                             break;
 260                           }
 261                         if (c < L_('a') || c >= L_('z'))
 262                           {
 263                             /* This cannot possibly be a character class name.
 264                                Match it as a normal range.  */
 265                             p = startp;
 266                             c = L_('[');
 267                             goto normal_bracket;
 268                           }
 269                         str[c1++] = c;
 270                       }
 271                     str[c1] = L_('\0');
 272
 273 #if defined _LIBC || WIDE_CHAR_SUPPORT
 274                     wt = IS_CHAR_CLASS (str);
 275                     if (wt == 0)
 276                       /* Invalid character class name.  */
 277                       return FNM_NOMATCH;
 278
 279 # if defined _LIBC && ! WIDE_CHAR_VERSION
 280                     /* The following code is glibc specific but does
 281                        there a good job in speeding up the code since
 282                        we can avoid the btowc() call.  */
 283                     if (_ISCTYPE ((UCHAR) *n, wt))
 284                       goto matched;
 285 # else
 286                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 287                       goto matched;
 288 # endif
 289 #else
 290                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
 291                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
 292                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
 293                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
 294                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
 295                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
 296                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
 297                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
 298                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
 299                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
 300                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
 301                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
 302                       goto matched;
 303 #endif
 304                     c = *p++;
 305                   }
 306 #ifdef _LIBC
 307                 else if (c == L_('[') && *p == L_('='))
 308                   {
 309                     UCHAR str[1];
 310                     uint32_t nrules =
 311                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 312                     const CHAR *startp = p;
 313
 314                     c = *++p;
 315                     if (c == L_('\0'))
 316                       {
 317                         p = startp;
 318                         c = L_('[');
 319                         goto normal_bracket;
 320                       }
 321                     str[0] = c;
 322
 323                     c = *++p;
 324                     if (c != L_('=') || p[1] != L_(']'))
 325                       {
 326                         p = startp;
 327                         c = L_('[');
 328                         goto normal_bracket;
 329                       }
 330                     p += 2;
 331
 332                     if (nrules == 0)
 333                       {
 334                         if ((UCHAR) *n == str[0])
 335                           goto matched;
 336                       }
 337                     else
 338                       {
 339                         const int32_t *table;
 340 # if WIDE_CHAR_VERSION
 341                         const int32_t *weights;
 342                         const int32_t *extra;
 343 # else
 344                         const unsigned char *weights;
 345                         const unsigned char *extra;
 346 # endif
 347                         const int32_t *indirect;
 348                         int32_t idx;
 349                         const UCHAR *cp = (const UCHAR *) str;
 350
 351                         /* This #include defines a local function!  */
 352 # if WIDE_CHAR_VERSION
 353 #  include <locale/weightwc.h>
 354 # else
 355 #  include <locale/weight.h>
 356 # endif
 357
 358 # if WIDE_CHAR_VERSION
 359                         table = (const int32_t *)
 360                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 361                         weights = (const int32_t *)
 362                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 363                         extra = (const int32_t *)
 364                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 365                         indirect = (const int32_t *)
 366                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 367 # else
 368                         table = (const int32_t *)
 369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 370                         weights = (const unsigned char *)
 371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 372                         extra = (const unsigned char *)
 373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 374                         indirect = (const int32_t *)
 375                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 376 # endif
 377
 378                         idx = findidx (&cp);
 379                         if (idx != 0)
 380                           {
 381                             /* We found a table entry.  Now see whether the
 382                                character we are currently at has the same
 383                                equivalance class value.  */
 384                             int len = weights[idx & 0xffffff];
 385                             int32_t idx2;
 386                             const UCHAR *np = (const UCHAR *) n;
 387
 388                             idx2 = findidx (&np);
 389                             if (idx2 != 0
 390                                 && (idx >> 24) == (idx2 >> 24)
 391                                 && len == weights[idx2 & 0xffffff])
 392                               {
 393                                 int cnt = 0;
 394
 395                                 idx &= 0xffffff;
 396                                 idx2 &= 0xffffff;
 397
 398                                 while (cnt < len
 399                                        && (weights[idx + 1 + cnt]
 400                                            == weights[idx2 + 1 + cnt]))
 401                                   ++cnt;
 402
 403                                 if (cnt == len)
 404                                   goto matched;
 405                               }
 406                           }
 407                       }
 408
 409                     c = *p++;
 410                   }
 411 #endif
 412                 else if (c == L_('\0'))
 413                   /* [ (unterminated) loses.  */
 414                   return FNM_NOMATCH;
 415                 else
 416                   {
 417                     bool is_range = false;
 418
 419 #ifdef _LIBC
 420                     bool is_seqval = false;
 421
 422                     if (c == L_('[') && *p == L_('.'))
 423                       {
 424                         uint32_t nrules =
 425                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 426                         const CHAR *startp = p;
 427                         size_t c1 = 0;
 428
 429                         while (1)
 430                           {
 431                             c = *++p;
 432                             if (c == L_('.') && p[1] == L_(']'))
 433                               {
 434                                 p += 2;
 435                                 break;
 436                               }
 437                             if (c == '\0')
 438                               return FNM_NOMATCH;
 439                             ++c1;
 440                           }
 441
 442                         /* We have to handling the symbols differently in
 443                            ranges since then the collation sequence is
 444                            important.  */
 445                         is_range = *p == L_('-') && p[1] != L_('\0');
 446
 447                         if (nrules == 0)
 448                           {
 449                             /* There are no names defined in the collation
 450                                data.  Therefore we only accept the trivial
 451                                names consisting of the character itself.  */
 452                             if (c1 != 1)
 453                               return FNM_NOMATCH;
 454
 455                             if (!is_range && *n == startp[1])
 456                               goto matched;
 457
 458                             cold = startp[1];
 459                             c = *p++;
 460                           }
 461                         else
 462                           {
 463                             int32_t table_size;
 464                             const int32_t *symb_table;
 465 # ifdef WIDE_CHAR_VERSION
 466                             char str[c1];
 467                             size_t strcnt;
 468 # else
 469 #  define str (startp + 1)
 470 # endif
 471                             const unsigned char *extra;
 472                             int32_t idx;
 473                             int32_t elem;
 474                             int32_t second;
 475                             int32_t hash;
 476
 477 # ifdef WIDE_CHAR_VERSION
 478                             /* We have to convert the name to a single-byte
 479                                string.  This is possible since the names
 480                                consist of ASCII characters and the internal
 481                                representation is UCS4.  */
 482                             for (strcnt = 0; strcnt < c1; ++strcnt)
 483                               str[strcnt] = startp[1 + strcnt];
 484 # endif
 485
 486                             table_size =
 487                               _NL_CURRENT_WORD (LC_COLLATE,
 488                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 489                             symb_table = (const int32_t *)
 490                               _NL_CURRENT (LC_COLLATE,
 491                                            _NL_COLLATE_SYMB_TABLEMB);
 492                             extra = (const unsigned char *)
 493                               _NL_CURRENT (LC_COLLATE,
 494                                            _NL_COLLATE_SYMB_EXTRAMB);
 495
 496                             /* Locate the character in the hashing table.  */
 497                             hash = elem_hash (str, c1);
 498
 499                             idx = 0;
 500                             elem = hash % table_size;
 501                             if (symb_table[2 * elem] != 0)
 502                               {
 503                                 second = hash % (table_size - 2) + 1;
 504
 505                                 do
 506                                   {
 507                                     /* First compare the hashing value.  */
 508                                     if (symb_table[2 * elem] == hash
 509                                         && (c1
 510                                             == extra[symb_table[2 * elem + 1]])
 511                                         && memcmp (str,
 512                                                    &extra[symb_table[2 * elem
 513                                                                      + 1]
 514                                                           + 1], c1) == 0)
 515                                       {
 516                                         /* Yep, this is the entry.  */
 517                                         idx = symb_table[2 * elem + 1];
 518                                         idx += 1 + extra[idx];
 519                                         break;
 520                                       }
 521
 522                                     /* Next entry.  */
 523                                     elem += second;
 524                                   }
 525                                 while (symb_table[2 * elem] != 0);
 526                               }
 527
 528                             if (symb_table[2 * elem] != 0)
 529                               {
 530                                 /* Compare the byte sequence but only if
 531                                    this is not part of a range.  */
 532 # ifdef WIDE_CHAR_VERSION
 533                                 int32_t *wextra;
 534
 535                                 idx += 1 + extra[idx];
 536                                 /* Adjust for the alignment.  */
 537                                 idx = (idx + 3) & ~3;
 538
 539                                 wextra = (int32_t *) &extra[idx + 4];
 540 # endif
 541
 542                                 if (! is_range)
 543                                   {
 544 # ifdef WIDE_CHAR_VERSION
 545                                     for (c1 = 0;
 546                                          (int32_t) c1 < wextra[idx];
 547                                          ++c1)
 548                                       if (n[c1] != wextra[1 + c1])
 549                                         break;
 550
 551                                     if ((int32_t) c1 == wextra[idx])
 552                                       goto matched;
 553 # else
 554                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 555                                       if (n[c1] != extra[1 + c1])
 556                                         break;
 557
 558                                     if (c1 == extra[idx])
 559                                       goto matched;
 560 # endif
 561                                   }
 562
 563                                 /* Get the collation sequence value.  */
 564                                 is_seqval = true;
 565 # ifdef WIDE_CHAR_VERSION
 566                                 cold = wextra[1 + wextra[idx]];
 567 # else
 568                                 /* Adjust for the alignment.  */
 569                                 idx += 1 + extra[idx];
 570                                 idx = (idx + 3) & ~4;
 571                                 cold = *((int32_t *) &extra[idx]);
 572 # endif
 573
 574                                 c = *p++;
 575                               }
 576                             else if (c1 == 1)
 577                               {
 578                                 /* No valid character.  Match it as a
 579                                    single byte.  */
 580                                 if (!is_range && *n == str[0])
 581                                   goto matched;
 582
 583                                 cold = str[0];
 584                                 c = *p++;
 585                               }
 586                             else
 587                               return FNM_NOMATCH;
 588                           }
 589                       }
 590                     else
 591 # undef str
 592 #endif
 593                       {
 594                         c = FOLD (c);
 595                       normal_bracket:
 596
 597                         /* We have to handling the symbols differently in
 598                            ranges since then the collation sequence is
 599                            important.  */
 600                         is_range = (*p == L_('-') && p[1] != L_('\0')
 601                                     && p[1] != L_(']'));
 602
 603                         if (!is_range && c == fn)
 604                           goto matched;
 605
 606 #if _LIBC
 607                         /* This is needed if we goto normal_bracket; from
 608                            outside of is_seqval's scope.  */
 609                         is_seqval = false;
 610 #endif
 611
 612                         cold = c;
 613                         c = *p++;
 614                       }
 615
 616                     if (c == L_('-') && *p != L_(']'))
 617                       {
 618 #if _LIBC
 619                         /* We have to find the collation sequence
 620                            value for C.  Collation sequence is nothing
 621                            we can regularly access.  The sequence
 622                            value is defined by the order in which the
 623                            definitions of the collation values for the
 624                            various characters appear in the source
 625                            file.  A strange concept, nowhere
 626                            documented.  */
 627                         uint32_t fcollseq;
 628                         uint32_t lcollseq;
 629                         UCHAR cend = *p++;
 630
 631 # ifdef WIDE_CHAR_VERSION
 632                         /* Search in the `names' array for the characters.  */
 633                         fcollseq = __collseq_table_lookup (collseq, fn);
 634                         if (fcollseq == ~((uint32_t) 0))
 635                           /* XXX We don't know anything about the character
 636                              we are supposed to match.  This means we are
 637                              failing.  */
 638                           goto range_not_matched;
 639
 640                         if (is_seqval)
 641                           lcollseq = cold;
 642                         else
 643                           lcollseq = __collseq_table_lookup (collseq, cold);
 644 # else
 645                         fcollseq = collseq[fn];
 646                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 647 # endif
 648
 649                         is_seqval = false;
 650                         if (cend == L_('[') && *p == L_('.'))
 651                           {
 652                             uint32_t nrules =
 653                               _NL_CURRENT_WORD (LC_COLLATE,
 654                                                 _NL_COLLATE_NRULES);
 655                             const CHAR *startp = p;
 656                             size_t c1 = 0;
 657
 658                             while (1)
 659                               {
 660                                 c = *++p;
 661                                 if (c == L_('.') && p[1] == L_(']'))
 662                                   {
 663                                     p += 2;
 664                                     break;
 665                                   }
 666                                 if (c == '\0')
 667                                   return FNM_NOMATCH;
 668                                 ++c1;
 669                               }
 670
 671                             if (nrules == 0)
 672                               {
 673                                 /* There are no names defined in the
 674                                    collation data.  Therefore we only
 675                                    accept the trivial names consisting
 676                                    of the character itself.  */
 677                                 if (c1 != 1)
 678                                   return FNM_NOMATCH;
 679
 680                                 cend = startp[1];
 681                               }
 682                             else
 683                               {
 684                                 int32_t table_size;
 685                                 const int32_t *symb_table;
 686 # ifdef WIDE_CHAR_VERSION
 687                                 char str[c1];
 688                                 size_t strcnt;
 689 # else
 690 #  define str (startp + 1)
 691 # endif
 692                                 const unsigned char *extra;
 693                                 int32_t idx;
 694                                 int32_t elem;
 695                                 int32_t second;
 696                                 int32_t hash;
 697
 698 # ifdef WIDE_CHAR_VERSION
 699                                 /* We have to convert the name to a single-byte
 700                                    string.  This is possible since the names
 701                                    consist of ASCII characters and the internal
 702                                    representation is UCS4.  */
 703                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 704                                   str[strcnt] = startp[1 + strcnt];
 705 # endif
 706
 707                                 table_size =
 708                                   _NL_CURRENT_WORD (LC_COLLATE,
 709                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 710                                 symb_table = (const int32_t *)
 711                                   _NL_CURRENT (LC_COLLATE,
 712                                                _NL_COLLATE_SYMB_TABLEMB);
 713                                 extra = (const unsigned char *)
 714                                   _NL_CURRENT (LC_COLLATE,
 715                                                _NL_COLLATE_SYMB_EXTRAMB);
 716
 717                                 /* Locate the character in the hashing
 718                                    table.  */
 719                                 hash = elem_hash (str, c1);
 720
 721                                 idx = 0;
 722                                 elem = hash % table_size;
 723                                 if (symb_table[2 * elem] != 0)
 724                                   {
 725                                     second = hash % (table_size - 2) + 1;
 726
 727                                     do
 728                                       {
 729                                         /* First compare the hashing value.  */
 730                                         if (symb_table[2 * elem] == hash
 731                                             && (c1
 732                                                 == extra[symb_table[2 * elem + 1]])
 733                                             && memcmp (str,
 734                                                        &extra[symb_table[2 * elem + 1]
 735                                                               + 1], c1) == 0)
 736                                           {
 737                                             /* Yep, this is the entry.  */
 738                                             idx = symb_table[2 * elem + 1];
 739                                             idx += 1 + extra[idx];
 740                                             break;
 741                                           }
 742
 743                                         /* Next entry.  */
 744                                         elem += second;
 745                                       }
 746                                     while (symb_table[2 * elem] != 0);
 747                                   }
 748
 749                                 if (symb_table[2 * elem] != 0)
 750                                   {
 751                                     /* Compare the byte sequence but only if
 752                                        this is not part of a range.  */
 753 # ifdef WIDE_CHAR_VERSION
 754                                     int32_t *wextra;
 755
 756                                     idx += 1 + extra[idx];
 757                                     /* Adjust for the alignment.  */
 758                                     idx = (idx + 3) & ~4;
 759
 760                                     wextra = (int32_t *) &extra[idx + 4];
 761 # endif
 762                                     /* Get the collation sequence value.  */
 763                                     is_seqval = true;
 764 # ifdef WIDE_CHAR_VERSION
 765                                     cend = wextra[1 + wextra[idx]];
 766 # else
 767                                     /* Adjust for the alignment.  */
 768                                     idx += 1 + extra[idx];
 769                                     idx = (idx + 3) & ~4;
 770                                     cend = *((int32_t *) &extra[idx]);
 771 # endif
 772                                   }
 773                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 774                                   {
 775                                     cend = str[0];
 776                                     c = *p++;
 777                                   }
 778                                 else
 779                                   return FNM_NOMATCH;
 780                               }
 781 # undef str
 782                           }
 783                         else
 784                           {
 785                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 786                               cend = *p++;
 787                             if (cend == L_('\0'))
 788                               return FNM_NOMATCH;
 789                             cend = FOLD (cend);
 790                           }
 791
 792                         /* XXX It is not entirely clear to me how to handle
 793                            characters which are not mentioned in the
 794                            collation specification.  */
 795                         if (
 796 # ifdef WIDE_CHAR_VERSION
 797                             lcollseq == 0xffffffff ||
 798 # endif
 799                             lcollseq <= fcollseq)
 800                           {
 801                             /* We have to look at the upper bound.  */
 802                             uint32_t hcollseq;
 803
 804                             if (is_seqval)
 805                               hcollseq = cend;
 806                             else
 807                               {
 808 # ifdef WIDE_CHAR_VERSION
 809                                 hcollseq =
 810                                   __collseq_table_lookup (collseq, cend);
 811                                 if (hcollseq == ~((uint32_t) 0))
 812                                   {
 813                                     /* Hum, no information about the upper
 814                                        bound.  The matching succeeds if the
 815                                        lower bound is matched exactly.  */
 816                                     if (lcollseq != fcollseq)
 817                                       goto range_not_matched;
 818
 819                                     goto matched;
 820                                   }
 821 # else
 822                                 hcollseq = collseq[cend];
 823 # endif
 824                               }
 825
 826                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 827                               goto matched;
 828                           }
 829 # ifdef WIDE_CHAR_VERSION
 830                       range_not_matched:
 831 # endif
 832 #else
 833                         /* We use a boring value comparison of the character
 834                            values.  This is better than comparing using
 835                            `strcoll' since the latter would have surprising
 836                            and sometimes fatal consequences.  */
 837                         UCHAR cend = *p++;
 838
 839                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 840                           cend = *p++;
 841                         if (cend == L_('\0'))
 842                           return FNM_NOMATCH;
 843
 844                         /* It is a range.  */
 845                         if (cold <= fn && fn <= cend)
 846                           goto matched;
 847 #endif
 848
 849                         c = *p++;
 850                       }
 851                   }
 852
 853                 if (c == L_(']'))
 854                   break;
 855               }
 856
 857             if (!not)
 858               return FNM_NOMATCH;
 859             break;
 860
 861           matched:
 862             /* Skip the rest of the [...] that already matched.  */
 863             do
 864               {
 865               ignore_next:
 866                 c = *p++;
 867
 868                 if (c == L_('\0'))
 869                   /* [... (unterminated) loses.  */
 870                   return FNM_NOMATCH;
 871
 872                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 873                   {
 874                     if (*p == L_('\0'))
 875                       return FNM_NOMATCH;
 876                     /* XXX 1003.2d11 is unclear if this is right.  */
 877                     ++p;
 878                   }
 879                 else if (c == L_('[') && *p == L_(':'))
 880                   {
 881                     int c1 = 0;
 882                     const CHAR *startp = p;
 883
 884                     while (1)
 885                       {
 886                         c = *++p;
 887                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 888                           return FNM_NOMATCH;
 889
 890                         if (*p == L_(':') && p[1] == L_(']'))
 891                           break;
 892
 893                         if (c < L_('a') || c >= L_('z'))
 894                           {
 895                             p = startp;
 896                             goto ignore_next;
 897                           }
 898                       }
 899                     p += 2;
 900                     c = *p++;
 901                   }
 902                 else if (c == L_('[') && *p == L_('='))
 903                   {
 904                     c = *++p;
 905                     if (c == L_('\0'))
 906                       return FNM_NOMATCH;
 907                     c = *++p;
 908                     if (c != L_('=') || p[1] != L_(']'))
 909                       return FNM_NOMATCH;
 910                     p += 2;
 911                     c = *p++;
 912                   }
 913                 else if (c == L_('[') && *p == L_('.'))
 914                   {
 915                     ++p;
 916                     while (1)
 917                       {
 918                         c = *++p;
 919                         if (c == '\0')
 920                           return FNM_NOMATCH;
 921
 922                         if (*p == L_('.') && p[1] == L_(']'))
 923                           break;
 924                       }
 925                     p += 2;
 926                     c = *p++;
 927                   }
 928               }
 929             while (c != L_(']'));
 930             if (not)
 931               return FNM_NOMATCH;
 932           }
 933           break;
 934
 935         case L_('+'):
 936         case L_('@'):
 937         case L_('!'):
 938           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 939             {
 940               int res;
 941
 942               res = EXT (c, p, n, string_end, no_leading_period, flags);
 943               if (res != -1)
 944                 return res;
 945             }
 946           goto normal_match;
 947
 948         case L_('/'):
 949           if (NO_LEADING_PERIOD (flags))
 950             {
 951               if (n == string_end || c != (UCHAR) *n)
 952                 return FNM_NOMATCH;
 953
 954               new_no_leading_period = true;
 955               break;
 956             }
 957           /* FALLTHROUGH */
 958         default:
 959         normal_match:
 960           if (n == string_end || c != FOLD ((UCHAR) *n))
 961             return FNM_NOMATCH;
 962         }
 963
 964       no_leading_period = new_no_leading_period;
 965       ++n;
 966     }
 967
 968   if (n == string_end)
 969     return 0;
 970
 971   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
 972     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 973     return 0;
 974
 975   return FNM_NOMATCH;
 976 }
 977
 978
 979 static const CHAR *
 980 internal_function
 981 END (const CHAR *pattern)
 982 {
 983   const CHAR *p = pattern;
 984
 985   while (1)
 986     if (*++p == L_('\0'))
 987       /* This is an invalid pattern.  */
 988       return pattern;
 989     else if (*p == L_('['))
 990       {
 991         /* Handle brackets special.  */
 992         if (posixly_correct == 0)
 993           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 994
 995         /* Skip the not sign.  We have to recognize it because of a possibly
 996            following ']'.  */
 997         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
 998           ++p;
 999         /* A leading ']' is recognized as such.  */
1000         if (*p == L_(']'))
1001           ++p;
1002         /* Skip over all characters of the list.  */
1003         while (*p != L_(']'))
1004           if (*p++ == L_('\0'))
1005             /* This is no valid pattern.  */
1006             return pattern;
1007       }
1008     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1009               || *p == L_('!')) && p[1] == L_('('))
1010       p = END (p + 1);
1011     else if (*p == L_(')'))
1012       break;
1013
1014   return p + 1;
1015 }
1016
1017
1018 static int
1019 internal_function
1020 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1021      bool no_leading_period, int flags)
1022 {
1023   const CHAR *startp;
1024   size_t level;
1025   struct patternlist
1026   {
1027     struct patternlist *next;
1028     CHAR str[1];
1029   } *list = NULL;
1030   struct patternlist **lastp = &list;
1031   size_t pattern_len = STRLEN (pattern);
1032   const CHAR *p;
1033   const CHAR *rs;
1034   enum { ALLOCA_LIMIT = 8000 };
1035
1036   /* Parse the pattern.  Store the individual parts in the list.  */
1037   level = 0;
1038   for (startp = p = pattern + 1; ; ++p)
1039     if (*p == L_('\0'))
1040       /* This is an invalid pattern.  */
1041       return -1;
1042     else if (*p == L_('['))
1043       {
1044         /* Handle brackets special.  */
1045         if (posixly_correct == 0)
1046           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1047
1048         /* Skip the not sign.  We have to recognize it because of a possibly
1049            following ']'.  */
1050         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1051           ++p;
1052         /* A leading ']' is recognized as such.  */
1053         if (*p == L_(']'))
1054           ++p;
1055         /* Skip over all characters of the list.  */
1056         while (*p != L_(']'))
1057           if (*p++ == L_('\0'))
1058             /* This is no valid pattern.  */
1059             return -1;
1060       }
1061     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1062               || *p == L_('!')) && p[1] == L_('('))
1063       /* Remember the nesting level.  */
1064       ++level;
1065     else if (*p == L_(')'))
1066       {
1067         if (level-- == 0)
1068           {
1069             /* This means we found the end of the pattern.  */
1070 #define NEW_PATTERN \
1071             struct patternlist *newp;                                         \
1072             size_t plen;                                                      \
1073             size_t plensize;                                                  \
1074             size_t newpsize;                                                  \
1075                                                                               \
1076             plen = (opt == L_('?') || opt == L_('@')                          \
1077                     ? pattern_len                                             \
1078                     : p - startp + 1UL);                                      \
1079             plensize = plen * sizeof (CHAR);                                  \
1080             newpsize = offsetof (struct patternlist, str) + plensize;         \
1081             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1082                 || newpsize < offsetof (struct patternlist, str)              \
1083                 || ALLOCA_LIMIT <= newpsize)                                  \
1084               return -1;                                                      \
1085             newp = (struct patternlist *) alloca (newpsize);                  \
1086             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1087             newp->next = NULL;                                                \
1088             *lastp = newp;                                                    \
1089             lastp = &newp->next
1090             NEW_PATTERN;
1091             break;
1092           }
1093       }
1094     else if (*p == L_('|'))
1095       {
1096         if (level == 0)
1097           {
1098             NEW_PATTERN;
1099             startp = p + 1;
1100           }
1101       }
1102   assert (list != NULL);
1103   assert (p[-1] == L_(')'));
1104 #undef NEW_PATTERN
1105
1106   switch (opt)
1107     {
1108     case L_('*'):
1109       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1110         return 0;
1111       /* FALLTHROUGH */
1112
1113     case L_('+'):
1114       do
1115         {
1116           for (rs = string; rs <= string_end; ++rs)
1117             /* First match the prefix with the current pattern with the
1118                current pattern.  */
1119             if (FCT (list->str, string, rs, no_leading_period,
1120                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1121                 /* This was successful.  Now match the rest with the rest
1122                    of the pattern.  */
1123                 && (FCT (p, rs, string_end,
1124                          rs == string
1125                          ? no_leading_period
1126                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1127                          flags & FNM_FILE_NAME
1128                          ? flags : flags & ~FNM_PERIOD) == 0
1129                     /* This didn't work.  Try the whole pattern.  */
1130                     || (rs != string
1131                         && FCT (pattern - 1, rs, string_end,
1132                                 rs == string
1133                                 ? no_leading_period
1134                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1135                                 flags & FNM_FILE_NAME
1136                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1137               /* It worked.  Signal success.  */
1138               return 0;
1139         }
1140       while ((list = list->next) != NULL);
1141
1142       /* None of the patterns lead to a match.  */
1143       return FNM_NOMATCH;
1144
1145     case L_('?'):
1146       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1147         return 0;
1148       /* FALLTHROUGH */
1149
1150     case L_('@'):
1151       do
1152         /* I cannot believe it but `strcat' is actually acceptable
1153            here.  Match the entire string with the prefix from the
1154            pattern list and the rest of the pattern following the
1155            pattern list.  */
1156         if (FCT (STRCAT (list->str, p), string, string_end,
1157                  no_leading_period,
1158                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1159           /* It worked.  Signal success.  */
1160           return 0;
1161       while ((list = list->next) != NULL);
1162
1163       /* None of the patterns lead to a match.  */
1164       return FNM_NOMATCH;
1165
1166     case L_('!'):
1167       for (rs = string; rs <= string_end; ++rs)
1168         {
1169           struct patternlist *runp;
1170
1171           for (runp = list; runp != NULL; runp = runp->next)
1172             if (FCT (runp->str, string, rs,  no_leading_period,
1173                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1174               break;
1175
1176           /* If none of the patterns matched see whether the rest does.  */
1177           if (runp == NULL
1178               && (FCT (p, rs, string_end,
1179                        rs == string
1180                        ? no_leading_period
1181                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1182                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1183                   == 0))
1184             /* This is successful.  */
1185             return 0;
1186         }
1187
1188       /* None of the patterns together with the rest of the pattern
1189          lead to a match.  */
1190       return FNM_NOMATCH;
1191
1192     default:
1193       assert (! "Invalid extended matching operator");
1194       break;
1195     }
1196
1197   return -1;
1198 }
1199
1200
1201 #undef FOLD
1202 #undef CHAR
1203 #undef UCHAR
1204 #undef INT
1205 #undef FCT
1206 #undef EXT
1207 #undef END
1208 #undef MEMPCPY
1209 #undef MEMCHR
1210 #undef STRCOLL
1211 #undef STRLEN
1212 #undef STRCAT
1213 #undef L_
1214 #undef BTOWC