lib/fnmatch_loop.c

   1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
   2    Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  18
  19 /* Match STRING against the file name pattern PATTERN, returning zero if
  20    it matches, nonzero if not.  */
  21 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  22                 const CHAR *string_end, bool no_leading_period, int flags)
  23      internal_function;
  24 static const CHAR *END (const CHAR *patternp) internal_function;
  25
  26 static int
  27 internal_function
  28 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  29      bool no_leading_period, int flags)
  30 {
  31   register const CHAR *p = pattern, *n = string;
  32   register UCHAR c;
  33 #ifdef _LIBC
  34 # if WIDE_CHAR_VERSION
  35   const char *collseq = (const char *)
  36     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  37 # else
  38   const UCHAR *collseq = (const UCHAR *)
  39     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  40 # endif
  41 #endif
  42
  43   while ((c = *p++) != L_('\0'))
  44     {
  45       bool new_no_leading_period = false;
  46       c = FOLD (c);
  47
  48       switch (c)
  49         {
  50         case L_('?'):
  51           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  52             {
  53               int res;
  54
  55               res = EXT (c, p, n, string_end, no_leading_period,
  56                          flags);
  57               if (res != -1)
  58                 return res;
  59             }
  60
  61           if (n == string_end)
  62             return FNM_NOMATCH;
  63           else if (*n == L_('/') && (flags & FNM_FILE_NAME))
  64             return FNM_NOMATCH;
  65           else if (*n == L_('.') && no_leading_period)
  66             return FNM_NOMATCH;
  67           break;
  68
  69         case L_('\\'):
  70           if (!(flags & FNM_NOESCAPE))
  71             {
  72               c = *p++;
  73               if (c == L_('\0'))
  74                 /* Trailing \ loses.  */
  75                 return FNM_NOMATCH;
  76               c = FOLD (c);
  77             }
  78           if (n == string_end || FOLD ((UCHAR) *n) != c)
  79             return FNM_NOMATCH;
  80           break;
  81
  82         case L_('*'):
  83           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  84             {
  85               int res;
  86
  87               res = EXT (c, p, n, string_end, no_leading_period,
  88                          flags);
  89               if (res != -1)
  90                 return res;
  91             }
  92
  93           if (n != string_end && *n == L_('.') && no_leading_period)
  94             return FNM_NOMATCH;
  95
  96           for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
  97             {
  98               if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
  99                 {
 100                   const CHAR *endp = END (p);
 101                   if (endp != p)
 102                     {
 103                       /* This is a pattern.  Skip over it.  */
 104                       p = endp;
 105                       continue;
 106                     }
 107                 }
 108
 109               if (c == L_('?'))
 110                 {
 111                   /* A ? needs to match one character.  */
 112                   if (n == string_end)
 113                     /* There isn't another character; no match.  */
 114                     return FNM_NOMATCH;
 115                   else if (*n == L_('/')
 116                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 117                     /* A slash does not match a wildcard under
 118                        FNM_FILE_NAME.  */
 119                     return FNM_NOMATCH;
 120                   else
 121                     /* One character of the string is consumed in matching
 122                        this ? wildcard, so *??? won't match if there are
 123                        less than three characters.  */
 124                     ++n;
 125                 }
 126             }
 127
 128           if (c == L_('\0'))
 129             /* The wildcard(s) is/are the last element of the pattern.
 130                If the name is a file name and contains another slash
 131                this means it cannot match, unless the FNM_LEADING_DIR
 132                flag is set.  */
 133             {
 134               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 135
 136               if (flags & FNM_FILE_NAME)
 137                 {
 138                   if (flags & FNM_LEADING_DIR)
 139                     result = 0;
 140                   else
 141                     {
 142                       if (MEMCHR (n, L_('/'), string_end - n) == NULL)
 143                         result = 0;
 144                     }
 145                 }
 146
 147               return result;
 148             }
 149           else
 150             {
 151               const CHAR *endp;
 152
 153               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
 154                              string_end - n);
 155               if (endp == NULL)
 156                 endp = string_end;
 157
 158               if (c == L_('[')
 159                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 160                       && (c == L_('@') || c == L_('+') || c == L_('!'))
 161                       && *p == L_('(')))
 162                 {
 163                   int flags2 = ((flags & FNM_FILE_NAME)
 164                                 ? flags : (flags & ~FNM_PERIOD));
 165                   bool no_leading_period2 = no_leading_period;
 166
 167                   for (--p; n < endp; ++n, no_leading_period2 = false)
 168                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 169                         == 0)
 170                       return 0;
 171                 }
 172               else if (c == L_('/') && (flags & FNM_FILE_NAME))
 173                 {
 174                   while (n < string_end && *n != L_('/'))
 175                     ++n;
 176                   if (n < string_end && *n == L_('/')
 177                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 178                           == 0))
 179                     return 0;
 180                 }
 181               else
 182                 {
 183                   int flags2 = ((flags & FNM_FILE_NAME)
 184                                 ? flags : (flags & ~FNM_PERIOD));
 185                   int no_leading_period2 = no_leading_period;
 186
 187                   if (c == L_('\\') && !(flags & FNM_NOESCAPE))
 188                     c = *p;
 189                   c = FOLD (c);
 190                   for (--p; n < endp; ++n, no_leading_period2 = false)
 191                     if (FOLD ((UCHAR) *n) == c
 192                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 193                             == 0))
 194                       return 0;
 195                 }
 196             }
 197
 198           /* If we come here no match is possible with the wildcard.  */
 199           return FNM_NOMATCH;
 200
 201         case L_('['):
 202           {
 203             /* Nonzero if the sense of the character class is inverted.  */
 204             register bool not;
 205             CHAR cold;
 206             UCHAR fn;
 207
 208             if (posixly_correct == 0)
 209               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 210
 211             if (n == string_end)
 212               return FNM_NOMATCH;
 213
 214             if (*n == L_('.') && no_leading_period)
 215               return FNM_NOMATCH;
 216
 217             if (*n == L_('/') && (flags & FNM_FILE_NAME))
 218               /* `/' cannot be matched.  */
 219               return FNM_NOMATCH;
 220
 221             not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
 222             if (not)
 223               ++p;
 224
 225             fn = FOLD ((UCHAR) *n);
 226
 227             c = *p++;
 228             for (;;)
 229               {
 230                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 231                   {
 232                     if (*p == L_('\0'))
 233                       return FNM_NOMATCH;
 234                     c = FOLD ((UCHAR) *p);
 235                     ++p;
 236
 237                     goto normal_bracket;
 238                   }
 239                 else if (c == L_('[') && *p == L_(':'))
 240                   {
 241                     /* Leave room for the null.  */
 242                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 243                     size_t c1 = 0;
 244 #if defined _LIBC || WIDE_CHAR_SUPPORT
 245                     wctype_t wt;
 246 #endif
 247                     const CHAR *startp = p;
 248
 249                     for (;;)
 250                       {
 251                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 252                           /* The name is too long and therefore the pattern
 253                              is ill-formed.  */
 254                           return FNM_NOMATCH;
 255
 256                         c = *++p;
 257                         if (c == L_(':') && p[1] == L_(']'))
 258                           {
 259                             p += 2;
 260                             break;
 261                           }
 262                         if (c < L_('a') || c >= L_('z'))
 263                           {
 264                             /* This cannot possibly be a character class name.
 265                                Match it as a normal range.  */
 266                             p = startp;
 267                             c = L_('[');
 268                             goto normal_bracket;
 269                           }
 270                         str[c1++] = c;
 271                       }
 272                     str[c1] = L_('\0');
 273
 274 #if defined _LIBC || WIDE_CHAR_SUPPORT
 275                     wt = IS_CHAR_CLASS (str);
 276                     if (wt == 0)
 277                       /* Invalid character class name.  */
 278                       return FNM_NOMATCH;
 279
 280 # if defined _LIBC && ! WIDE_CHAR_VERSION
 281                     /* The following code is glibc specific but does
 282                        there a good job in speeding up the code since
 283                        we can avoid the btowc() call.  */
 284                     if (_ISCTYPE ((UCHAR) *n, wt))
 285                       goto matched;
 286 # else
 287                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 288                       goto matched;
 289 # endif
 290 #else
 291                     if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
 292                         || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
 293                         || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
 294                         || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
 295                         || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
 296                         || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
 297                         || (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
 298                         || (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
 299                         || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
 300                         || (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
 301                         || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
 302                         || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
 303                       goto matched;
 304 #endif
 305                     c = *p++;
 306                   }
 307 #ifdef _LIBC
 308                 else if (c == L_('[') && *p == L_('='))
 309                   {
 310                     UCHAR str[1];
 311                     uint32_t nrules =
 312                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 313                     const CHAR *startp = p;
 314
 315                     c = *++p;
 316                     if (c == L_('\0'))
 317                       {
 318                         p = startp;
 319                         c = L_('[');
 320                         goto normal_bracket;
 321                       }
 322                     str[0] = c;
 323
 324                     c = *++p;
 325                     if (c != L_('=') || p[1] != L_(']'))
 326                       {
 327                         p = startp;
 328                         c = L_('[');
 329                         goto normal_bracket;
 330                       }
 331                     p += 2;
 332
 333                     if (nrules == 0)
 334                       {
 335                         if ((UCHAR) *n == str[0])
 336                           goto matched;
 337                       }
 338                     else
 339                       {
 340                         const int32_t *table;
 341 # if WIDE_CHAR_VERSION
 342                         const int32_t *weights;
 343                         const int32_t *extra;
 344 # else
 345                         const unsigned char *weights;
 346                         const unsigned char *extra;
 347 # endif
 348                         const int32_t *indirect;
 349                         int32_t idx;
 350                         const UCHAR *cp = (const UCHAR *) str;
 351
 352                         /* This #include defines a local function!  */
 353 # if WIDE_CHAR_VERSION
 354 #  include <locale/weightwc.h>
 355 # else
 356 #  include <locale/weight.h>
 357 # endif
 358
 359 # if WIDE_CHAR_VERSION
 360                         table = (const int32_t *)
 361                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 362                         weights = (const int32_t *)
 363                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 364                         extra = (const int32_t *)
 365                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 366                         indirect = (const int32_t *)
 367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 368 # else
 369                         table = (const int32_t *)
 370                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 371                         weights = (const unsigned char *)
 372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 373                         extra = (const unsigned char *)
 374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 375                         indirect = (const int32_t *)
 376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 377 # endif
 378
 379                         idx = findidx (&cp);
 380                         if (idx != 0)
 381                           {
 382                             /* We found a table entry.  Now see whether the
 383                                character we are currently at has the same
 384                                equivalance class value.  */
 385                             int len = weights[idx];
 386                             int32_t idx2;
 387                             const UCHAR *np = (const UCHAR *) n;
 388
 389                             idx2 = findidx (&np);
 390                             if (idx2 != 0 && len == weights[idx2])
 391                               {
 392                                 int cnt = 0;
 393
 394                                 while (cnt < len
 395                                        && (weights[idx + 1 + cnt]
 396                                            == weights[idx2 + 1 + cnt]))
 397                                   ++cnt;
 398
 399                                 if (cnt == len)
 400                                   goto matched;
 401                               }
 402                           }
 403                       }
 404
 405                     c = *p++;
 406                   }
 407 #endif
 408                 else if (c == L_('\0'))
 409                   /* [ (unterminated) loses.  */
 410                   return FNM_NOMATCH;
 411                 else
 412                   {
 413                     bool is_range = false;
 414
 415 #ifdef _LIBC
 416                     bool is_seqval = false;
 417
 418                     if (c == L_('[') && *p == L_('.'))
 419                       {
 420                         uint32_t nrules =
 421                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 422                         const CHAR *startp = p;
 423                         size_t c1 = 0;
 424
 425                         while (1)
 426                           {
 427                             c = *++p;
 428                             if (c == L_('.') && p[1] == L_(']'))
 429                               {
 430                                 p += 2;
 431                                 break;
 432                               }
 433                             if (c == '\0')
 434                               return FNM_NOMATCH;
 435                             ++c1;
 436                           }
 437
 438                         /* We have to handling the symbols differently in
 439                            ranges since then the collation sequence is
 440                            important.  */
 441                         is_range = *p == L_('-') && p[1] != L_('\0');
 442
 443                         if (nrules == 0)
 444                           {
 445                             /* There are no names defined in the collation
 446                                data.  Therefore we only accept the trivial
 447                                names consisting of the character itself.  */
 448                             if (c1 != 1)
 449                               return FNM_NOMATCH;
 450
 451                             if (!is_range && *n == startp[1])
 452                               goto matched;
 453
 454                             cold = startp[1];
 455                             c = *p++;
 456                           }
 457                         else
 458                           {
 459                             int32_t table_size;
 460                             const int32_t *symb_table;
 461 # ifdef WIDE_CHAR_VERSION
 462                             char str[c1];
 463                             size_t strcnt;
 464 # else
 465 #  define str (startp + 1)
 466 # endif
 467                             const unsigned char *extra;
 468                             int32_t idx;
 469                             int32_t elem;
 470                             int32_t second;
 471                             int32_t hash;
 472
 473 # ifdef WIDE_CHAR_VERSION
 474                             /* We have to convert the name to a single-byte
 475                                string.  This is possible since the names
 476                                consist of ASCII characters and the internal
 477                                representation is UCS4.  */
 478                             for (strcnt = 0; strcnt < c1; ++strcnt)
 479                               str[strcnt] = startp[1 + strcnt];
 480 # endif
 481
 482                             table_size =
 483                               _NL_CURRENT_WORD (LC_COLLATE,
 484                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 485                             symb_table = (const int32_t *)
 486                               _NL_CURRENT (LC_COLLATE,
 487                                            _NL_COLLATE_SYMB_TABLEMB);
 488                             extra = (const unsigned char *)
 489                               _NL_CURRENT (LC_COLLATE,
 490                                            _NL_COLLATE_SYMB_EXTRAMB);
 491
 492                             /* Locate the character in the hashing table.  */
 493                             hash = elem_hash (str, c1);
 494
 495                             idx = 0;
 496                             elem = hash % table_size;
 497                             if (symb_table[2 * elem] != 0)
 498                               {
 499                                 second = hash % (table_size - 2) + 1;
 500
 501                                 do
 502                                   {
 503                                     /* First compare the hashing value.  */
 504                                     if (symb_table[2 * elem] == hash
 505                                         && (c1
 506                                             == extra[symb_table[2 * elem + 1]])
 507                                         && memcmp (str,
 508                                                    &extra[symb_table[2 * elem
 509                                                                      + 1]
 510                                                           + 1], c1) == 0)
 511                                       {
 512                                         /* Yep, this is the entry.  */
 513                                         idx = symb_table[2 * elem + 1];
 514                                         idx += 1 + extra[idx];
 515                                         break;
 516                                       }
 517
 518                                     /* Next entry.  */
 519                                     elem += second;
 520                                   }
 521                                 while (symb_table[2 * elem] != 0);
 522                               }
 523
 524                             if (symb_table[2 * elem] != 0)
 525                               {
 526                                 /* Compare the byte sequence but only if
 527                                    this is not part of a range.  */
 528 # ifdef WIDE_CHAR_VERSION
 529                                 int32_t *wextra;
 530
 531                                 idx += 1 + extra[idx];
 532                                 /* Adjust for the alignment.  */
 533                                 idx = (idx + 3) & ~3;
 534
 535                                 wextra = (int32_t *) &extra[idx + 4];
 536 # endif
 537
 538                                 if (! is_range)
 539                                   {
 540 # ifdef WIDE_CHAR_VERSION
 541                                     for (c1 = 0;
 542                                          (int32_t) c1 < wextra[idx];
 543                                          ++c1)
 544                                       if (n[c1] != wextra[1 + c1])
 545                                         break;
 546
 547                                     if ((int32_t) c1 == wextra[idx])
 548                                       goto matched;
 549 # else
 550                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 551                                       if (n[c1] != extra[1 + c1])
 552                                         break;
 553
 554                                     if (c1 == extra[idx])
 555                                       goto matched;
 556 # endif
 557                                   }
 558
 559                                 /* Get the collation sequence value.  */
 560                                 is_seqval = true;
 561 # ifdef WIDE_CHAR_VERSION
 562                                 cold = wextra[1 + wextra[idx]];
 563 # else
 564                                 /* Adjust for the alignment.  */
 565                                 idx += 1 + extra[idx];
 566                                 idx = (idx + 3) & ~4;
 567                                 cold = *((int32_t *) &extra[idx]);
 568 # endif
 569
 570                                 c = *p++;
 571                               }
 572                             else if (c1 == 1)
 573                               {
 574                                 /* No valid character.  Match it as a
 575                                    single byte.  */
 576                                 if (!is_range && *n == str[0])
 577                                   goto matched;
 578
 579                                 cold = str[0];
 580                                 c = *p++;
 581                               }
 582                             else
 583                               return FNM_NOMATCH;
 584                           }
 585                       }
 586                     else
 587 # undef str
 588 #endif
 589                       {
 590                         c = FOLD (c);
 591                       normal_bracket:
 592
 593                         /* We have to handling the symbols differently in
 594                            ranges since then the collation sequence is
 595                            important.  */
 596                         is_range = (*p == L_('-') && p[1] != L_('\0')
 597                                     && p[1] != L_(']'));
 598
 599                         if (!is_range && c == fn)
 600                           goto matched;
 601
 602 #if _LIBC
 603                         /* This is needed if we goto normal_bracket; from
 604                            outside of is_seqval's scope.  */
 605                         is_seqval = false;
 606 #endif
 607
 608                         cold = c;
 609                         c = *p++;
 610                       }
 611
 612                     if (c == L_('-') && *p != L_(']'))
 613                       {
 614 #if _LIBC
 615                         /* We have to find the collation sequence
 616                            value for C.  Collation sequence is nothing
 617                            we can regularly access.  The sequence
 618                            value is defined by the order in which the
 619                            definitions of the collation values for the
 620                            various characters appear in the source
 621                            file.  A strange concept, nowhere
 622                            documented.  */
 623                         uint32_t fcollseq;
 624                         uint32_t lcollseq;
 625                         UCHAR cend = *p++;
 626
 627 # ifdef WIDE_CHAR_VERSION
 628                         /* Search in the `names' array for the characters.  */
 629                         fcollseq = __collseq_table_lookup (collseq, fn);
 630                         if (fcollseq == ~((uint32_t) 0))
 631                           /* XXX We don't know anything about the character
 632                              we are supposed to match.  This means we are
 633                              failing.  */
 634                           goto range_not_matched;
 635
 636                         if (is_seqval)
 637                           lcollseq = cold;
 638                         else
 639                           lcollseq = __collseq_table_lookup (collseq, cold);
 640 # else
 641                         fcollseq = collseq[fn];
 642                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 643 # endif
 644
 645                         is_seqval = false;
 646                         if (cend == L_('[') && *p == L_('.'))
 647                           {
 648                             uint32_t nrules =
 649                               _NL_CURRENT_WORD (LC_COLLATE,
 650                                                 _NL_COLLATE_NRULES);
 651                             const CHAR *startp = p;
 652                             size_t c1 = 0;
 653
 654                             while (1)
 655                               {
 656                                 c = *++p;
 657                                 if (c == L_('.') && p[1] == L_(']'))
 658                                   {
 659                                     p += 2;
 660                                     break;
 661                                   }
 662                                 if (c == '\0')
 663                                   return FNM_NOMATCH;
 664                                 ++c1;
 665                               }
 666
 667                             if (nrules == 0)
 668                               {
 669                                 /* There are no names defined in the
 670                                    collation data.  Therefore we only
 671                                    accept the trivial names consisting
 672                                    of the character itself.  */
 673                                 if (c1 != 1)
 674                                   return FNM_NOMATCH;
 675
 676                                 cend = startp[1];
 677                               }
 678                             else
 679                               {
 680                                 int32_t table_size;
 681                                 const int32_t *symb_table;
 682 # ifdef WIDE_CHAR_VERSION
 683                                 char str[c1];
 684                                 size_t strcnt;
 685 # else
 686 #  define str (startp + 1)
 687 # endif
 688                                 const unsigned char *extra;
 689                                 int32_t idx;
 690                                 int32_t elem;
 691                                 int32_t second;
 692                                 int32_t hash;
 693
 694 # ifdef WIDE_CHAR_VERSION
 695                                 /* We have to convert the name to a single-byte
 696                                    string.  This is possible since the names
 697                                    consist of ASCII characters and the internal
 698                                    representation is UCS4.  */
 699                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 700                                   str[strcnt] = startp[1 + strcnt];
 701 # endif
 702
 703                                 table_size =
 704                                   _NL_CURRENT_WORD (LC_COLLATE,
 705                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 706                                 symb_table = (const int32_t *)
 707                                   _NL_CURRENT (LC_COLLATE,
 708                                                _NL_COLLATE_SYMB_TABLEMB);
 709                                 extra = (const unsigned char *)
 710                                   _NL_CURRENT (LC_COLLATE,
 711                                                _NL_COLLATE_SYMB_EXTRAMB);
 712
 713                                 /* Locate the character in the hashing
 714                                    table.  */
 715                                 hash = elem_hash (str, c1);
 716
 717                                 idx = 0;
 718                                 elem = hash % table_size;
 719                                 if (symb_table[2 * elem] != 0)
 720                                   {
 721                                     second = hash % (table_size - 2) + 1;
 722
 723                                     do
 724                                       {
 725                                         /* First compare the hashing value.  */
 726                                         if (symb_table[2 * elem] == hash
 727                                             && (c1
 728                                                 == extra[symb_table[2 * elem + 1]])
 729                                             && memcmp (str,
 730                                                        &extra[symb_table[2 * elem + 1]
 731                                                               + 1], c1) == 0)
 732                                           {
 733                                             /* Yep, this is the entry.  */
 734                                             idx = symb_table[2 * elem + 1];
 735                                             idx += 1 + extra[idx];
 736                                             break;
 737                                           }
 738
 739                                         /* Next entry.  */
 740                                         elem += second;
 741                                       }
 742                                     while (symb_table[2 * elem] != 0);
 743                                   }
 744
 745                                 if (symb_table[2 * elem] != 0)
 746                                   {
 747                                     /* Compare the byte sequence but only if
 748                                        this is not part of a range.  */
 749 # ifdef WIDE_CHAR_VERSION
 750                                     int32_t *wextra;
 751
 752                                     idx += 1 + extra[idx];
 753                                     /* Adjust for the alignment.  */
 754                                     idx = (idx + 3) & ~4;
 755
 756                                     wextra = (int32_t *) &extra[idx + 4];
 757 # endif
 758                                     /* Get the collation sequence value.  */
 759                                     is_seqval = true;
 760 # ifdef WIDE_CHAR_VERSION
 761                                     cend = wextra[1 + wextra[idx]];
 762 # else
 763                                     /* Adjust for the alignment.  */
 764                                     idx += 1 + extra[idx];
 765                                     idx = (idx + 3) & ~4;
 766                                     cend = *((int32_t *) &extra[idx]);
 767 # endif
 768                                   }
 769                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 770                                   {
 771                                     cend = str[0];
 772                                     c = *p++;
 773                                   }
 774                                 else
 775                                   return FNM_NOMATCH;
 776                               }
 777 # undef str
 778                           }
 779                         else
 780                           {
 781                             if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 782                               cend = *p++;
 783                             if (cend == L_('\0'))
 784                               return FNM_NOMATCH;
 785                             cend = FOLD (cend);
 786                           }
 787
 788                         /* XXX It is not entirely clear to me how to handle
 789                            characters which are not mentioned in the
 790                            collation specification.  */
 791                         if (
 792 # ifdef WIDE_CHAR_VERSION
 793                             lcollseq == 0xffffffff ||
 794 # endif
 795                             lcollseq <= fcollseq)
 796                           {
 797                             /* We have to look at the upper bound.  */
 798                             uint32_t hcollseq;
 799
 800                             if (is_seqval)
 801                               hcollseq = cend;
 802                             else
 803                               {
 804 # ifdef WIDE_CHAR_VERSION
 805                                 hcollseq =
 806                                   __collseq_table_lookup (collseq, cend);
 807                                 if (hcollseq == ~((uint32_t) 0))
 808                                   {
 809                                     /* Hum, no information about the upper
 810                                        bound.  The matching succeeds if the
 811                                        lower bound is matched exactly.  */
 812                                     if (lcollseq != fcollseq)
 813                                       goto range_not_matched;
 814
 815                                     goto matched;
 816                                   }
 817 # else
 818                                 hcollseq = collseq[cend];
 819 # endif
 820                               }
 821
 822                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 823                               goto matched;
 824                           }
 825 # ifdef WIDE_CHAR_VERSION
 826                       range_not_matched:
 827 # endif
 828 #else
 829                         /* We use a boring value comparison of the character
 830                            values.  This is better than comparing using
 831                            `strcoll' since the latter would have surprising
 832                            and sometimes fatal consequences.  */
 833                         UCHAR cend = *p++;
 834
 835                         if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
 836                           cend = *p++;
 837                         if (cend == L_('\0'))
 838                           return FNM_NOMATCH;
 839
 840                         /* It is a range.  */
 841                         if (cold <= fn && fn <= cend)
 842                           goto matched;
 843 #endif
 844
 845                         c = *p++;
 846                       }
 847                   }
 848
 849                 if (c == L_(']'))
 850                   break;
 851               }
 852
 853             if (!not)
 854               return FNM_NOMATCH;
 855             break;
 856
 857           matched:
 858             /* Skip the rest of the [...] that already matched.  */
 859             do
 860               {
 861               ignore_next:
 862                 c = *p++;
 863
 864                 if (c == L_('\0'))
 865                   /* [... (unterminated) loses.  */
 866                   return FNM_NOMATCH;
 867
 868                 if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
 869                   {
 870                     if (*p == L_('\0'))
 871                       return FNM_NOMATCH;
 872                     /* XXX 1003.2d11 is unclear if this is right.  */
 873                     ++p;
 874                   }
 875                 else if (c == L_('[') && *p == L_(':'))
 876                   {
 877                     int c1 = 0;
 878                     const CHAR *startp = p;
 879
 880                     while (1)
 881                       {
 882                         c = *++p;
 883                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 884                           return FNM_NOMATCH;
 885
 886                         if (*p == L_(':') && p[1] == L_(']'))
 887                           break;
 888
 889                         if (c < L_('a') || c >= L_('z'))
 890                           {
 891                             p = startp;
 892                             goto ignore_next;
 893                           }
 894                       }
 895                     p += 2;
 896                     c = *p++;
 897                   }
 898                 else if (c == L_('[') && *p == L_('='))
 899                   {
 900                     c = *++p;
 901                     if (c == L_('\0'))
 902                       return FNM_NOMATCH;
 903                     c = *++p;
 904                     if (c != L_('=') || p[1] != L_(']'))
 905                       return FNM_NOMATCH;
 906                     p += 2;
 907                     c = *p++;
 908                   }
 909                 else if (c == L_('[') && *p == L_('.'))
 910                   {
 911                     ++p;
 912                     while (1)
 913                       {
 914                         c = *++p;
 915                         if (c == '\0')
 916                           return FNM_NOMATCH;
 917
 918                         if (*p == L_('.') && p[1] == L_(']'))
 919                           break;
 920                       }
 921                     p += 2;
 922                     c = *p++;
 923                   }
 924               }
 925             while (c != L_(']'));
 926             if (not)
 927               return FNM_NOMATCH;
 928           }
 929           break;
 930
 931         case L_('+'):
 932         case L_('@'):
 933         case L_('!'):
 934           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 935             {
 936               int res;
 937
 938               res = EXT (c, p, n, string_end, no_leading_period, flags);
 939               if (res != -1)
 940                 return res;
 941             }
 942           goto normal_match;
 943
 944         case L_('/'):
 945           if (NO_LEADING_PERIOD (flags))
 946             {
 947               if (n == string_end || c != (UCHAR) *n)
 948                 return FNM_NOMATCH;
 949
 950               new_no_leading_period = true;
 951               break;
 952             }
 953           /* FALLTHROUGH */
 954         default:
 955         normal_match:
 956           if (n == string_end || c != FOLD ((UCHAR) *n))
 957             return FNM_NOMATCH;
 958         }
 959
 960       no_leading_period = new_no_leading_period;
 961       ++n;
 962     }
 963
 964   if (n == string_end)
 965     return 0;
 966
 967   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
 968     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 969     return 0;
 970
 971   return FNM_NOMATCH;
 972 }
 973
 974
 975 static const CHAR *
 976 internal_function
 977 END (const CHAR *pattern)
 978 {
 979   const CHAR *p = pattern;
 980
 981   while (1)
 982     if (*++p == L_('\0'))
 983       /* This is an invalid pattern.  */
 984       return pattern;
 985     else if (*p == L_('['))
 986       {
 987         /* Handle brackets special.  */
 988         if (posixly_correct == 0)
 989           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 990
 991         /* Skip the not sign.  We have to recognize it because of a possibly
 992            following ']'.  */
 993         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
 994           ++p;
 995         /* A leading ']' is recognized as such.  */
 996         if (*p == L_(']'))
 997           ++p;
 998         /* Skip over all characters of the list.  */
 999         while (*p != L_(']'))
1000           if (*p++ == L_('\0'))
1001             /* This is no valid pattern.  */
1002             return pattern;
1003       }
1004     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1005               || *p == L_('!')) && p[1] == L_('('))
1006       p = END (p + 1);
1007     else if (*p == L_(')'))
1008       break;
1009
1010   return p + 1;
1011 }
1012
1013
1014 static int
1015 internal_function
1016 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1017      bool no_leading_period, int flags)
1018 {
1019   const CHAR *startp;
1020   size_t level;
1021   struct patternlist
1022   {
1023     struct patternlist *next;
1024     CHAR str[1];
1025   } *list = NULL;
1026   struct patternlist **lastp = &list;
1027   size_t pattern_len = STRLEN (pattern);
1028   const CHAR *p;
1029   const CHAR *rs;
1030   enum { ALLOCA_LIMIT = 8000 };
1031
1032   /* Parse the pattern.  Store the individual parts in the list.  */
1033   level = 0;
1034   for (startp = p = pattern + 1; ; ++p)
1035     if (*p == L_('\0'))
1036       /* This is an invalid pattern.  */
1037       return -1;
1038     else if (*p == L_('['))
1039       {
1040         /* Handle brackets special.  */
1041         if (posixly_correct == 0)
1042           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1043
1044         /* Skip the not sign.  We have to recognize it because of a possibly
1045            following ']'.  */
1046         if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1047           ++p;
1048         /* A leading ']' is recognized as such.  */
1049         if (*p == L_(']'))
1050           ++p;
1051         /* Skip over all characters of the list.  */
1052         while (*p != L_(']'))
1053           if (*p++ == L_('\0'))
1054             /* This is no valid pattern.  */
1055             return -1;
1056       }
1057     else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1058               || *p == L_('!')) && p[1] == L_('('))
1059       /* Remember the nesting level.  */
1060       ++level;
1061     else if (*p == L_(')'))
1062       {
1063         if (level-- == 0)
1064           {
1065             /* This means we found the end of the pattern.  */
1066 #define NEW_PATTERN \
1067             struct patternlist *newp;                                         \
1068             size_t plen;                                                      \
1069             size_t plensize;                                                  \
1070             size_t newpsize;                                                  \
1071                                                                               \
1072             plen = (opt == L_('?') || opt == L_('@')                          \
1073                     ? pattern_len                                             \
1074                     : p - startp + 1);                                        \
1075             plensize = plen * sizeof (CHAR);                                  \
1076             newpsize = offsetof (struct patternlist, str) + plensize;         \
1077             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1078                 || newpsize < offsetof (struct patternlist, str)              \
1079                 || ALLOCA_LIMIT <= newpsize)                                  \
1080               return -1;                                                      \
1081             newp = (struct patternlist *) alloca (newpsize);                  \
1082             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0');    \
1083             newp->next = NULL;                                                \
1084             *lastp = newp;                                                    \
1085             lastp = &newp->next
1086             NEW_PATTERN;
1087             break;
1088           }
1089       }
1090     else if (*p == L_('|'))
1091       {
1092         if (level == 0)
1093           {
1094             NEW_PATTERN;
1095             startp = p + 1;
1096           }
1097       }
1098   assert (list != NULL);
1099   assert (p[-1] == L_(')'));
1100 #undef NEW_PATTERN
1101
1102   switch (opt)
1103     {
1104     case L_('*'):
1105       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1106         return 0;
1107       /* FALLTHROUGH */
1108
1109     case L_('+'):
1110       do
1111         {
1112           for (rs = string; rs <= string_end; ++rs)
1113             /* First match the prefix with the current pattern with the
1114                current pattern.  */
1115             if (FCT (list->str, string, rs, no_leading_period,
1116                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1117                 /* This was successful.  Now match the rest with the rest
1118                    of the pattern.  */
1119                 && (FCT (p, rs, string_end,
1120                          rs == string
1121                          ? no_leading_period
1122                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1123                          flags & FNM_FILE_NAME
1124                          ? flags : flags & ~FNM_PERIOD) == 0
1125                     /* This didn't work.  Try the whole pattern.  */
1126                     || (rs != string
1127                         && FCT (pattern - 1, rs, string_end,
1128                                 rs == string
1129                                 ? no_leading_period
1130                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1131                                 flags & FNM_FILE_NAME
1132                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1133               /* It worked.  Signal success.  */
1134               return 0;
1135         }
1136       while ((list = list->next) != NULL);
1137
1138       /* None of the patterns lead to a match.  */
1139       return FNM_NOMATCH;
1140
1141     case L_('?'):
1142       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1143         return 0;
1144       /* FALLTHROUGH */
1145
1146     case L_('@'):
1147       do
1148         /* I cannot believe it but `strcat' is actually acceptable
1149            here.  Match the entire string with the prefix from the
1150            pattern list and the rest of the pattern following the
1151            pattern list.  */
1152         if (FCT (STRCAT (list->str, p), string, string_end,
1153                  no_leading_period,
1154                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1155           /* It worked.  Signal success.  */
1156           return 0;
1157       while ((list = list->next) != NULL);
1158
1159       /* None of the patterns lead to a match.  */
1160       return FNM_NOMATCH;
1161
1162     case L_('!'):
1163       for (rs = string; rs <= string_end; ++rs)
1164         {
1165           struct patternlist *runp;
1166
1167           for (runp = list; runp != NULL; runp = runp->next)
1168             if (FCT (runp->str, string, rs,  no_leading_period,
1169                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1170               break;
1171
1172           /* If none of the patterns matched see whether the rest does.  */
1173           if (runp == NULL
1174               && (FCT (p, rs, string_end,
1175                        rs == string
1176                        ? no_leading_period
1177                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1178                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1179                   == 0))
1180             /* This is successful.  */
1181             return 0;
1182         }
1183
1184       /* None of the patterns together with the rest of the pattern
1185          lead to a match.  */
1186       return FNM_NOMATCH;
1187
1188     default:
1189       assert (! "Invalid extended matching operator");
1190       break;
1191     }
1192
1193   return -1;
1194 }
1195
1196
1197 #undef FOLD
1198 #undef CHAR
1199 #undef UCHAR
1200 #undef INT
1201 #undef FCT
1202 #undef EXT
1203 #undef END
1204 #undef MEMPCPY
1205 #undef MEMCHR
1206 #undef STRCOLL
1207 #undef STRLEN
1208 #undef STRCAT
1209 #undef L_
1210 #undef BTOWC