lib/regexec.c

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002-2013 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <http://www.gnu.org/licenses/>.  */
  19
  20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  21                                      Idx n) internal_function;
  22 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
  23 static void match_ctx_free (re_match_context_t *cache) internal_function;
  24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
  25                                           Idx str_idx, Idx from, Idx to)
  26      internal_function;
  27 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
  28      internal_function;
  29 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
  30                                            Idx str_idx) internal_function;
  31 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  32                                                     Idx node, Idx str_idx)
  33      internal_function;
  34 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  35                            re_dfastate_t **limited_sts, Idx last_node,
  36                            Idx last_str_idx)
  37      internal_function;
  38 static reg_errcode_t re_search_internal (const regex_t *preg,
  39                                          const char *string, Idx length,
  40                                          Idx start, Idx last_start, Idx stop,
  41                                          size_t nmatch, regmatch_t pmatch[],
  42                                          int eflags) internal_function;
  43 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
  44                                   const char *string1, Idx length1,
  45                                   const char *string2, Idx length2,
  46                                   Idx start, regoff_t range,
  47                                   struct re_registers *regs,
  48                                   Idx stop, bool ret_len) internal_function;
  49 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
  50                                 const char *string, Idx length, Idx start,
  51                                 regoff_t range, Idx stop,
  52                                 struct re_registers *regs,
  53                                 bool ret_len) internal_function;
  54 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  55                               Idx nregs, int regs_allocated) internal_function;
  56 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
  57      internal_function;
  58 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
  59                            Idx *p_match_first) internal_function;
  60 static Idx check_halt_state_context (const re_match_context_t *mctx,
  61                                      const re_dfastate_t *state, Idx idx)
  62      internal_function;
  63 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
  64                          regmatch_t *prev_idx_match, Idx cur_node,
  65                          Idx cur_idx, Idx nmatch) internal_function;
  66 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  67                                       Idx str_idx, Idx dest_node, Idx nregs,
  68                                       regmatch_t *regs,
  69                                       re_node_set *eps_via_nodes)
  70      internal_function;
  71 static reg_errcode_t set_regs (const regex_t *preg,
  72                                const re_match_context_t *mctx,
  73                                size_t nmatch, regmatch_t *pmatch,
  74                                bool fl_backtrack) internal_function;
  75 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
  76      internal_function;
  77
  78 #ifdef RE_ENABLE_I18N
  79 static int sift_states_iter_mb (const re_match_context_t *mctx,
  80                                 re_sift_context_t *sctx,
  81                                 Idx node_idx, Idx str_idx, Idx max_str_idx)
  82      internal_function;
  83 #endif /* RE_ENABLE_I18N */
  84 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
  85                                            re_sift_context_t *sctx)
  86      internal_function;
  87 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
  88                                           re_sift_context_t *sctx, Idx str_idx,
  89                                           re_node_set *cur_dest)
  90      internal_function;
  91 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
  92                                               re_sift_context_t *sctx,
  93                                               Idx str_idx,
  94                                               re_node_set *dest_nodes)
  95      internal_function;
  96 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
  97                                             re_node_set *dest_nodes,
  98                                             const re_node_set *candidates)
  99      internal_function;
 100 static bool check_dst_limits (const re_match_context_t *mctx,
 101                               const re_node_set *limits,
 102                               Idx dst_node, Idx dst_idx, Idx src_node,
 103                               Idx src_idx) internal_function;
 104 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
 105                                         int boundaries, Idx subexp_idx,
 106                                         Idx from_node, Idx bkref_idx)
 107      internal_function;
 108 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
 109                                       Idx limit, Idx subexp_idx,
 110                                       Idx node, Idx str_idx,
 111                                       Idx bkref_idx) internal_function;
 112 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
 113                                           re_node_set *dest_nodes,
 114                                           const re_node_set *candidates,
 115                                           re_node_set *limits,
 116                                           struct re_backref_cache_entry *bkref_ents,
 117                                           Idx str_idx) internal_function;
 118 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
 119                                         re_sift_context_t *sctx,
 120                                         Idx str_idx, const re_node_set *candidates)
 121      internal_function;
 122 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
 123                                         re_dfastate_t **dst,
 124                                         re_dfastate_t **src, Idx num)
 125      internal_function;
 126 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
 127                                          re_match_context_t *mctx) internal_function;
 128 static re_dfastate_t *transit_state (reg_errcode_t *err,
 129                                      re_match_context_t *mctx,
 130                                      re_dfastate_t *state) internal_function;
 131 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 132                                             re_match_context_t *mctx,
 133                                             re_dfastate_t *next_state)
 134      internal_function;
 135 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 136                                                 re_node_set *cur_nodes,
 137                                                 Idx str_idx) internal_function;
 138 #if 0
 139 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 140                                         re_match_context_t *mctx,
 141                                         re_dfastate_t *pstate)
 142      internal_function;
 143 #endif
 144 #ifdef RE_ENABLE_I18N
 145 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 146                                        re_dfastate_t *pstate)
 147      internal_function;
 148 #endif /* RE_ENABLE_I18N */
 149 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 150                                           const re_node_set *nodes)
 151      internal_function;
 152 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 153                                  Idx bkref_node, Idx bkref_str_idx)
 154      internal_function;
 155 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 156                                      const re_sub_match_top_t *sub_top,
 157                                      re_sub_match_last_t *sub_last,
 158                                      Idx bkref_node, Idx bkref_str)
 159      internal_function;
 160 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 161                              Idx subexp_idx, int type) internal_function;
 162 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 163                                     state_array_t *path, Idx top_node,
 164                                     Idx top_str, Idx last_node, Idx last_str,
 165                                     int type) internal_function;
 166 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 167                                                    Idx str_idx,
 168                                                    re_node_set *cur_nodes,
 169                                                    re_node_set *next_nodes)
 170      internal_function;
 171 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
 172                                                re_node_set *cur_nodes,
 173                                                Idx ex_subexp, int type)
 174      internal_function;
 175 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
 176                                                    re_node_set *dst_nodes,
 177                                                    Idx target, Idx ex_subexp,
 178                                                    int type) internal_function;
 179 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 180                                          re_node_set *cur_nodes, Idx cur_str,
 181                                          Idx subexp_num, int type)
 182      internal_function;
 183 static bool build_trtable (const re_dfa_t *dfa,
 184                            re_dfastate_t *state) internal_function;
 185 #ifdef RE_ENABLE_I18N
 186 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 187                                     const re_string_t *input, Idx idx)
 188      internal_function;
 189 # ifdef _LIBC
 190 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 191                                                    size_t name_len)
 192      internal_function;
 193 # endif /* _LIBC */
 194 #endif /* RE_ENABLE_I18N */
 195 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
 196                                        const re_dfastate_t *state,
 197                                        re_node_set *states_node,
 198                                        bitset_t *states_ch) internal_function;
 199 static bool check_node_accept (const re_match_context_t *mctx,
 200                                const re_token_t *node, Idx idx)
 201      internal_function;
 202 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
 203      internal_function;
 204 \f
 205 /* Entry point for POSIX code.  */
 206
 207 /* regexec searches for a given pattern, specified by PREG, in the
 208    string STRING.
 209
 210    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 211    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 212    least NMATCH elements, and we set them to the offsets of the
 213    corresponding matched substrings.
 214
 215    EFLAGS specifies "execution flags" which affect matching: if
 216    REG_NOTBOL is set, then ^ does not match at the beginning of the
 217    string; if REG_NOTEOL is set, then $ does not match at the end.
 218
 219    We return 0 if we find a match and REG_NOMATCH if not.  */
 220
 221 int
 222 regexec (preg, string, nmatch, pmatch, eflags)
 223     const regex_t *_Restrict_ preg;
 224     const char *_Restrict_ string;
 225     size_t nmatch;
 226     regmatch_t pmatch[_Restrict_arr_];
 227     int eflags;
 228 {
 229   reg_errcode_t err;
 230   Idx start, length;
 231 #ifdef _LIBC
 232   re_dfa_t *dfa = preg->buffer;
 233 #endif
 234
 235   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
 236     return REG_BADPAT;
 237
 238   if (eflags & REG_STARTEND)
 239     {
 240       start = pmatch[0].rm_so;
 241       length = pmatch[0].rm_eo;
 242     }
 243   else
 244     {
 245       start = 0;
 246       length = strlen (string);
 247     }
 248
 249   __libc_lock_lock (dfa->lock);
 250   if (preg->no_sub)
 251     err = re_search_internal (preg, string, length, start, length,
 252                               length, 0, NULL, eflags);
 253   else
 254     err = re_search_internal (preg, string, length, start, length,
 255                               length, nmatch, pmatch, eflags);
 256   __libc_lock_unlock (dfa->lock);
 257   return err != REG_NOERROR;
 258 }
 259
 260 #ifdef _LIBC
 261 # include <shlib-compat.h>
 262 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
 263
 264 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 265 __typeof__ (__regexec) __compat_regexec;
 266
 267 int
 268 attribute_compat_text_section
 269 __compat_regexec (const regex_t *_Restrict_ preg,
 270                   const char *_Restrict_ string, size_t nmatch,
 271                   regmatch_t pmatch[], int eflags)
 272 {
 273   return regexec (preg, string, nmatch, pmatch,
 274                   eflags & (REG_NOTBOL | REG_NOTEOL));
 275 }
 276 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
 277 # endif
 278 #endif
 279
 280 /* Entry points for GNU code.  */
 281
 282 /* re_match, re_search, re_match_2, re_search_2
 283
 284    The former two functions operate on STRING with length LENGTH,
 285    while the later two operate on concatenation of STRING1 and STRING2
 286    with lengths LENGTH1 and LENGTH2, respectively.
 287
 288    re_match() matches the compiled pattern in BUFP against the string,
 289    starting at index START.
 290
 291    re_search() first tries matching at index START, then it tries to match
 292    starting from index START + 1, and so on.  The last start position tried
 293    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 294    way as re_match().)
 295
 296    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 297    the first STOP characters of the concatenation of the strings should be
 298    concerned.
 299
 300    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 301    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
 302    computed relative to the concatenation, not relative to the individual
 303    strings.)
 304
 305    On success, re_match* functions return the length of the match, re_search*
 306    return the position of the start of the match.  Return value -1 means no
 307    match was found and -2 indicates an internal error.  */
 308
 309 regoff_t
 310 re_match (bufp, string, length, start, regs)
 311     struct re_pattern_buffer *bufp;
 312     const char *string;
 313     Idx length, start;
 314     struct re_registers *regs;
 315 {
 316   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
 317 }
 318 #ifdef _LIBC
 319 weak_alias (__re_match, re_match)
 320 #endif
 321
 322 regoff_t
 323 re_search (bufp, string, length, start, range, regs)
 324     struct re_pattern_buffer *bufp;
 325     const char *string;
 326     Idx length, start;
 327     regoff_t range;
 328     struct re_registers *regs;
 329 {
 330   return re_search_stub (bufp, string, length, start, range, length, regs,
 331                          false);
 332 }
 333 #ifdef _LIBC
 334 weak_alias (__re_search, re_search)
 335 #endif
 336
 337 regoff_t
 338 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
 339     struct re_pattern_buffer *bufp;
 340     const char *string1, *string2;
 341     Idx length1, length2, start, stop;
 342     struct re_registers *regs;
 343 {
 344   return re_search_2_stub (bufp, string1, length1, string2, length2,
 345                            start, 0, regs, stop, true);
 346 }
 347 #ifdef _LIBC
 348 weak_alias (__re_match_2, re_match_2)
 349 #endif
 350
 351 regoff_t
 352 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
 353     struct re_pattern_buffer *bufp;
 354     const char *string1, *string2;
 355     Idx length1, length2, start, stop;
 356     regoff_t range;
 357     struct re_registers *regs;
 358 {
 359   return re_search_2_stub (bufp, string1, length1, string2, length2,
 360                            start, range, regs, stop, false);
 361 }
 362 #ifdef _LIBC
 363 weak_alias (__re_search_2, re_search_2)
 364 #endif
 365
 366 static regoff_t
 367 re_search_2_stub (struct re_pattern_buffer *bufp,
 368                   const char *string1, Idx length1,
 369                   const char *string2, Idx length2,
 370                   Idx start, regoff_t range, struct re_registers *regs,
 371                   Idx stop, bool ret_len)
 372 {
 373   const char *str;
 374   regoff_t rval;
 375   Idx len = length1 + length2;
 376   char *s = NULL;
 377
 378   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
 379     return -2;
 380
 381   /* Concatenate the strings.  */
 382   if (length2 > 0)
 383     if (length1 > 0)
 384       {
 385         s = re_malloc (char, len);
 386
 387         if (BE (s == NULL, 0))
 388           return -2;
 389 #ifdef _LIBC
 390         memcpy (__mempcpy (s, string1, length1), string2, length2);
 391 #else
 392         memcpy (s, string1, length1);
 393         memcpy (s + length1, string2, length2);
 394 #endif
 395         str = s;
 396       }
 397     else
 398       str = string2;
 399   else
 400     str = string1;
 401
 402   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 403                          ret_len);
 404   re_free (s);
 405   return rval;
 406 }
 407
 408 /* The parameters have the same meaning as those of re_search.
 409    Additional parameters:
 410    If RET_LEN is true the length of the match is returned (re_match style);
 411    otherwise the position of the match is returned.  */
 412
 413 static regoff_t
 414 re_search_stub (struct re_pattern_buffer *bufp,
 415                 const char *string, Idx length,
 416                 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
 417                 bool ret_len)
 418 {
 419   reg_errcode_t result;
 420   regmatch_t *pmatch;
 421   Idx nregs;
 422   regoff_t rval;
 423   int eflags = 0;
 424 #ifdef _LIBC
 425   re_dfa_t *dfa = bufp->buffer;
 426 #endif
 427   Idx last_start = start + range;
 428
 429   /* Check for out-of-range.  */
 430   if (BE (start < 0 || start > length, 0))
 431     return -1;
 432   if (BE (length < last_start || (0 <= range && last_start < start), 0))
 433     last_start = length;
 434   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
 435     last_start = 0;
 436
 437   __libc_lock_lock (dfa->lock);
 438
 439   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 440   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 441
 442   /* Compile fastmap if we haven't yet.  */
 443   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 444     re_compile_fastmap (bufp);
 445
 446   if (BE (bufp->no_sub, 0))
 447     regs = NULL;
 448
 449   /* We need at least 1 register.  */
 450   if (regs == NULL)
 451     nregs = 1;
 452   else if (BE (bufp->regs_allocated == REGS_FIXED
 453                && regs->num_regs <= bufp->re_nsub, 0))
 454     {
 455       nregs = regs->num_regs;
 456       if (BE (nregs < 1, 0))
 457         {
 458           /* Nothing can be copied to regs.  */
 459           regs = NULL;
 460           nregs = 1;
 461         }
 462     }
 463   else
 464     nregs = bufp->re_nsub + 1;
 465   pmatch = re_malloc (regmatch_t, nregs);
 466   if (BE (pmatch == NULL, 0))
 467     {
 468       rval = -2;
 469       goto out;
 470     }
 471
 472   result = re_search_internal (bufp, string, length, start, last_start, stop,
 473                                nregs, pmatch, eflags);
 474
 475   rval = 0;
 476
 477   /* I hope we needn't fill their regs with -1's when no match was found.  */
 478   if (result != REG_NOERROR)
 479     rval = result == REG_NOMATCH ? -1 : -2;
 480   else if (regs != NULL)
 481     {
 482       /* If caller wants register contents data back, copy them.  */
 483       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 484                                            bufp->regs_allocated);
 485       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
 486         rval = -2;
 487     }
 488
 489   if (BE (rval == 0, 1))
 490     {
 491       if (ret_len)
 492         {
 493           assert (pmatch[0].rm_so == start);
 494           rval = pmatch[0].rm_eo - start;
 495         }
 496       else
 497         rval = pmatch[0].rm_so;
 498     }
 499   re_free (pmatch);
 500  out:
 501   __libc_lock_unlock (dfa->lock);
 502   return rval;
 503 }
 504
 505 static unsigned
 506 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
 507               int regs_allocated)
 508 {
 509   int rval = REGS_REALLOCATE;
 510   Idx i;
 511   Idx need_regs = nregs + 1;
 512   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
 513      uses.  */
 514
 515   /* Have the register data arrays been allocated?  */
 516   if (regs_allocated == REGS_UNALLOCATED)
 517     { /* No.  So allocate them with malloc.  */
 518       regs->start = re_malloc (regoff_t, need_regs);
 519       if (BE (regs->start == NULL, 0))
 520         return REGS_UNALLOCATED;
 521       regs->end = re_malloc (regoff_t, need_regs);
 522       if (BE (regs->end == NULL, 0))
 523         {
 524           re_free (regs->start);
 525           return REGS_UNALLOCATED;
 526         }
 527       regs->num_regs = need_regs;
 528     }
 529   else if (regs_allocated == REGS_REALLOCATE)
 530     { /* Yes.  If we need more elements than were already
 531          allocated, reallocate them.  If we need fewer, just
 532          leave it alone.  */
 533       if (BE (need_regs > regs->num_regs, 0))
 534         {
 535           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
 536           regoff_t *new_end;
 537           if (BE (new_start == NULL, 0))
 538             return REGS_UNALLOCATED;
 539           new_end = re_realloc (regs->end, regoff_t, need_regs);
 540           if (BE (new_end == NULL, 0))
 541             {
 542               re_free (new_start);
 543               return REGS_UNALLOCATED;
 544             }
 545           regs->start = new_start;
 546           regs->end = new_end;
 547           regs->num_regs = need_regs;
 548         }
 549     }
 550   else
 551     {
 552       assert (regs_allocated == REGS_FIXED);
 553       /* This function may not be called with REGS_FIXED and nregs too big.  */
 554       assert (regs->num_regs >= nregs);
 555       rval = REGS_FIXED;
 556     }
 557
 558   /* Copy the regs.  */
 559   for (i = 0; i < nregs; ++i)
 560     {
 561       regs->start[i] = pmatch[i].rm_so;
 562       regs->end[i] = pmatch[i].rm_eo;
 563     }
 564   for ( ; i < regs->num_regs; ++i)
 565     regs->start[i] = regs->end[i] = -1;
 566
 567   return rval;
 568 }
 569
 570 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 571    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 572    this memory for recording register information.  STARTS and ENDS
 573    must be allocated using the malloc library routine, and must each
 574    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 575
 576    If NUM_REGS == 0, then subsequent matches should allocate their own
 577    register data.
 578
 579    Unless this function is called, the first search or match using
 580    PATTERN_BUFFER will allocate its own register data, without
 581    freeing the old data.  */
 582
 583 void
 584 re_set_registers (bufp, regs, num_regs, starts, ends)
 585     struct re_pattern_buffer *bufp;
 586     struct re_registers *regs;
 587     __re_size_t num_regs;
 588     regoff_t *starts, *ends;
 589 {
 590   if (num_regs)
 591     {
 592       bufp->regs_allocated = REGS_REALLOCATE;
 593       regs->num_regs = num_regs;
 594       regs->start = starts;
 595       regs->end = ends;
 596     }
 597   else
 598     {
 599       bufp->regs_allocated = REGS_UNALLOCATED;
 600       regs->num_regs = 0;
 601       regs->start = regs->end = NULL;
 602     }
 603 }
 604 #ifdef _LIBC
 605 weak_alias (__re_set_registers, re_set_registers)
 606 #endif
 607 \f
 608 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 609    them unless specifically requested.  */
 610
 611 #if defined _REGEX_RE_COMP || defined _LIBC
 612 int
 613 # ifdef _LIBC
 614 weak_function
 615 # endif
 616 re_exec (s)
 617      const char *s;
 618 {
 619   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 620 }
 621 #endif /* _REGEX_RE_COMP */
 622 \f
 623 /* Internal entry point.  */
 624
 625 /* Searches for a compiled pattern PREG in the string STRING, whose
 626    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 627    meaning as with regexec.  LAST_START is START + RANGE, where
 628    START and RANGE have the same meaning as with re_search.
 629    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 630    otherwise return the error code.
 631    Note: We assume front end functions already check ranges.
 632    (0 <= LAST_START && LAST_START <= LENGTH)  */
 633
 634 static reg_errcode_t
 635 __attribute_warn_unused_result__
 636 re_search_internal (const regex_t *preg,
 637                     const char *string, Idx length,
 638                     Idx start, Idx last_start, Idx stop,
 639                     size_t nmatch, regmatch_t pmatch[],
 640                     int eflags)
 641 {
 642   reg_errcode_t err;
 643   const re_dfa_t *dfa = preg->buffer;
 644   Idx left_lim, right_lim;
 645   int incr;
 646   bool fl_longest_match;
 647   int match_kind;
 648   Idx match_first;
 649   Idx match_last = REG_MISSING;
 650   Idx extra_nmatch;
 651   bool sb;
 652   int ch;
 653 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
 654   re_match_context_t mctx = { .dfa = dfa };
 655 #else
 656   re_match_context_t mctx;
 657 #endif
 658   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
 659                     && start != last_start && !preg->can_be_null)
 660                    ? preg->fastmap : NULL);
 661   RE_TRANSLATE_TYPE t = preg->translate;
 662
 663 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
 664   memset (&mctx, '\0', sizeof (re_match_context_t));
 665   mctx.dfa = dfa;
 666 #endif
 667
 668   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
 669   nmatch -= extra_nmatch;
 670
 671   /* Check if the DFA haven't been compiled.  */
 672   if (BE (preg->used == 0 || dfa->init_state == NULL
 673           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
 674           || dfa->init_state_begbuf == NULL, 0))
 675     return REG_NOMATCH;
 676
 677 #ifdef DEBUG
 678   /* We assume front-end functions already check them.  */
 679   assert (0 <= last_start && last_start <= length);
 680 #endif
 681
 682   /* If initial states with non-begbuf contexts have no elements,
 683      the regex must be anchored.  If preg->newline_anchor is set,
 684      we'll never use init_state_nl, so do not check it.  */
 685   if (dfa->init_state->nodes.nelem == 0
 686       && dfa->init_state_word->nodes.nelem == 0
 687       && (dfa->init_state_nl->nodes.nelem == 0
 688           || !preg->newline_anchor))
 689     {
 690       if (start != 0 && last_start != 0)
 691         return REG_NOMATCH;
 692       start = last_start = 0;
 693     }
 694
 695   /* We must check the longest matching, if nmatch > 0.  */
 696   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 697
 698   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 699                             preg->translate, (preg->syntax & RE_ICASE) != 0,
 700                             dfa);
 701   if (BE (err != REG_NOERROR, 0))
 702     goto free_return;
 703   mctx.input.stop = stop;
 704   mctx.input.raw_stop = stop;
 705   mctx.input.newline_anchor = preg->newline_anchor;
 706
 707   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 708   if (BE (err != REG_NOERROR, 0))
 709     goto free_return;
 710
 711   /* We will log all the DFA states through which the dfa pass,
 712      if nmatch > 1, or this dfa has "multibyte node", which is a
 713      back-reference or a node which can accept multibyte character or
 714      multi character collating element.  */
 715   if (nmatch > 1 || dfa->has_mb_node)
 716     {
 717       /* Avoid overflow.  */
 718       if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
 719                <= mctx.input.bufs_len), 0))
 720         {
 721           err = REG_ESPACE;
 722           goto free_return;
 723         }
 724
 725       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 726       if (BE (mctx.state_log == NULL, 0))
 727         {
 728           err = REG_ESPACE;
 729           goto free_return;
 730         }
 731     }
 732   else
 733     mctx.state_log = NULL;
 734
 735   match_first = start;
 736   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 737                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 738
 739   /* Check incrementally whether the input string matches.  */
 740   incr = (last_start < start) ? -1 : 1;
 741   left_lim = (last_start < start) ? last_start : start;
 742   right_lim = (last_start < start) ? start : last_start;
 743   sb = dfa->mb_cur_max == 1;
 744   match_kind =
 745     (fastmap
 746      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
 747         | (start <= last_start ? 2 : 0)
 748         | (t != NULL ? 1 : 0))
 749      : 8);
 750
 751   for (;; match_first += incr)
 752     {
 753       err = REG_NOMATCH;
 754       if (match_first < left_lim || right_lim < match_first)
 755         goto free_return;
 756
 757       /* Advance as rapidly as possible through the string, until we
 758          find a plausible place to start matching.  This may be done
 759          with varying efficiency, so there are various possibilities:
 760          only the most common of them are specialized, in order to
 761          save on code size.  We use a switch statement for speed.  */
 762       switch (match_kind)
 763         {
 764         case 8:
 765           /* No fastmap.  */
 766           break;
 767
 768         case 7:
 769           /* Fastmap with single-byte translation, match forward.  */
 770           while (BE (match_first < right_lim, 1)
 771                  && !fastmap[t[(unsigned char) string[match_first]]])
 772             ++match_first;
 773           goto forward_match_found_start_or_reached_end;
 774
 775         case 6:
 776           /* Fastmap without translation, match forward.  */
 777           while (BE (match_first < right_lim, 1)
 778                  && !fastmap[(unsigned char) string[match_first]])
 779             ++match_first;
 780
 781         forward_match_found_start_or_reached_end:
 782           if (BE (match_first == right_lim, 0))
 783             {
 784               ch = match_first >= length
 785                        ? 0 : (unsigned char) string[match_first];
 786               if (!fastmap[t ? t[ch] : ch])
 787                 goto free_return;
 788             }
 789           break;
 790
 791         case 4:
 792         case 5:
 793           /* Fastmap without multi-byte translation, match backwards.  */
 794           while (match_first >= left_lim)
 795             {
 796               ch = match_first >= length
 797                        ? 0 : (unsigned char) string[match_first];
 798               if (fastmap[t ? t[ch] : ch])
 799                 break;
 800               --match_first;
 801             }
 802           if (match_first < left_lim)
 803             goto free_return;
 804           break;
 805
 806         default:
 807           /* In this case, we can't determine easily the current byte,
 808              since it might be a component byte of a multibyte
 809              character.  Then we use the constructed buffer instead.  */
 810           for (;;)
 811             {
 812               /* If MATCH_FIRST is out of the valid range, reconstruct the
 813                  buffers.  */
 814               __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
 815               if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
 816                 {
 817                   err = re_string_reconstruct (&mctx.input, match_first,
 818                                                eflags);
 819                   if (BE (err != REG_NOERROR, 0))
 820                     goto free_return;
 821
 822                   offset = match_first - mctx.input.raw_mbs_idx;
 823                 }
 824               /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
 825                  Note that MATCH_FIRST must not be smaller than 0.  */
 826               ch = (match_first >= length
 827                     ? 0 : re_string_byte_at (&mctx.input, offset));
 828               if (fastmap[ch])
 829                 break;
 830               match_first += incr;
 831               if (match_first < left_lim || match_first > right_lim)
 832                 {
 833                   err = REG_NOMATCH;
 834                   goto free_return;
 835                 }
 836             }
 837           break;
 838         }
 839
 840       /* Reconstruct the buffers so that the matcher can assume that
 841          the matching starts from the beginning of the buffer.  */
 842       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 843       if (BE (err != REG_NOERROR, 0))
 844         goto free_return;
 845
 846 #ifdef RE_ENABLE_I18N
 847      /* Don't consider this char as a possible match start if it part,
 848         yet isn't the head, of a multibyte character.  */
 849       if (!sb && !re_string_first_byte (&mctx.input, 0))
 850         continue;
 851 #endif
 852
 853       /* It seems to be appropriate one, then use the matcher.  */
 854       /* We assume that the matching starts from 0.  */
 855       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 856       match_last = check_matching (&mctx, fl_longest_match,
 857                                    start <= last_start ? &match_first : NULL);
 858       if (match_last != REG_MISSING)
 859         {
 860           if (BE (match_last == REG_ERROR, 0))
 861             {
 862               err = REG_ESPACE;
 863               goto free_return;
 864             }
 865           else
 866             {
 867               mctx.match_last = match_last;
 868               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 869                 {
 870                   re_dfastate_t *pstate = mctx.state_log[match_last];
 871                   mctx.last_node = check_halt_state_context (&mctx, pstate,
 872                                                              match_last);
 873                 }
 874               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 875                   || dfa->nbackref)
 876                 {
 877                   err = prune_impossible_nodes (&mctx);
 878                   if (err == REG_NOERROR)
 879                     break;
 880                   if (BE (err != REG_NOMATCH, 0))
 881                     goto free_return;
 882                   match_last = REG_MISSING;
 883                 }
 884               else
 885                 break; /* We found a match.  */
 886             }
 887         }
 888
 889       match_ctx_clean (&mctx);
 890     }
 891
 892 #ifdef DEBUG
 893   assert (match_last != REG_MISSING);
 894   assert (err == REG_NOERROR);
 895 #endif
 896
 897   /* Set pmatch[] if we need.  */
 898   if (nmatch > 0)
 899     {
 900       Idx reg_idx;
 901
 902       /* Initialize registers.  */
 903       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 904         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 905
 906       /* Set the points where matching start/end.  */
 907       pmatch[0].rm_so = 0;
 908       pmatch[0].rm_eo = mctx.match_last;
 909       /* FIXME: This function should fail if mctx.match_last exceeds
 910          the maximum possible regoff_t value.  We need a new error
 911          code REG_OVERFLOW.  */
 912
 913       if (!preg->no_sub && nmatch > 1)
 914         {
 915           err = set_regs (preg, &mctx, nmatch, pmatch,
 916                           dfa->has_plural_match && dfa->nbackref > 0);
 917           if (BE (err != REG_NOERROR, 0))
 918             goto free_return;
 919         }
 920
 921       /* At last, add the offset to each register, since we slid
 922          the buffers so that we could assume that the matching starts
 923          from 0.  */
 924       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 925         if (pmatch[reg_idx].rm_so != -1)
 926           {
 927 #ifdef RE_ENABLE_I18N
 928             if (BE (mctx.input.offsets_needed != 0, 0))
 929               {
 930                 pmatch[reg_idx].rm_so =
 931                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
 932                    ? mctx.input.valid_raw_len
 933                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
 934                 pmatch[reg_idx].rm_eo =
 935                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
 936                    ? mctx.input.valid_raw_len
 937                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
 938               }
 939 #else
 940             assert (mctx.input.offsets_needed == 0);
 941 #endif
 942             pmatch[reg_idx].rm_so += match_first;
 943             pmatch[reg_idx].rm_eo += match_first;
 944           }
 945       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
 946         {
 947           pmatch[nmatch + reg_idx].rm_so = -1;
 948           pmatch[nmatch + reg_idx].rm_eo = -1;
 949         }
 950
 951       if (dfa->subexp_map)
 952         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
 953           if (dfa->subexp_map[reg_idx] != reg_idx)
 954             {
 955               pmatch[reg_idx + 1].rm_so
 956                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
 957               pmatch[reg_idx + 1].rm_eo
 958                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
 959             }
 960     }
 961
 962  free_return:
 963   re_free (mctx.state_log);
 964   if (dfa->nbackref)
 965     match_ctx_free (&mctx);
 966   re_string_destruct (&mctx.input);
 967   return err;
 968 }
 969
 970 static reg_errcode_t
 971 __attribute_warn_unused_result__
 972 prune_impossible_nodes (re_match_context_t *mctx)
 973 {
 974   const re_dfa_t *const dfa = mctx->dfa;
 975   Idx halt_node, match_last;
 976   reg_errcode_t ret;
 977   re_dfastate_t **sifted_states;
 978   re_dfastate_t **lim_states = NULL;
 979   re_sift_context_t sctx;
 980 #ifdef DEBUG
 981   assert (mctx->state_log != NULL);
 982 #endif
 983   match_last = mctx->match_last;
 984   halt_node = mctx->last_node;
 985
 986   /* Avoid overflow.  */
 987   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
 988     return REG_ESPACE;
 989
 990   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 991   if (BE (sifted_states == NULL, 0))
 992     {
 993       ret = REG_ESPACE;
 994       goto free_return;
 995     }
 996   if (dfa->nbackref)
 997     {
 998       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
 999       if (BE (lim_states == NULL, 0))
1000         {
1001           ret = REG_ESPACE;
1002           goto free_return;
1003         }
1004       while (1)
1005         {
1006           memset (lim_states, '\0',
1007                   sizeof (re_dfastate_t *) * (match_last + 1));
1008           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1009                          match_last);
1010           ret = sift_states_backward (mctx, &sctx);
1011           re_node_set_free (&sctx.limits);
1012           if (BE (ret != REG_NOERROR, 0))
1013               goto free_return;
1014           if (sifted_states[0] != NULL || lim_states[0] != NULL)
1015             break;
1016           do
1017             {
1018               --match_last;
1019               if (! REG_VALID_INDEX (match_last))
1020                 {
1021                   ret = REG_NOMATCH;
1022                   goto free_return;
1023                 }
1024             } while (mctx->state_log[match_last] == NULL
1025                      || !mctx->state_log[match_last]->halt);
1026           halt_node = check_halt_state_context (mctx,
1027                                                 mctx->state_log[match_last],
1028                                                 match_last);
1029         }
1030       ret = merge_state_array (dfa, sifted_states, lim_states,
1031                                match_last + 1);
1032       re_free (lim_states);
1033       lim_states = NULL;
1034       if (BE (ret != REG_NOERROR, 0))
1035         goto free_return;
1036     }
1037   else
1038     {
1039       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1040       ret = sift_states_backward (mctx, &sctx);
1041       re_node_set_free (&sctx.limits);
1042       if (BE (ret != REG_NOERROR, 0))
1043         goto free_return;
1044       if (sifted_states[0] == NULL)
1045         {
1046           ret = REG_NOMATCH;
1047           goto free_return;
1048         }
1049     }
1050   re_free (mctx->state_log);
1051   mctx->state_log = sifted_states;
1052   sifted_states = NULL;
1053   mctx->last_node = halt_node;
1054   mctx->match_last = match_last;
1055   ret = REG_NOERROR;
1056  free_return:
1057   re_free (sifted_states);
1058   re_free (lim_states);
1059   return ret;
1060 }
1061
1062 /* Acquire an initial state and return it.
1063    We must select appropriate initial state depending on the context,
1064    since initial states may have constraints like "\<", "^", etc..  */
1065
1066 static inline re_dfastate_t *
1067 __attribute ((always_inline)) internal_function
1068 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1069                             Idx idx)
1070 {
1071   const re_dfa_t *const dfa = mctx->dfa;
1072   if (dfa->init_state->has_constraint)
1073     {
1074       unsigned int context;
1075       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1076       if (IS_WORD_CONTEXT (context))
1077         return dfa->init_state_word;
1078       else if (IS_ORDINARY_CONTEXT (context))
1079         return dfa->init_state;
1080       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1081         return dfa->init_state_begbuf;
1082       else if (IS_NEWLINE_CONTEXT (context))
1083         return dfa->init_state_nl;
1084       else if (IS_BEGBUF_CONTEXT (context))
1085         {
1086           /* It is relatively rare case, then calculate on demand.  */
1087           return re_acquire_state_context (err, dfa,
1088                                            dfa->init_state->entrance_nodes,
1089                                            context);
1090         }
1091       else
1092         /* Must not happen?  */
1093         return dfa->init_state;
1094     }
1095   else
1096     return dfa->init_state;
1097 }
1098
1099 /* Check whether the regular expression match input string INPUT or not,
1100    and return the index where the matching end.  Return REG_MISSING if
1101    there is no match, and return REG_ERROR in case of an error.
1102    FL_LONGEST_MATCH means we want the POSIX longest matching.
1103    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1104    next place where we may want to try matching.
1105    Note that the matcher assumes that the matching starts from the current
1106    index of the buffer.  */
1107
1108 static Idx
1109 internal_function __attribute_warn_unused_result__
1110 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1111                 Idx *p_match_first)
1112 {
1113   const re_dfa_t *const dfa = mctx->dfa;
1114   reg_errcode_t err;
1115   Idx match = 0;
1116   Idx match_last = REG_MISSING;
1117   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1118   re_dfastate_t *cur_state;
1119   bool at_init_state = p_match_first != NULL;
1120   Idx next_start_idx = cur_str_idx;
1121
1122   err = REG_NOERROR;
1123   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1124   /* An initial state must not be NULL (invalid).  */
1125   if (BE (cur_state == NULL, 0))
1126     {
1127       assert (err == REG_ESPACE);
1128       return REG_ERROR;
1129     }
1130
1131   if (mctx->state_log != NULL)
1132     {
1133       mctx->state_log[cur_str_idx] = cur_state;
1134
1135       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1136          later.  E.g. Processing back references.  */
1137       if (BE (dfa->nbackref, 0))
1138         {
1139           at_init_state = false;
1140           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1141           if (BE (err != REG_NOERROR, 0))
1142             return err;
1143
1144           if (cur_state->has_backref)
1145             {
1146               err = transit_state_bkref (mctx, &cur_state->nodes);
1147               if (BE (err != REG_NOERROR, 0))
1148                 return err;
1149             }
1150         }
1151     }
1152
1153   /* If the RE accepts NULL string.  */
1154   if (BE (cur_state->halt, 0))
1155     {
1156       if (!cur_state->has_constraint
1157           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1158         {
1159           if (!fl_longest_match)
1160             return cur_str_idx;
1161           else
1162             {
1163               match_last = cur_str_idx;
1164               match = 1;
1165             }
1166         }
1167     }
1168
1169   while (!re_string_eoi (&mctx->input))
1170     {
1171       re_dfastate_t *old_state = cur_state;
1172       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1173
1174       if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1175            && mctx->input.bufs_len < mctx->input.len)
1176           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1177               && mctx->input.valid_len < mctx->input.len))
1178         {
1179           err = extend_buffers (mctx, next_char_idx + 1);
1180           if (BE (err != REG_NOERROR, 0))
1181             {
1182               assert (err == REG_ESPACE);
1183               return REG_ERROR;
1184             }
1185         }
1186
1187       cur_state = transit_state (&err, mctx, cur_state);
1188       if (mctx->state_log != NULL)
1189         cur_state = merge_state_with_log (&err, mctx, cur_state);
1190
1191       if (cur_state == NULL)
1192         {
1193           /* Reached the invalid state or an error.  Try to recover a valid
1194              state using the state log, if available and if we have not
1195              already found a valid (even if not the longest) match.  */
1196           if (BE (err != REG_NOERROR, 0))
1197             return REG_ERROR;
1198
1199           if (mctx->state_log == NULL
1200               || (match && !fl_longest_match)
1201               || (cur_state = find_recover_state (&err, mctx)) == NULL)
1202             break;
1203         }
1204
1205       if (BE (at_init_state, 0))
1206         {
1207           if (old_state == cur_state)
1208             next_start_idx = next_char_idx;
1209           else
1210             at_init_state = false;
1211         }
1212
1213       if (cur_state->halt)
1214         {
1215           /* Reached a halt state.
1216              Check the halt state can satisfy the current context.  */
1217           if (!cur_state->has_constraint
1218               || check_halt_state_context (mctx, cur_state,
1219                                            re_string_cur_idx (&mctx->input)))
1220             {
1221               /* We found an appropriate halt state.  */
1222               match_last = re_string_cur_idx (&mctx->input);
1223               match = 1;
1224
1225               /* We found a match, do not modify match_first below.  */
1226               p_match_first = NULL;
1227               if (!fl_longest_match)
1228                 break;
1229             }
1230         }
1231     }
1232
1233   if (p_match_first)
1234     *p_match_first += next_start_idx;
1235
1236   return match_last;
1237 }
1238
1239 /* Check NODE match the current context.  */
1240
1241 static bool
1242 internal_function
1243 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1244 {
1245   re_token_type_t type = dfa->nodes[node].type;
1246   unsigned int constraint = dfa->nodes[node].constraint;
1247   if (type != END_OF_RE)
1248     return false;
1249   if (!constraint)
1250     return true;
1251   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1252     return false;
1253   return true;
1254 }
1255
1256 /* Check the halt state STATE match the current context.
1257    Return 0 if not match, if the node, STATE has, is a halt node and
1258    match the context, return the node.  */
1259
1260 static Idx
1261 internal_function
1262 check_halt_state_context (const re_match_context_t *mctx,
1263                           const re_dfastate_t *state, Idx idx)
1264 {
1265   Idx i;
1266   unsigned int context;
1267 #ifdef DEBUG
1268   assert (state->halt);
1269 #endif
1270   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1271   for (i = 0; i < state->nodes.nelem; ++i)
1272     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1273       return state->nodes.elems[i];
1274   return 0;
1275 }
1276
1277 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1278    corresponding to the DFA).
1279    Return the destination node, and update EPS_VIA_NODES;
1280    return REG_MISSING in case of errors.  */
1281
1282 static Idx
1283 internal_function
1284 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1285                    Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1286                    struct re_fail_stack_t *fs)
1287 {
1288   const re_dfa_t *const dfa = mctx->dfa;
1289   Idx i;
1290   bool ok;
1291   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1292     {
1293       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1294       re_node_set *edests = &dfa->edests[node];
1295       Idx dest_node;
1296       ok = re_node_set_insert (eps_via_nodes, node);
1297       if (BE (! ok, 0))
1298         return REG_ERROR;
1299       /* Pick up a valid destination, or return REG_MISSING if none
1300          is found.  */
1301       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1302         {
1303           Idx candidate = edests->elems[i];
1304           if (!re_node_set_contains (cur_nodes, candidate))
1305             continue;
1306           if (dest_node == REG_MISSING)
1307             dest_node = candidate;
1308
1309           else
1310             {
1311               /* In order to avoid infinite loop like "(a*)*", return the second
1312                  epsilon-transition if the first was already considered.  */
1313               if (re_node_set_contains (eps_via_nodes, dest_node))
1314                 return candidate;
1315
1316               /* Otherwise, push the second epsilon-transition on the fail stack.  */
1317               else if (fs != NULL
1318                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1319                                            eps_via_nodes))
1320                 return REG_ERROR;
1321
1322               /* We know we are going to exit.  */
1323               break;
1324             }
1325         }
1326       return dest_node;
1327     }
1328   else
1329     {
1330       Idx naccepted = 0;
1331       re_token_type_t type = dfa->nodes[node].type;
1332
1333 #ifdef RE_ENABLE_I18N
1334       if (dfa->nodes[node].accept_mb)
1335         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1336       else
1337 #endif /* RE_ENABLE_I18N */
1338       if (type == OP_BACK_REF)
1339         {
1340           Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1341           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1342           if (fs != NULL)
1343             {
1344               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1345                 return REG_MISSING;
1346               else if (naccepted)
1347                 {
1348                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1349                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1350                               naccepted) != 0)
1351                     return REG_MISSING;
1352                 }
1353             }
1354
1355           if (naccepted == 0)
1356             {
1357               Idx dest_node;
1358               ok = re_node_set_insert (eps_via_nodes, node);
1359               if (BE (! ok, 0))
1360                 return REG_ERROR;
1361               dest_node = dfa->edests[node].elems[0];
1362               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1363                                         dest_node))
1364                 return dest_node;
1365             }
1366         }
1367
1368       if (naccepted != 0
1369           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1370         {
1371           Idx dest_node = dfa->nexts[node];
1372           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1373           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1374                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1375                                                dest_node)))
1376             return REG_MISSING;
1377           re_node_set_empty (eps_via_nodes);
1378           return dest_node;
1379         }
1380     }
1381   return REG_MISSING;
1382 }
1383
1384 static reg_errcode_t
1385 internal_function __attribute_warn_unused_result__
1386 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1387                  Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1388 {
1389   reg_errcode_t err;
1390   Idx num = fs->num++;
1391   if (fs->num == fs->alloc)
1392     {
1393       struct re_fail_stack_ent_t *new_array;
1394       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1395                                        * fs->alloc * 2));
1396       if (new_array == NULL)
1397         return REG_ESPACE;
1398       fs->alloc *= 2;
1399       fs->stack = new_array;
1400     }
1401   fs->stack[num].idx = str_idx;
1402   fs->stack[num].node = dest_node;
1403   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1404   if (fs->stack[num].regs == NULL)
1405     return REG_ESPACE;
1406   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1407   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1408   return err;
1409 }
1410
1411 static Idx
1412 internal_function
1413 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1414                 regmatch_t *regs, re_node_set *eps_via_nodes)
1415 {
1416   Idx num = --fs->num;
1417   assert (REG_VALID_INDEX (num));
1418   *pidx = fs->stack[num].idx;
1419   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1420   re_node_set_free (eps_via_nodes);
1421   re_free (fs->stack[num].regs);
1422   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1423   return fs->stack[num].node;
1424 }
1425
1426 /* Set the positions where the subexpressions are starts/ends to registers
1427    PMATCH.
1428    Note: We assume that pmatch[0] is already set, and
1429    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1430
1431 static reg_errcode_t
1432 internal_function __attribute_warn_unused_result__
1433 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1434           regmatch_t *pmatch, bool fl_backtrack)
1435 {
1436   const re_dfa_t *dfa = preg->buffer;
1437   Idx idx, cur_node;
1438   re_node_set eps_via_nodes;
1439   struct re_fail_stack_t *fs;
1440   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1441   regmatch_t *prev_idx_match;
1442   bool prev_idx_match_malloced = false;
1443
1444 #ifdef DEBUG
1445   assert (nmatch > 1);
1446   assert (mctx->state_log != NULL);
1447 #endif
1448   if (fl_backtrack)
1449     {
1450       fs = &fs_body;
1451       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1452       if (fs->stack == NULL)
1453         return REG_ESPACE;
1454     }
1455   else
1456     fs = NULL;
1457
1458   cur_node = dfa->init_node;
1459   re_node_set_init_empty (&eps_via_nodes);
1460
1461   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1462     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1463   else
1464     {
1465       prev_idx_match = re_malloc (regmatch_t, nmatch);
1466       if (prev_idx_match == NULL)
1467         {
1468           free_fail_stack_return (fs);
1469           return REG_ESPACE;
1470         }
1471       prev_idx_match_malloced = true;
1472     }
1473   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1474
1475   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1476     {
1477       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1478
1479       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1480         {
1481           Idx reg_idx;
1482           if (fs)
1483             {
1484               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1485                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1486                   break;
1487               if (reg_idx == nmatch)
1488                 {
1489                   re_node_set_free (&eps_via_nodes);
1490                   if (prev_idx_match_malloced)
1491                     re_free (prev_idx_match);
1492                   return free_fail_stack_return (fs);
1493                 }
1494               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1495                                          &eps_via_nodes);
1496             }
1497           else
1498             {
1499               re_node_set_free (&eps_via_nodes);
1500               if (prev_idx_match_malloced)
1501                 re_free (prev_idx_match);
1502               return REG_NOERROR;
1503             }
1504         }
1505
1506       /* Proceed to next node.  */
1507       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1508                                     &eps_via_nodes, fs);
1509
1510       if (BE (! REG_VALID_INDEX (cur_node), 0))
1511         {
1512           if (BE (cur_node == REG_ERROR, 0))
1513             {
1514               re_node_set_free (&eps_via_nodes);
1515               if (prev_idx_match_malloced)
1516                 re_free (prev_idx_match);
1517               free_fail_stack_return (fs);
1518               return REG_ESPACE;
1519             }
1520           if (fs)
1521             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1522                                        &eps_via_nodes);
1523           else
1524             {
1525               re_node_set_free (&eps_via_nodes);
1526               if (prev_idx_match_malloced)
1527                 re_free (prev_idx_match);
1528               return REG_NOMATCH;
1529             }
1530         }
1531     }
1532   re_node_set_free (&eps_via_nodes);
1533   if (prev_idx_match_malloced)
1534     re_free (prev_idx_match);
1535   return free_fail_stack_return (fs);
1536 }
1537
1538 static reg_errcode_t
1539 internal_function
1540 free_fail_stack_return (struct re_fail_stack_t *fs)
1541 {
1542   if (fs)
1543     {
1544       Idx fs_idx;
1545       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1546         {
1547           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1548           re_free (fs->stack[fs_idx].regs);
1549         }
1550       re_free (fs->stack);
1551     }
1552   return REG_NOERROR;
1553 }
1554
1555 static void
1556 internal_function
1557 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1558              regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1559 {
1560   int type = dfa->nodes[cur_node].type;
1561   if (type == OP_OPEN_SUBEXP)
1562     {
1563       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1564
1565       /* We are at the first node of this sub expression.  */
1566       if (reg_num < nmatch)
1567         {
1568           pmatch[reg_num].rm_so = cur_idx;
1569           pmatch[reg_num].rm_eo = -1;
1570         }
1571     }
1572   else if (type == OP_CLOSE_SUBEXP)
1573     {
1574       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1575       if (reg_num < nmatch)
1576         {
1577           /* We are at the last node of this sub expression.  */
1578           if (pmatch[reg_num].rm_so < cur_idx)
1579             {
1580               pmatch[reg_num].rm_eo = cur_idx;
1581               /* This is a non-empty match or we are not inside an optional
1582                  subexpression.  Accept this right away.  */
1583               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1584             }
1585           else
1586             {
1587               if (dfa->nodes[cur_node].opt_subexp
1588                   && prev_idx_match[reg_num].rm_so != -1)
1589                 /* We transited through an empty match for an optional
1590                    subexpression, like (a?)*, and this is not the subexp's
1591                    first match.  Copy back the old content of the registers
1592                    so that matches of an inner subexpression are undone as
1593                    well, like in ((a?))*.  */
1594                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1595               else
1596                 /* We completed a subexpression, but it may be part of
1597                    an optional one, so do not update PREV_IDX_MATCH.  */
1598                 pmatch[reg_num].rm_eo = cur_idx;
1599             }
1600         }
1601     }
1602 }
1603
1604 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1605    and sift the nodes in each states according to the following rules.
1606    Updated state_log will be wrote to STATE_LOG.
1607
1608    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1609      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1610         If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1611         the LAST_NODE, we throw away the node 'a'.
1612      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1613         string 's' and transit to 'b':
1614         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1615            away the node 'a'.
1616         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1617             thrown away, we throw away the node 'a'.
1618      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1619         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1620            node 'a'.
1621         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1622             we throw away the node 'a'.  */
1623
1624 #define STATE_NODE_CONTAINS(state,node) \
1625   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1626
1627 static reg_errcode_t
1628 internal_function
1629 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1630 {
1631   reg_errcode_t err;
1632   int null_cnt = 0;
1633   Idx str_idx = sctx->last_str_idx;
1634   re_node_set cur_dest;
1635
1636 #ifdef DEBUG
1637   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1638 #endif
1639
1640   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1641      transit to the last_node and the last_node itself.  */
1642   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1643   if (BE (err != REG_NOERROR, 0))
1644     return err;
1645   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1646   if (BE (err != REG_NOERROR, 0))
1647     goto free_return;
1648
1649   /* Then check each states in the state_log.  */
1650   while (str_idx > 0)
1651     {
1652       /* Update counters.  */
1653       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1654       if (null_cnt > mctx->max_mb_elem_len)
1655         {
1656           memset (sctx->sifted_states, '\0',
1657                   sizeof (re_dfastate_t *) * str_idx);
1658           re_node_set_free (&cur_dest);
1659           return REG_NOERROR;
1660         }
1661       re_node_set_empty (&cur_dest);
1662       --str_idx;
1663
1664       if (mctx->state_log[str_idx])
1665         {
1666           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1667           if (BE (err != REG_NOERROR, 0))
1668             goto free_return;
1669         }
1670
1671       /* Add all the nodes which satisfy the following conditions:
1672          - It can epsilon transit to a node in CUR_DEST.
1673          - It is in CUR_SRC.
1674          And update state_log.  */
1675       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1676       if (BE (err != REG_NOERROR, 0))
1677         goto free_return;
1678     }
1679   err = REG_NOERROR;
1680  free_return:
1681   re_node_set_free (&cur_dest);
1682   return err;
1683 }
1684
1685 static reg_errcode_t
1686 internal_function __attribute_warn_unused_result__
1687 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1688                      Idx str_idx, re_node_set *cur_dest)
1689 {
1690   const re_dfa_t *const dfa = mctx->dfa;
1691   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1692   Idx i;
1693
1694   /* Then build the next sifted state.
1695      We build the next sifted state on 'cur_dest', and update
1696      'sifted_states[str_idx]' with 'cur_dest'.
1697      Note:
1698      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1699      'cur_src' points the node_set of the old 'state_log[str_idx]'
1700      (with the epsilon nodes pre-filtered out).  */
1701   for (i = 0; i < cur_src->nelem; i++)
1702     {
1703       Idx prev_node = cur_src->elems[i];
1704       int naccepted = 0;
1705       bool ok;
1706
1707 #ifdef DEBUG
1708       re_token_type_t type = dfa->nodes[prev_node].type;
1709       assert (!IS_EPSILON_NODE (type));
1710 #endif
1711 #ifdef RE_ENABLE_I18N
1712       /* If the node may accept "multi byte".  */
1713       if (dfa->nodes[prev_node].accept_mb)
1714         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1715                                          str_idx, sctx->last_str_idx);
1716 #endif /* RE_ENABLE_I18N */
1717
1718       /* We don't check backreferences here.
1719          See update_cur_sifted_state().  */
1720       if (!naccepted
1721           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1722           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1723                                   dfa->nexts[prev_node]))
1724         naccepted = 1;
1725
1726       if (naccepted == 0)
1727         continue;
1728
1729       if (sctx->limits.nelem)
1730         {
1731           Idx to_idx = str_idx + naccepted;
1732           if (check_dst_limits (mctx, &sctx->limits,
1733                                 dfa->nexts[prev_node], to_idx,
1734                                 prev_node, str_idx))
1735             continue;
1736         }
1737       ok = re_node_set_insert (cur_dest, prev_node);
1738       if (BE (! ok, 0))
1739         return REG_ESPACE;
1740     }
1741
1742   return REG_NOERROR;
1743 }
1744
1745 /* Helper functions.  */
1746
1747 static reg_errcode_t
1748 internal_function
1749 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1750 {
1751   Idx top = mctx->state_log_top;
1752
1753   if ((next_state_log_idx >= mctx->input.bufs_len
1754        && mctx->input.bufs_len < mctx->input.len)
1755       || (next_state_log_idx >= mctx->input.valid_len
1756           && mctx->input.valid_len < mctx->input.len))
1757     {
1758       reg_errcode_t err;
1759       err = extend_buffers (mctx, next_state_log_idx + 1);
1760       if (BE (err != REG_NOERROR, 0))
1761         return err;
1762     }
1763
1764   if (top < next_state_log_idx)
1765     {
1766       memset (mctx->state_log + top + 1, '\0',
1767               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1768       mctx->state_log_top = next_state_log_idx;
1769     }
1770   return REG_NOERROR;
1771 }
1772
1773 static reg_errcode_t
1774 internal_function
1775 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1776                    re_dfastate_t **src, Idx num)
1777 {
1778   Idx st_idx;
1779   reg_errcode_t err;
1780   for (st_idx = 0; st_idx < num; ++st_idx)
1781     {
1782       if (dst[st_idx] == NULL)
1783         dst[st_idx] = src[st_idx];
1784       else if (src[st_idx] != NULL)
1785         {
1786           re_node_set merged_set;
1787           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1788                                         &src[st_idx]->nodes);
1789           if (BE (err != REG_NOERROR, 0))
1790             return err;
1791           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1792           re_node_set_free (&merged_set);
1793           if (BE (err != REG_NOERROR, 0))
1794             return err;
1795         }
1796     }
1797   return REG_NOERROR;
1798 }
1799
1800 static reg_errcode_t
1801 internal_function
1802 update_cur_sifted_state (const re_match_context_t *mctx,
1803                          re_sift_context_t *sctx, Idx str_idx,
1804                          re_node_set *dest_nodes)
1805 {
1806   const re_dfa_t *const dfa = mctx->dfa;
1807   reg_errcode_t err = REG_NOERROR;
1808   const re_node_set *candidates;
1809   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1810                 : &mctx->state_log[str_idx]->nodes);
1811
1812   if (dest_nodes->nelem == 0)
1813     sctx->sifted_states[str_idx] = NULL;
1814   else
1815     {
1816       if (candidates)
1817         {
1818           /* At first, add the nodes which can epsilon transit to a node in
1819              DEST_NODE.  */
1820           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1821           if (BE (err != REG_NOERROR, 0))
1822             return err;
1823
1824           /* Then, check the limitations in the current sift_context.  */
1825           if (sctx->limits.nelem)
1826             {
1827               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1828                                          mctx->bkref_ents, str_idx);
1829               if (BE (err != REG_NOERROR, 0))
1830                 return err;
1831             }
1832         }
1833
1834       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1835       if (BE (err != REG_NOERROR, 0))
1836         return err;
1837     }
1838
1839   if (candidates && mctx->state_log[str_idx]->has_backref)
1840     {
1841       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1842       if (BE (err != REG_NOERROR, 0))
1843         return err;
1844     }
1845   return REG_NOERROR;
1846 }
1847
1848 static reg_errcode_t
1849 internal_function __attribute_warn_unused_result__
1850 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1851                        const re_node_set *candidates)
1852 {
1853   reg_errcode_t err = REG_NOERROR;
1854   Idx i;
1855
1856   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1857   if (BE (err != REG_NOERROR, 0))
1858     return err;
1859
1860   if (!state->inveclosure.alloc)
1861     {
1862       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1863       if (BE (err != REG_NOERROR, 0))
1864         return REG_ESPACE;
1865       for (i = 0; i < dest_nodes->nelem; i++)
1866         {
1867           err = re_node_set_merge (&state->inveclosure,
1868                                    dfa->inveclosures + dest_nodes->elems[i]);
1869           if (BE (err != REG_NOERROR, 0))
1870             return REG_ESPACE;
1871         }
1872     }
1873   return re_node_set_add_intersect (dest_nodes, candidates,
1874                                     &state->inveclosure);
1875 }
1876
1877 static reg_errcode_t
1878 internal_function
1879 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1880                        const re_node_set *candidates)
1881 {
1882     Idx ecl_idx;
1883     reg_errcode_t err;
1884     re_node_set *inv_eclosure = dfa->inveclosures + node;
1885     re_node_set except_nodes;
1886     re_node_set_init_empty (&except_nodes);
1887     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1888       {
1889         Idx cur_node = inv_eclosure->elems[ecl_idx];
1890         if (cur_node == node)
1891           continue;
1892         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1893           {
1894             Idx edst1 = dfa->edests[cur_node].elems[0];
1895             Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1896                          ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1897             if ((!re_node_set_contains (inv_eclosure, edst1)
1898                  && re_node_set_contains (dest_nodes, edst1))
1899                 || (REG_VALID_NONZERO_INDEX (edst2)
1900                     && !re_node_set_contains (inv_eclosure, edst2)
1901                     && re_node_set_contains (dest_nodes, edst2)))
1902               {
1903                 err = re_node_set_add_intersect (&except_nodes, candidates,
1904                                                  dfa->inveclosures + cur_node);
1905                 if (BE (err != REG_NOERROR, 0))
1906                   {
1907                     re_node_set_free (&except_nodes);
1908                     return err;
1909                   }
1910               }
1911           }
1912       }
1913     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1914       {
1915         Idx cur_node = inv_eclosure->elems[ecl_idx];
1916         if (!re_node_set_contains (&except_nodes, cur_node))
1917           {
1918             Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1919             re_node_set_remove_at (dest_nodes, idx);
1920           }
1921       }
1922     re_node_set_free (&except_nodes);
1923     return REG_NOERROR;
1924 }
1925
1926 static bool
1927 internal_function
1928 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1929                   Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1930 {
1931   const re_dfa_t *const dfa = mctx->dfa;
1932   Idx lim_idx, src_pos, dst_pos;
1933
1934   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1935   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1936   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1937     {
1938       Idx subexp_idx;
1939       struct re_backref_cache_entry *ent;
1940       ent = mctx->bkref_ents + limits->elems[lim_idx];
1941       subexp_idx = dfa->nodes[ent->node].opr.idx;
1942
1943       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1944                                            subexp_idx, dst_node, dst_idx,
1945                                            dst_bkref_idx);
1946       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1947                                            subexp_idx, src_node, src_idx,
1948                                            src_bkref_idx);
1949
1950       /* In case of:
1951          <src> <dst> ( <subexp> )
1952          ( <subexp> ) <src> <dst>
1953          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1954       if (src_pos == dst_pos)
1955         continue; /* This is unrelated limitation.  */
1956       else
1957         return true;
1958     }
1959   return false;
1960 }
1961
1962 static int
1963 internal_function
1964 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1965                              Idx subexp_idx, Idx from_node, Idx bkref_idx)
1966 {
1967   const re_dfa_t *const dfa = mctx->dfa;
1968   const re_node_set *eclosures = dfa->eclosures + from_node;
1969   Idx node_idx;
1970
1971   /* Else, we are on the boundary: examine the nodes on the epsilon
1972      closure.  */
1973   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1974     {
1975       Idx node = eclosures->elems[node_idx];
1976       switch (dfa->nodes[node].type)
1977         {
1978         case OP_BACK_REF:
1979           if (bkref_idx != REG_MISSING)
1980             {
1981               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1982               do
1983                 {
1984                   Idx dst;
1985                   int cpos;
1986
1987                   if (ent->node != node)
1988                     continue;
1989
1990                   if (subexp_idx < BITSET_WORD_BITS
1991                       && !(ent->eps_reachable_subexps_map
1992                            & ((bitset_word_t) 1 << subexp_idx)))
1993                     continue;
1994
1995                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
1996                      OP_CLOSE_SUBEXP cases below.  But, if the
1997                      destination node is the same node as the source
1998                      node, don't recurse because it would cause an
1999                      infinite loop: a regex that exhibits this behavior
2000                      is ()\1*\1*  */
2001                   dst = dfa->edests[node].elems[0];
2002                   if (dst == from_node)
2003                     {
2004                       if (boundaries & 1)
2005                         return -1;
2006                       else /* if (boundaries & 2) */
2007                         return 0;
2008                     }
2009
2010                   cpos =
2011                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2012                                                  dst, bkref_idx);
2013                   if (cpos == -1 /* && (boundaries & 1) */)
2014                     return -1;
2015                   if (cpos == 0 && (boundaries & 2))
2016                     return 0;
2017
2018                   if (subexp_idx < BITSET_WORD_BITS)
2019                     ent->eps_reachable_subexps_map
2020                       &= ~((bitset_word_t) 1 << subexp_idx);
2021                 }
2022               while (ent++->more);
2023             }
2024           break;
2025
2026         case OP_OPEN_SUBEXP:
2027           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2028             return -1;
2029           break;
2030
2031         case OP_CLOSE_SUBEXP:
2032           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2033             return 0;
2034           break;
2035
2036         default:
2037             break;
2038         }
2039     }
2040
2041   return (boundaries & 2) ? 1 : 0;
2042 }
2043
2044 static int
2045 internal_function
2046 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2047                            Idx subexp_idx, Idx from_node, Idx str_idx,
2048                            Idx bkref_idx)
2049 {
2050   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2051   int boundaries;
2052
2053   /* If we are outside the range of the subexpression, return -1 or 1.  */
2054   if (str_idx < lim->subexp_from)
2055     return -1;
2056
2057   if (lim->subexp_to < str_idx)
2058     return 1;
2059
2060   /* If we are within the subexpression, return 0.  */
2061   boundaries = (str_idx == lim->subexp_from);
2062   boundaries |= (str_idx == lim->subexp_to) << 1;
2063   if (boundaries == 0)
2064     return 0;
2065
2066   /* Else, examine epsilon closure.  */
2067   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2068                                       from_node, bkref_idx);
2069 }
2070
2071 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2072    which are against limitations from DEST_NODES. */
2073
2074 static reg_errcode_t
2075 internal_function
2076 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2077                      const re_node_set *candidates, re_node_set *limits,
2078                      struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2079 {
2080   reg_errcode_t err;
2081   Idx node_idx, lim_idx;
2082
2083   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2084     {
2085       Idx subexp_idx;
2086       struct re_backref_cache_entry *ent;
2087       ent = bkref_ents + limits->elems[lim_idx];
2088
2089       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2090         continue; /* This is unrelated limitation.  */
2091
2092       subexp_idx = dfa->nodes[ent->node].opr.idx;
2093       if (ent->subexp_to == str_idx)
2094         {
2095           Idx ops_node = REG_MISSING;
2096           Idx cls_node = REG_MISSING;
2097           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2098             {
2099               Idx node = dest_nodes->elems[node_idx];
2100               re_token_type_t type = dfa->nodes[node].type;
2101               if (type == OP_OPEN_SUBEXP
2102                   && subexp_idx == dfa->nodes[node].opr.idx)
2103                 ops_node = node;
2104               else if (type == OP_CLOSE_SUBEXP
2105                        && subexp_idx == dfa->nodes[node].opr.idx)
2106                 cls_node = node;
2107             }
2108
2109           /* Check the limitation of the open subexpression.  */
2110           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2111           if (REG_VALID_INDEX (ops_node))
2112             {
2113               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2114                                            candidates);
2115               if (BE (err != REG_NOERROR, 0))
2116                 return err;
2117             }
2118
2119           /* Check the limitation of the close subexpression.  */
2120           if (REG_VALID_INDEX (cls_node))
2121             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2122               {
2123                 Idx node = dest_nodes->elems[node_idx];
2124                 if (!re_node_set_contains (dfa->inveclosures + node,
2125                                            cls_node)
2126                     && !re_node_set_contains (dfa->eclosures + node,
2127                                               cls_node))
2128                   {
2129                     /* It is against this limitation.
2130                        Remove it form the current sifted state.  */
2131                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2132                                                  candidates);
2133                     if (BE (err != REG_NOERROR, 0))
2134                       return err;
2135                     --node_idx;
2136                   }
2137               }
2138         }
2139       else /* (ent->subexp_to != str_idx)  */
2140         {
2141           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2142             {
2143               Idx node = dest_nodes->elems[node_idx];
2144               re_token_type_t type = dfa->nodes[node].type;
2145               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2146                 {
2147                   if (subexp_idx != dfa->nodes[node].opr.idx)
2148                     continue;
2149                   /* It is against this limitation.
2150                      Remove it form the current sifted state.  */
2151                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2152                                                candidates);
2153                   if (BE (err != REG_NOERROR, 0))
2154                     return err;
2155                 }
2156             }
2157         }
2158     }
2159   return REG_NOERROR;
2160 }
2161
2162 static reg_errcode_t
2163 internal_function __attribute_warn_unused_result__
2164 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2165                    Idx str_idx, const re_node_set *candidates)
2166 {
2167   const re_dfa_t *const dfa = mctx->dfa;
2168   reg_errcode_t err;
2169   Idx node_idx, node;
2170   re_sift_context_t local_sctx;
2171   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2172
2173   if (first_idx == REG_MISSING)
2174     return REG_NOERROR;
2175
2176   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2177
2178   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2179     {
2180       Idx enabled_idx;
2181       re_token_type_t type;
2182       struct re_backref_cache_entry *entry;
2183       node = candidates->elems[node_idx];
2184       type = dfa->nodes[node].type;
2185       /* Avoid infinite loop for the REs like "()\1+".  */
2186       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2187         continue;
2188       if (type != OP_BACK_REF)
2189         continue;
2190
2191       entry = mctx->bkref_ents + first_idx;
2192       enabled_idx = first_idx;
2193       do
2194         {
2195           Idx subexp_len;
2196           Idx to_idx;
2197           Idx dst_node;
2198           bool ok;
2199           re_dfastate_t *cur_state;
2200
2201           if (entry->node != node)
2202             continue;
2203           subexp_len = entry->subexp_to - entry->subexp_from;
2204           to_idx = str_idx + subexp_len;
2205           dst_node = (subexp_len ? dfa->nexts[node]
2206                       : dfa->edests[node].elems[0]);
2207
2208           if (to_idx > sctx->last_str_idx
2209               || sctx->sifted_states[to_idx] == NULL
2210               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2211               || check_dst_limits (mctx, &sctx->limits, node,
2212                                    str_idx, dst_node, to_idx))
2213             continue;
2214
2215           if (local_sctx.sifted_states == NULL)
2216             {
2217               local_sctx = *sctx;
2218               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2219               if (BE (err != REG_NOERROR, 0))
2220                 goto free_return;
2221             }
2222           local_sctx.last_node = node;
2223           local_sctx.last_str_idx = str_idx;
2224           ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2225           if (BE (! ok, 0))
2226             {
2227               err = REG_ESPACE;
2228               goto free_return;
2229             }
2230           cur_state = local_sctx.sifted_states[str_idx];
2231           err = sift_states_backward (mctx, &local_sctx);
2232           if (BE (err != REG_NOERROR, 0))
2233             goto free_return;
2234           if (sctx->limited_states != NULL)
2235             {
2236               err = merge_state_array (dfa, sctx->limited_states,
2237                                        local_sctx.sifted_states,
2238                                        str_idx + 1);
2239               if (BE (err != REG_NOERROR, 0))
2240                 goto free_return;
2241             }
2242           local_sctx.sifted_states[str_idx] = cur_state;
2243           re_node_set_remove (&local_sctx.limits, enabled_idx);
2244
2245           /* mctx->bkref_ents may have changed, reload the pointer.  */
2246           entry = mctx->bkref_ents + enabled_idx;
2247         }
2248       while (enabled_idx++, entry++->more);
2249     }
2250   err = REG_NOERROR;
2251  free_return:
2252   if (local_sctx.sifted_states != NULL)
2253     {
2254       re_node_set_free (&local_sctx.limits);
2255     }
2256
2257   return err;
2258 }
2259
2260
2261 #ifdef RE_ENABLE_I18N
2262 static int
2263 internal_function
2264 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2265                      Idx node_idx, Idx str_idx, Idx max_str_idx)
2266 {
2267   const re_dfa_t *const dfa = mctx->dfa;
2268   int naccepted;
2269   /* Check the node can accept "multi byte".  */
2270   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2271   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2272       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2273                             dfa->nexts[node_idx]))
2274     /* The node can't accept the "multi byte", or the
2275        destination was already thrown away, then the node
2276        could't accept the current input "multi byte".   */
2277     naccepted = 0;
2278   /* Otherwise, it is sure that the node could accept
2279      'naccepted' bytes input.  */
2280   return naccepted;
2281 }
2282 #endif /* RE_ENABLE_I18N */
2283
2284 \f
2285 /* Functions for state transition.  */
2286
2287 /* Return the next state to which the current state STATE will transit by
2288    accepting the current input byte, and update STATE_LOG if necessary.
2289    If STATE can accept a multibyte char/collating element/back reference
2290    update the destination of STATE_LOG.  */
2291
2292 static re_dfastate_t *
2293 internal_function __attribute_warn_unused_result__
2294 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2295                re_dfastate_t *state)
2296 {
2297   re_dfastate_t **trtable;
2298   unsigned char ch;
2299
2300 #ifdef RE_ENABLE_I18N
2301   /* If the current state can accept multibyte.  */
2302   if (BE (state->accept_mb, 0))
2303     {
2304       *err = transit_state_mb (mctx, state);
2305       if (BE (*err != REG_NOERROR, 0))
2306         return NULL;
2307     }
2308 #endif /* RE_ENABLE_I18N */
2309
2310   /* Then decide the next state with the single byte.  */
2311 #if 0
2312   if (0)
2313     /* don't use transition table  */
2314     return transit_state_sb (err, mctx, state);
2315 #endif
2316
2317   /* Use transition table  */
2318   ch = re_string_fetch_byte (&mctx->input);
2319   for (;;)
2320     {
2321       trtable = state->trtable;
2322       if (BE (trtable != NULL, 1))
2323         return trtable[ch];
2324
2325       trtable = state->word_trtable;
2326       if (BE (trtable != NULL, 1))
2327         {
2328           unsigned int context;
2329           context
2330             = re_string_context_at (&mctx->input,
2331                                     re_string_cur_idx (&mctx->input) - 1,
2332                                     mctx->eflags);
2333           if (IS_WORD_CONTEXT (context))
2334             return trtable[ch + SBC_MAX];
2335           else
2336             return trtable[ch];
2337         }
2338
2339       if (!build_trtable (mctx->dfa, state))
2340         {
2341           *err = REG_ESPACE;
2342           return NULL;
2343         }
2344
2345       /* Retry, we now have a transition table.  */
2346     }
2347 }
2348
2349 /* Update the state_log if we need */
2350 static re_dfastate_t *
2351 internal_function
2352 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2353                       re_dfastate_t *next_state)
2354 {
2355   const re_dfa_t *const dfa = mctx->dfa;
2356   Idx cur_idx = re_string_cur_idx (&mctx->input);
2357
2358   if (cur_idx > mctx->state_log_top)
2359     {
2360       mctx->state_log[cur_idx] = next_state;
2361       mctx->state_log_top = cur_idx;
2362     }
2363   else if (mctx->state_log[cur_idx] == 0)
2364     {
2365       mctx->state_log[cur_idx] = next_state;
2366     }
2367   else
2368     {
2369       re_dfastate_t *pstate;
2370       unsigned int context;
2371       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2372       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2373          the destination of a multibyte char/collating element/
2374          back reference.  Then the next state is the union set of
2375          these destinations and the results of the transition table.  */
2376       pstate = mctx->state_log[cur_idx];
2377       log_nodes = pstate->entrance_nodes;
2378       if (next_state != NULL)
2379         {
2380           table_nodes = next_state->entrance_nodes;
2381           *err = re_node_set_init_union (&next_nodes, table_nodes,
2382                                              log_nodes);
2383           if (BE (*err != REG_NOERROR, 0))
2384             return NULL;
2385         }
2386       else
2387         next_nodes = *log_nodes;
2388       /* Note: We already add the nodes of the initial state,
2389          then we don't need to add them here.  */
2390
2391       context = re_string_context_at (&mctx->input,
2392                                       re_string_cur_idx (&mctx->input) - 1,
2393                                       mctx->eflags);
2394       next_state = mctx->state_log[cur_idx]
2395         = re_acquire_state_context (err, dfa, &next_nodes, context);
2396       /* We don't need to check errors here, since the return value of
2397          this function is next_state and ERR is already set.  */
2398
2399       if (table_nodes != NULL)
2400         re_node_set_free (&next_nodes);
2401     }
2402
2403   if (BE (dfa->nbackref, 0) && next_state != NULL)
2404     {
2405       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2406          later.  We must check them here, since the back references in the
2407          next state might use them.  */
2408       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2409                                         cur_idx);
2410       if (BE (*err != REG_NOERROR, 0))
2411         return NULL;
2412
2413       /* If the next state has back references.  */
2414       if (next_state->has_backref)
2415         {
2416           *err = transit_state_bkref (mctx, &next_state->nodes);
2417           if (BE (*err != REG_NOERROR, 0))
2418             return NULL;
2419           next_state = mctx->state_log[cur_idx];
2420         }
2421     }
2422
2423   return next_state;
2424 }
2425
2426 /* Skip bytes in the input that correspond to part of a
2427    multi-byte match, then look in the log for a state
2428    from which to restart matching.  */
2429 static re_dfastate_t *
2430 internal_function
2431 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2432 {
2433   re_dfastate_t *cur_state;
2434   do
2435     {
2436       Idx max = mctx->state_log_top;
2437       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2438
2439       do
2440         {
2441           if (++cur_str_idx > max)
2442             return NULL;
2443           re_string_skip_bytes (&mctx->input, 1);
2444         }
2445       while (mctx->state_log[cur_str_idx] == NULL);
2446
2447       cur_state = merge_state_with_log (err, mctx, NULL);
2448     }
2449   while (*err == REG_NOERROR && cur_state == NULL);
2450   return cur_state;
2451 }
2452
2453 /* Helper functions for transit_state.  */
2454
2455 /* From the node set CUR_NODES, pick up the nodes whose types are
2456    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2457    expression. And register them to use them later for evaluating the
2458    corresponding back references.  */
2459
2460 static reg_errcode_t
2461 internal_function
2462 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2463                            Idx str_idx)
2464 {
2465   const re_dfa_t *const dfa = mctx->dfa;
2466   Idx node_idx;
2467   reg_errcode_t err;
2468
2469   /* TODO: This isn't efficient.
2470            Because there might be more than one nodes whose types are
2471            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2472            nodes.
2473            E.g. RE: (a){2}  */
2474   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2475     {
2476       Idx node = cur_nodes->elems[node_idx];
2477       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2478           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2479           && (dfa->used_bkref_map
2480               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2481         {
2482           err = match_ctx_add_subtop (mctx, node, str_idx);
2483           if (BE (err != REG_NOERROR, 0))
2484             return err;
2485         }
2486     }
2487   return REG_NOERROR;
2488 }
2489
2490 #if 0
2491 /* Return the next state to which the current state STATE will transit by
2492    accepting the current input byte.  */
2493
2494 static re_dfastate_t *
2495 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2496                   re_dfastate_t *state)
2497 {
2498   const re_dfa_t *const dfa = mctx->dfa;
2499   re_node_set next_nodes;
2500   re_dfastate_t *next_state;
2501   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2502   unsigned int context;
2503
2504   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2505   if (BE (*err != REG_NOERROR, 0))
2506     return NULL;
2507   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2508     {
2509       Idx cur_node = state->nodes.elems[node_cnt];
2510       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2511         {
2512           *err = re_node_set_merge (&next_nodes,
2513                                     dfa->eclosures + dfa->nexts[cur_node]);
2514           if (BE (*err != REG_NOERROR, 0))
2515             {
2516               re_node_set_free (&next_nodes);
2517               return NULL;
2518             }
2519         }
2520     }
2521   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2522   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2523   /* We don't need to check errors here, since the return value of
2524      this function is next_state and ERR is already set.  */
2525
2526   re_node_set_free (&next_nodes);
2527   re_string_skip_bytes (&mctx->input, 1);
2528   return next_state;
2529 }
2530 #endif
2531
2532 #ifdef RE_ENABLE_I18N
2533 static reg_errcode_t
2534 internal_function
2535 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2536 {
2537   const re_dfa_t *const dfa = mctx->dfa;
2538   reg_errcode_t err;
2539   Idx i;
2540
2541   for (i = 0; i < pstate->nodes.nelem; ++i)
2542     {
2543       re_node_set dest_nodes, *new_nodes;
2544       Idx cur_node_idx = pstate->nodes.elems[i];
2545       int naccepted;
2546       Idx dest_idx;
2547       unsigned int context;
2548       re_dfastate_t *dest_state;
2549
2550       if (!dfa->nodes[cur_node_idx].accept_mb)
2551         continue;
2552
2553       if (dfa->nodes[cur_node_idx].constraint)
2554         {
2555           context = re_string_context_at (&mctx->input,
2556                                           re_string_cur_idx (&mctx->input),
2557                                           mctx->eflags);
2558           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2559                                            context))
2560             continue;
2561         }
2562
2563       /* How many bytes the node can accept?  */
2564       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2565                                            re_string_cur_idx (&mctx->input));
2566       if (naccepted == 0)
2567         continue;
2568
2569       /* The node can accepts 'naccepted' bytes.  */
2570       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2571       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2572                                : mctx->max_mb_elem_len);
2573       err = clean_state_log_if_needed (mctx, dest_idx);
2574       if (BE (err != REG_NOERROR, 0))
2575         return err;
2576 #ifdef DEBUG
2577       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2578 #endif
2579       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2580
2581       dest_state = mctx->state_log[dest_idx];
2582       if (dest_state == NULL)
2583         dest_nodes = *new_nodes;
2584       else
2585         {
2586           err = re_node_set_init_union (&dest_nodes,
2587                                         dest_state->entrance_nodes, new_nodes);
2588           if (BE (err != REG_NOERROR, 0))
2589             return err;
2590         }
2591       context = re_string_context_at (&mctx->input, dest_idx - 1,
2592                                       mctx->eflags);
2593       mctx->state_log[dest_idx]
2594         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2595       if (dest_state != NULL)
2596         re_node_set_free (&dest_nodes);
2597       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2598         return err;
2599     }
2600   return REG_NOERROR;
2601 }
2602 #endif /* RE_ENABLE_I18N */
2603
2604 static reg_errcode_t
2605 internal_function
2606 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2607 {
2608   const re_dfa_t *const dfa = mctx->dfa;
2609   reg_errcode_t err;
2610   Idx i;
2611   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2612
2613   for (i = 0; i < nodes->nelem; ++i)
2614     {
2615       Idx dest_str_idx, prev_nelem, bkc_idx;
2616       Idx node_idx = nodes->elems[i];
2617       unsigned int context;
2618       const re_token_t *node = dfa->nodes + node_idx;
2619       re_node_set *new_dest_nodes;
2620
2621       /* Check whether 'node' is a backreference or not.  */
2622       if (node->type != OP_BACK_REF)
2623         continue;
2624
2625       if (node->constraint)
2626         {
2627           context = re_string_context_at (&mctx->input, cur_str_idx,
2628                                           mctx->eflags);
2629           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2630             continue;
2631         }
2632
2633       /* 'node' is a backreference.
2634          Check the substring which the substring matched.  */
2635       bkc_idx = mctx->nbkref_ents;
2636       err = get_subexp (mctx, node_idx, cur_str_idx);
2637       if (BE (err != REG_NOERROR, 0))
2638         goto free_return;
2639
2640       /* And add the epsilon closures (which is 'new_dest_nodes') of
2641          the backreference to appropriate state_log.  */
2642 #ifdef DEBUG
2643       assert (dfa->nexts[node_idx] != REG_MISSING);
2644 #endif
2645       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2646         {
2647           Idx subexp_len;
2648           re_dfastate_t *dest_state;
2649           struct re_backref_cache_entry *bkref_ent;
2650           bkref_ent = mctx->bkref_ents + bkc_idx;
2651           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2652             continue;
2653           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2654           new_dest_nodes = (subexp_len == 0
2655                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2656                             : dfa->eclosures + dfa->nexts[node_idx]);
2657           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2658                           - bkref_ent->subexp_from);
2659           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2660                                           mctx->eflags);
2661           dest_state = mctx->state_log[dest_str_idx];
2662           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2663                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2664           /* Add 'new_dest_node' to state_log.  */
2665           if (dest_state == NULL)
2666             {
2667               mctx->state_log[dest_str_idx]
2668                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2669                                             context);
2670               if (BE (mctx->state_log[dest_str_idx] == NULL
2671                       && err != REG_NOERROR, 0))
2672                 goto free_return;
2673             }
2674           else
2675             {
2676               re_node_set dest_nodes;
2677               err = re_node_set_init_union (&dest_nodes,
2678                                             dest_state->entrance_nodes,
2679                                             new_dest_nodes);
2680               if (BE (err != REG_NOERROR, 0))
2681                 {
2682                   re_node_set_free (&dest_nodes);
2683                   goto free_return;
2684                 }
2685               mctx->state_log[dest_str_idx]
2686                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2687               re_node_set_free (&dest_nodes);
2688               if (BE (mctx->state_log[dest_str_idx] == NULL
2689                       && err != REG_NOERROR, 0))
2690                 goto free_return;
2691             }
2692           /* We need to check recursively if the backreference can epsilon
2693              transit.  */
2694           if (subexp_len == 0
2695               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2696             {
2697               err = check_subexp_matching_top (mctx, new_dest_nodes,
2698                                                cur_str_idx);
2699               if (BE (err != REG_NOERROR, 0))
2700                 goto free_return;
2701               err = transit_state_bkref (mctx, new_dest_nodes);
2702               if (BE (err != REG_NOERROR, 0))
2703                 goto free_return;
2704             }
2705         }
2706     }
2707   err = REG_NOERROR;
2708  free_return:
2709   return err;
2710 }
2711
2712 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2713    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2714    Note that we might collect inappropriate candidates here.
2715    However, the cost of checking them strictly here is too high, then we
2716    delay these checking for prune_impossible_nodes().  */
2717
2718 static reg_errcode_t
2719 internal_function __attribute_warn_unused_result__
2720 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2721 {
2722   const re_dfa_t *const dfa = mctx->dfa;
2723   Idx subexp_num, sub_top_idx;
2724   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2725   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2726   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2727   if (cache_idx != REG_MISSING)
2728     {
2729       const struct re_backref_cache_entry *entry
2730         = mctx->bkref_ents + cache_idx;
2731       do
2732         if (entry->node == bkref_node)
2733           return REG_NOERROR; /* We already checked it.  */
2734       while (entry++->more);
2735     }
2736
2737   subexp_num = dfa->nodes[bkref_node].opr.idx;
2738
2739   /* For each sub expression  */
2740   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2741     {
2742       reg_errcode_t err;
2743       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2744       re_sub_match_last_t *sub_last;
2745       Idx sub_last_idx, sl_str, bkref_str_off;
2746
2747       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2748         continue; /* It isn't related.  */
2749
2750       sl_str = sub_top->str_idx;
2751       bkref_str_off = bkref_str_idx;
2752       /* At first, check the last node of sub expressions we already
2753          evaluated.  */
2754       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2755         {
2756           regoff_t sl_str_diff;
2757           sub_last = sub_top->lasts[sub_last_idx];
2758           sl_str_diff = sub_last->str_idx - sl_str;
2759           /* The matched string by the sub expression match with the substring
2760              at the back reference?  */
2761           if (sl_str_diff > 0)
2762             {
2763               if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2764                 {
2765                   /* Not enough chars for a successful match.  */
2766                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2767                     break;
2768
2769                   err = clean_state_log_if_needed (mctx,
2770                                                    bkref_str_off
2771                                                    + sl_str_diff);
2772                   if (BE (err != REG_NOERROR, 0))
2773                     return err;
2774                   buf = (const char *) re_string_get_buffer (&mctx->input);
2775                 }
2776               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2777                 /* We don't need to search this sub expression any more.  */
2778                 break;
2779             }
2780           bkref_str_off += sl_str_diff;
2781           sl_str += sl_str_diff;
2782           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2783                                 bkref_str_idx);
2784
2785           /* Reload buf, since the preceding call might have reallocated
2786              the buffer.  */
2787           buf = (const char *) re_string_get_buffer (&mctx->input);
2788
2789           if (err == REG_NOMATCH)
2790             continue;
2791           if (BE (err != REG_NOERROR, 0))
2792             return err;
2793         }
2794
2795       if (sub_last_idx < sub_top->nlasts)
2796         continue;
2797       if (sub_last_idx > 0)
2798         ++sl_str;
2799       /* Then, search for the other last nodes of the sub expression.  */
2800       for (; sl_str <= bkref_str_idx; ++sl_str)
2801         {
2802           Idx cls_node;
2803           regoff_t sl_str_off;
2804           const re_node_set *nodes;
2805           sl_str_off = sl_str - sub_top->str_idx;
2806           /* The matched string by the sub expression match with the substring
2807              at the back reference?  */
2808           if (sl_str_off > 0)
2809             {
2810               if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2811                 {
2812                   /* If we are at the end of the input, we cannot match.  */
2813                   if (bkref_str_off >= mctx->input.len)
2814                     break;
2815
2816                   err = extend_buffers (mctx, bkref_str_off + 1);
2817                   if (BE (err != REG_NOERROR, 0))
2818                     return err;
2819
2820                   buf = (const char *) re_string_get_buffer (&mctx->input);
2821                 }
2822               if (buf [bkref_str_off++] != buf[sl_str - 1])
2823                 break; /* We don't need to search this sub expression
2824                           any more.  */
2825             }
2826           if (mctx->state_log[sl_str] == NULL)
2827             continue;
2828           /* Does this state have a ')' of the sub expression?  */
2829           nodes = &mctx->state_log[sl_str]->nodes;
2830           cls_node = find_subexp_node (dfa, nodes, subexp_num,
2831                                        OP_CLOSE_SUBEXP);
2832           if (cls_node == REG_MISSING)
2833             continue; /* No.  */
2834           if (sub_top->path == NULL)
2835             {
2836               sub_top->path = calloc (sizeof (state_array_t),
2837                                       sl_str - sub_top->str_idx + 1);
2838               if (sub_top->path == NULL)
2839                 return REG_ESPACE;
2840             }
2841           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2842              in the current context?  */
2843           err = check_arrival (mctx, sub_top->path, sub_top->node,
2844                                sub_top->str_idx, cls_node, sl_str,
2845                                OP_CLOSE_SUBEXP);
2846           if (err == REG_NOMATCH)
2847               continue;
2848           if (BE (err != REG_NOERROR, 0))
2849               return err;
2850           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2851           if (BE (sub_last == NULL, 0))
2852             return REG_ESPACE;
2853           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2854                                 bkref_str_idx);
2855           if (err == REG_NOMATCH)
2856             continue;
2857         }
2858     }
2859   return REG_NOERROR;
2860 }
2861
2862 /* Helper functions for get_subexp().  */
2863
2864 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2865    If it can arrive, register the sub expression expressed with SUB_TOP
2866    and SUB_LAST.  */
2867
2868 static reg_errcode_t
2869 internal_function
2870 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2871                 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2872 {
2873   reg_errcode_t err;
2874   Idx to_idx;
2875   /* Can the subexpression arrive the back reference?  */
2876   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2877                        sub_last->str_idx, bkref_node, bkref_str,
2878                        OP_OPEN_SUBEXP);
2879   if (err != REG_NOERROR)
2880     return err;
2881   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2882                              sub_last->str_idx);
2883   if (BE (err != REG_NOERROR, 0))
2884     return err;
2885   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2886   return clean_state_log_if_needed (mctx, to_idx);
2887 }
2888
2889 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2890    Search '(' if FL_OPEN, or search ')' otherwise.
2891    TODO: This function isn't efficient...
2892          Because there might be more than one nodes whose types are
2893          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2894          nodes.
2895          E.g. RE: (a){2}  */
2896
2897 static Idx
2898 internal_function
2899 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2900                   Idx subexp_idx, int type)
2901 {
2902   Idx cls_idx;
2903   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2904     {
2905       Idx cls_node = nodes->elems[cls_idx];
2906       const re_token_t *node = dfa->nodes + cls_node;
2907       if (node->type == type
2908           && node->opr.idx == subexp_idx)
2909         return cls_node;
2910     }
2911   return REG_MISSING;
2912 }
2913
2914 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2915    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2916    heavily reused.
2917    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2918
2919 static reg_errcode_t
2920 internal_function __attribute_warn_unused_result__
2921 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2922                Idx top_str, Idx last_node, Idx last_str, int type)
2923 {
2924   const re_dfa_t *const dfa = mctx->dfa;
2925   reg_errcode_t err = REG_NOERROR;
2926   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2927   re_dfastate_t *cur_state = NULL;
2928   re_node_set *cur_nodes, next_nodes;
2929   re_dfastate_t **backup_state_log;
2930   unsigned int context;
2931
2932   subexp_num = dfa->nodes[top_node].opr.idx;
2933   /* Extend the buffer if we need.  */
2934   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2935     {
2936       re_dfastate_t **new_array;
2937       Idx old_alloc = path->alloc;
2938       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2939       Idx new_alloc;
2940       if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
2941         return REG_ESPACE;
2942       new_alloc = old_alloc + incr_alloc;
2943       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2944         return REG_ESPACE;
2945       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2946       if (BE (new_array == NULL, 0))
2947         return REG_ESPACE;
2948       path->array = new_array;
2949       path->alloc = new_alloc;
2950       memset (new_array + old_alloc, '\0',
2951               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2952     }
2953
2954   str_idx = path->next_idx ? path->next_idx : top_str;
2955
2956   /* Temporary modify MCTX.  */
2957   backup_state_log = mctx->state_log;
2958   backup_cur_idx = mctx->input.cur_idx;
2959   mctx->state_log = path->array;
2960   mctx->input.cur_idx = str_idx;
2961
2962   /* Setup initial node set.  */
2963   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2964   if (str_idx == top_str)
2965     {
2966       err = re_node_set_init_1 (&next_nodes, top_node);
2967       if (BE (err != REG_NOERROR, 0))
2968         return err;
2969       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2970       if (BE (err != REG_NOERROR, 0))
2971         {
2972           re_node_set_free (&next_nodes);
2973           return err;
2974         }
2975     }
2976   else
2977     {
2978       cur_state = mctx->state_log[str_idx];
2979       if (cur_state && cur_state->has_backref)
2980         {
2981           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2982           if (BE (err != REG_NOERROR, 0))
2983             return err;
2984         }
2985       else
2986         re_node_set_init_empty (&next_nodes);
2987     }
2988   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2989     {
2990       if (next_nodes.nelem)
2991         {
2992           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2993                                     subexp_num, type);
2994           if (BE (err != REG_NOERROR, 0))
2995             {
2996               re_node_set_free (&next_nodes);
2997               return err;
2998             }
2999         }
3000       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3001       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3002         {
3003           re_node_set_free (&next_nodes);
3004           return err;
3005         }
3006       mctx->state_log[str_idx] = cur_state;
3007     }
3008
3009   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3010     {
3011       re_node_set_empty (&next_nodes);
3012       if (mctx->state_log[str_idx + 1])
3013         {
3014           err = re_node_set_merge (&next_nodes,
3015                                    &mctx->state_log[str_idx + 1]->nodes);
3016           if (BE (err != REG_NOERROR, 0))
3017             {
3018               re_node_set_free (&next_nodes);
3019               return err;
3020             }
3021         }
3022       if (cur_state)
3023         {
3024           err = check_arrival_add_next_nodes (mctx, str_idx,
3025                                               &cur_state->non_eps_nodes,
3026                                               &next_nodes);
3027           if (BE (err != REG_NOERROR, 0))
3028             {
3029               re_node_set_free (&next_nodes);
3030               return err;
3031             }
3032         }
3033       ++str_idx;
3034       if (next_nodes.nelem)
3035         {
3036           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3037           if (BE (err != REG_NOERROR, 0))
3038             {
3039               re_node_set_free (&next_nodes);
3040               return err;
3041             }
3042           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3043                                     subexp_num, type);
3044           if (BE (err != REG_NOERROR, 0))
3045             {
3046               re_node_set_free (&next_nodes);
3047               return err;
3048             }
3049         }
3050       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3051       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3052       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3053         {
3054           re_node_set_free (&next_nodes);
3055           return err;
3056         }
3057       mctx->state_log[str_idx] = cur_state;
3058       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3059     }
3060   re_node_set_free (&next_nodes);
3061   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3062                : &mctx->state_log[last_str]->nodes);
3063   path->next_idx = str_idx;
3064
3065   /* Fix MCTX.  */
3066   mctx->state_log = backup_state_log;
3067   mctx->input.cur_idx = backup_cur_idx;
3068
3069   /* Then check the current node set has the node LAST_NODE.  */
3070   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3071     return REG_NOERROR;
3072
3073   return REG_NOMATCH;
3074 }
3075
3076 /* Helper functions for check_arrival.  */
3077
3078 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3079    to NEXT_NODES.
3080    TODO: This function is similar to the functions transit_state*(),
3081          however this function has many additional works.
3082          Can't we unify them?  */
3083
3084 static reg_errcode_t
3085 internal_function __attribute_warn_unused_result__
3086 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3087                               re_node_set *cur_nodes, re_node_set *next_nodes)
3088 {
3089   const re_dfa_t *const dfa = mctx->dfa;
3090   bool ok;
3091   Idx cur_idx;
3092 #ifdef RE_ENABLE_I18N
3093   reg_errcode_t err = REG_NOERROR;
3094 #endif
3095   re_node_set union_set;
3096   re_node_set_init_empty (&union_set);
3097   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3098     {
3099       int naccepted = 0;
3100       Idx cur_node = cur_nodes->elems[cur_idx];
3101 #ifdef DEBUG
3102       re_token_type_t type = dfa->nodes[cur_node].type;
3103       assert (!IS_EPSILON_NODE (type));
3104 #endif
3105 #ifdef RE_ENABLE_I18N
3106       /* If the node may accept "multi byte".  */
3107       if (dfa->nodes[cur_node].accept_mb)
3108         {
3109           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3110                                                str_idx);
3111           if (naccepted > 1)
3112             {
3113               re_dfastate_t *dest_state;
3114               Idx next_node = dfa->nexts[cur_node];
3115               Idx next_idx = str_idx + naccepted;
3116               dest_state = mctx->state_log[next_idx];
3117               re_node_set_empty (&union_set);
3118               if (dest_state)
3119                 {
3120                   err = re_node_set_merge (&union_set, &dest_state->nodes);
3121                   if (BE (err != REG_NOERROR, 0))
3122                     {
3123                       re_node_set_free (&union_set);
3124                       return err;
3125                     }
3126                 }
3127               ok = re_node_set_insert (&union_set, next_node);
3128               if (BE (! ok, 0))
3129                 {
3130                   re_node_set_free (&union_set);
3131                   return REG_ESPACE;
3132                 }
3133               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3134                                                             &union_set);
3135               if (BE (mctx->state_log[next_idx] == NULL
3136                       && err != REG_NOERROR, 0))
3137                 {
3138                   re_node_set_free (&union_set);
3139                   return err;
3140                 }
3141             }
3142         }
3143 #endif /* RE_ENABLE_I18N */
3144       if (naccepted
3145           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3146         {
3147           ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3148           if (BE (! ok, 0))
3149             {
3150               re_node_set_free (&union_set);
3151               return REG_ESPACE;
3152             }
3153         }
3154     }
3155   re_node_set_free (&union_set);
3156   return REG_NOERROR;
3157 }
3158
3159 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3160    CUR_NODES, however exclude the nodes which are:
3161     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3162     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3163 */
3164
3165 static reg_errcode_t
3166 internal_function
3167 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3168                           Idx ex_subexp, int type)
3169 {
3170   reg_errcode_t err;
3171   Idx idx, outside_node;
3172   re_node_set new_nodes;
3173 #ifdef DEBUG
3174   assert (cur_nodes->nelem);
3175 #endif
3176   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3177   if (BE (err != REG_NOERROR, 0))
3178     return err;
3179   /* Create a new node set NEW_NODES with the nodes which are epsilon
3180      closures of the node in CUR_NODES.  */
3181
3182   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3183     {
3184       Idx cur_node = cur_nodes->elems[idx];
3185       const re_node_set *eclosure = dfa->eclosures + cur_node;
3186       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3187       if (outside_node == REG_MISSING)
3188         {
3189           /* There are no problematic nodes, just merge them.  */
3190           err = re_node_set_merge (&new_nodes, eclosure);
3191           if (BE (err != REG_NOERROR, 0))
3192             {
3193               re_node_set_free (&new_nodes);
3194               return err;
3195             }
3196         }
3197       else
3198         {
3199           /* There are problematic nodes, re-calculate incrementally.  */
3200           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3201                                               ex_subexp, type);
3202           if (BE (err != REG_NOERROR, 0))
3203             {
3204               re_node_set_free (&new_nodes);
3205               return err;
3206             }
3207         }
3208     }
3209   re_node_set_free (cur_nodes);
3210   *cur_nodes = new_nodes;
3211   return REG_NOERROR;
3212 }
3213
3214 /* Helper function for check_arrival_expand_ecl.
3215    Check incrementally the epsilon closure of TARGET, and if it isn't
3216    problematic append it to DST_NODES.  */
3217
3218 static reg_errcode_t
3219 internal_function __attribute_warn_unused_result__
3220 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3221                               Idx target, Idx ex_subexp, int type)
3222 {
3223   Idx cur_node;
3224   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3225     {
3226       bool ok;
3227
3228       if (dfa->nodes[cur_node].type == type
3229           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3230         {
3231           if (type == OP_CLOSE_SUBEXP)
3232             {
3233               ok = re_node_set_insert (dst_nodes, cur_node);
3234               if (BE (! ok, 0))
3235                 return REG_ESPACE;
3236             }
3237           break;
3238         }
3239       ok = re_node_set_insert (dst_nodes, cur_node);
3240       if (BE (! ok, 0))
3241         return REG_ESPACE;
3242       if (dfa->edests[cur_node].nelem == 0)
3243         break;
3244       if (dfa->edests[cur_node].nelem == 2)
3245         {
3246           reg_errcode_t err;
3247           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3248                                               dfa->edests[cur_node].elems[1],
3249                                               ex_subexp, type);
3250           if (BE (err != REG_NOERROR, 0))
3251             return err;
3252         }
3253       cur_node = dfa->edests[cur_node].elems[0];
3254     }
3255   return REG_NOERROR;
3256 }
3257
3258
3259 /* For all the back references in the current state, calculate the
3260    destination of the back references by the appropriate entry
3261    in MCTX->BKREF_ENTS.  */
3262
3263 static reg_errcode_t
3264 internal_function __attribute_warn_unused_result__
3265 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3266                     Idx cur_str, Idx subexp_num, int type)
3267 {
3268   const re_dfa_t *const dfa = mctx->dfa;
3269   reg_errcode_t err;
3270   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3271   struct re_backref_cache_entry *ent;
3272
3273   if (cache_idx_start == REG_MISSING)
3274     return REG_NOERROR;
3275
3276  restart:
3277   ent = mctx->bkref_ents + cache_idx_start;
3278   do
3279     {
3280       Idx to_idx, next_node;
3281
3282       /* Is this entry ENT is appropriate?  */
3283       if (!re_node_set_contains (cur_nodes, ent->node))
3284         continue; /* No.  */
3285
3286       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3287       /* Calculate the destination of the back reference, and append it
3288          to MCTX->STATE_LOG.  */
3289       if (to_idx == cur_str)
3290         {
3291           /* The backreference did epsilon transit, we must re-check all the
3292              node in the current state.  */
3293           re_node_set new_dests;
3294           reg_errcode_t err2, err3;
3295           next_node = dfa->edests[ent->node].elems[0];
3296           if (re_node_set_contains (cur_nodes, next_node))
3297             continue;
3298           err = re_node_set_init_1 (&new_dests, next_node);
3299           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3300           err3 = re_node_set_merge (cur_nodes, &new_dests);
3301           re_node_set_free (&new_dests);
3302           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3303                   || err3 != REG_NOERROR, 0))
3304             {
3305               err = (err != REG_NOERROR ? err
3306                      : (err2 != REG_NOERROR ? err2 : err3));
3307               return err;
3308             }
3309           /* TODO: It is still inefficient...  */
3310           goto restart;
3311         }
3312       else
3313         {
3314           re_node_set union_set;
3315           next_node = dfa->nexts[ent->node];
3316           if (mctx->state_log[to_idx])
3317             {
3318               bool ok;
3319               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3320                                         next_node))
3321                 continue;
3322               err = re_node_set_init_copy (&union_set,
3323                                            &mctx->state_log[to_idx]->nodes);
3324               ok = re_node_set_insert (&union_set, next_node);
3325               if (BE (err != REG_NOERROR || ! ok, 0))
3326                 {
3327                   re_node_set_free (&union_set);
3328                   err = err != REG_NOERROR ? err : REG_ESPACE;
3329                   return err;
3330                 }
3331             }
3332           else
3333             {
3334               err = re_node_set_init_1 (&union_set, next_node);
3335               if (BE (err != REG_NOERROR, 0))
3336                 return err;
3337             }
3338           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3339           re_node_set_free (&union_set);
3340           if (BE (mctx->state_log[to_idx] == NULL
3341                   && err != REG_NOERROR, 0))
3342             return err;
3343         }
3344     }
3345   while (ent++->more);
3346   return REG_NOERROR;
3347 }
3348
3349 /* Build transition table for the state.
3350    Return true if successful.  */
3351
3352 static bool
3353 internal_function
3354 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3355 {
3356   reg_errcode_t err;
3357   Idx i, j;
3358   int ch;
3359   bool need_word_trtable = false;
3360   bitset_word_t elem, mask;
3361   bool dests_node_malloced = false;
3362   bool dest_states_malloced = false;
3363   Idx ndests; /* Number of the destination states from 'state'.  */
3364   re_dfastate_t **trtable;
3365   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3366   re_node_set follows, *dests_node;
3367   bitset_t *dests_ch;
3368   bitset_t acceptable;
3369
3370   struct dests_alloc
3371   {
3372     re_node_set dests_node[SBC_MAX];
3373     bitset_t dests_ch[SBC_MAX];
3374   } *dests_alloc;
3375
3376   /* We build DFA states which corresponds to the destination nodes
3377      from 'state'.  'dests_node[i]' represents the nodes which i-th
3378      destination state contains, and 'dests_ch[i]' represents the
3379      characters which i-th destination state accepts.  */
3380   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3381     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3382   else
3383     {
3384       dests_alloc = re_malloc (struct dests_alloc, 1);
3385       if (BE (dests_alloc == NULL, 0))
3386         return false;
3387       dests_node_malloced = true;
3388     }
3389   dests_node = dests_alloc->dests_node;
3390   dests_ch = dests_alloc->dests_ch;
3391
3392   /* Initialize transition table.  */
3393   state->word_trtable = state->trtable = NULL;
3394
3395   /* At first, group all nodes belonging to 'state' into several
3396      destinations.  */
3397   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3398   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3399     {
3400       if (dests_node_malloced)
3401         free (dests_alloc);
3402       /* Return false in case of an error, true otherwise.  */
3403       if (ndests == 0)
3404         {
3405           state->trtable = (re_dfastate_t **)
3406             calloc (sizeof (re_dfastate_t *), SBC_MAX);
3407           if (BE (state->trtable == NULL, 0))
3408             return false;
3409           return true;
3410         }
3411       return false;
3412     }
3413
3414   err = re_node_set_alloc (&follows, ndests + 1);
3415   if (BE (err != REG_NOERROR, 0))
3416     goto out_free;
3417
3418   /* Avoid arithmetic overflow in size calculation.  */
3419   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3420             / (3 * sizeof (re_dfastate_t *)))
3421            < ndests),
3422           0))
3423     goto out_free;
3424
3425   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3426                          + ndests * 3 * sizeof (re_dfastate_t *)))
3427     dest_states = (re_dfastate_t **)
3428       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3429   else
3430     {
3431       dest_states = (re_dfastate_t **)
3432         malloc (ndests * 3 * sizeof (re_dfastate_t *));
3433       if (BE (dest_states == NULL, 0))
3434         {
3435 out_free:
3436           if (dest_states_malloced)
3437             free (dest_states);
3438           re_node_set_free (&follows);
3439           for (i = 0; i < ndests; ++i)
3440             re_node_set_free (dests_node + i);
3441           if (dests_node_malloced)
3442             free (dests_alloc);
3443           return false;
3444         }
3445       dest_states_malloced = true;
3446     }
3447   dest_states_word = dest_states + ndests;
3448   dest_states_nl = dest_states_word + ndests;
3449   bitset_empty (acceptable);
3450
3451   /* Then build the states for all destinations.  */
3452   for (i = 0; i < ndests; ++i)
3453     {
3454       Idx next_node;
3455       re_node_set_empty (&follows);
3456       /* Merge the follows of this destination states.  */
3457       for (j = 0; j < dests_node[i].nelem; ++j)
3458         {
3459           next_node = dfa->nexts[dests_node[i].elems[j]];
3460           if (next_node != REG_MISSING)
3461             {
3462               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3463               if (BE (err != REG_NOERROR, 0))
3464                 goto out_free;
3465             }
3466         }
3467       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3468       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3469         goto out_free;
3470       /* If the new state has context constraint,
3471          build appropriate states for these contexts.  */
3472       if (dest_states[i]->has_constraint)
3473         {
3474           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3475                                                           CONTEXT_WORD);
3476           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3477             goto out_free;
3478
3479           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3480             need_word_trtable = true;
3481
3482           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3483                                                         CONTEXT_NEWLINE);
3484           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3485             goto out_free;
3486         }
3487       else
3488         {
3489           dest_states_word[i] = dest_states[i];
3490           dest_states_nl[i] = dest_states[i];
3491         }
3492       bitset_merge (acceptable, dests_ch[i]);
3493     }
3494
3495   if (!BE (need_word_trtable, 0))
3496     {
3497       /* We don't care about whether the following character is a word
3498          character, or we are in a single-byte character set so we can
3499          discern by looking at the character code: allocate a
3500          256-entry transition table.  */
3501       trtable = state->trtable =
3502         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3503       if (BE (trtable == NULL, 0))
3504         goto out_free;
3505
3506       /* For all characters ch...:  */
3507       for (i = 0; i < BITSET_WORDS; ++i)
3508         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3509              elem;
3510              mask <<= 1, elem >>= 1, ++ch)
3511           if (BE (elem & 1, 0))
3512             {
3513               /* There must be exactly one destination which accepts
3514                  character ch.  See group_nodes_into_DFAstates.  */
3515               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3516                 ;
3517
3518               /* j-th destination accepts the word character ch.  */
3519               if (dfa->word_char[i] & mask)
3520                 trtable[ch] = dest_states_word[j];
3521               else
3522                 trtable[ch] = dest_states[j];
3523             }
3524     }
3525   else
3526     {
3527       /* We care about whether the following character is a word
3528          character, and we are in a multi-byte character set: discern
3529          by looking at the character code: build two 256-entry
3530          transition tables, one starting at trtable[0] and one
3531          starting at trtable[SBC_MAX].  */
3532       trtable = state->word_trtable =
3533         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3534       if (BE (trtable == NULL, 0))
3535         goto out_free;
3536
3537       /* For all characters ch...:  */
3538       for (i = 0; i < BITSET_WORDS; ++i)
3539         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3540              elem;
3541              mask <<= 1, elem >>= 1, ++ch)
3542           if (BE (elem & 1, 0))
3543             {
3544               /* There must be exactly one destination which accepts
3545                  character ch.  See group_nodes_into_DFAstates.  */
3546               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3547                 ;
3548
3549               /* j-th destination accepts the word character ch.  */
3550               trtable[ch] = dest_states[j];
3551               trtable[ch + SBC_MAX] = dest_states_word[j];
3552             }
3553     }
3554
3555   /* new line */
3556   if (bitset_contain (acceptable, NEWLINE_CHAR))
3557     {
3558       /* The current state accepts newline character.  */
3559       for (j = 0; j < ndests; ++j)
3560         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3561           {
3562             /* k-th destination accepts newline character.  */
3563             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3564             if (need_word_trtable)
3565               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3566             /* There must be only one destination which accepts
3567                newline.  See group_nodes_into_DFAstates.  */
3568             break;
3569           }
3570     }
3571
3572   if (dest_states_malloced)
3573     free (dest_states);
3574
3575   re_node_set_free (&follows);
3576   for (i = 0; i < ndests; ++i)
3577     re_node_set_free (dests_node + i);
3578
3579   if (dests_node_malloced)
3580     free (dests_alloc);
3581
3582   return true;
3583 }
3584
3585 /* Group all nodes belonging to STATE into several destinations.
3586    Then for all destinations, set the nodes belonging to the destination
3587    to DESTS_NODE[i] and set the characters accepted by the destination
3588    to DEST_CH[i].  This function return the number of destinations.  */
3589
3590 static Idx
3591 internal_function
3592 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3593                             re_node_set *dests_node, bitset_t *dests_ch)
3594 {
3595   reg_errcode_t err;
3596   bool ok;
3597   Idx i, j, k;
3598   Idx ndests; /* Number of the destinations from 'state'.  */
3599   bitset_t accepts; /* Characters a node can accept.  */
3600   const re_node_set *cur_nodes = &state->nodes;
3601   bitset_empty (accepts);
3602   ndests = 0;
3603
3604   /* For all the nodes belonging to 'state',  */
3605   for (i = 0; i < cur_nodes->nelem; ++i)
3606     {
3607       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3608       re_token_type_t type = node->type;
3609       unsigned int constraint = node->constraint;
3610
3611       /* Enumerate all single byte character this node can accept.  */
3612       if (type == CHARACTER)
3613         bitset_set (accepts, node->opr.c);
3614       else if (type == SIMPLE_BRACKET)
3615         {
3616           bitset_merge (accepts, node->opr.sbcset);
3617         }
3618       else if (type == OP_PERIOD)
3619         {
3620 #ifdef RE_ENABLE_I18N
3621           if (dfa->mb_cur_max > 1)
3622             bitset_merge (accepts, dfa->sb_char);
3623           else
3624 #endif
3625             bitset_set_all (accepts);
3626           if (!(dfa->syntax & RE_DOT_NEWLINE))
3627             bitset_clear (accepts, '\n');
3628           if (dfa->syntax & RE_DOT_NOT_NULL)
3629             bitset_clear (accepts, '\0');
3630         }
3631 #ifdef RE_ENABLE_I18N
3632       else if (type == OP_UTF8_PERIOD)
3633         {
3634           if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3635             memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3636           else
3637             bitset_merge (accepts, utf8_sb_map);
3638           if (!(dfa->syntax & RE_DOT_NEWLINE))
3639             bitset_clear (accepts, '\n');
3640           if (dfa->syntax & RE_DOT_NOT_NULL)
3641             bitset_clear (accepts, '\0');
3642         }
3643 #endif
3644       else
3645         continue;
3646
3647       /* Check the 'accepts' and sift the characters which are not
3648          match it the context.  */
3649       if (constraint)
3650         {
3651           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3652             {
3653               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3654               bitset_empty (accepts);
3655               if (accepts_newline)
3656                 bitset_set (accepts, NEWLINE_CHAR);
3657               else
3658                 continue;
3659             }
3660           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3661             {
3662               bitset_empty (accepts);
3663               continue;
3664             }
3665
3666           if (constraint & NEXT_WORD_CONSTRAINT)
3667             {
3668               bitset_word_t any_set = 0;
3669               if (type == CHARACTER && !node->word_char)
3670                 {
3671                   bitset_empty (accepts);
3672                   continue;
3673                 }
3674 #ifdef RE_ENABLE_I18N
3675               if (dfa->mb_cur_max > 1)
3676                 for (j = 0; j < BITSET_WORDS; ++j)
3677                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3678               else
3679 #endif
3680                 for (j = 0; j < BITSET_WORDS; ++j)
3681                   any_set |= (accepts[j] &= dfa->word_char[j]);
3682               if (!any_set)
3683                 continue;
3684             }
3685           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3686             {
3687               bitset_word_t any_set = 0;
3688               if (type == CHARACTER && node->word_char)
3689                 {
3690                   bitset_empty (accepts);
3691                   continue;
3692                 }
3693 #ifdef RE_ENABLE_I18N
3694               if (dfa->mb_cur_max > 1)
3695                 for (j = 0; j < BITSET_WORDS; ++j)
3696                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3697               else
3698 #endif
3699                 for (j = 0; j < BITSET_WORDS; ++j)
3700                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3701               if (!any_set)
3702                 continue;
3703             }
3704         }
3705
3706       /* Then divide 'accepts' into DFA states, or create a new
3707          state.  Above, we make sure that accepts is not empty.  */
3708       for (j = 0; j < ndests; ++j)
3709         {
3710           bitset_t intersec; /* Intersection sets, see below.  */
3711           bitset_t remains;
3712           /* Flags, see below.  */
3713           bitset_word_t has_intersec, not_subset, not_consumed;
3714
3715           /* Optimization, skip if this state doesn't accept the character.  */
3716           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3717             continue;
3718
3719           /* Enumerate the intersection set of this state and 'accepts'.  */
3720           has_intersec = 0;
3721           for (k = 0; k < BITSET_WORDS; ++k)
3722             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3723           /* And skip if the intersection set is empty.  */
3724           if (!has_intersec)
3725             continue;
3726
3727           /* Then check if this state is a subset of 'accepts'.  */
3728           not_subset = not_consumed = 0;
3729           for (k = 0; k < BITSET_WORDS; ++k)
3730             {
3731               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3732               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3733             }
3734
3735           /* If this state isn't a subset of 'accepts', create a
3736              new group state, which has the 'remains'. */
3737           if (not_subset)
3738             {
3739               bitset_copy (dests_ch[ndests], remains);
3740               bitset_copy (dests_ch[j], intersec);
3741               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3742               if (BE (err != REG_NOERROR, 0))
3743                 goto error_return;
3744               ++ndests;
3745             }
3746
3747           /* Put the position in the current group. */
3748           ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3749           if (BE (! ok, 0))
3750             goto error_return;
3751
3752           /* If all characters are consumed, go to next node. */
3753           if (!not_consumed)
3754             break;
3755         }
3756       /* Some characters remain, create a new group. */
3757       if (j == ndests)
3758         {
3759           bitset_copy (dests_ch[ndests], accepts);
3760           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3761           if (BE (err != REG_NOERROR, 0))
3762             goto error_return;
3763           ++ndests;
3764           bitset_empty (accepts);
3765         }
3766     }
3767   return ndests;
3768  error_return:
3769   for (j = 0; j < ndests; ++j)
3770     re_node_set_free (dests_node + j);
3771   return REG_MISSING;
3772 }
3773
3774 #ifdef RE_ENABLE_I18N
3775 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3776    Return the number of the bytes the node accepts.
3777    STR_IDX is the current index of the input string.
3778
3779    This function handles the nodes which can accept one character, or
3780    one collating element like '.', '[a-z]', opposite to the other nodes
3781    can only accept one byte.  */
3782
3783 static int
3784 internal_function
3785 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3786                          const re_string_t *input, Idx str_idx)
3787 {
3788   const re_token_t *node = dfa->nodes + node_idx;
3789   int char_len, elem_len;
3790   Idx i;
3791
3792   if (BE (node->type == OP_UTF8_PERIOD, 0))
3793     {
3794       unsigned char c = re_string_byte_at (input, str_idx), d;
3795       if (BE (c < 0xc2, 1))
3796         return 0;
3797
3798       if (str_idx + 2 > input->len)
3799         return 0;
3800
3801       d = re_string_byte_at (input, str_idx + 1);
3802       if (c < 0xe0)
3803         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3804       else if (c < 0xf0)
3805         {
3806           char_len = 3;
3807           if (c == 0xe0 && d < 0xa0)
3808             return 0;
3809         }
3810       else if (c < 0xf8)
3811         {
3812           char_len = 4;
3813           if (c == 0xf0 && d < 0x90)
3814             return 0;
3815         }
3816       else if (c < 0xfc)
3817         {
3818           char_len = 5;
3819           if (c == 0xf8 && d < 0x88)
3820             return 0;
3821         }
3822       else if (c < 0xfe)
3823         {
3824           char_len = 6;
3825           if (c == 0xfc && d < 0x84)
3826             return 0;
3827         }
3828       else
3829         return 0;
3830
3831       if (str_idx + char_len > input->len)
3832         return 0;
3833
3834       for (i = 1; i < char_len; ++i)
3835         {
3836           d = re_string_byte_at (input, str_idx + i);
3837           if (d < 0x80 || d > 0xbf)
3838             return 0;
3839         }
3840       return char_len;
3841     }
3842
3843   char_len = re_string_char_size_at (input, str_idx);
3844   if (node->type == OP_PERIOD)
3845     {
3846       if (char_len <= 1)
3847         return 0;
3848       /* FIXME: I don't think this if is needed, as both '\n'
3849          and '\0' are char_len == 1.  */
3850       /* '.' accepts any one character except the following two cases.  */
3851       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3852            re_string_byte_at (input, str_idx) == '\n') ||
3853           ((dfa->syntax & RE_DOT_NOT_NULL) &&
3854            re_string_byte_at (input, str_idx) == '\0'))
3855         return 0;
3856       return char_len;
3857     }
3858
3859   elem_len = re_string_elem_size_at (input, str_idx);
3860   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3861     return 0;
3862
3863   if (node->type == COMPLEX_BRACKET)
3864     {
3865       const re_charset_t *cset = node->opr.mbcset;
3866 # ifdef _LIBC
3867       const unsigned char *pin
3868         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3869       Idx j;
3870       uint32_t nrules;
3871 # endif /* _LIBC */
3872       int match_len = 0;
3873       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3874                     ? re_string_wchar_at (input, str_idx) : 0);
3875
3876       /* match with multibyte character?  */
3877       for (i = 0; i < cset->nmbchars; ++i)
3878         if (wc == cset->mbchars[i])
3879           {
3880             match_len = char_len;
3881             goto check_node_accept_bytes_match;
3882           }
3883       /* match with character_class?  */
3884       for (i = 0; i < cset->nchar_classes; ++i)
3885         {
3886           wctype_t wt = cset->char_classes[i];
3887           if (__iswctype (wc, wt))
3888             {
3889               match_len = char_len;
3890               goto check_node_accept_bytes_match;
3891             }
3892         }
3893
3894 # ifdef _LIBC
3895       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3896       if (nrules != 0)
3897         {
3898           unsigned int in_collseq = 0;
3899           const int32_t *table, *indirect;
3900           const unsigned char *weights, *extra;
3901           const char *collseqwc;
3902           /* This #include defines a local function!  */
3903 #  include <locale/weight.h>
3904
3905           /* match with collating_symbol?  */
3906           if (cset->ncoll_syms)
3907             extra = (const unsigned char *)
3908               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3909           for (i = 0; i < cset->ncoll_syms; ++i)
3910             {
3911               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3912               /* Compare the length of input collating element and
3913                  the length of current collating element.  */
3914               if (*coll_sym != elem_len)
3915                 continue;
3916               /* Compare each bytes.  */
3917               for (j = 0; j < *coll_sym; j++)
3918                 if (pin[j] != coll_sym[1 + j])
3919                   break;
3920               if (j == *coll_sym)
3921                 {
3922                   /* Match if every bytes is equal.  */
3923                   match_len = j;
3924                   goto check_node_accept_bytes_match;
3925                 }
3926             }
3927
3928           if (cset->nranges)
3929             {
3930               if (elem_len <= char_len)
3931                 {
3932                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3933                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3934                 }
3935               else
3936                 in_collseq = find_collation_sequence_value (pin, elem_len);
3937             }
3938           /* match with range expression?  */
3939           /* FIXME: Implement rational ranges here, too.  */
3940           for (i = 0; i < cset->nranges; ++i)
3941             if (cset->range_starts[i] <= in_collseq
3942                 && in_collseq <= cset->range_ends[i])
3943               {
3944                 match_len = elem_len;
3945                 goto check_node_accept_bytes_match;
3946               }
3947
3948           /* match with equivalence_class?  */
3949           if (cset->nequiv_classes)
3950             {
3951               const unsigned char *cp = pin;
3952               table = (const int32_t *)
3953                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3954               weights = (const unsigned char *)
3955                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3956               extra = (const unsigned char *)
3957                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3958               indirect = (const int32_t *)
3959                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3960               int32_t idx = findidx (&cp, elem_len);
3961               if (idx > 0)
3962                 for (i = 0; i < cset->nequiv_classes; ++i)
3963                   {
3964                     int32_t equiv_class_idx = cset->equiv_classes[i];
3965                     size_t weight_len = weights[idx & 0xffffff];
3966                     if (weight_len == weights[equiv_class_idx & 0xffffff]
3967                         && (idx >> 24) == (equiv_class_idx >> 24))
3968                       {
3969                         Idx cnt = 0;
3970
3971                         idx &= 0xffffff;
3972                         equiv_class_idx &= 0xffffff;
3973
3974                         while (cnt <= weight_len
3975                                && (weights[equiv_class_idx + 1 + cnt]
3976                                    == weights[idx + 1 + cnt]))
3977                           ++cnt;
3978                         if (cnt > weight_len)
3979                           {
3980                             match_len = elem_len;
3981                             goto check_node_accept_bytes_match;
3982                           }
3983                       }
3984                   }
3985             }
3986         }
3987       else
3988 # endif /* _LIBC */
3989         {
3990           /* match with range expression?  */
3991           for (i = 0; i < cset->nranges; ++i)
3992             {
3993               if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3994                 {
3995                   match_len = char_len;
3996                   goto check_node_accept_bytes_match;
3997                 }
3998             }
3999         }
4000     check_node_accept_bytes_match:
4001       if (!cset->non_match)
4002         return match_len;
4003       else
4004         {
4005           if (match_len > 0)
4006             return 0;
4007           else
4008             return (elem_len > char_len) ? elem_len : char_len;
4009         }
4010     }
4011   return 0;
4012 }
4013
4014 # ifdef _LIBC
4015 static unsigned int
4016 internal_function
4017 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4018 {
4019   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4020   if (nrules == 0)
4021     {
4022       if (mbs_len == 1)
4023         {
4024           /* No valid character.  Match it as a single byte character.  */
4025           const unsigned char *collseq = (const unsigned char *)
4026             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4027           return collseq[mbs[0]];
4028         }
4029       return UINT_MAX;
4030     }
4031   else
4032     {
4033       int32_t idx;
4034       const unsigned char *extra = (const unsigned char *)
4035         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4036       int32_t extrasize = (const unsigned char *)
4037         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4038
4039       for (idx = 0; idx < extrasize;)
4040         {
4041           int mbs_cnt;
4042           bool found = false;
4043           int32_t elem_mbs_len;
4044           /* Skip the name of collating element name.  */
4045           idx = idx + extra[idx] + 1;
4046           elem_mbs_len = extra[idx++];
4047           if (mbs_len == elem_mbs_len)
4048             {
4049               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4050                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4051                   break;
4052               if (mbs_cnt == elem_mbs_len)
4053                 /* Found the entry.  */
4054                 found = true;
4055             }
4056           /* Skip the byte sequence of the collating element.  */
4057           idx += elem_mbs_len;
4058           /* Adjust for the alignment.  */
4059           idx = (idx + 3) & ~3;
4060           /* Skip the collation sequence value.  */
4061           idx += sizeof (uint32_t);
4062           /* Skip the wide char sequence of the collating element.  */
4063           idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
4064           /* If we found the entry, return the sequence value.  */
4065           if (found)
4066             return *(uint32_t *) (extra + idx);
4067           /* Skip the collation sequence value.  */
4068           idx += sizeof (uint32_t);
4069         }
4070       return UINT_MAX;
4071     }
4072 }
4073 # endif /* _LIBC */
4074 #endif /* RE_ENABLE_I18N */
4075
4076 /* Check whether the node accepts the byte which is IDX-th
4077    byte of the INPUT.  */
4078
4079 static bool
4080 internal_function
4081 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4082                    Idx idx)
4083 {
4084   unsigned char ch;
4085   ch = re_string_byte_at (&mctx->input, idx);
4086   switch (node->type)
4087     {
4088     case CHARACTER:
4089       if (node->opr.c != ch)
4090         return false;
4091       break;
4092
4093     case SIMPLE_BRACKET:
4094       if (!bitset_contain (node->opr.sbcset, ch))
4095         return false;
4096       break;
4097
4098 #ifdef RE_ENABLE_I18N
4099     case OP_UTF8_PERIOD:
4100       if (ch >= ASCII_CHARS)
4101         return false;
4102       /* FALLTHROUGH */
4103 #endif
4104     case OP_PERIOD:
4105       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4106           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4107         return false;
4108       break;
4109
4110     default:
4111       return false;
4112     }
4113
4114   if (node->constraint)
4115     {
4116       /* The node has constraints.  Check whether the current context
4117          satisfies the constraints.  */
4118       unsigned int context = re_string_context_at (&mctx->input, idx,
4119                                                    mctx->eflags);
4120       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4121         return false;
4122     }
4123
4124   return true;
4125 }
4126
4127 /* Extend the buffers, if the buffers have run out.  */
4128
4129 static reg_errcode_t
4130 internal_function __attribute_warn_unused_result__
4131 extend_buffers (re_match_context_t *mctx, int min_len)
4132 {
4133   reg_errcode_t ret;
4134   re_string_t *pstr = &mctx->input;
4135
4136   /* Avoid overflow.  */
4137   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4138           <= pstr->bufs_len, 0))
4139     return REG_ESPACE;
4140
4141   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
4142   ret = re_string_realloc_buffers (pstr,
4143                                    MAX (min_len,
4144                                         MIN (pstr->len, pstr->bufs_len * 2)));
4145   if (BE (ret != REG_NOERROR, 0))
4146     return ret;
4147
4148   if (mctx->state_log != NULL)
4149     {
4150       /* And double the length of state_log.  */
4151       /* XXX We have no indication of the size of this buffer.  If this
4152          allocation fail we have no indication that the state_log array
4153          does not have the right size.  */
4154       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4155                                               pstr->bufs_len + 1);
4156       if (BE (new_array == NULL, 0))
4157         return REG_ESPACE;
4158       mctx->state_log = new_array;
4159     }
4160
4161   /* Then reconstruct the buffers.  */
4162   if (pstr->icase)
4163     {
4164 #ifdef RE_ENABLE_I18N
4165       if (pstr->mb_cur_max > 1)
4166         {
4167           ret = build_wcs_upper_buffer (pstr);
4168           if (BE (ret != REG_NOERROR, 0))
4169             return ret;
4170         }
4171       else
4172 #endif /* RE_ENABLE_I18N  */
4173         build_upper_buffer (pstr);
4174     }
4175   else
4176     {
4177 #ifdef RE_ENABLE_I18N
4178       if (pstr->mb_cur_max > 1)
4179         build_wcs_buffer (pstr);
4180       else
4181 #endif /* RE_ENABLE_I18N  */
4182         {
4183           if (pstr->trans != NULL)
4184             re_string_translate_buffer (pstr);
4185         }
4186     }
4187   return REG_NOERROR;
4188 }
4189
4190 \f
4191 /* Functions for matching context.  */
4192
4193 /* Initialize MCTX.  */
4194
4195 static reg_errcode_t
4196 internal_function __attribute_warn_unused_result__
4197 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4198 {
4199   mctx->eflags = eflags;
4200   mctx->match_last = REG_MISSING;
4201   if (n > 0)
4202     {
4203       /* Avoid overflow.  */
4204       size_t max_object_size =
4205         MAX (sizeof (struct re_backref_cache_entry),
4206              sizeof (re_sub_match_top_t *));
4207       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
4208         return REG_ESPACE;
4209
4210       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4211       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4212       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4213         return REG_ESPACE;
4214     }
4215   /* Already zero-ed by the caller.
4216      else
4217        mctx->bkref_ents = NULL;
4218      mctx->nbkref_ents = 0;
4219      mctx->nsub_tops = 0;  */
4220   mctx->abkref_ents = n;
4221   mctx->max_mb_elem_len = 1;
4222   mctx->asub_tops = n;
4223   return REG_NOERROR;
4224 }
4225
4226 /* Clean the entries which depend on the current input in MCTX.
4227    This function must be invoked when the matcher changes the start index
4228    of the input, or changes the input string.  */
4229
4230 static void
4231 internal_function
4232 match_ctx_clean (re_match_context_t *mctx)
4233 {
4234   Idx st_idx;
4235   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4236     {
4237       Idx sl_idx;
4238       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4239       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4240         {
4241           re_sub_match_last_t *last = top->lasts[sl_idx];
4242           re_free (last->path.array);
4243           re_free (last);
4244         }
4245       re_free (top->lasts);
4246       if (top->path)
4247         {
4248           re_free (top->path->array);
4249           re_free (top->path);
4250         }
4251       free (top);
4252     }
4253
4254   mctx->nsub_tops = 0;
4255   mctx->nbkref_ents = 0;
4256 }
4257
4258 /* Free all the memory associated with MCTX.  */
4259
4260 static void
4261 internal_function
4262 match_ctx_free (re_match_context_t *mctx)
4263 {
4264   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4265   match_ctx_clean (mctx);
4266   re_free (mctx->sub_tops);
4267   re_free (mctx->bkref_ents);
4268 }
4269
4270 /* Add a new backreference entry to MCTX.
4271    Note that we assume that caller never call this function with duplicate
4272    entry, and call with STR_IDX which isn't smaller than any existing entry.
4273 */
4274
4275 static reg_errcode_t
4276 internal_function __attribute_warn_unused_result__
4277 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4278                      Idx to)
4279 {
4280   if (mctx->nbkref_ents >= mctx->abkref_ents)
4281     {
4282       struct re_backref_cache_entry* new_entry;
4283       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4284                               mctx->abkref_ents * 2);
4285       if (BE (new_entry == NULL, 0))
4286         {
4287           re_free (mctx->bkref_ents);
4288           return REG_ESPACE;
4289         }
4290       mctx->bkref_ents = new_entry;
4291       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4292               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4293       mctx->abkref_ents *= 2;
4294     }
4295   if (mctx->nbkref_ents > 0
4296       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4297     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4298
4299   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4300   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4301   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4302   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4303
4304   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4305      If bit N is clear, means that this entry won't epsilon-transition to
4306      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4307      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4308      such node.
4309
4310      A backreference does not epsilon-transition unless it is empty, so set
4311      to all zeros if FROM != TO.  */
4312   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4313     = (from == to ? -1 : 0);
4314
4315   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4316   if (mctx->max_mb_elem_len < to - from)
4317     mctx->max_mb_elem_len = to - from;
4318   return REG_NOERROR;
4319 }
4320
4321 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4322    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4323
4324 static Idx
4325 internal_function
4326 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4327 {
4328   Idx left, right, mid, last;
4329   last = right = mctx->nbkref_ents;
4330   for (left = 0; left < right;)
4331     {
4332       mid = (left + right) / 2;
4333       if (mctx->bkref_ents[mid].str_idx < str_idx)
4334         left = mid + 1;
4335       else
4336         right = mid;
4337     }
4338   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4339     return left;
4340   else
4341     return REG_MISSING;
4342 }
4343
4344 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4345    at STR_IDX.  */
4346
4347 static reg_errcode_t
4348 internal_function __attribute_warn_unused_result__
4349 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4350 {
4351 #ifdef DEBUG
4352   assert (mctx->sub_tops != NULL);
4353   assert (mctx->asub_tops > 0);
4354 #endif
4355   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4356     {
4357       Idx new_asub_tops = mctx->asub_tops * 2;
4358       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4359                                                    re_sub_match_top_t *,
4360                                                    new_asub_tops);
4361       if (BE (new_array == NULL, 0))
4362         return REG_ESPACE;
4363       mctx->sub_tops = new_array;
4364       mctx->asub_tops = new_asub_tops;
4365     }
4366   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4367   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4368     return REG_ESPACE;
4369   mctx->sub_tops[mctx->nsub_tops]->node = node;
4370   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4371   return REG_NOERROR;
4372 }
4373
4374 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4375    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4376
4377 static re_sub_match_last_t *
4378 internal_function
4379 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4380 {
4381   re_sub_match_last_t *new_entry;
4382   if (BE (subtop->nlasts == subtop->alasts, 0))
4383     {
4384       Idx new_alasts = 2 * subtop->alasts + 1;
4385       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4386                                                     re_sub_match_last_t *,
4387                                                     new_alasts);
4388       if (BE (new_array == NULL, 0))
4389         return NULL;
4390       subtop->lasts = new_array;
4391       subtop->alasts = new_alasts;
4392     }
4393   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4394   if (BE (new_entry != NULL, 1))
4395     {
4396       subtop->lasts[subtop->nlasts] = new_entry;
4397       new_entry->node = node;
4398       new_entry->str_idx = str_idx;
4399       ++subtop->nlasts;
4400     }
4401   return new_entry;
4402 }
4403
4404 static void
4405 internal_function
4406 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4407                re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4408 {
4409   sctx->sifted_states = sifted_sts;
4410   sctx->limited_states = limited_sts;
4411   sctx->last_node = last_node;
4412   sctx->last_str_idx = last_str_idx;
4413   re_node_set_init_empty (&sctx->limits);
4414 }