X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregexec.c;h=05a8e807e59bab31a1c2057f563332e0d2514a95;hb=1276a2c5f24c0c932426aca9c899fa524d2443f2;hp=598e456772d52a94433b768f5316f41f3063cd96;hpb=0f6dd806d5fd7c45d9366cd35c793958b14dd75b;p=gnulib.git diff --git a/lib/regexec.c b/lib/regexec.c index 598e45677..05a8e807e 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -1,22 +1,21 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free - Software Foundation, Inc. + Copyright (C) 2002-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, Idx n) internal_function; @@ -52,9 +51,8 @@ static regoff_t re_search_stub (struct re_pattern_buffer *bufp, regoff_t range, Idx stop, struct re_registers *regs, bool ret_len) internal_function; -static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, - Idx nregs, int regs_allocated) - internal_function; +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + Idx nregs, int regs_allocated) internal_function; static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) internal_function; static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, @@ -201,7 +199,7 @@ static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, static bool check_node_accept (const re_match_context_t *mctx, const re_token_t *node, Idx idx) internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) +static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) internal_function; /* Entry point for POSIX code. */ @@ -210,11 +208,11 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx) string STRING. If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at least NMATCH elements, and we set them to the offsets of the corresponding matched substrings. - EFLAGS specifies `execution flags' which affect matching: if + EFLAGS specifies "execution flags" which affect matching: if REG_NOTBOL is set, then ^ does not match at the beginning of the string; if REG_NOTEOL is set, then $ does not match at the end. @@ -230,9 +228,7 @@ regexec (preg, string, nmatch, pmatch, eflags) { reg_errcode_t err; Idx start, length; -#ifdef _LIBC - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; -#endif + re_dfa_t *dfa = preg->buffer; if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) return REG_BADPAT; @@ -248,14 +244,14 @@ regexec (preg, string, nmatch, pmatch, eflags) length = strlen (string); } - __libc_lock_lock (dfa->lock); + lock_lock (dfa->lock); if (preg->no_sub) err = re_search_internal (preg, string, length, start, length, length, 0, NULL, eflags); else err = re_search_internal (preg, string, length, start, length, length, nmatch, pmatch, eflags); - __libc_lock_unlock (dfa->lock); + lock_unlock (dfa->lock); return err != REG_NOERROR; } @@ -366,7 +362,6 @@ weak_alias (__re_search_2, re_search_2) #endif static regoff_t -internal_function re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1, Idx length1, const char *string2, Idx length2, @@ -414,7 +409,6 @@ re_search_2_stub (struct re_pattern_buffer *bufp, otherwise the position of the match is returned. */ static regoff_t -internal_function re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length, Idx start, regoff_t range, Idx stop, struct re_registers *regs, @@ -425,9 +419,7 @@ re_search_stub (struct re_pattern_buffer *bufp, Idx nregs; regoff_t rval; int eflags = 0; -#ifdef _LIBC - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; -#endif + re_dfa_t *dfa = bufp->buffer; Idx last_start = start + range; /* Check for out-of-range. */ @@ -438,7 +430,7 @@ re_search_stub (struct re_pattern_buffer *bufp, else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) last_start = 0; - __libc_lock_lock (dfa->lock); + lock_lock (dfa->lock); eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; @@ -478,9 +470,9 @@ re_search_stub (struct re_pattern_buffer *bufp, rval = 0; - /* I hope we needn't fill ther regs with -1's when no match was found. */ + /* I hope we needn't fill their regs with -1's when no match was found. */ if (result != REG_NOERROR) - rval = -1; + rval = result == REG_NOMATCH ? -1 : -2; else if (regs != NULL) { /* If caller wants register contents data back, copy them. */ @@ -502,19 +494,18 @@ re_search_stub (struct re_pattern_buffer *bufp, } re_free (pmatch); out: - __libc_lock_unlock (dfa->lock); + lock_unlock (dfa->lock); return rval; } -static unsigned int -internal_function +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, int regs_allocated) { int rval = REGS_REALLOCATE; Idx i; Idx need_regs = nregs + 1; - /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code uses. */ /* Have the register data arrays been allocated? */ @@ -637,7 +628,7 @@ re_exec (s) (0 <= LAST_START && LAST_START <= LENGTH) */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_search_internal (const regex_t *preg, const char *string, Idx length, Idx start, Idx last_start, Idx stop, @@ -645,7 +636,7 @@ re_search_internal (const regex_t *preg, int eflags) { reg_errcode_t err; - const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + const re_dfa_t *dfa = preg->buffer; Idx left_lim, right_lim; int incr; bool fl_longest_match; @@ -720,7 +711,8 @@ re_search_internal (const regex_t *preg, if (nmatch > 1 || dfa->has_mb_node) { /* Avoid overflow. */ - if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) + if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) + <= mctx.input.bufs_len), 0)) { err = REG_ESPACE; goto free_return; @@ -740,7 +732,7 @@ re_search_internal (const regex_t *preg, mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF : CONTEXT_NEWLINE | CONTEXT_BEGBUF; - /* Check incrementally whether of not the input string match. */ + /* Check incrementally whether the input string matches. */ incr = (last_start < start) ? -1 : 1; left_lim = (last_start < start) ? last_start : start; right_lim = (last_start < start) ? start : last_start; @@ -833,10 +825,10 @@ re_search_internal (const regex_t *preg, break; match_first += incr; if (match_first < left_lim || match_first > right_lim) - { - err = REG_NOMATCH; - goto free_return; - } + { + err = REG_NOMATCH; + goto free_return; + } } break; } @@ -922,7 +914,7 @@ re_search_internal (const regex_t *preg, goto free_return; } - /* At last, add the offset to the each registers, since we slided + /* At last, add the offset to each register, since we slid the buffers so that we could assume that the matching starts from 0. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) @@ -953,14 +945,14 @@ re_search_internal (const regex_t *preg, } if (dfa->subexp_map) - for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) - if (dfa->subexp_map[reg_idx] != reg_idx) - { - pmatch[reg_idx + 1].rm_so - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; - pmatch[reg_idx + 1].rm_eo - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; - } + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } } free_return: @@ -972,7 +964,7 @@ re_search_internal (const regex_t *preg, } static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ prune_impossible_nodes (re_match_context_t *mctx) { const re_dfa_t *const dfa = mctx->dfa; @@ -988,7 +980,7 @@ prune_impossible_nodes (re_match_context_t *mctx) halt_node = mctx->last_node; /* Avoid overflow. */ - if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)) + if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0)) return REG_ESPACE; sifted_states = re_malloc (re_dfastate_t *, match_last + 1); @@ -1068,7 +1060,7 @@ prune_impossible_nodes (re_match_context_t *mctx) since initial states may have constraints like "\<", "^", etc.. */ static inline re_dfastate_t * -__attribute ((always_inline)) internal_function +__attribute__ ((always_inline)) internal_function acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, Idx idx) { @@ -1106,7 +1098,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, FL_LONGEST_MATCH means we want the POSIX longest matching. If P_MATCH_FIRST is not NULL, and the match fails, it is set to the next place where we may want to try matching. - Note that the matcher assume that the maching starts from the current + Note that the matcher assumes that the matching starts from the current index of the buffer. */ static Idx @@ -1149,7 +1141,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, { err = transit_state_bkref (mctx, &cur_state->nodes); if (BE (err != REG_NOERROR, 0)) - return err; + return err; } } } @@ -1175,17 +1167,18 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, re_dfastate_t *old_state = cur_state; Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; - if (BE (next_char_idx >= mctx->input.bufs_len, 0) - || (BE (next_char_idx >= mctx->input.valid_len, 0) - && mctx->input.valid_len < mctx->input.len)) - { - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) + if ((BE (next_char_idx >= mctx->input.bufs_len, 0) + && mctx->input.bufs_len < mctx->input.len) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx, next_char_idx + 1); + if (BE (err != REG_NOERROR, 0)) { assert (err == REG_ESPACE); return REG_ERROR; } - } + } cur_state = transit_state (&err, mctx, cur_state); if (mctx->state_log != NULL) @@ -1309,17 +1302,17 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, if (dest_node == REG_MISSING) dest_node = candidate; - else + else { /* In order to avoid infinite loop like "(a*)*", return the second - epsilon-transition if the first was already considered. */ + epsilon-transition if the first was already considered. */ if (re_node_set_contains (eps_via_nodes, dest_node)) - return candidate; + return candidate; /* Otherwise, push the second epsilon-transition on the fail stack. */ else if (fs != NULL && push_fail_stack (fs, *pidx, candidate, nregs, regs, - eps_via_nodes)) + eps_via_nodes)) return REG_ERROR; /* We know we are going to exit. */ @@ -1436,7 +1429,7 @@ internal_function __attribute_warn_unused_result__ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, bool fl_backtrack) { - const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + const re_dfa_t *dfa = preg->buffer; Idx idx, cur_node; re_node_set eps_via_nodes; struct re_fail_stack_t *fs; @@ -1608,21 +1601,21 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, and sift the nodes in each states according to the following rules. Updated state_log will be wrote to STATE_LOG. - Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if... 1. When STR_IDX == MATCH_LAST(the last index in the state_log): - If `a' isn't the LAST_NODE and `a' can't epsilon transit to - the LAST_NODE, we throw away the node `a'. - 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts - string `s' and transit to `b': + If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to + the LAST_NODE, we throw away the node 'a'. + 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts + string 's' and transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw - away the node `a'. + away the node 'a'. ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is - thrown away, we throw away the node `a'. + thrown away, we throw away the node 'a'. 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the - node `a'. + node 'a'. ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, - we throw away the node `a'. */ + we throw away the node 'a'. */ #define STATE_NODE_CONTAINS(state,node) \ ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) @@ -1667,7 +1660,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) if (mctx->state_log[str_idx]) { err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) + if (BE (err != REG_NOERROR, 0)) goto free_return; } @@ -1695,11 +1688,11 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx i; /* Then build the next sifted state. - We build the next sifted state on `cur_dest', and update - `sifted_states[str_idx]' with `cur_dest'. + We build the next sifted state on 'cur_dest', and update + 'sifted_states[str_idx]' with 'cur_dest'. Note: - `cur_dest' is the sifted state from `state_log[str_idx + 1]'. - `cur_src' points the node_set of the old `state_log[str_idx]' + 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'. + 'cur_src' points the node_set of the old 'state_log[str_idx]' (with the epsilon nodes pre-filtered out). */ for (i = 0; i < cur_src->nelem; i++) { @@ -1712,7 +1705,7 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, assert (!IS_EPSILON_NODE (type)); #endif #ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ + /* If the node may accept "multi byte". */ if (dfa->nodes[prev_node].accept_mb) naccepted = sift_states_iter_mb (mctx, sctx, prev_node, str_idx, sctx->last_str_idx); @@ -1753,12 +1746,13 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) { Idx top = mctx->state_log_top; - if (next_state_log_idx >= mctx->input.bufs_len + if ((next_state_log_idx >= mctx->input.bufs_len + && mctx->input.bufs_len < mctx->input.len) || (next_state_log_idx >= mctx->input.valid_len && mctx->input.valid_len < mctx->input.len)) { reg_errcode_t err; - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_state_log_idx + 1); if (BE (err != REG_NOERROR, 0)) return err; } @@ -1982,7 +1976,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, { struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; do - { + { Idx dst; int cpos; @@ -2004,9 +1998,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, if (dst == from_node) { if (boundaries & 1) - return -1; + return -1; else /* if (boundaries & 2) */ - return 0; + return 0; } cpos = @@ -2020,7 +2014,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, if (subexp_idx < BITSET_WORD_BITS) ent->eps_reachable_subexps_map &= ~((bitset_word_t) 1 << subexp_idx); - } + } while (ent++->more); } break; @@ -2245,7 +2239,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, re_node_set_remove (&local_sctx.limits, enabled_idx); /* mctx->bkref_ents may have changed, reload the pointer. */ - entry = mctx->bkref_ents + enabled_idx; + entry = mctx->bkref_ents + enabled_idx; } while (enabled_idx++, entry++->more); } @@ -2268,17 +2262,17 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, { const re_dfa_t *const dfa = mctx->dfa; int naccepted; - /* Check the node can accept `multi byte'. */ + /* Check the node can accept "multi byte". */ naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); if (naccepted > 0 && str_idx + naccepted <= max_str_idx && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], dfa->nexts[node_idx])) - /* The node can't accept the `multi byte', or the + /* The node can't accept the "multi byte", or the destination was already thrown away, then the node - could't accept the current input `multi byte'. */ + could't accept the current input "multi byte". */ naccepted = 0; /* Otherwise, it is sure that the node could accept - `naccepted' bytes input. */ + 'naccepted' bytes input. */ return naccepted; } #endif /* RE_ENABLE_I18N */ @@ -2326,7 +2320,7 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, trtable = state->word_trtable; if (BE (trtable != NULL, 1)) - { + { unsigned int context; context = re_string_context_at (&mctx->input, @@ -2372,21 +2366,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, unsigned int context; re_node_set next_nodes, *log_nodes, *table_nodes = NULL; /* If (state_log[cur_idx] != 0), it implies that cur_idx is - the destination of a multibyte char/collating element/ - back reference. Then the next state is the union set of - these destinations and the results of the transition table. */ + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ pstate = mctx->state_log[cur_idx]; log_nodes = pstate->entrance_nodes; if (next_state != NULL) - { - table_nodes = next_state->entrance_nodes; - *err = re_node_set_init_union (&next_nodes, table_nodes, + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, log_nodes); - if (BE (*err != REG_NOERROR, 0)) + if (BE (*err != REG_NOERROR, 0)) return NULL; - } + } else - next_nodes = *log_nodes; + next_nodes = *log_nodes; /* Note: We already add the nodes of the initial state, then we don't need to add them here. */ @@ -2394,12 +2388,12 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, re_string_cur_idx (&mctx->input) - 1, mctx->eflags); next_state = mctx->state_log[cur_idx] - = re_acquire_state_context (err, dfa, &next_nodes, context); + = re_acquire_state_context (err, dfa, &next_nodes, context); /* We don't need to check errors here, since the return value of - this function is next_state and ERR is already set. */ + this function is next_state and ERR is already set. */ if (table_nodes != NULL) - re_node_set_free (&next_nodes); + re_node_set_free (&next_nodes); } if (BE (dfa->nbackref, 0) && next_state != NULL) @@ -2440,9 +2434,9 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) do { - if (++cur_str_idx > max) - return NULL; - re_string_skip_bytes (&mctx->input, 1); + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); } while (mctx->state_log[cur_str_idx] == NULL); @@ -2457,7 +2451,7 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) /* From the node set CUR_NODES, pick up the nodes whose types are OP_OPEN_SUBEXP and which have corresponding back references in the regular expression. And register them to use them later for evaluating the - correspoding back references. */ + corresponding back references. */ static reg_errcode_t internal_function @@ -2550,7 +2544,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) re_dfastate_t *dest_state; if (!dfa->nodes[cur_node_idx].accept_mb) - continue; + continue; if (dfa->nodes[cur_node_idx].constraint) { @@ -2568,7 +2562,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) if (naccepted == 0) continue; - /* The node can accepts `naccepted' bytes. */ + /* The node can accepts 'naccepted' bytes. */ dest_idx = re_string_cur_idx (&mctx->input) + naccepted; mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted : mctx->max_mb_elem_len); @@ -2620,7 +2614,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) const re_token_t *node = dfa->nodes + node_idx; re_node_set *new_dest_nodes; - /* Check whether `node' is a backreference or not. */ + /* Check whether 'node' is a backreference or not. */ if (node->type != OP_BACK_REF) continue; @@ -2632,14 +2626,14 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) continue; } - /* `node' is a backreference. + /* 'node' is a backreference. Check the substring which the substring matched. */ bkc_idx = mctx->nbkref_ents; err = get_subexp (mctx, node_idx, cur_str_idx); if (BE (err != REG_NOERROR, 0)) goto free_return; - /* And add the epsilon closures (which is `new_dest_nodes') of + /* And add the epsilon closures (which is 'new_dest_nodes') of the backreference to appropriate state_log. */ #ifdef DEBUG assert (dfa->nexts[node_idx] != REG_MISSING); @@ -2663,7 +2657,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) dest_state = mctx->state_log[dest_str_idx]; prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 : mctx->state_log[cur_str_idx]->nodes.nelem); - /* Add `new_dest_node' to state_log. */ + /* Add 'new_dest_node' to state_log. */ if (dest_state == NULL) { mctx->state_log[dest_str_idx] @@ -2731,7 +2725,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx; do - if (entry->node == bkref_node) + if (entry->node == bkref_node) return REG_NOERROR; /* We already checked it. */ while (entry++->more); } @@ -2815,7 +2809,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) if (bkref_str_off >= mctx->input.len) break; - err = extend_buffers (mctx); + err = extend_buffers (mctx, bkref_str_off + 1); if (BE (err != REG_NOERROR, 0)) return err; @@ -2937,9 +2931,12 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, { re_dfastate_t **new_array; Idx old_alloc = path->alloc; - Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1; - if (BE (new_alloc < old_alloc, 0) - || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) + Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1; + Idx new_alloc; + if (BE (IDX_MAX - old_alloc < incr_alloc, 0)) + return REG_ESPACE; + new_alloc = old_alloc + incr_alloc; + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) return REG_ESPACE; new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); if (BE (new_array == NULL, 0)) @@ -3102,7 +3099,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, assert (!IS_EPSILON_NODE (type)); #endif #ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ + /* If the node may accept "multi byte". */ if (dfa->nodes[cur_node].accept_mb) { naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, @@ -3359,7 +3356,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) bitset_word_t elem, mask; bool dests_node_malloced = false; bool dest_states_malloced = false; - Idx ndests; /* Number of the destination states from `state'. */ + Idx ndests; /* Number of the destination states from 'state'. */ re_dfastate_t **trtable; re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; re_node_set follows, *dests_node; @@ -3373,8 +3370,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) } *dests_alloc; /* We build DFA states which corresponds to the destination nodes - from `state'. `dests_node[i]' represents the nodes which i-th - destination state contains, and `dests_ch[i]' represents the + from 'state'. 'dests_node[i]' represents the nodes which i-th + destination state contains, and 'dests_ch[i]' represents the characters which i-th destination state accepts. */ if (__libc_use_alloca (sizeof (struct dests_alloc))) dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); @@ -3388,20 +3385,23 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) dests_node = dests_alloc->dests_node; dests_ch = dests_alloc->dests_ch; - /* Initialize transiton table. */ + /* Initialize transition table. */ state->word_trtable = state->trtable = NULL; - /* At first, group all nodes belonging to `state' into several + /* At first, group all nodes belonging to 'state' into several destinations. */ ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) { if (dests_node_malloced) free (dests_alloc); + /* Return false in case of an error, true otherwise. */ if (ndests == 0) { state->trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (state->trtable == NULL, 0)) + return false; return true; } return false; @@ -3591,13 +3591,13 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, reg_errcode_t err; bool ok; Idx i, j, k; - Idx ndests; /* Number of the destinations from `state'. */ + Idx ndests; /* Number of the destinations from 'state'. */ bitset_t accepts; /* Characters a node can accept. */ const re_node_set *cur_nodes = &state->nodes; bitset_empty (accepts); ndests = 0; - /* For all the nodes belonging to `state', */ + /* For all the nodes belonging to 'state', */ for (i = 0; i < cur_nodes->nelem; ++i) { re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; @@ -3626,7 +3626,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N else if (type == OP_UTF8_PERIOD) - { + { if (ASCII_CHARS % BITSET_WORD_BITS == 0) memset (accepts, -1, ASCII_CHARS / CHAR_BIT); else @@ -3635,12 +3635,12 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, bitset_clear (accepts, '\n'); if (dfa->syntax & RE_DOT_NOT_NULL) bitset_clear (accepts, '\0'); - } + } #endif else continue; - /* Check the `accepts' and sift the characters which are not + /* Check the 'accepts' and sift the characters which are not match it the context. */ if (constraint) { @@ -3699,7 +3699,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } } - /* Then divide `accepts' into DFA states, or create a new + /* Then divide 'accepts' into DFA states, or create a new state. Above, we make sure that accepts is not empty. */ for (j = 0; j < ndests; ++j) { @@ -3712,7 +3712,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) continue; - /* Enumerate the intersection set of this state and `accepts'. */ + /* Enumerate the intersection set of this state and 'accepts'. */ has_intersec = 0; for (k = 0; k < BITSET_WORDS; ++k) has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; @@ -3720,7 +3720,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, if (!has_intersec) continue; - /* Then check if this state is a subset of `accepts'. */ + /* Then check if this state is a subset of 'accepts'. */ not_subset = not_consumed = 0; for (k = 0; k < BITSET_WORDS; ++k) { @@ -3728,8 +3728,8 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; } - /* If this state isn't a subset of `accepts', create a - new group state, which has the `remains'. */ + /* If this state isn't a subset of 'accepts', create a + new group state, which has the 'remains'. */ if (not_subset) { bitset_copy (dests_ch[ndests], remains); @@ -3768,7 +3768,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N -/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. +/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. Return the number of the bytes the node accepts. STR_IDX is the current index of the input string. @@ -3840,7 +3840,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, if (node->type == OP_PERIOD) { if (char_len <= 1) - return 0; + return 0; /* FIXME: I don't think this if is needed, as both '\n' and '\0' are char_len == 1. */ /* '.' accepts any one character except the following two cases. */ @@ -3895,7 +3895,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, const int32_t *table, *indirect; const unsigned char *weights, *extra; const char *collseqwc; - int32_t idx; /* This #include defines a local function! */ # include @@ -3933,6 +3932,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, in_collseq = find_collation_sequence_value (pin, elem_len); } /* match with range expression? */ + /* FIXME: Implement rational ranges here, too. */ for (i = 0; i < cset->nranges; ++i) if (cset->range_starts[i] <= in_collseq && in_collseq <= cset->range_ends[i]) @@ -3953,7 +3953,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - int32_t idx = findidx (&cp); + int32_t idx = findidx (&cp, elem_len); if (idx > 0) for (i = 0; i < cset->nequiv_classes; ++i) { @@ -3984,18 +3984,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, # endif /* _LIBC */ { /* match with range expression? */ -#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__) - wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; -#else - wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - cmp_buf[2] = wc; -#endif for (i = 0; i < cset->nranges; ++i) { - cmp_buf[0] = cset->range_starts[i]; - cmp_buf[4] = cset->range_ends[i]; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i]) { match_len = char_len; goto check_node_accept_bytes_match; @@ -4065,7 +4056,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) /* Skip the collation sequence value. */ idx += sizeof (uint32_t); /* Skip the wide char sequence of the collating element. */ - idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1); /* If we found the entry, return the sequence value. */ if (found) return *(uint32_t *) (extra + idx); @@ -4133,17 +4124,20 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, static reg_errcode_t internal_function __attribute_warn_unused_result__ -extend_buffers (re_match_context_t *mctx) +extend_buffers (re_match_context_t *mctx, int min_len) { reg_errcode_t ret; re_string_t *pstr = &mctx->input; /* Avoid overflow. */ - if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) + if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 + <= pstr->bufs_len, 0)) return REG_ESPACE; - /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + /* Double the lengths of the buffers, but allocate at least MIN_LEN. */ + ret = re_string_realloc_buffers (pstr, + MAX (min_len, + MIN (pstr->len, pstr->bufs_len * 2))); if (BE (ret != REG_NOERROR, 0)) return ret; @@ -4206,7 +4200,7 @@ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) size_t max_object_size = MAX (sizeof (struct re_backref_cache_entry), sizeof (re_sub_match_top_t *)); - if (BE (SIZE_MAX / max_object_size < n, 0)) + if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0)) return REG_ESPACE; mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);