X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregexec.c;h=660d25b5813b3d5ac0664a2886e228cefc99e6ed;hb=ab624c1a8f744419fdb653b77250153a3563203f;hp=230f10d427b3b31fa0fc1e81235e50aa28e784e3;hpb=28492cce5d1bf8def08218e91543f118db2d3ff8;p=gnulib.git diff --git a/lib/regexec.c b/lib/regexec.c index 230f10d42..660d25b58 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + Copyright (C) 2002-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -14,8 +14,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + with this program; if not, see . */ static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, Idx n) internal_function; @@ -24,7 +23,7 @@ static void match_ctx_free (re_match_context_t *cache) internal_function; static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node, Idx str_idx, Idx from, Idx to) internal_function; -static Idx search_cur_bkref_entry (re_match_context_t *mctx, Idx str_idx) +static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) internal_function; static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) internal_function; @@ -37,7 +36,7 @@ static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, internal_function; static reg_errcode_t re_search_internal (const regex_t *preg, const char *string, Idx length, - Idx start, regoff_t range, Idx stop, + Idx start, Idx last_start, Idx stop, size_t nmatch, regmatch_t pmatch[], int eflags) internal_function; static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, @@ -45,73 +44,84 @@ static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, const char *string2, Idx length2, Idx start, regoff_t range, struct re_registers *regs, - Idx stop, int ret_len) internal_function; + Idx stop, bool ret_len) internal_function; static regoff_t re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length, Idx start, regoff_t range, Idx stop, struct re_registers *regs, - int ret_len) internal_function; + bool ret_len) internal_function; static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, - Idx nregs, int regs_allocated) internal_function; + Idx nregs, int regs_allocated) internal_function; static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) internal_function; -static Idx check_matching (re_match_context_t *mctx, int fl_longest_match, - Idx *p_match_first) - internal_function; +static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, + Idx *p_match_first) internal_function; static Idx check_halt_state_context (const re_match_context_t *mctx, const re_dfastate_t *state, Idx idx) internal_function; -static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch) internal_function; static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, Idx nregs, regmatch_t *regs, - re_node_set *eps_via_nodes) internal_function; + re_node_set *eps_via_nodes) + internal_function; static reg_errcode_t set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, - int fl_backtrack) internal_function; -static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function; + bool fl_backtrack) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; #ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, - Idx node_idx, Idx str_idx, Idx max_str_idx) internal_function; + Idx node_idx, Idx str_idx, Idx max_str_idx) + internal_function; #endif /* RE_ENABLE_I18N */ -static reg_errcode_t sift_states_backward (re_match_context_t *mctx, - re_sift_context_t *sctx) internal_function; -static reg_errcode_t build_sifted_states (re_match_context_t *mctx, +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, - re_node_set *cur_dest) internal_function; -static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, - re_node_set *dest_nodes) internal_function; -static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, - const re_node_set *candidates) internal_function; -static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits, - Idx dst_node, Idx dst_idx, Idx src_node, - Idx src_idx) internal_function; -static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx, + const re_node_set *candidates) + internal_function; +static bool check_dst_limits (const re_match_context_t *mctx, + const re_node_set *limits, + Idx dst_node, Idx dst_idx, Idx src_node, + Idx src_idx) internal_function; +static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, Idx subexp_idx, - Idx from_node, Idx bkref_idx) internal_function; -static int check_dst_limits_calc_pos (re_match_context_t *mctx, + Idx from_node, Idx bkref_idx) + internal_function; +static int check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, Idx subexp_idx, Idx node, Idx str_idx, Idx bkref_idx) internal_function; -static reg_errcode_t check_subexp_limits (re_dfa_t *dfa, +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates, re_node_set *limits, struct re_backref_cache_entry *bkref_ents, Idx str_idx) internal_function; -static reg_errcode_t sift_states_bkref (re_match_context_t *mctx, +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, - Idx str_idx, const re_node_set *candidates) internal_function; -static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, - re_dfastate_t **src, Idx num) internal_function; + Idx str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, Idx num) + internal_function; static re_dfastate_t *find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) internal_function; static re_dfastate_t *transit_state (reg_errcode_t *err, @@ -119,27 +129,33 @@ static re_dfastate_t *transit_state (reg_errcode_t *err, re_dfastate_t *state) internal_function; static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *next_state) internal_function; + re_dfastate_t *next_state) + internal_function; static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, Idx str_idx) internal_function; #if 0 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *pstate) internal_function; + re_dfastate_t *pstate) + internal_function; #endif #ifdef RE_ENABLE_I18N static reg_errcode_t transit_state_mb (re_match_context_t *mctx, - re_dfastate_t *pstate) internal_function; + re_dfastate_t *pstate) + internal_function; #endif /* RE_ENABLE_I18N */ static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, - const re_node_set *nodes) internal_function; + const re_node_set *nodes) + internal_function; static reg_errcode_t get_subexp (re_match_context_t *mctx, - Idx bkref_node, Idx bkref_str_idx) internal_function; + Idx bkref_node, Idx bkref_str_idx) + internal_function; static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, re_sub_match_last_t *sub_last, - Idx bkref_node, Idx bkref_str) internal_function; + Idx bkref_node, Idx bkref_str) + internal_function; static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, Idx subexp_idx, int type) internal_function; static reg_errcode_t check_arrival (re_match_context_t *mctx, @@ -149,34 +165,41 @@ static reg_errcode_t check_arrival (re_match_context_t *mctx, static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, re_node_set *cur_nodes, - re_node_set *next_nodes) internal_function; -static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, - Idx ex_subexp, int type) internal_function; -static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa, + Idx ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, Idx target, Idx ex_subexp, int type) internal_function; static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, Idx cur_str, - Idx subexp_num, int type) internal_function; -static int build_trtable (re_dfa_t *dfa, - re_dfastate_t *state) internal_function; + Idx subexp_num, int type) + internal_function; +static bool build_trtable (const re_dfa_t *dfa, + re_dfastate_t *state) internal_function; #ifdef RE_ENABLE_I18N -static int check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx, - const re_string_t *input, Idx idx) internal_function; +static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, + const re_string_t *input, Idx idx) + internal_function; # ifdef _LIBC static unsigned int find_collation_sequence_value (const unsigned char *mbs, - size_t name_len) internal_function; + size_t name_len) + internal_function; # endif /* _LIBC */ #endif /* RE_ENABLE_I18N */ -static Idx group_nodes_into_DFAstates (re_dfa_t *dfa, +static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, re_node_set *states_node, - bitset *states_ch) internal_function; -static int check_node_accept (const re_match_context_t *mctx, - const re_token_t *node, Idx idx) internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function; + bitset_t *states_ch) internal_function; +static bool check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, Idx idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; /* Entry point for POSIX code. */ @@ -184,24 +207,28 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function string STRING. If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at least NMATCH elements, and we set them to the offsets of the corresponding matched substrings. - EFLAGS specifies `execution flags' which affect matching: if + EFLAGS specifies "execution flags" which affect matching: if REG_NOTBOL is set, then ^ does not match at the beginning of the string; if REG_NOTEOL is set, then $ does not match at the end. We return 0 if we find a match and REG_NOMATCH if not. */ int -regexec (const regex_t *__restrict preg, const char *__restrict string, - size_t nmatch, regmatch_t pmatch[], int eflags) +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *_Restrict_ preg; + const char *_Restrict_ string; + size_t nmatch; + regmatch_t pmatch[_Restrict_arr_]; + int eflags; { reg_errcode_t err; Idx start, length; #ifdef _LIBC - re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; #endif if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) @@ -219,11 +246,11 @@ regexec (const regex_t *__restrict preg, const char *__restrict string, } __libc_lock_lock (dfa->lock); - if (preg->re_no_sub) - err = re_search_internal (preg, string, length, start, length - start, + if (preg->no_sub) + err = re_search_internal (preg, string, length, start, length, length, 0, NULL, eflags); else - err = re_search_internal (preg, string, length, start, length - start, + err = re_search_internal (preg, string, length, start, length, length, nmatch, pmatch, eflags); __libc_lock_unlock (dfa->lock); return err != REG_NOERROR; @@ -238,8 +265,8 @@ __typeof__ (__regexec) __compat_regexec; int attribute_compat_text_section -__compat_regexec (const regex_t *__restrict preg, - const char *__restrict string, size_t nmatch, +__compat_regexec (const regex_t *_Restrict_ preg, + const char *_Restrict_ string, size_t nmatch, regmatch_t pmatch[], int eflags) { return regexec (preg, string, nmatch, pmatch, @@ -269,8 +296,8 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); the first STOP characters of the concatenation of the strings should be concerned. - If REGS is not NULL, and BUFP->re_no_sub is not set, the offsets of the match - and all groups is stroed in REGS. (For the "_2" variants, the offsets are + If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match + and all groups is stored in REGS. (For the "_2" variants, the offsets are computed relative to the concatenation, not relative to the individual strings.) @@ -279,79 +306,92 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); match was found and -2 indicates an internal error. */ regoff_t -re_match (struct re_pattern_buffer *bufp, const char *string, - Idx length, Idx start, struct re_registers *regs) +re_match (bufp, string, length, start, regs) + struct re_pattern_buffer *bufp; + const char *string; + Idx length, start; + struct re_registers *regs; { - return re_search_stub (bufp, string, length, start, 0, length, regs, 1); + return re_search_stub (bufp, string, length, start, 0, length, regs, true); } #ifdef _LIBC weak_alias (__re_match, re_match) #endif regoff_t -re_search (struct re_pattern_buffer *bufp, const char *string, - Idx length, Idx start, regoff_t range, struct re_registers *regs) +re_search (bufp, string, length, start, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + Idx length, start; + regoff_t range; + struct re_registers *regs; { - return re_search_stub (bufp, string, length, start, range, length, regs, 0); + return re_search_stub (bufp, string, length, start, range, length, regs, + false); } #ifdef _LIBC weak_alias (__re_search, re_search) #endif regoff_t -re_match_2 (struct re_pattern_buffer *bufp, - const char *string1, Idx length1, - const char *string2, Idx length2, - Idx start, struct re_registers *regs, Idx stop) +re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + Idx length1, length2, start, stop; + struct re_registers *regs; { return re_search_2_stub (bufp, string1, length1, string2, length2, - start, 0, regs, stop, 1); + start, 0, regs, stop, true); } #ifdef _LIBC weak_alias (__re_match_2, re_match_2) #endif regoff_t -re_search_2 (struct re_pattern_buffer *bufp, - const char *string1, Idx length1, - const char *string2, Idx length2, - Idx start, regoff_t range, struct re_registers *regs, Idx stop) +re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + Idx length1, length2, start, stop; + regoff_t range; + struct re_registers *regs; { return re_search_2_stub (bufp, string1, length1, string2, length2, - start, range, regs, stop, 0); + start, range, regs, stop, false); } #ifdef _LIBC weak_alias (__re_search_2, re_search_2) #endif static regoff_t -internal_function re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1, Idx length1, const char *string2, Idx length2, Idx start, regoff_t range, struct re_registers *regs, - Idx stop, int ret_len) + Idx stop, bool ret_len) { const char *str; regoff_t rval; Idx len = length1 + length2; - int free_str = 0; + char *s = NULL; - if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) + if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0)) return -2; /* Concatenate the strings. */ if (length2 > 0) if (length1 > 0) { - char *s = re_malloc (char, len); + s = re_malloc (char, len); if (BE (s == NULL, 0)) return -2; +#ifdef _LIBC + memcpy (__mempcpy (s, string1, length1), string2, length2); +#else memcpy (s, string1, length1); memcpy (s + length1, string2, length2); +#endif str = s; - free_str = 1; } else str = string2; @@ -360,22 +400,20 @@ re_search_2_stub (struct re_pattern_buffer *bufp, rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len); - if (free_str) - re_free ((char *) str); + re_free (s); return rval; } /* The parameters have the same meaning as those of re_search. Additional parameters: - If RET_LEN is nonzero the length of the match is returned (re_match style); + If RET_LEN is true the length of the match is returned (re_match style); otherwise the position of the match is returned. */ static regoff_t -internal_function re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length, Idx start, regoff_t range, Idx stop, struct re_registers *regs, - int ret_len) + bool ret_len) { reg_errcode_t result; regmatch_t *pmatch; @@ -383,48 +421,37 @@ re_search_stub (struct re_pattern_buffer *bufp, regoff_t rval; int eflags = 0; #ifdef _LIBC - re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer; + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; #endif + Idx last_start = start + range; /* Check for out-of-range. */ if (BE (start < 0 || start > length, 0)) return -1; - if (sizeof start < sizeof range) - { - regoff_t length_offset = length; - regoff_t start_offset = start; - if (BE (length_offset - start_offset < range, 0)) - range = length_offset - start_offset; - else if (BE (range < - start_offset, 0)) - range = -start_offset; - } - else - { - if (BE (start + range > length, 0)) - range = length - start; - else if (BE (start + range < 0, 0)) - range = -start; - } + if (BE (length < last_start || (0 <= range && last_start < start), 0)) + last_start = length; + else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) + last_start = 0; __libc_lock_lock (dfa->lock); - eflags |= (bufp->re_not_bol) ? REG_NOTBOL : 0; - eflags |= (bufp->re_not_eol) ? REG_NOTEOL : 0; + eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; + eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; /* Compile fastmap if we haven't yet. */ - if (range > 0 && bufp->re_fastmap != NULL && !bufp->re_fastmap_accurate) + if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate) re_compile_fastmap (bufp); - if (BE (bufp->re_no_sub, 0)) + if (BE (bufp->no_sub, 0)) regs = NULL; /* We need at least 1 register. */ if (regs == NULL) nregs = 1; - else if (BE (bufp->re_regs_allocated == REG_FIXED - && regs->rm_num_regs < bufp->re_nsub + 1, 0)) + else if (BE (bufp->regs_allocated == REGS_FIXED + && regs->num_regs <= bufp->re_nsub, 0)) { - nregs = regs->rm_num_regs; + nregs = regs->num_regs; if (BE (nregs < 1, 0)) { /* Nothing can be copied to regs. */ @@ -441,7 +468,7 @@ re_search_stub (struct re_pattern_buffer *bufp, goto out; } - result = re_search_internal (bufp, string, length, start, range, stop, + result = re_search_internal (bufp, string, length, start, last_start, stop, nregs, pmatch, eflags); rval = 0; @@ -452,9 +479,9 @@ re_search_stub (struct re_pattern_buffer *bufp, else if (regs != NULL) { /* If caller wants register contents data back, copy them. */ - bufp->re_regs_allocated = re_copy_regs (regs, pmatch, nregs, - bufp->re_regs_allocated); - if (BE (bufp->re_regs_allocated == REG_UNALLOCATED, 0)) + bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, + bufp->regs_allocated); + if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) rval = -2; } @@ -475,57 +502,66 @@ re_search_stub (struct re_pattern_buffer *bufp, } static unsigned -internal_function re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, int regs_allocated) { - int rval = REG_REALLOCATE; + int rval = REGS_REALLOCATE; Idx i; Idx need_regs = nregs + 1; - /* We need one extra element beyond `rm_num_regs' for the `-1' marker GNU code + /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code uses. */ /* Have the register data arrays been allocated? */ - if (regs_allocated == REG_UNALLOCATED) + if (regs_allocated == REGS_UNALLOCATED) { /* No. So allocate them with malloc. */ - regs->rm_start = re_malloc (regoff_t, need_regs); - regs->rm_end = re_malloc (regoff_t, need_regs); - if (BE (regs->rm_start == NULL, 0) || BE (regs->rm_end == NULL, 0)) - return REG_UNALLOCATED; - regs->rm_num_regs = need_regs; + regs->start = re_malloc (regoff_t, need_regs); + if (BE (regs->start == NULL, 0)) + return REGS_UNALLOCATED; + regs->end = re_malloc (regoff_t, need_regs); + if (BE (regs->end == NULL, 0)) + { + re_free (regs->start); + return REGS_UNALLOCATED; + } + regs->num_regs = need_regs; } - else if (regs_allocated == REG_REALLOCATE) + else if (regs_allocated == REGS_REALLOCATE) { /* Yes. If we need more elements than were already allocated, reallocate them. If we need fewer, just leave it alone. */ - if (BE (need_regs > regs->rm_num_regs, 0)) + if (BE (need_regs > regs->num_regs, 0)) { - regoff_t *new_start = - re_realloc (regs->rm_start, regoff_t, need_regs); - regoff_t *new_end = re_realloc (regs->rm_end, regoff_t, need_regs); - if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0)) - return REG_UNALLOCATED; - regs->rm_start = new_start; - regs->rm_end = new_end; - regs->rm_num_regs = need_regs; + regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); + regoff_t *new_end; + if (BE (new_start == NULL, 0)) + return REGS_UNALLOCATED; + new_end = re_realloc (regs->end, regoff_t, need_regs); + if (BE (new_end == NULL, 0)) + { + re_free (new_start); + return REGS_UNALLOCATED; + } + regs->start = new_start; + regs->end = new_end; + regs->num_regs = need_regs; } } else { - assert (regs_allocated == REG_FIXED); - /* This function may not be called with REG_FIXED and nregs too big. */ - assert (regs->rm_num_regs >= nregs); - rval = REG_FIXED; + assert (regs_allocated == REGS_FIXED); + /* This function may not be called with REGS_FIXED and nregs too big. */ + assert (regs->num_regs >= nregs); + rval = REGS_FIXED; } /* Copy the regs. */ for (i = 0; i < nregs; ++i) { - regs->rm_start[i] = pmatch[i].rm_so; - regs->rm_end[i] = pmatch[i].rm_eo; + regs->start[i] = pmatch[i].rm_so; + regs->end[i] = pmatch[i].rm_eo; } - for ( ; i < regs->rm_num_regs; ++i) - regs->rm_start[i] = regs->rm_end[i] = -1; + for ( ; i < regs->num_regs; ++i) + regs->start[i] = regs->end[i] = -1; return rval; } @@ -544,21 +580,24 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, freeing the old data. */ void -re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, - __re_size_t num_regs, regoff_t *starts, regoff_t *ends) +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + __re_size_t num_regs; + regoff_t *starts, *ends; { if (num_regs) { - bufp->re_regs_allocated = REG_REALLOCATE; - regs->rm_num_regs = num_regs; - regs->rm_start = starts; - regs->rm_end = ends; + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; } else { - bufp->re_regs_allocated = REG_UNALLOCATED; - regs->rm_num_regs = 0; - regs->rm_start = regs->rm_end = NULL; + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = NULL; } } #ifdef _LIBC @@ -573,7 +612,8 @@ int # ifdef _LIBC weak_function # endif -re_exec (const char *s) +re_exec (s) + const char *s; { return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); } @@ -583,38 +623,41 @@ re_exec (const char *s) /* Searches for a compiled pattern PREG in the string STRING, whose length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same - mingings with regexec. START, and RANGE have the same meanings - with re_search. + meaning as with regexec. LAST_START is START + RANGE, where + START and RANGE have the same meaning as with re_search. Return REG_NOERROR if we find a match, and REG_NOMATCH if not, otherwise return the error code. Note: We assume front end functions already check ranges. - (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + (0 <= LAST_START && LAST_START <= LENGTH) */ static reg_errcode_t -internal_function +__attribute_warn_unused_result__ re_search_internal (const regex_t *preg, const char *string, Idx length, - Idx start, regoff_t range, Idx stop, + Idx start, Idx last_start, Idx stop, size_t nmatch, regmatch_t pmatch[], int eflags) { reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; Idx left_lim, right_lim; int incr; - int fl_longest_match, match_kind; - Idx match_first, match_last = REG_MISSING; + bool fl_longest_match; + int match_kind; + Idx match_first; + Idx match_last = REG_MISSING; Idx extra_nmatch; - int sb, ch; + bool sb; + int ch; #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) re_match_context_t mctx = { .dfa = dfa }; #else re_match_context_t mctx; #endif - char *fastmap = (preg->re_fastmap != NULL && preg->re_fastmap_accurate - && range && !preg->re_can_be_null) ? preg->re_fastmap : NULL; - unsigned REG_TRANSLATE_TYPE t = - (unsigned REG_TRANSLATE_TYPE) preg->re_translate; + char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate + && start != last_start && !preg->can_be_null) + ? preg->fastmap : NULL); + RE_TRANSLATE_TYPE t = preg->translate; #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) memset (&mctx, '\0', sizeof (re_match_context_t)); @@ -625,40 +668,40 @@ re_search_internal (const regex_t *preg, nmatch -= extra_nmatch; /* Check if the DFA haven't been compiled. */ - if (BE (preg->re_used == 0 || dfa->init_state == NULL + if (BE (preg->used == 0 || dfa->init_state == NULL || dfa->init_state_word == NULL || dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL, 0)) return REG_NOMATCH; #ifdef DEBUG /* We assume front-end functions already check them. */ - assert (start + range >= 0 && start + range <= length); + assert (0 <= last_start && last_start <= length); #endif /* If initial states with non-begbuf contexts have no elements, - the regex must be anchored. If preg->re_newline_anchor is set, + the regex must be anchored. If preg->newline_anchor is set, we'll never use init_state_nl, so do not check it. */ if (dfa->init_state->nodes.nelem == 0 && dfa->init_state_word->nodes.nelem == 0 && (dfa->init_state_nl->nodes.nelem == 0 - || !preg->re_newline_anchor)) + || !preg->newline_anchor)) { - if (start != 0 && start + range != 0) + if (start != 0 && last_start != 0) return REG_NOMATCH; - start = range = 0; + start = last_start = 0; } /* We must check the longest matching, if nmatch > 0. */ fl_longest_match = (nmatch != 0 || dfa->nbackref); err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, - preg->re_translate, - preg->re_syntax & REG_IGNORE_CASE, dfa); + preg->translate, (preg->syntax & RE_ICASE) != 0, + dfa); if (BE (err != REG_NOERROR, 0)) goto free_return; mctx.input.stop = stop; mctx.input.raw_stop = stop; - mctx.input.newline_anchor = preg->re_newline_anchor; + mctx.input.newline_anchor = preg->newline_anchor; err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); if (BE (err != REG_NOERROR, 0)) @@ -670,6 +713,14 @@ re_search_internal (const regex_t *preg, multi character collating element. */ if (nmatch > 1 || dfa->has_mb_node) { + /* Avoid overflow. */ + if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) + <= mctx.input.bufs_len), 0)) + { + err = REG_ESPACE; + goto free_return; + } + mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); if (BE (mctx.state_log == NULL, 0)) { @@ -685,14 +736,14 @@ re_search_internal (const regex_t *preg, : CONTEXT_NEWLINE | CONTEXT_BEGBUF; /* Check incrementally whether of not the input string match. */ - incr = (range < 0) ? -1 : 1; - left_lim = (range < 0) ? start + range : start; - right_lim = (range < 0) ? start : start + range; + incr = (last_start < start) ? -1 : 1; + left_lim = (last_start < start) ? last_start : start; + right_lim = (last_start < start) ? start : last_start; sb = dfa->mb_cur_max == 1; match_kind = (fastmap - ? ((sb || !(preg->re_syntax & REG_IGNORE_CASE || t) ? 4 : 0) - | (range >= 0 ? 2 : 0) + ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) + | (start <= last_start ? 2 : 0) | (t != NULL ? 1 : 0)) : 8); @@ -777,10 +828,10 @@ re_search_internal (const regex_t *preg, break; match_first += incr; if (match_first < left_lim || match_first > right_lim) - { - err = REG_NOMATCH; - goto free_return; - } + { + err = REG_NOMATCH; + goto free_return; + } } break; } @@ -802,7 +853,7 @@ re_search_internal (const regex_t *preg, /* We assume that the matching starts from 0. */ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; match_last = check_matching (&mctx, fl_longest_match, - range >= 0 ? &match_first : NULL); + start <= last_start ? &match_first : NULL); if (match_last != REG_MISSING) { if (BE (match_last == REG_ERROR, 0)) @@ -813,13 +864,13 @@ re_search_internal (const regex_t *preg, else { mctx.match_last = match_last; - if ((!preg->re_no_sub && nmatch > 1) || dfa->nbackref) + if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) { re_dfastate_t *pstate = mctx.state_log[match_last]; mctx.last_node = check_halt_state_context (&mctx, pstate, match_last); } - if ((!preg->re_no_sub && nmatch > 1 && dfa->has_plural_match) + if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) || dfa->nbackref) { err = prune_impossible_nodes (&mctx); @@ -858,7 +909,7 @@ re_search_internal (const regex_t *preg, the maximum possible regoff_t value. We need a new error code REG_OVERFLOW. */ - if (!preg->re_no_sub && nmatch > 1) + if (!preg->no_sub && nmatch > 1) { err = set_regs (preg, &mctx, nmatch, pmatch, dfa->has_plural_match && dfa->nbackref > 0); @@ -866,7 +917,7 @@ re_search_internal (const regex_t *preg, goto free_return; } - /* At last, add the offset to the each registers, since we slided + /* At last, add the offset to each register, since we slid the buffers so that we could assume that the matching starts from 0. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) @@ -897,14 +948,14 @@ re_search_internal (const regex_t *preg, } if (dfa->subexp_map) - for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) - if (dfa->subexp_map[reg_idx] != reg_idx) - { - pmatch[reg_idx + 1].rm_so - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; - pmatch[reg_idx + 1].rm_eo - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; - } + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } } free_return: @@ -916,10 +967,10 @@ re_search_internal (const regex_t *preg, } static reg_errcode_t -internal_function +__attribute_warn_unused_result__ prune_impossible_nodes (re_match_context_t *mctx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; Idx halt_node, match_last; reg_errcode_t ret; re_dfastate_t **sifted_states; @@ -930,6 +981,11 @@ prune_impossible_nodes (re_match_context_t *mctx) #endif match_last = mctx->match_last; halt_node = mctx->last_node; + + /* Avoid overflow. */ + if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0)) + return REG_ESPACE; + sifted_states = re_malloc (re_dfastate_t *, match_last + 1); if (BE (sifted_states == NULL, 0)) { @@ -984,6 +1040,11 @@ prune_impossible_nodes (re_match_context_t *mctx) re_node_set_free (&sctx.limits); if (BE (ret != REG_NOERROR, 0)) goto free_return; + if (sifted_states[0] == NULL) + { + ret = REG_NOMATCH; + goto free_return; + } } re_free (mctx->state_log); mctx->state_log = sifted_states; @@ -1006,7 +1067,7 @@ __attribute ((always_inline)) internal_function acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, Idx idx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; if (dfa->init_state->has_constraint) { unsigned int context; @@ -1040,21 +1101,21 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, FL_LONGEST_MATCH means we want the POSIX longest matching. If P_MATCH_FIRST is not NULL, and the match fails, it is set to the next place where we may want to try matching. - Note that the matcher assume that the maching starts from the current + Note that the matcher assumes that the matching starts from the current index of the buffer. */ static Idx -internal_function -check_matching (re_match_context_t *mctx, int fl_longest_match, +internal_function __attribute_warn_unused_result__ +check_matching (re_match_context_t *mctx, bool fl_longest_match, Idx *p_match_first) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; Idx match = 0; Idx match_last = REG_MISSING; Idx cur_str_idx = re_string_cur_idx (&mctx->input); re_dfastate_t *cur_state; - int at_init_state = p_match_first != NULL; + bool at_init_state = p_match_first != NULL; Idx next_start_idx = cur_str_idx; err = REG_NOERROR; @@ -1074,7 +1135,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, later. E.g. Processing back references. */ if (BE (dfa->nbackref, 0)) { - at_init_state = 0; + at_init_state = false; err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); if (BE (err != REG_NOERROR, 0)) return err; @@ -1083,7 +1144,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, { err = transit_state_bkref (mctx, &cur_state->nodes); if (BE (err != REG_NOERROR, 0)) - return err; + return err; } } } @@ -1109,17 +1170,18 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, re_dfastate_t *old_state = cur_state; Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; - if (BE (next_char_idx >= mctx->input.bufs_len, 0) - || (BE (next_char_idx >= mctx->input.valid_len, 0) - && mctx->input.valid_len < mctx->input.len)) - { - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) + if ((BE (next_char_idx >= mctx->input.bufs_len, 0) + && mctx->input.bufs_len < mctx->input.len) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) { assert (err == REG_ESPACE); return REG_ERROR; } - } + } cur_state = transit_state (&err, mctx, cur_state); if (mctx->state_log != NULL) @@ -1144,7 +1206,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, if (old_state == cur_state) next_start_idx = next_char_idx; else - at_init_state = 0; + at_init_state = false; } if (cur_state->halt) @@ -1175,19 +1237,19 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, /* Check NODE match the current context. */ -static int +static bool internal_function check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context) { re_token_type_t type = dfa->nodes[node].type; unsigned int constraint = dfa->nodes[node].constraint; if (type != END_OF_RE) - return 0; + return false; if (!constraint) - return 1; + return true; if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) - return 0; - return 1; + return false; + return true; } /* Check the halt state STATE match the current context. @@ -1218,20 +1280,20 @@ check_halt_state_context (const re_match_context_t *mctx, static Idx internal_function -proceed_next_node (const re_match_context_t *mctx, - Idx nregs, regmatch_t *regs, Idx *pidx, Idx node, - re_node_set *eps_via_nodes, struct re_fail_stack_t *fs) +proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, + Idx *pidx, Idx node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; Idx i; - int err; + bool ok; if (IS_EPSILON_NODE (dfa->nodes[node].type)) { re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; re_node_set *edests = &dfa->edests[node]; Idx dest_node; - err = re_node_set_insert (eps_via_nodes, node); - if (BE (err < 0, 0)) + ok = re_node_set_insert (eps_via_nodes, node); + if (BE (! ok, 0)) return REG_ERROR; /* Pick up a valid destination, or return REG_MISSING if none is found. */ @@ -1243,17 +1305,17 @@ proceed_next_node (const re_match_context_t *mctx, if (dest_node == REG_MISSING) dest_node = candidate; - else + else { /* In order to avoid infinite loop like "(a*)*", return the second - epsilon-transition if the first was already considered. */ + epsilon-transition if the first was already considered. */ if (re_node_set_contains (eps_via_nodes, dest_node)) - return candidate; + return candidate; /* Otherwise, push the second epsilon-transition on the fail stack. */ else if (fs != NULL && push_fail_stack (fs, *pidx, candidate, nregs, regs, - eps_via_nodes)) + eps_via_nodes)) return REG_ERROR; /* We know we are going to exit. */ @@ -1292,8 +1354,8 @@ proceed_next_node (const re_match_context_t *mctx, if (naccepted == 0) { Idx dest_node; - err = re_node_set_insert (eps_via_nodes, node); - if (BE (err < 0, 0)) + ok = re_node_set_insert (eps_via_nodes, node); + if (BE (! ok, 0)) return REG_ERROR; dest_node = dfa->edests[node].elems[0]; if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, @@ -1319,7 +1381,7 @@ proceed_next_node (const re_match_context_t *mctx, } static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) { @@ -1327,8 +1389,9 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, Idx num = fs->num++; if (fs->num == fs->alloc) { - struct re_fail_stack_ent_t *new_array = - re_realloc (fs->stack, struct re_fail_stack_ent_t, fs->alloc * 2); + struct re_fail_stack_ent_t *new_array; + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + * fs->alloc * 2)); if (new_array == NULL) return REG_ESPACE; fs->alloc *= 2; @@ -1346,8 +1409,8 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, static Idx internal_function -pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, - Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) { Idx num = --fs->num; assert (REG_VALID_INDEX (num)); @@ -1365,17 +1428,17 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ static reg_errcode_t -internal_function -set_regs (const regex_t *preg, const re_match_context_t *mctx, - size_t nmatch, regmatch_t *pmatch, int fl_backtrack) +internal_function __attribute_warn_unused_result__ +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, bool fl_backtrack) { - re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; Idx idx, cur_node; re_node_set eps_via_nodes; struct re_fail_stack_t *fs; struct re_fail_stack_t fs_body = { 0, 2, NULL }; regmatch_t *prev_idx_match; - int prev_idx_match_malloced = 0; + bool prev_idx_match_malloced = false; #ifdef DEBUG assert (nmatch > 1); @@ -1404,7 +1467,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, free_fail_stack_return (fs); return REG_ESPACE; } - prev_idx_match_malloced = 1; + prev_idx_match_malloced = true; } memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); @@ -1490,8 +1553,8 @@ free_fail_stack_return (struct re_fail_stack_t *fs) static void internal_function -update_regs (re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match, - Idx cur_node, Idx cur_idx, Idx nmatch) +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch) { int type = dfa->nodes[cur_node].type; if (type == OP_OPEN_SUBEXP) @@ -1541,28 +1604,28 @@ update_regs (re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match, and sift the nodes in each states according to the following rules. Updated state_log will be wrote to STATE_LOG. - Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if... 1. When STR_IDX == MATCH_LAST(the last index in the state_log): - If `a' isn't the LAST_NODE and `a' can't epsilon transit to - the LAST_NODE, we throw away the node `a'. - 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts - string `s' and transit to `b': + If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to + the LAST_NODE, we throw away the node 'a'. + 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts + string 's' and transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw - away the node `a'. + away the node 'a'. ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is - thrown away, we throw away the node `a'. + thrown away, we throw away the node 'a'. 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the - node `a'. + node 'a'. ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, - we throw away the node `a'. */ + we throw away the node 'a'. */ #define STATE_NODE_CONTAINS(state,node) \ ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) static reg_errcode_t internal_function -sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx) +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) { reg_errcode_t err; int null_cnt = 0; @@ -1600,7 +1663,7 @@ sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx) if (mctx->state_log[str_idx]) { err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) + if (BE (err != REG_NOERROR, 0)) goto free_return; } @@ -1619,33 +1682,33 @@ sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx) } static reg_errcode_t -internal_function -build_sifted_states (re_match_context_t *mctx, re_sift_context_t *sctx, +internal_function __attribute_warn_unused_result__ +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, re_node_set *cur_dest) { - re_dfa_t *const dfa = mctx->dfa; - re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; Idx i; /* Then build the next sifted state. - We build the next sifted state on `cur_dest', and update - `sifted_states[str_idx]' with `cur_dest'. + We build the next sifted state on 'cur_dest', and update + 'sifted_states[str_idx]' with 'cur_dest'. Note: - `cur_dest' is the sifted state from `state_log[str_idx + 1]'. - `cur_src' points the node_set of the old `state_log[str_idx]' + 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'. + 'cur_src' points the node_set of the old 'state_log[str_idx]' (with the epsilon nodes pre-filtered out). */ for (i = 0; i < cur_src->nelem; i++) { Idx prev_node = cur_src->elems[i]; int naccepted = 0; - int ret; + bool ok; #ifdef DEBUG re_token_type_t type = dfa->nodes[prev_node].type; assert (!IS_EPSILON_NODE (type)); #endif #ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ + /* If the node may accept "multi byte". */ if (dfa->nodes[prev_node].accept_mb) naccepted = sift_states_iter_mb (mctx, sctx, prev_node, str_idx, sctx->last_str_idx); @@ -1670,8 +1733,8 @@ build_sifted_states (re_match_context_t *mctx, re_sift_context_t *sctx, prev_node, str_idx)) continue; } - ret = re_node_set_insert (cur_dest, prev_node); - if (BE (ret == -1, 0)) + ok = re_node_set_insert (cur_dest, prev_node); + if (BE (! ok, 0)) return REG_ESPACE; } @@ -1686,7 +1749,8 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) { Idx top = mctx->state_log_top; - if (next_state_log_idx >= mctx->input.bufs_len + if ((next_state_log_idx >= mctx->input.bufs_len + && mctx->input.bufs_len < mctx->input.len) || (next_state_log_idx >= mctx->input.valid_len && mctx->input.valid_len < mctx->input.len)) { @@ -1707,8 +1771,8 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) static reg_errcode_t internal_function -merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src, - Idx num) +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, Idx num) { Idx st_idx; reg_errcode_t err; @@ -1734,11 +1798,12 @@ merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src, static reg_errcode_t internal_function -update_cur_sifted_state (re_match_context_t *mctx, re_sift_context_t *sctx, - Idx str_idx, re_node_set *dest_nodes) +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, Idx str_idx, + re_node_set *dest_nodes) { - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; const re_node_set *candidates; candidates = ((mctx->state_log[str_idx] == NULL) ? NULL : &mctx->state_log[str_idx]->nodes); @@ -1780,8 +1845,8 @@ update_cur_sifted_state (re_match_context_t *mctx, re_sift_context_t *sctx, } static reg_errcode_t -internal_function -add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes, +internal_function __attribute_warn_unused_result__ +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates) { reg_errcode_t err = REG_NOERROR; @@ -1795,10 +1860,14 @@ add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes, { err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); if (BE (err != REG_NOERROR, 0)) - return REG_ESPACE; + return REG_ESPACE; for (i = 0; i < dest_nodes->nelem; i++) - re_node_set_merge (&state->inveclosure, - dfa->inveclosures + dest_nodes->elems[i]); + { + err = re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + } } return re_node_set_add_intersect (dest_nodes, candidates, &state->inveclosure); @@ -1806,7 +1875,7 @@ add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes, static reg_errcode_t internal_function -sub_epsilon_src_nodes (re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, +sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, const re_node_set *candidates) { Idx ecl_idx; @@ -1853,12 +1922,12 @@ sub_epsilon_src_nodes (re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, return REG_NOERROR; } -static int +static bool internal_function -check_dst_limits (re_match_context_t *mctx, re_node_set *limits, +check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; Idx lim_idx, src_pos, dst_pos; Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); @@ -1884,18 +1953,18 @@ check_dst_limits (re_match_context_t *mctx, re_node_set *limits, if (src_pos == dst_pos) continue; /* This is unrelated limitation. */ else - return 1; + return true; } - return 0; + return false; } static int internal_function -check_dst_limits_calc_pos_1 (re_match_context_t *mctx, int boundaries, +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, Idx subexp_idx, Idx from_node, Idx bkref_idx) { - re_dfa_t *const dfa = mctx->dfa; - re_node_set *eclosures = dfa->eclosures + from_node; + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; Idx node_idx; /* Else, we are on the boundary: examine the nodes on the epsilon @@ -1910,16 +1979,16 @@ check_dst_limits_calc_pos_1 (re_match_context_t *mctx, int boundaries, { struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; do - { + { Idx dst; int cpos; if (ent->node != node) continue; - if (subexp_idx - < CHAR_BIT * sizeof ent->eps_reachable_subexps_map - && !(ent->eps_reachable_subexps_map & (1u << subexp_idx))) + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) continue; /* Recurse trying to reach the OP_OPEN_SUBEXP and @@ -1932,9 +2001,9 @@ check_dst_limits_calc_pos_1 (re_match_context_t *mctx, int boundaries, if (dst == from_node) { if (boundaries & 1) - return -1; + return -1; else /* if (boundaries & 2) */ - return 0; + return 0; } cpos = @@ -1945,10 +2014,10 @@ check_dst_limits_calc_pos_1 (re_match_context_t *mctx, int boundaries, if (cpos == 0 && (boundaries & 2)) return 0; - if (subexp_idx - < CHAR_BIT * sizeof ent->eps_reachable_subexps_map) - ent->eps_reachable_subexps_map &= ~(1u << subexp_idx); - } + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); + } while (ent++->more); } break; @@ -1973,8 +2042,9 @@ check_dst_limits_calc_pos_1 (re_match_context_t *mctx, int boundaries, static int internal_function -check_dst_limits_calc_pos (re_match_context_t *mctx, Idx limit, Idx subexp_idx, - Idx from_node, Idx str_idx, Idx bkref_idx) +check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, + Idx subexp_idx, Idx from_node, Idx str_idx, + Idx bkref_idx) { struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; int boundaries; @@ -2002,7 +2072,7 @@ check_dst_limits_calc_pos (re_match_context_t *mctx, Idx limit, Idx subexp_idx, static reg_errcode_t internal_function -check_subexp_limits (re_dfa_t *dfa, re_node_set *dest_nodes, +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates, re_node_set *limits, struct re_backref_cache_entry *bkref_ents, Idx str_idx) { @@ -2089,11 +2159,11 @@ check_subexp_limits (re_dfa_t *dfa, re_node_set *dest_nodes, } static reg_errcode_t -internal_function -sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx, +internal_function __attribute_warn_unused_result__ +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, const re_node_set *candidates) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; Idx node_idx, node; re_sift_context_t local_sctx; @@ -2121,7 +2191,10 @@ sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx, enabled_idx = first_idx; do { - Idx subexp_len, to_idx, dst_node, ret; + Idx subexp_len; + Idx to_idx; + Idx dst_node; + bool ok; re_dfastate_t *cur_state; if (entry->node != node) @@ -2147,8 +2220,8 @@ sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx, } local_sctx.last_node = node; local_sctx.last_str_idx = str_idx; - ret = re_node_set_insert (&local_sctx.limits, enabled_idx); - if (BE (ret < 0, 0)) + ok = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (! ok, 0)) { err = REG_ESPACE; goto free_return; @@ -2169,7 +2242,7 @@ sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx, re_node_set_remove (&local_sctx.limits, enabled_idx); /* mctx->bkref_ents may have changed, reload the pointer. */ - entry = mctx->bkref_ents + enabled_idx; + entry = mctx->bkref_ents + enabled_idx; } while (enabled_idx++, entry++->more); } @@ -2190,19 +2263,19 @@ internal_function sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx node_idx, Idx str_idx, Idx max_str_idx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; int naccepted; - /* Check the node can accept `multi byte'. */ + /* Check the node can accept "multi byte". */ naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); if (naccepted > 0 && str_idx + naccepted <= max_str_idx && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], dfa->nexts[node_idx])) - /* The node can't accept the `multi byte', or the + /* The node can't accept the "multi byte", or the destination was already thrown away, then the node - could't accept the current input `multi byte'. */ + could't accept the current input "multi byte". */ naccepted = 0; /* Otherwise, it is sure that the node could accept - `naccepted' bytes input. */ + 'naccepted' bytes input. */ return naccepted; } #endif /* RE_ENABLE_I18N */ @@ -2216,7 +2289,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, update the destination of STATE_LOG. */ static re_dfastate_t * -internal_function +internal_function __attribute_warn_unused_result__ transit_state (reg_errcode_t *err, re_match_context_t *mctx, re_dfastate_t *state) { @@ -2250,7 +2323,7 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, trtable = state->word_trtable; if (BE (trtable != NULL, 1)) - { + { unsigned int context; context = re_string_context_at (&mctx->input, @@ -2273,12 +2346,12 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, } /* Update the state_log if we need */ -re_dfastate_t * +static re_dfastate_t * internal_function merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, re_dfastate_t *next_state) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; Idx cur_idx = re_string_cur_idx (&mctx->input); if (cur_idx > mctx->state_log_top) @@ -2296,21 +2369,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, unsigned int context; re_node_set next_nodes, *log_nodes, *table_nodes = NULL; /* If (state_log[cur_idx] != 0), it implies that cur_idx is - the destination of a multibyte char/collating element/ - back reference. Then the next state is the union set of - these destinations and the results of the transition table. */ + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ pstate = mctx->state_log[cur_idx]; log_nodes = pstate->entrance_nodes; if (next_state != NULL) - { - table_nodes = next_state->entrance_nodes; - *err = re_node_set_init_union (&next_nodes, table_nodes, + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, log_nodes); - if (BE (*err != REG_NOERROR, 0)) + if (BE (*err != REG_NOERROR, 0)) return NULL; - } + } else - next_nodes = *log_nodes; + next_nodes = *log_nodes; /* Note: We already add the nodes of the initial state, then we don't need to add them here. */ @@ -2318,12 +2391,12 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, re_string_cur_idx (&mctx->input) - 1, mctx->eflags); next_state = mctx->state_log[cur_idx] - = re_acquire_state_context (err, dfa, &next_nodes, context); + = re_acquire_state_context (err, dfa, &next_nodes, context); /* We don't need to check errors here, since the return value of - this function is next_state and ERR is already set. */ + this function is next_state and ERR is already set. */ if (table_nodes != NULL) - re_node_set_free (&next_nodes); + re_node_set_free (&next_nodes); } if (BE (dfa->nbackref, 0) && next_state != NULL) @@ -2356,7 +2429,7 @@ static re_dfastate_t * internal_function find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) { - re_dfastate_t *cur_state = NULL; + re_dfastate_t *cur_state; do { Idx max = mctx->state_log_top; @@ -2364,9 +2437,9 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) do { - if (++cur_str_idx > max) - return NULL; - re_string_skip_bytes (&mctx->input, 1); + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); } while (mctx->state_log[cur_str_idx] == NULL); @@ -2381,14 +2454,14 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) /* From the node set CUR_NODES, pick up the nodes whose types are OP_OPEN_SUBEXP and which have corresponding back references in the regular expression. And register them to use them later for evaluating the - correspoding back references. */ + corresponding back references. */ static reg_errcode_t internal_function check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, Idx str_idx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; Idx node_idx; reg_errcode_t err; @@ -2401,8 +2474,9 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, { Idx node = cur_nodes->elems[node_idx]; if (dfa->nodes[node].type == OP_OPEN_SUBEXP - && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map - && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx)) + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) { err = match_ctx_add_subtop (mctx, node, str_idx); if (BE (err != REG_NOERROR, 0)) @@ -2420,7 +2494,7 @@ static re_dfastate_t * transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, re_dfastate_t *state) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; re_node_set next_nodes; re_dfastate_t *next_state; Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); @@ -2459,7 +2533,7 @@ static reg_errcode_t internal_function transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; Idx i; @@ -2473,7 +2547,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) re_dfastate_t *dest_state; if (!dfa->nodes[cur_node_idx].accept_mb) - continue; + continue; if (dfa->nodes[cur_node_idx].constraint) { @@ -2491,7 +2565,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) if (naccepted == 0) continue; - /* The node can accepts `naccepted' bytes. */ + /* The node can accepts 'naccepted' bytes. */ dest_idx = re_string_cur_idx (&mctx->input) + naccepted; mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted : mctx->max_mb_elem_len); @@ -2513,7 +2587,8 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) if (BE (err != REG_NOERROR, 0)) return err; } - context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags); + context = re_string_context_at (&mctx->input, dest_idx - 1, + mctx->eflags); mctx->state_log[dest_idx] = re_acquire_state_context (&err, dfa, &dest_nodes, context); if (dest_state != NULL) @@ -2529,7 +2604,7 @@ static reg_errcode_t internal_function transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; Idx i; Idx cur_str_idx = re_string_cur_idx (&mctx->input); @@ -2542,7 +2617,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) const re_token_t *node = dfa->nodes + node_idx; re_node_set *new_dest_nodes; - /* Check whether `node' is a backreference or not. */ + /* Check whether 'node' is a backreference or not. */ if (node->type != OP_BACK_REF) continue; @@ -2554,14 +2629,14 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) continue; } - /* `node' is a backreference. + /* 'node' is a backreference. Check the substring which the substring matched. */ bkc_idx = mctx->nbkref_ents; err = get_subexp (mctx, node_idx, cur_str_idx); if (BE (err != REG_NOERROR, 0)) goto free_return; - /* And add the epsilon closures (which is `new_dest_nodes') of + /* And add the epsilon closures (which is 'new_dest_nodes') of the backreference to appropriate state_log. */ #ifdef DEBUG assert (dfa->nexts[node_idx] != REG_MISSING); @@ -2585,7 +2660,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) dest_state = mctx->state_log[dest_str_idx]; prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 : mctx->state_log[cur_str_idx]->nodes.nelem); - /* Add `new_dest_node' to state_log. */ + /* Add 'new_dest_node' to state_log. */ if (dest_state == NULL) { mctx->state_log[dest_str_idx] @@ -2640,19 +2715,20 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) delay these checking for prune_impossible_nodes(). */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; Idx subexp_num, sub_top_idx; const char *buf = (const char *) re_string_get_buffer (&mctx->input); /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); if (cache_idx != REG_MISSING) { - const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx; + const struct re_backref_cache_entry *entry + = mctx->bkref_ents + cache_idx; do - if (entry->node == bkref_node) + if (entry->node == bkref_node) return REG_NOERROR; /* We already checked it. */ while (entry++->more); } @@ -2697,7 +2773,8 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) buf = (const char *) re_string_get_buffer (&mctx->input); } if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) - break; /* We don't need to search this sub expression any more. */ + /* We don't need to search this sub expression any more. */ + break; } bkref_str_off += sl_str_diff; sl_str += sl_str_diff; @@ -2749,20 +2826,22 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) continue; /* Does this state have a ')' of the sub expression? */ nodes = &mctx->state_log[sl_str]->nodes; - cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP); + cls_node = find_subexp_node (dfa, nodes, subexp_num, + OP_CLOSE_SUBEXP); if (cls_node == REG_MISSING) continue; /* No. */ if (sub_top->path == NULL) { - sub_top->path = re_calloc (state_array_t, - sl_str - sub_top->str_idx + 1); + sub_top->path = calloc (sizeof (state_array_t), + sl_str - sub_top->str_idx + 1); if (sub_top->path == NULL) return REG_ESPACE; } /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node in the current context? */ err = check_arrival (mctx, sub_top->path, sub_top->node, - sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP); + sub_top->str_idx, cls_node, sl_str, + OP_CLOSE_SUBEXP); if (err == REG_NOMATCH) continue; if (BE (err != REG_NOERROR, 0)) @@ -2794,7 +2873,8 @@ get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, Idx to_idx; /* Can the subexpression arrive the back reference? */ err = check_arrival (mctx, &sub_last->path, sub_last->node, - sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP); + sub_last->str_idx, bkref_node, bkref_str, + OP_OPEN_SUBEXP); if (err != REG_NOERROR) return err; err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, @@ -2836,13 +2916,12 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ static reg_errcode_t -internal_function -check_arrival (re_match_context_t *mctx, state_array_t *path, - Idx top_node, Idx top_str, Idx last_node, Idx last_str, - int type) +internal_function __attribute_warn_unused_result__ +check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, + Idx top_str, Idx last_node, Idx last_str, int type) { - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; Idx subexp_num, backup_cur_idx, str_idx, null_cnt; re_dfastate_t *cur_state = NULL; re_node_set *cur_nodes, next_nodes; @@ -2855,19 +2934,23 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, { re_dfastate_t **new_array; Idx old_alloc = path->alloc; - path->alloc += last_str + mctx->max_mb_elem_len + 1; - new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); - if (new_array == NULL) - { - path->alloc = old_alloc; - return REG_ESPACE; - } + Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1; + Idx new_alloc; + if (BE (IDX_MAX - old_alloc < incr_alloc, 0)) + return REG_ESPACE; + new_alloc = old_alloc + incr_alloc; + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) + return REG_ESPACE; + new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; path->array = new_array; + path->alloc = new_alloc; memset (new_array + old_alloc, '\0', sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); } - str_idx = path->next_idx == 0 ? top_str : path->next_idx; + str_idx = path->next_idx ? path->next_idx : top_str; /* Temporary modify MCTX. */ backup_state_log = mctx->state_log; @@ -2895,7 +2978,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, if (cur_state && cur_state->has_backref) { err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); - if (BE ( err != REG_NOERROR, 0)) + if (BE (err != REG_NOERROR, 0)) return err; } else @@ -2907,7 +2990,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, { err = expand_bkref_cache (mctx, &next_nodes, str_idx, subexp_num, type); - if (BE ( err != REG_NOERROR, 0)) + if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&next_nodes); return err; @@ -2938,7 +3021,8 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, if (cur_state) { err = check_arrival_add_next_nodes (mctx, str_idx, - &cur_state->non_eps_nodes, &next_nodes); + &cur_state->non_eps_nodes, + &next_nodes); if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&next_nodes); @@ -2956,7 +3040,7 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, } err = expand_bkref_cache (mctx, &next_nodes, str_idx, subexp_num, type); - if (BE ( err != REG_NOERROR, 0)) + if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&next_nodes); return err; @@ -2997,15 +3081,16 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Can't we unify them? */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, - re_node_set *cur_nodes, - re_node_set *next_nodes) + re_node_set *cur_nodes, re_node_set *next_nodes) { - re_dfa_t *const dfa = mctx->dfa; - int result; + const re_dfa_t *const dfa = mctx->dfa; + bool ok; Idx cur_idx; - reg_errcode_t err; +#ifdef RE_ENABLE_I18N + reg_errcode_t err = REG_NOERROR; +#endif re_node_set union_set; re_node_set_init_empty (&union_set); for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) @@ -3017,7 +3102,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, assert (!IS_EPSILON_NODE (type)); #endif #ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ + /* If the node may accept "multi byte". */ if (dfa->nodes[cur_node].accept_mb) { naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, @@ -3038,8 +3123,8 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, return err; } } - result = re_node_set_insert (&union_set, next_node); - if (BE (result < 0, 0)) + ok = re_node_set_insert (&union_set, next_node); + if (BE (! ok, 0)) { re_node_set_free (&union_set); return REG_ESPACE; @@ -3058,8 +3143,8 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, if (naccepted || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) { - result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); - if (BE (result < 0, 0)) + ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (! ok, 0)) { re_node_set_free (&union_set); return REG_ESPACE; @@ -3078,7 +3163,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, static reg_errcode_t internal_function -check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes, +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, Idx ex_subexp, int type) { reg_errcode_t err; @@ -3096,7 +3181,7 @@ check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes, for (idx = 0; idx < cur_nodes->nelem; ++idx) { Idx cur_node = cur_nodes->elems[idx]; - re_node_set *eclosure = dfa->eclosures + cur_node; + const re_node_set *eclosure = dfa->eclosures + cur_node; outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); if (outside_node == REG_MISSING) { @@ -3130,39 +3215,39 @@ check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes, problematic append it to DST_NODES. */ static reg_errcode_t -internal_function -check_arrival_expand_ecl_sub (re_dfa_t *dfa, re_node_set *dst_nodes, +internal_function __attribute_warn_unused_result__ +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, Idx target, Idx ex_subexp, int type) { Idx cur_node; for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) { - int err; + bool ok; if (dfa->nodes[cur_node].type == type && dfa->nodes[cur_node].opr.idx == ex_subexp) { if (type == OP_CLOSE_SUBEXP) { - err = re_node_set_insert (dst_nodes, cur_node); - if (BE (err == -1, 0)) + ok = re_node_set_insert (dst_nodes, cur_node); + if (BE (! ok, 0)) return REG_ESPACE; } break; } - err = re_node_set_insert (dst_nodes, cur_node); - if (BE (err == -1, 0)) + ok = re_node_set_insert (dst_nodes, cur_node); + if (BE (! ok, 0)) return REG_ESPACE; if (dfa->edests[cur_node].nelem == 0) break; if (dfa->edests[cur_node].nelem == 2) { - reg_errcode_t ret = - check_arrival_expand_ecl_sub (dfa, dst_nodes, - dfa->edests[cur_node].elems[1], - ex_subexp, type); - if (BE (ret != REG_NOERROR, 0)) - return ret; + reg_errcode_t err; + err = check_arrival_expand_ecl_sub (dfa, dst_nodes, + dfa->edests[cur_node].elems[1], + ex_subexp, type); + if (BE (err != REG_NOERROR, 0)) + return err; } cur_node = dfa->edests[cur_node].elems[0]; } @@ -3175,11 +3260,11 @@ check_arrival_expand_ecl_sub (re_dfa_t *dfa, re_node_set *dst_nodes, in MCTX->BKREF_ENTS. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, Idx cur_str, Idx subexp_num, int type) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str); struct re_backref_cache_entry *ent; @@ -3229,14 +3314,14 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, next_node = dfa->nexts[ent->node]; if (mctx->state_log[to_idx]) { - int ret; + bool ok; if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, next_node)) continue; err = re_node_set_init_copy (&union_set, &mctx->state_log[to_idx]->nodes); - ret = re_node_set_insert (&union_set, next_node); - if (BE (err != REG_NOERROR || ret < 0, 0)) + ok = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ! ok, 0)) { re_node_set_free (&union_set); err = err != REG_NOERROR ? err : REG_ESPACE; @@ -3261,65 +3346,82 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, } /* Build transition table for the state. - Return 1 if succeeded, otherwise return NULL. */ + Return true if successful. */ -static int +static bool internal_function -build_trtable (re_dfa_t *dfa, re_dfastate_t *state) +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) { reg_errcode_t err; Idx i, j; - int ch, need_word_trtable = 0; - unsigned int elem, mask; - int dests_node_malloced = 0, dest_states_malloced = 0; - Idx ndests; /* Number of the destination states from `state'. */ + int ch; + bool need_word_trtable = false; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; + Idx ndests; /* Number of the destination states from 'state'. */ re_dfastate_t **trtable; re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; re_node_set follows, *dests_node; - bitset *dests_ch; - bitset acceptable; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; /* We build DFA states which corresponds to the destination nodes - from `state'. `dests_node[i]' represents the nodes which i-th - destination state contains, and `dests_ch[i]' represents the + from 'state'. 'dests_node[i]' represents the nodes which i-th + destination state contains, and 'dests_ch[i]' represents the characters which i-th destination state accepts. */ - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)) - dests_node = (re_node_set *) - alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); else { - dests_node = (re_node_set *) - malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); - if (BE (dests_node == NULL, 0)) - return 0; - dests_node_malloced = 1; + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) + return false; + dests_node_malloced = true; } - dests_ch = (bitset *) (dests_node + SBC_MAX); + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; - /* Initialize transiton table. */ + /* Initialize transition table. */ state->word_trtable = state->trtable = NULL; - /* At first, group all nodes belonging to `state' into several + /* At first, group all nodes belonging to 'state' into several destinations. */ ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) { if (dests_node_malloced) - free (dests_node); - /* Return 0 in case of an error, 1 otherwise. */ + free (dests_alloc); + /* Return false in case of an error, true otherwise. */ if (ndests == 0) { - state->trtable = re_calloc (re_dfastate_t *, SBC_MAX); - return 1; + state->trtable = (re_dfastate_t **) + calloc (sizeof (re_dfastate_t *), SBC_MAX); + if (BE (state->trtable == NULL, 0)) + return false; + return true; } - return 0; + return false; } err = re_node_set_alloc (&follows, ndests + 1); if (BE (err != REG_NOERROR, 0)) goto out_free; - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX + /* Avoid arithmetic overflow in size calculation. */ + if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX) + / (3 * sizeof (re_dfastate_t *))) + < ndests), + 0)) + goto out_free; + + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + ndests * 3 * sizeof (re_dfastate_t *))) dest_states = (re_dfastate_t **) alloca (ndests * 3 * sizeof (re_dfastate_t *)); @@ -3336,10 +3438,10 @@ out_free: for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_node); - return 0; + free (dests_alloc); + return false; } - dest_states_malloced = 1; + dest_states_malloced = true; } dest_states_word = dest_states + ndests; dest_states_nl = dest_states_word + ndests; @@ -3374,13 +3476,13 @@ out_free: goto out_free; if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) - need_word_trtable = 1; + need_word_trtable = true; dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, CONTEXT_NEWLINE); if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) goto out_free; - } + } else { dest_states_word[i] = dest_states[i]; @@ -3395,13 +3497,14 @@ out_free: character, or we are in a single-byte character set so we can discern by looking at the character code: allocate a 256-entry transition table. */ - trtable = state->trtable = re_calloc (re_dfastate_t *, SBC_MAX); + trtable = state->trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); if (BE (trtable == NULL, 0)) goto out_free; /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) if (BE (elem & 1, 0)) @@ -3425,13 +3528,14 @@ out_free: by looking at the character code: build two 256-entry transition tables, one starting at trtable[0] and one starting at trtable[SBC_MAX]. */ - trtable = state->word_trtable = re_calloc (re_dfastate_t *, 2 * SBC_MAX); + trtable = state->word_trtable = + (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); if (BE (trtable == NULL, 0)) goto out_free; /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) if (BE (elem & 1, 0)) @@ -3472,9 +3576,9 @@ out_free: re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_node); + free (dests_alloc); - return 1; + return true; } /* Group all nodes belonging to STATE into several destinations. @@ -3484,19 +3588,19 @@ out_free: static Idx internal_function -group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state, - re_node_set *dests_node, bitset *dests_ch) +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) { reg_errcode_t err; - int result; + bool ok; Idx i, j, k; - Idx ndests; /* Number of the destinations from `state'. */ - bitset accepts; /* Characters a node can accept. */ + Idx ndests; /* Number of the destinations from 'state'. */ + bitset_t accepts; /* Characters a node can accept. */ const re_node_set *cur_nodes = &state->nodes; bitset_empty (accepts); ndests = 0; - /* For all the nodes belonging to `state', */ + /* For all the nodes belonging to 'state', */ for (i = 0; i < cur_nodes->nelem; ++i) { re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; @@ -3518,31 +3622,34 @@ group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state, else #endif bitset_set_all (accepts); - if (!(dfa->syntax & REG_DOT_NEWLINE)) + if (!(dfa->syntax & RE_DOT_NEWLINE)) bitset_clear (accepts, '\n'); - if (dfa->syntax & REG_DOT_NOT_NULL) + if (dfa->syntax & RE_DOT_NOT_NULL) bitset_clear (accepts, '\0'); } #ifdef RE_ENABLE_I18N else if (type == OP_UTF8_PERIOD) - { - memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2); - if (!(dfa->syntax & REG_DOT_NEWLINE)) + { + if (ASCII_CHARS % BITSET_WORD_BITS == 0) + memset (accepts, -1, ASCII_CHARS / CHAR_BIT); + else + bitset_merge (accepts, utf8_sb_map); + if (!(dfa->syntax & RE_DOT_NEWLINE)) bitset_clear (accepts, '\n'); - if (dfa->syntax & REG_DOT_NOT_NULL) + if (dfa->syntax & RE_DOT_NOT_NULL) bitset_clear (accepts, '\0'); - } + } #endif else continue; - /* Check the `accepts' and sift the characters which are not + /* Check the 'accepts' and sift the characters which are not match it the context. */ if (constraint) { if (constraint & NEXT_NEWLINE_CONSTRAINT) { - int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); bitset_empty (accepts); if (accepts_newline) bitset_set (accepts, NEWLINE_CHAR); @@ -3557,7 +3664,7 @@ group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state, if (constraint & NEXT_WORD_CONSTRAINT) { - unsigned int any_set = 0; + bitset_word_t any_set = 0; if (type == CHARACTER && !node->word_char) { bitset_empty (accepts); @@ -3565,18 +3672,18 @@ group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); else #endif - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= dfa->word_char[j]); if (!any_set) continue; } if (constraint & NEXT_NOTWORD_CONSTRAINT) { - unsigned int any_set = 0; + bitset_word_t any_set = 0; if (type == CHARACTER && node->word_char) { bitset_empty (accepts); @@ -3584,48 +3691,48 @@ group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); else #endif - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= ~dfa->word_char[j]); if (!any_set) continue; } } - /* Then divide `accepts' into DFA states, or create a new + /* Then divide 'accepts' into DFA states, or create a new state. Above, we make sure that accepts is not empty. */ for (j = 0; j < ndests; ++j) { - bitset intersec; /* Intersection sets, see below. */ - bitset remains; + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; /* Flags, see below. */ - int has_intersec, not_subset, not_consumed; + bitset_word_t has_intersec, not_subset, not_consumed; /* Optimization, skip if this state doesn't accept the character. */ if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) continue; - /* Enumerate the intersection set of this state and `accepts'. */ + /* Enumerate the intersection set of this state and 'accepts'. */ has_intersec = 0; - for (k = 0; k < BITSET_UINTS; ++k) + for (k = 0; k < BITSET_WORDS; ++k) has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; /* And skip if the intersection set is empty. */ if (!has_intersec) continue; - /* Then check if this state is a subset of `accepts'. */ + /* Then check if this state is a subset of 'accepts'. */ not_subset = not_consumed = 0; - for (k = 0; k < BITSET_UINTS; ++k) + for (k = 0; k < BITSET_WORDS; ++k) { not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; } - /* If this state isn't a subset of `accepts', create a - new group state, which has the `remains'. */ + /* If this state isn't a subset of 'accepts', create a + new group state, which has the 'remains'. */ if (not_subset) { bitset_copy (dests_ch[ndests], remains); @@ -3637,8 +3744,8 @@ group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state, } /* Put the position in the current group. */ - result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); - if (BE (result < 0, 0)) + ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (! ok, 0)) goto error_return; /* If all characters are consumed, go to next node. */ @@ -3664,7 +3771,7 @@ group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N -/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. +/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. Return the number of the bytes the node accepts. STR_IDX is the current index of the input string. @@ -3674,7 +3781,7 @@ group_nodes_into_DFAstates (re_dfa_t *dfa, const re_dfastate_t *state, static int internal_function -check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx, +check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, const re_string_t *input, Idx str_idx) { const re_token_t *node = dfa->nodes + node_idx; @@ -3736,13 +3843,13 @@ check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx, if (node->type == OP_PERIOD) { if (char_len <= 1) - return 0; + return 0; /* FIXME: I don't think this if is needed, as both '\n' and '\0' are char_len == 1. */ /* '.' accepts any one character except the following two cases. */ - if ((!(dfa->syntax & REG_DOT_NEWLINE) && + if ((!(dfa->syntax & RE_DOT_NEWLINE) && re_string_byte_at (input, str_idx) == '\n') || - ((dfa->syntax & REG_DOT_NOT_NULL) && + ((dfa->syntax & RE_DOT_NOT_NULL) && re_string_byte_at (input, str_idx) == '\0')) return 0; return char_len; @@ -3791,7 +3898,6 @@ check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx, const int32_t *table, *indirect; const unsigned char *weights, *extra; const char *collseqwc; - int32_t idx; /* This #include defines a local function! */ # include @@ -3849,15 +3955,20 @@ check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - idx = findidx (&cp); + int32_t idx = findidx (&cp, elem_len); if (idx > 0) for (i = 0; i < cset->nequiv_classes; ++i) { int32_t equiv_class_idx = cset->equiv_classes[i]; - size_t weight_len = weights[idx]; - if (weight_len == weights[equiv_class_idx]) + size_t weight_len = weights[idx & 0xffffff]; + if (weight_len == weights[equiv_class_idx & 0xffffff] + && (idx >> 24) == (equiv_class_idx >> 24)) { Idx cnt = 0; + + idx &= 0xffffff; + equiv_class_idx &= 0xffffff; + while (cnt <= weight_len && (weights[equiv_class_idx + 1 + cnt] == weights[idx + 1 + cnt])) @@ -3875,7 +3986,7 @@ check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx, # endif /* _LIBC */ { /* match with range expression? */ -#if __GNUC__ >= 2 +#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && defined __STRICT_ANSI__) wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; #else wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; @@ -3909,6 +4020,7 @@ check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx, # ifdef _LIBC static unsigned int +internal_function find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) { uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); @@ -3933,7 +4045,8 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) for (idx = 0; idx < extrasize;) { - int mbs_cnt, found = 0; + int mbs_cnt; + bool found = false; int32_t elem_mbs_len; /* Skip the name of collating element name. */ idx = idx + extra[idx] + 1; @@ -3945,7 +4058,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) break; if (mbs_cnt == elem_mbs_len) /* Found the entry. */ - found = 1; + found = true; } /* Skip the byte sequence of the collating element. */ idx += elem_mbs_len; @@ -3954,7 +4067,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) /* Skip the collation sequence value. */ idx += sizeof (uint32_t); /* Skip the wide char sequence of the collating element. */ - idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1); /* If we found the entry, return the sequence value. */ if (found) return *(uint32_t *) (extra + idx); @@ -3970,7 +4083,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) /* Check whether the node accepts the byte which is IDX-th byte of the INPUT. */ -static int +static bool internal_function check_node_accept (const re_match_context_t *mctx, const re_token_t *node, Idx idx) @@ -3981,28 +4094,28 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, { case CHARACTER: if (node->opr.c != ch) - return 0; + return false; break; case SIMPLE_BRACKET: if (!bitset_contain (node->opr.sbcset, ch)) - return 0; + return false; break; #ifdef RE_ENABLE_I18N case OP_UTF8_PERIOD: - if (ch >= 0x80) - return 0; + if (ch >= ASCII_CHARS) + return false; /* FALLTHROUGH */ #endif case OP_PERIOD: - if ((ch == '\n' && !(mctx->dfa->syntax & REG_DOT_NEWLINE)) - || (ch == '\0' && (mctx->dfa->syntax & REG_DOT_NOT_NULL))) - return 0; + if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) + || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) + return false; break; default: - return 0; + return false; } if (node->constraint) @@ -4012,23 +4125,28 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, unsigned int context = re_string_context_at (&mctx->input, idx, mctx->eflags); if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) - return 0; + return false; } - return 1; + return true; } /* Extend the buffers, if the buffers have run out. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ extend_buffers (re_match_context_t *mctx) { reg_errcode_t ret; re_string_t *pstr = &mctx->input; - /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + /* Avoid overflow. */ + if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 + <= pstr->bufs_len, 0)) + return REG_ESPACE; + + /* Double the lengths of the buffers. */ + ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2)); if (BE (ret != REG_NOERROR, 0)) return ret; @@ -4080,13 +4198,20 @@ extend_buffers (re_match_context_t *mctx) /* Initialize MCTX. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) { mctx->eflags = eflags; mctx->match_last = REG_MISSING; if (n > 0) { + /* Avoid overflow. */ + size_t max_object_size = + MAX (sizeof (struct re_backref_cache_entry), + sizeof (re_sub_match_top_t *)); + if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0)) + return REG_ESPACE; + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) @@ -4153,9 +4278,9 @@ match_ctx_free (re_match_context_t *mctx) */ static reg_errcode_t -internal_function -match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, - Idx from, Idx to) +internal_function __attribute_warn_unused_result__ +match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, + Idx to) { if (mctx->nbkref_ents >= mctx->abkref_ents) { @@ -4203,7 +4328,7 @@ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, static Idx internal_function -search_cur_bkref_entry (re_match_context_t *mctx, Idx str_idx) +search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) { Idx left, right, mid, last; last = right = mctx->nbkref_ents; @@ -4225,7 +4350,7 @@ search_cur_bkref_entry (re_match_context_t *mctx, Idx str_idx) at STR_IDX. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) { #ifdef DEBUG @@ -4243,7 +4368,7 @@ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) mctx->sub_tops = new_array; mctx->asub_tops = new_asub_tops; } - mctx->sub_tops[mctx->nsub_tops] = re_calloc (re_sub_match_top_t, 1); + mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) return REG_ESPACE; mctx->sub_tops[mctx->nsub_tops]->node = node; @@ -4270,7 +4395,7 @@ match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) subtop->lasts = new_array; subtop->alasts = new_alasts; } - new_entry = re_calloc (re_sub_match_last_t, 1); + new_entry = calloc (1, sizeof (re_sub_match_last_t)); if (BE (new_entry != NULL, 1)) { subtop->lasts[subtop->nlasts] = new_entry; @@ -4283,10 +4408,8 @@ match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) static void internal_function -sift_ctx_init (re_sift_context_t *sctx, - re_dfastate_t **sifted_sts, - re_dfastate_t **limited_sts, - Idx last_node, Idx last_str_idx) +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx) { sctx->sifted_states = sifted_sts; sctx->limited_states = limited_sts;