X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregexec.c;h=a85077c932f0852cf366139d4cec7342a212310a;hb=8de557e31178699dd6e839850056f0653cdfba89;hp=257c6478570e070c6c1c8c38e9481c8e98e2e546;hpb=1e5cfc92d3a783d911169d1704ae6e37072c327c;p=gnulib.git diff --git a/lib/regexec.c b/lib/regexec.c index 257c64785..a85077c93 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -434,7 +434,7 @@ re_search_stub (struct re_pattern_buffer *bufp, if (regs == NULL) nregs = 1; else if (BE (bufp->re_regs_allocated == REG_FIXED - && regs->rm_num_regs < bufp->re_nsub + 1, 0)) + && regs->rm_num_regs <= bufp->re_nsub, 0)) { nregs = regs->rm_num_regs; if (BE (nregs < 1, 0)) @@ -446,7 +446,7 @@ re_search_stub (struct re_pattern_buffer *bufp, } else nregs = bufp->re_nsub + 1; - pmatch = re_malloc (regmatch_t, nregs); + pmatch = re_xmalloc (regmatch_t, nregs); if (BE (pmatch == NULL, 0)) { rval = -2; @@ -500,7 +500,7 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, /* Have the register data arrays been allocated? */ if (regs_allocated == REG_UNALLOCATED) { /* No. So allocate them with malloc. */ - regs->rm_start = re_malloc (regoff_t, need_regs); + regs->rm_start = re_xmalloc (regoff_t, need_regs); regs->rm_end = re_malloc (regoff_t, need_regs); if (BE (regs->rm_start == NULL, 0) || BE (regs->rm_end == NULL, 0)) return REG_UNALLOCATED; @@ -513,7 +513,7 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, if (BE (need_regs > regs->rm_num_regs, 0)) { regoff_t *new_start = - re_realloc (regs->rm_start, regoff_t, need_regs); + re_xrealloc (regs->rm_start, regoff_t, need_regs); regoff_t *new_end = re_realloc (regs->rm_end, regoff_t, need_regs); if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0)) return REG_UNALLOCATED; @@ -685,7 +685,7 @@ re_search_internal (const regex_t *preg, multi character collating element. */ if (nmatch > 1 || dfa->has_mb_node) { - mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); + mctx.state_log = re_xmalloc (re_dfastate_t *, mctx.input.bufs_len + 1); if (BE (mctx.state_log == NULL, 0)) { err = REG_ESPACE; @@ -945,7 +945,7 @@ prune_impossible_nodes (re_match_context_t *mctx) #endif match_last = mctx->match_last; halt_node = mctx->last_node; - sifted_states = re_malloc (re_dfastate_t *, match_last + 1); + sifted_states = re_xmalloc (re_dfastate_t *, match_last + 1); if (BE (sifted_states == NULL, 0)) { ret = REG_ESPACE; @@ -953,7 +953,7 @@ prune_impossible_nodes (re_match_context_t *mctx) } if (dfa->nbackref) { - lim_states = re_malloc (re_dfastate_t *, match_last + 1); + lim_states = re_xmalloc (re_dfastate_t *, match_last + 1); if (BE (lim_states == NULL, 0)) { ret = REG_ESPACE; @@ -1343,15 +1343,14 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, if (fs->num == fs->alloc) { struct re_fail_stack_ent_t *new_array = - re_realloc (fs->stack, struct re_fail_stack_ent_t, fs->alloc * 2); + re_x2realloc (fs->stack, struct re_fail_stack_ent_t, &fs->alloc); if (new_array == NULL) return REG_ESPACE; - fs->alloc *= 2; fs->stack = new_array; } fs->stack[num].idx = str_idx; fs->stack[num].node = dest_node; - fs->stack[num].regs = re_malloc (regmatch_t, nregs); + fs->stack[num].regs = re_xmalloc (regmatch_t, nregs); if (fs->stack[num].regs == NULL) return REG_ESPACE; memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); @@ -1399,7 +1398,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, if (fl_backtrack) { fs = &fs_body; - fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); + fs->stack = re_xmalloc (struct re_fail_stack_ent_t, fs->alloc); if (fs->stack == NULL) return REG_ESPACE; } @@ -1409,6 +1408,11 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, cur_node = dfa->init_node; re_node_set_init_empty (&eps_via_nodes); + if (re_alloc_oversized (nmatch, sizeof (regmatch_t))) + { + free_fail_stack_return (fs); + return REG_ESPACE; + } if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); else @@ -1932,9 +1936,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, if (ent->node != node) continue; - if (subexp_idx - < CHAR_BIT * sizeof ent->eps_reachable_subexps_map - && !(ent->eps_reachable_subexps_map & (1u << subexp_idx))) + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word) 1 << subexp_idx))) continue; /* Recurse trying to reach the OP_OPEN_SUBEXP and @@ -1960,9 +1964,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, if (cpos == 0 && (boundaries & 2)) return 0; - if (subexp_idx - < CHAR_BIT * sizeof ent->eps_reachable_subexps_map) - ent->eps_reachable_subexps_map &= ~(1u << subexp_idx); + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map &= + ~ ((bitset_word) 1 << subexp_idx); } while (ent++->more); } @@ -2418,8 +2422,9 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, { Idx node = cur_nodes->elems[node_idx]; if (dfa->nodes[node].type == OP_OPEN_SUBEXP - && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map - && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx)) + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word) 1 << dfa->nodes[node].opr.idx))) { err = match_ctx_add_subtop (mctx, node, str_idx); if (BE (err != REG_NOERROR, 0)) @@ -2872,16 +2877,16 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, { re_dfastate_t **new_array; Idx old_alloc = path->alloc; - path->alloc += last_str + mctx->max_mb_elem_len + 1; - new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); - if (new_array == NULL) - { - path->alloc = old_alloc; - return REG_ESPACE; - } + Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1; + if (BE (new_alloc < old_alloc, 0)) + return REG_ESPACE; + new_array = re_xrealloc (path->array, re_dfastate_t *, new_alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; path->array = new_array; + path->alloc = new_alloc; memset (new_array + old_alloc, '\0', - sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); + sizeof (re_dfastate_t *) * (new_alloc - old_alloc)); } str_idx = path->next_idx == 0 ? top_str : path->next_idx; @@ -3288,7 +3293,7 @@ build_trtable (re_dfa_t *dfa, re_dfastate_t *state) Idx i, j; int ch; bool need_word_trtable = false; - unsigned int elem, mask; + bitset_word elem, mask; bool dests_node_malloced = false, dest_states_malloced = false; Idx ndests; /* Number of the destination states from `state'. */ re_dfastate_t **trtable; @@ -3297,22 +3302,27 @@ build_trtable (re_dfa_t *dfa, re_dfastate_t *state) bitset *dests_ch; bitset acceptable; + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset dests_ch[SBC_MAX]; + } *dests_alloc; + /* We build DFA states which corresponds to the destination nodes from `state'. `dests_node[i]' represents the nodes which i-th destination state contains, and `dests_ch[i]' represents the characters which i-th destination state accepts. */ - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)) - dests_node = (re_node_set *) - alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof dests_alloc[0]); else { - dests_node = (re_node_set *) - malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); - if (BE (dests_node == NULL, 0)) + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) return false; dests_node_malloced = true; } - dests_ch = (bitset *) (dests_node + SBC_MAX); + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; /* Initialize transiton table. */ state->word_trtable = state->trtable = NULL; @@ -3323,7 +3333,7 @@ build_trtable (re_dfa_t *dfa, re_dfastate_t *state) if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) { if (dests_node_malloced) - free (dests_node); + free (dests_alloc); if (ndests == 0) { state->trtable = re_calloc (re_dfastate_t *, SBC_MAX); @@ -3336,6 +3346,12 @@ build_trtable (re_dfa_t *dfa, re_dfastate_t *state) if (BE (err != REG_NOERROR, 0)) goto out_free; + /* Avoid arithmetic overflow in size calculation. */ + if (BE (((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX) + / (3 * sizeof (re_dfastate_t *))) + < ndests, 0)) + goto out_free; + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX + ndests * 3 * sizeof (re_dfastate_t *))) dest_states = (re_dfastate_t **) @@ -3353,7 +3369,7 @@ out_free: for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_node); + free (dests_alloc); return false; } dest_states_malloced = true; @@ -3417,8 +3433,8 @@ out_free: goto out_free; /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) if (BE (elem & 1, 0)) @@ -3447,8 +3463,8 @@ out_free: goto out_free; /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) if (BE (elem & 1, 0)) @@ -3489,7 +3505,7 @@ out_free: re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_node); + free (dests_alloc); return true; } @@ -3543,7 +3559,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, #ifdef RE_ENABLE_I18N else if (type == OP_UTF8_PERIOD) { - memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2); + if (SBC_MAX / 2 % BITSET_WORD_BITS == 0) + memset (accepts, -1, sizeof accepts / 2); + else + bitset_merge (accepts, utf8_sb_map); if (!(dfa->syntax & REG_DOT_NEWLINE)) bitset_clear (accepts, '\n'); if (dfa->syntax & REG_DOT_NOT_NULL) @@ -3559,8 +3578,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, { if (constraint & NEXT_NEWLINE_CONSTRAINT) { - unsigned int accepts_newline = - bitset_contain (accepts, NEWLINE_CHAR); + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); bitset_empty (accepts); if (accepts_newline) bitset_set (accepts, NEWLINE_CHAR); @@ -3575,7 +3593,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, if (constraint & NEXT_WORD_CONSTRAINT) { - unsigned int any_set = 0; + bitset_word any_set = 0; if (type == CHARACTER && !node->word_char) { bitset_empty (accepts); @@ -3583,18 +3601,18 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); else #endif - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= dfa->word_char[j]); if (!any_set) continue; } if (constraint & NEXT_NOTWORD_CONSTRAINT) { - unsigned int any_set = 0; + bitset_word any_set = 0; if (type == CHARACTER && node->word_char) { bitset_empty (accepts); @@ -3602,11 +3620,11 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); else #endif - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= ~dfa->word_char[j]); if (!any_set) continue; @@ -3620,7 +3638,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, bitset intersec; /* Intersection sets, see below. */ bitset remains; /* Flags, see below. */ - unsigned int has_intersec, not_subset, not_consumed; + bitset_word has_intersec, not_subset, not_consumed; /* Optimization, skip if this state doesn't accept the character. */ if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) @@ -3628,7 +3646,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, /* Enumerate the intersection set of this state and `accepts'. */ has_intersec = 0; - for (k = 0; k < BITSET_UINTS; ++k) + for (k = 0; k < BITSET_WORDS; ++k) has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; /* And skip if the intersection set is empty. */ if (!has_intersec) @@ -3636,7 +3654,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, /* Then check if this state is a subset of `accepts'. */ not_subset = not_consumed = 0; - for (k = 0; k < BITSET_UINTS; ++k) + for (k = 0; k < BITSET_WORDS; ++k) { not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; @@ -4057,8 +4075,8 @@ extend_buffers (re_match_context_t *mctx) /* XXX We have no indication of the size of this buffer. If this allocation fail we have no indication that the state_log array does not have the right size. */ - re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, - pstr->bufs_len + 1); + re_dfastate_t **new_array = re_xrealloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len + 1); if (BE (new_array == NULL, 0)) return REG_ESPACE; mctx->state_log = new_array; @@ -4106,8 +4124,8 @@ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) mctx->match_last = REG_MISSING; if (n > 0) { - mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); - mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); + mctx->bkref_ents = re_xmalloc (struct re_backref_cache_entry, n); + mctx->sub_tops = re_xmalloc (re_sub_match_top_t *, n); if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) return REG_ESPACE; } @@ -4179,8 +4197,8 @@ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, if (mctx->nbkref_ents >= mctx->abkref_ents) { struct re_backref_cache_entry* new_entry; - new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, - mctx->abkref_ents * 2); + new_entry = re_x2realloc (mctx->bkref_ents, struct re_backref_cache_entry, + &mctx->abkref_ents); if (BE (new_entry == NULL, 0)) { re_free (mctx->bkref_ents); @@ -4188,8 +4206,8 @@ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, } mctx->bkref_ents = new_entry; memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', - sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); - mctx->abkref_ents *= 2; + (sizeof (struct re_backref_cache_entry) + * (mctx->abkref_ents - mctx->nbkref_ents))); } if (mctx->nbkref_ents > 0 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) @@ -4253,10 +4271,10 @@ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) #endif if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) { - Idx new_asub_tops = mctx->asub_tops * 2; - re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, - re_sub_match_top_t *, - new_asub_tops); + Idx new_asub_tops = mctx->asub_tops; + re_sub_match_top_t **new_array = re_x2realloc (mctx->sub_tops, + re_sub_match_top_t *, + &new_asub_tops); if (BE (new_array == NULL, 0)) return REG_ESPACE; mctx->sub_tops = new_array; @@ -4280,10 +4298,10 @@ match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) re_sub_match_last_t *new_entry; if (BE (subtop->nlasts == subtop->alasts, 0)) { - Idx new_alasts = 2 * subtop->alasts + 1; - re_sub_match_last_t **new_array = re_realloc (subtop->lasts, - re_sub_match_last_t *, - new_alasts); + Idx new_alasts = subtop->alasts; + re_sub_match_last_t **new_array = re_x2realloc (subtop->lasts, + re_sub_match_last_t *, + &new_alasts); if (BE (new_array == NULL, 0)) return NULL; subtop->lasts = new_array;