X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregexec.c;h=9388ac12bfd779dbf249dd49ff1d5b16887325f9;hb=217ec874c8e4610ce407f0204c4e3fe30b7c76ec;hp=6307d0501aba84a40a0fc1409b43e8b05e699249;hpb=8335a4d6c7b4448cd0bcb6d0bebf1d456bcfdb17;p=gnulib.git diff --git a/lib/regexec.c b/lib/regexec.c index 6307d0501..9388ac12b 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -1,5 +1,6 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free + Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -221,10 +222,10 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx) int regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *__restrict preg; - const char *__restrict string; + const regex_t *_Restrict_ preg; + const char *_Restrict_ string; size_t nmatch; - regmatch_t pmatch[]; + regmatch_t pmatch[_Restrict_arr_]; int eflags; { reg_errcode_t err; @@ -267,8 +268,8 @@ __typeof__ (__regexec) __compat_regexec; int attribute_compat_text_section -__compat_regexec (const regex_t *__restrict preg, - const char *__restrict string, size_t nmatch, +__compat_regexec (const regex_t *_Restrict_ preg, + const char *_Restrict_ string, size_t nmatch, regmatch_t pmatch[], int eflags) { return regexec (preg, string, nmatch, pmatch, @@ -636,7 +637,7 @@ re_exec (s) (0 <= LAST_START && LAST_START <= LENGTH) */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ re_search_internal (const regex_t *preg, const char *string, Idx length, Idx start, Idx last_start, Idx stop, @@ -700,7 +701,8 @@ re_search_internal (const regex_t *preg, fl_longest_match = (nmatch != 0 || dfa->nbackref); err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, - preg->translate, preg->syntax & RE_ICASE, dfa); + preg->translate, (preg->syntax & RE_ICASE) != 0, + dfa); if (BE (err != REG_NOERROR, 0)) goto free_return; mctx.input.stop = stop; @@ -831,10 +833,10 @@ re_search_internal (const regex_t *preg, break; match_first += incr; if (match_first < left_lim || match_first > right_lim) - { - err = REG_NOMATCH; - goto free_return; - } + { + err = REG_NOMATCH; + goto free_return; + } } break; } @@ -951,14 +953,14 @@ re_search_internal (const regex_t *preg, } if (dfa->subexp_map) - for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) - if (dfa->subexp_map[reg_idx] != reg_idx) - { - pmatch[reg_idx + 1].rm_so - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; - pmatch[reg_idx + 1].rm_eo - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; - } + for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) + if (dfa->subexp_map[reg_idx] != reg_idx) + { + pmatch[reg_idx + 1].rm_so + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; + pmatch[reg_idx + 1].rm_eo + = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; + } } free_return: @@ -970,7 +972,7 @@ re_search_internal (const regex_t *preg, } static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ prune_impossible_nodes (re_match_context_t *mctx) { const re_dfa_t *const dfa = mctx->dfa; @@ -1043,6 +1045,11 @@ prune_impossible_nodes (re_match_context_t *mctx) re_node_set_free (&sctx.limits); if (BE (ret != REG_NOERROR, 0)) goto free_return; + if (sifted_states[0] == NULL) + { + ret = REG_NOMATCH; + goto free_return; + } } re_free (mctx->state_log); mctx->state_log = sifted_states; @@ -1103,7 +1110,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, index of the buffer. */ static Idx -internal_function +internal_function __attribute_warn_unused_result__ check_matching (re_match_context_t *mctx, bool fl_longest_match, Idx *p_match_first) { @@ -1142,7 +1149,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, { err = transit_state_bkref (mctx, &cur_state->nodes); if (BE (err != REG_NOERROR, 0)) - return err; + return err; } } } @@ -1169,16 +1176,16 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; if (BE (next_char_idx >= mctx->input.bufs_len, 0) - || (BE (next_char_idx >= mctx->input.valid_len, 0) - && mctx->input.valid_len < mctx->input.len)) - { - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) + || (BE (next_char_idx >= mctx->input.valid_len, 0) + && mctx->input.valid_len < mctx->input.len)) + { + err = extend_buffers (mctx); + if (BE (err != REG_NOERROR, 0)) { assert (err == REG_ESPACE); return REG_ERROR; } - } + } cur_state = transit_state (&err, mctx, cur_state); if (mctx->state_log != NULL) @@ -1302,17 +1309,17 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, if (dest_node == REG_MISSING) dest_node = candidate; - else + else { /* In order to avoid infinite loop like "(a*)*", return the second - epsilon-transition if the first was already considered. */ + epsilon-transition if the first was already considered. */ if (re_node_set_contains (eps_via_nodes, dest_node)) - return candidate; + return candidate; /* Otherwise, push the second epsilon-transition on the fail stack. */ else if (fs != NULL && push_fail_stack (fs, *pidx, candidate, nregs, regs, - eps_via_nodes)) + eps_via_nodes)) return REG_ERROR; /* We know we are going to exit. */ @@ -1378,7 +1385,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, } static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) { @@ -1425,7 +1432,7 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, bool fl_backtrack) { @@ -1660,7 +1667,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) if (mctx->state_log[str_idx]) { err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) + if (BE (err != REG_NOERROR, 0)) goto free_return; } @@ -1679,7 +1686,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) } static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, re_node_set *cur_dest) { @@ -1841,7 +1848,7 @@ update_cur_sifted_state (const re_match_context_t *mctx, } static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates) { @@ -1856,10 +1863,14 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, { err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); if (BE (err != REG_NOERROR, 0)) - return REG_ESPACE; + return REG_ESPACE; for (i = 0; i < dest_nodes->nelem; i++) - re_node_set_merge (&state->inveclosure, - dfa->inveclosures + dest_nodes->elems[i]); + { + err = re_node_set_merge (&state->inveclosure, + dfa->inveclosures + dest_nodes->elems[i]); + if (BE (err != REG_NOERROR, 0)) + return REG_ESPACE; + } } return re_node_set_add_intersect (dest_nodes, candidates, &state->inveclosure); @@ -1971,7 +1982,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, { struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; do - { + { Idx dst; int cpos; @@ -1993,9 +2004,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, if (dst == from_node) { if (boundaries & 1) - return -1; + return -1; else /* if (boundaries & 2) */ - return 0; + return 0; } cpos = @@ -2009,7 +2020,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, if (subexp_idx < BITSET_WORD_BITS) ent->eps_reachable_subexps_map &= ~((bitset_word_t) 1 << subexp_idx); - } + } while (ent++->more); } break; @@ -2151,7 +2162,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, } static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, const re_node_set *candidates) { @@ -2234,7 +2245,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, re_node_set_remove (&local_sctx.limits, enabled_idx); /* mctx->bkref_ents may have changed, reload the pointer. */ - entry = mctx->bkref_ents + enabled_idx; + entry = mctx->bkref_ents + enabled_idx; } while (enabled_idx++, entry++->more); } @@ -2281,7 +2292,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, update the destination of STATE_LOG. */ static re_dfastate_t * -internal_function +internal_function __attribute_warn_unused_result__ transit_state (reg_errcode_t *err, re_match_context_t *mctx, re_dfastate_t *state) { @@ -2315,7 +2326,7 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, trtable = state->word_trtable; if (BE (trtable != NULL, 1)) - { + { unsigned int context; context = re_string_context_at (&mctx->input, @@ -2338,7 +2349,7 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, } /* Update the state_log if we need */ -re_dfastate_t * +static re_dfastate_t * internal_function merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, re_dfastate_t *next_state) @@ -2361,21 +2372,21 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, unsigned int context; re_node_set next_nodes, *log_nodes, *table_nodes = NULL; /* If (state_log[cur_idx] != 0), it implies that cur_idx is - the destination of a multibyte char/collating element/ - back reference. Then the next state is the union set of - these destinations and the results of the transition table. */ + the destination of a multibyte char/collating element/ + back reference. Then the next state is the union set of + these destinations and the results of the transition table. */ pstate = mctx->state_log[cur_idx]; log_nodes = pstate->entrance_nodes; if (next_state != NULL) - { - table_nodes = next_state->entrance_nodes; - *err = re_node_set_init_union (&next_nodes, table_nodes, + { + table_nodes = next_state->entrance_nodes; + *err = re_node_set_init_union (&next_nodes, table_nodes, log_nodes); - if (BE (*err != REG_NOERROR, 0)) + if (BE (*err != REG_NOERROR, 0)) return NULL; - } + } else - next_nodes = *log_nodes; + next_nodes = *log_nodes; /* Note: We already add the nodes of the initial state, then we don't need to add them here. */ @@ -2383,12 +2394,12 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, re_string_cur_idx (&mctx->input) - 1, mctx->eflags); next_state = mctx->state_log[cur_idx] - = re_acquire_state_context (err, dfa, &next_nodes, context); + = re_acquire_state_context (err, dfa, &next_nodes, context); /* We don't need to check errors here, since the return value of - this function is next_state and ERR is already set. */ + this function is next_state and ERR is already set. */ if (table_nodes != NULL) - re_node_set_free (&next_nodes); + re_node_set_free (&next_nodes); } if (BE (dfa->nbackref, 0) && next_state != NULL) @@ -2429,9 +2440,9 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) do { - if (++cur_str_idx > max) - return NULL; - re_string_skip_bytes (&mctx->input, 1); + if (++cur_str_idx > max) + return NULL; + re_string_skip_bytes (&mctx->input, 1); } while (mctx->state_log[cur_str_idx] == NULL); @@ -2539,7 +2550,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) re_dfastate_t *dest_state; if (!dfa->nodes[cur_node_idx].accept_mb) - continue; + continue; if (dfa->nodes[cur_node_idx].constraint) { @@ -2707,7 +2718,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) delay these checking for prune_impossible_nodes(). */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) { const re_dfa_t *const dfa = mctx->dfa; @@ -2720,7 +2731,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx; do - if (entry->node == bkref_node) + if (entry->node == bkref_node) return REG_NOERROR; /* We already checked it. */ while (entry++->more); } @@ -2908,7 +2919,7 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, Idx top_str, Idx last_node, Idx last_str, int type) { @@ -3070,14 +3081,16 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, Can't we unify them? */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, re_node_set *cur_nodes, re_node_set *next_nodes) { const re_dfa_t *const dfa = mctx->dfa; bool ok; Idx cur_idx; +#ifdef RE_ENABLE_I18N reg_errcode_t err = REG_NOERROR; +#endif re_node_set union_set; re_node_set_init_empty (&union_set); for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) @@ -3202,7 +3215,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, problematic append it to DST_NODES. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, Idx target, Idx ex_subexp, int type) { @@ -3247,7 +3260,7 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, in MCTX->BKREF_ENTS. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, Idx cur_str, Idx subexp_num, int type) { @@ -3466,7 +3479,7 @@ out_free: CONTEXT_NEWLINE); if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) goto out_free; - } + } else { dest_states_word[i] = dest_states[i]; @@ -3613,16 +3626,16 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N else if (type == OP_UTF8_PERIOD) - { + { if (ASCII_CHARS % BITSET_WORD_BITS == 0) - memset (accepts, -1, ASCII_CHARS); + memset (accepts, -1, ASCII_CHARS / CHAR_BIT); else bitset_merge (accepts, utf8_sb_map); if (!(dfa->syntax & RE_DOT_NEWLINE)) bitset_clear (accepts, '\n'); if (dfa->syntax & RE_DOT_NOT_NULL) bitset_clear (accepts, '\0'); - } + } #endif else continue; @@ -3827,7 +3840,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, if (node->type == OP_PERIOD) { if (char_len <= 1) - return 0; + return 0; /* FIXME: I don't think this if is needed, as both '\n' and '\0' are char_len == 1. */ /* '.' accepts any one character except the following two cases. */ @@ -3940,15 +3953,20 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - idx = findidx (&cp); + int32_t idx = findidx (&cp); if (idx > 0) for (i = 0; i < cset->nequiv_classes; ++i) { int32_t equiv_class_idx = cset->equiv_classes[i]; - size_t weight_len = weights[idx]; - if (weight_len == weights[equiv_class_idx]) + size_t weight_len = weights[idx & 0xffffff]; + if (weight_len == weights[equiv_class_idx & 0xffffff] + && (idx >> 24) == (equiv_class_idx >> 24)) { Idx cnt = 0; + + idx &= 0xffffff; + equiv_class_idx &= 0xffffff; + while (cnt <= weight_len && (weights[equiv_class_idx + 1 + cnt] == weights[idx + 1 + cnt])) @@ -3966,7 +3984,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, # endif /* _LIBC */ { /* match with range expression? */ -#if __GNUC__ >= 2 +#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__) wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; #else wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; @@ -4114,7 +4132,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, /* Extend the buffers, if the buffers have run out. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ extend_buffers (re_match_context_t *mctx) { reg_errcode_t ret; @@ -4177,7 +4195,7 @@ extend_buffers (re_match_context_t *mctx) /* Initialize MCTX. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) { mctx->eflags = eflags; @@ -4257,7 +4275,7 @@ match_ctx_free (re_match_context_t *mctx) */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, Idx to) { @@ -4329,7 +4347,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) at STR_IDX. */ static reg_errcode_t -internal_function +internal_function __attribute_warn_unused_result__ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) { #ifdef DEBUG