X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregcomp.c;h=279b20c4c89fcc5bf1737c835552e83fee036e10;hb=5fa5d3d767b29cf1d757609afbb56459b09743fa;hp=dc9bbc102084d0d4ad036db2ceb78687f3198775;hpb=1e5cfc92d3a783d911169d1704ae6e37072c327c;p=gnulib.git diff --git a/lib/regcomp.c b/lib/regcomp.c index dc9bbc102..279b20c4c 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -86,21 +86,21 @@ static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, re_token_t *token); #ifdef RE_ENABLE_I18N -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, +static reg_errcode_t build_equiv_class (bitset sbcset, re_charset_t *mbcset, Idx *equiv_class_alloc, const unsigned char *name); static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans, - re_bitset_ptr_t sbcset, + bitset sbcset, re_charset_t *mbcset, Idx *char_class_alloc, const unsigned char *class_name, reg_syntax_t syntax); #else /* not RE_ENABLE_I18N */ -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, +static reg_errcode_t build_equiv_class (bitset sbcset, const unsigned char *name); static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans, - re_bitset_ptr_t sbcset, + bitset sbcset, const unsigned char *class_name, reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ @@ -334,9 +334,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, else if (type == SIMPLE_BRACKET) { int i, j, ch; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (dfa->nodes[node].opr.sbcset[i] & (1u << j)) + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (dfa->nodes[node].opr.sbcset[i] & ((bitset_word) 1 << j)) re_set_fastmap (fastmap, icase, ch); } #ifdef RE_ENABLE_I18N @@ -356,13 +356,11 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, is a valid collation element, and don't catch 'b' since 'b' is the only collation element which starts from 'b'. */ - int j, ch; const int32_t *table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (table[ch] < 0) - re_set_fastmap (fastmap, icase, ch); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); } # else if (dfa->mb_cur_max > 1) @@ -546,11 +544,22 @@ weak_alias (__regerror, regerror) static const bitset utf8_sb_map = { /* Set the first 128 bits. */ -# if UINT_MAX == 0xffffffff - 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff -# else -# error "Add case for new unsigned int size" +# if 2 < BITSET_WORDS + BITSET_WORD_MAX, +# endif +# if 4 < BITSET_WORDS + BITSET_WORD_MAX, +# endif +# if 6 < BITSET_WORDS + BITSET_WORD_MAX, # endif +# if 8 < BITSET_WORDS +# error "Invalid BITSET_WORDS" +# endif + (BITSET_WORD_MAX + >> (SBC_MAX % BITSET_WORD_BITS == 0 + ? 0 + : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS)) }; #endif @@ -808,7 +817,7 @@ init_dfa (re_dfa_t *dfa, Idx pat_len) dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; dfa->nodes_alloc = pat_len + 1; - dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); + dfa->nodes = re_xmalloc (re_token_t, dfa->nodes_alloc); /* table_size = 2 ^ ceil(log pat_len) */ for (table_size = 1; table_size <= pat_len; table_size <<= 1) @@ -858,20 +867,17 @@ init_dfa (re_dfa_t *dfa, Idx pat_len) { int i, j, ch; - dfa->sb_char = re_calloc (unsigned int, BITSET_UINTS); + dfa->sb_char = re_calloc (bitset_word, BITSET_WORDS); if (BE (dfa->sb_char == NULL, 0)) return REG_ESPACE; - /* Clear all bits by, then set those corresponding to single - byte chars. */ - bitset_empty (dfa->sb_char); - - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) { wint_t wch = __btowc (ch); if (wch != WEOF) - dfa->sb_char[i] |= 1u << j; + dfa->sb_char[i] |= (bitset_word) 1 << j; # ifndef _LIBC if (isascii (ch) && wch != ch) dfa->map_notascii = 1; @@ -895,10 +901,10 @@ init_word_char (re_dfa_t *dfa) { int i, j, ch; dfa->word_ops_used = 1; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') - dfa->word_char[i] |= 1u << j; + dfa->word_char[i] |= (bitset_word) 1 << j; } /* Free the work area which are only used while compiling. */ @@ -1046,9 +1052,18 @@ optimize_utf8 (re_dfa_t *dfa) return; case SIMPLE_BRACKET: /* Just double check. */ - for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i) - if (dfa->nodes[node].opr.sbcset[i]) - return; + { + int rshift = + (SBC_MAX / 2 % BITSET_WORD_BITS == 0 + ? 0 + : BITSET_WORD_BITS - SBC_MAX / 2 % BITSET_WORD_BITS); + for (i = SBC_MAX / 2 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) + { + if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0) + return; + rshift = 0; + } + } break; default: abort (); @@ -1083,13 +1098,13 @@ analyze (regex_t *preg) /* Allocate arrays. */ dfa->nexts = re_malloc (Idx, dfa->nodes_alloc); dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc); - dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); + dfa->edests = re_xmalloc (re_node_set, dfa->nodes_alloc); dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL || dfa->eclosures == NULL, 0)) return REG_ESPACE; - dfa->subexp_map = re_malloc (Idx, preg->re_nsub); + dfa->subexp_map = re_xmalloc (Idx, preg->re_nsub); if (dfa->subexp_map != NULL) { Idx i; @@ -1125,7 +1140,7 @@ analyze (regex_t *preg) if ((!preg->re_no_sub && preg->re_nsub > 0 && dfa->has_plural_match) || dfa->nbackref) { - dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); + dfa->inveclosures = re_xmalloc (re_node_set, dfa->nodes_len); if (BE (dfa->inveclosures == NULL, 0)) return REG_ESPACE; ret = calc_inveclosure (dfa); @@ -1224,8 +1239,8 @@ optimize_subexps (void *extra, bin_tree_t *node) node->left->parent = node; dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; - if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map) - dfa->used_bkref_map &= ~(1u << other_idx); + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~ ((bitset_word) 1 << other_idx); } return REG_NOERROR; @@ -1268,8 +1283,8 @@ lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) very common, so we do not lose much. An example that triggers this case is the sed "script" /\(\)/x. */ && node->left != NULL - && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map - || !(dfa->used_bkref_map & (1u << node->token.opr.idx)))) + && ! (node->token.opr.idx < BITSET_WORD_BITS + && dfa->used_bkref_map & ((bitset_word) 1 << node->token.opr.idx))) return node->left; /* Convert the SUBEXP node to the concatenation of an @@ -2550,7 +2565,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, update it. */ static reg_errcode_t -build_range_exp (re_bitset_ptr_t sbcset, +build_range_exp (bitset sbcset, # ifdef RE_ENABLE_I18N re_charset_t *mbcset, Idx *range_alloc, # endif @@ -2608,12 +2623,11 @@ build_range_exp (re_bitset_ptr_t sbcset, wchar_t *new_array_start, *new_array_end; Idx new_nranges; - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; + new_nranges = mbcset->nranges; /* Use realloc since mbcset->range_starts and mbcset->range_ends are NULL if *range_alloc == 0. */ - new_array_start = re_realloc (mbcset->range_starts, wchar_t, - new_nranges); + new_array_start = re_x2realloc (mbcset->range_starts, wchar_t, + &new_nranges); new_array_end = re_realloc (mbcset->range_ends, wchar_t, new_nranges); @@ -2667,7 +2681,7 @@ build_range_exp (re_bitset_ptr_t sbcset, pointer argument since we may update it. */ static reg_errcode_t -build_collating_symbol (re_bitset_ptr_t sbcset, +build_collating_symbol (bitset sbcset, # ifdef RE_ENABLE_I18N re_charset_t *mbcset, Idx *coll_sym_alloc, # endif @@ -2803,7 +2817,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, auto inline reg_errcode_t __attribute ((always_inline)) - build_range_exp (re_bitset_ptr_t sbcset, re_charset_t *mbcset, + build_range_exp (bitset sbcset, re_charset_t *mbcset, Idx *range_alloc, bracket_elem_t *start_elem, bracket_elem_t *end_elem) { @@ -2840,10 +2854,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, uint32_t *new_array_end; Idx new_nranges; - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; - new_array_start = re_realloc (mbcset->range_starts, uint32_t, - new_nranges); + new_nranges = mbcset->nranges; + new_array_start = re_x2realloc (mbcset->range_starts, uint32_t, + &new_nranges); new_array_end = re_realloc (mbcset->range_ends, uint32_t, new_nranges); @@ -2884,7 +2897,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, auto inline reg_errcode_t __attribute ((always_inline)) - build_collating_symbol (re_bitset_ptr_t sbcset, re_charset_t *mbcset, + build_collating_symbol (bitset sbcset, re_charset_t *mbcset, Idx *coll_sym_alloc, const unsigned char *name) { int32_t elem, idx; @@ -2914,12 +2927,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) { /* Not enough, realloc it. */ - /* +1 in case of mbcset->ncoll_syms is 0. */ - Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; + Idx new_coll_sym_alloc = mbcset->ncoll_syms; /* Use realloc since mbcset->coll_syms is NULL if *alloc == 0. */ - int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, - new_coll_sym_alloc); + int32_t *new_coll_syms = re_x2realloc (mbcset->coll_syms, int32_t, + &new_coll_sym_alloc); if (BE (new_coll_syms == NULL, 0)) return REG_ESPACE; mbcset->coll_syms = new_coll_syms; @@ -2969,7 +2981,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, _NL_COLLATE_SYMB_EXTRAMB); } #endif - sbcset = re_calloc (unsigned int, BITSET_UINTS); + sbcset = re_calloc (bitset_word, BITSET_WORDS); #ifdef RE_ENABLE_I18N mbcset = re_calloc (re_charset_t, 1); #endif /* RE_ENABLE_I18N */ @@ -3103,11 +3115,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, { wchar_t *new_mbchars; /* Not enough, realloc it. */ - /* +1 in case of mbcset->nmbchars is 0. */ - mbchar_alloc = 2 * mbcset->nmbchars + 1; + mbchar_alloc = mbcset->nmbchars; /* Use realloc since array is NULL if *alloc == 0. */ - new_mbchars = re_realloc (mbcset->mbchars, wchar_t, - mbchar_alloc); + new_mbchars = re_x2realloc (mbcset->mbchars, wchar_t, + &mbchar_alloc); if (BE (new_mbchars == NULL, 0)) goto parse_bracket_exp_espace; mbcset->mbchars = new_mbchars; @@ -3180,12 +3191,12 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); if (BE (mbc_tree == NULL, 0)) goto parse_bracket_exp_espace; - for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx) + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) if (sbcset[sbc_idx]) break; /* If there are no bits set in sbcset, there is no point of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ - if (sbc_idx < BITSET_UINTS) + if (sbc_idx < BITSET_WORDS) { /* Build a tree for simple bracket. */ br_token.type = SIMPLE_BRACKET; @@ -3320,7 +3331,7 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, is a pointer argument sinse we may update it. */ static reg_errcode_t -build_equiv_class (re_bitset_ptr_t sbcset, +build_equiv_class (bitset sbcset, #ifdef RE_ENABLE_I18N re_charset_t *mbcset, Idx *equiv_class_alloc, #endif @@ -3381,12 +3392,11 @@ build_equiv_class (re_bitset_ptr_t sbcset, if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) { /* Not enough, realloc it. */ - /* +1 in case of mbcset->nequiv_classes is 0. */ - Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; + Idx new_equiv_class_alloc = mbcset->nequiv_classes; /* Use realloc since the array is NULL if *alloc == 0. */ - int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, - int32_t, - new_equiv_class_alloc); + int32_t *new_equiv_classes = re_x2realloc (mbcset->equiv_classes, + int32_t, + &new_equiv_class_alloc); if (BE (new_equiv_classes == NULL, 0)) return REG_ESPACE; mbcset->equiv_classes = new_equiv_classes; @@ -3411,7 +3421,7 @@ build_equiv_class (re_bitset_ptr_t sbcset, is a pointer argument sinse we may update it. */ static reg_errcode_t -build_charclass (unsigned REG_TRANSLATE_TYPE trans, re_bitset_ptr_t sbcset, +build_charclass (unsigned REG_TRANSLATE_TYPE trans, bitset sbcset, #ifdef RE_ENABLE_I18N re_charset_t *mbcset, Idx *char_class_alloc, #endif @@ -3431,11 +3441,10 @@ build_charclass (unsigned REG_TRANSLATE_TYPE trans, re_bitset_ptr_t sbcset, if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) { /* Not enough, realloc it. */ - /* +1 in case of mbcset->nchar_classes is 0. */ - Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1; + Idx new_char_class_alloc = mbcset->nchar_classes; /* Use realloc since array is NULL if *alloc == 0. */ - wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, - new_char_class_alloc); + wctype_t *new_char_classes = re_x2realloc (mbcset->char_classes, wctype_t, + &new_char_class_alloc); if (BE (new_char_classes == NULL, 0)) return REG_ESPACE; mbcset->char_classes = new_char_classes; @@ -3499,7 +3508,7 @@ build_charclass_op (re_dfa_t *dfa, unsigned REG_TRANSLATE_TYPE trans, re_token_t br_token; bin_tree_t *tree; - sbcset = re_calloc (unsigned int, BITSET_UINTS); + sbcset = re_calloc (bitset_word, BITSET_WORDS); #ifdef RE_ENABLE_I18N mbcset = re_calloc (re_charset_t, 1); #endif /* RE_ENABLE_I18N */