re_string_t *regexp,
re_token_t *token);
#ifdef RE_ENABLE_I18N
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+static reg_errcode_t build_equiv_class (bitset sbcset,
re_charset_t *mbcset,
Idx *equiv_class_alloc,
const unsigned char *name);
static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans,
- re_bitset_ptr_t sbcset,
+ bitset sbcset,
re_charset_t *mbcset,
Idx *char_class_alloc,
const unsigned char *class_name,
reg_syntax_t syntax);
#else /* not RE_ENABLE_I18N */
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+static reg_errcode_t build_equiv_class (bitset sbcset,
const unsigned char *name);
static reg_errcode_t build_charclass (unsigned REG_TRANSLATE_TYPE trans,
- re_bitset_ptr_t sbcset,
+ bitset sbcset,
const unsigned char *class_name,
reg_syntax_t syntax);
#endif /* not RE_ENABLE_I18N */
else if (type == SIMPLE_BRACKET)
{
int i, j, ch;
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+ if (dfa->nodes[node].opr.sbcset[i] & ((bitset_word) 1 << j))
re_set_fastmap (fastmap, icase, ch);
}
#ifdef RE_ENABLE_I18N
is a valid collation element, and don't catch
'b' since 'b' is the only collation element
which starts from 'b'. */
- int j, ch;
const int32_t *table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (table[ch] < 0)
- re_set_fastmap (fastmap, icase, ch);
+ for (i = 0; i < SBC_MAX; ++i)
+ if (table[i] < 0)
+ re_set_fastmap (fastmap, icase, i);
}
# else
if (dfa->mb_cur_max > 1)
static const bitset utf8_sb_map =
{
/* Set the first 128 bits. */
-# if UINT_MAX == 0xffffffff
- 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
-# else
-# error "Add case for new unsigned int size"
+# if 2 < BITSET_WORDS
+ BITSET_WORD_MAX,
+# endif
+# if 4 < BITSET_WORDS
+ BITSET_WORD_MAX,
+# endif
+# if 6 < BITSET_WORDS
+ BITSET_WORD_MAX,
# endif
+# if 8 < BITSET_WORDS
+# error "Invalid BITSET_WORDS"
+# endif
+ (BITSET_WORD_MAX
+ >> (SBC_MAX % BITSET_WORD_BITS == 0
+ ? 0
+ : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
};
#endif
dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
dfa->nodes_alloc = pat_len + 1;
- dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+ dfa->nodes = re_xmalloc (re_token_t, dfa->nodes_alloc);
/* table_size = 2 ^ ceil(log pat_len) */
for (table_size = 1; table_size <= pat_len; table_size <<= 1)
{
int i, j, ch;
- dfa->sb_char = re_calloc (unsigned int, BITSET_UINTS);
+ dfa->sb_char = re_calloc (bitset_word, BITSET_WORDS);
if (BE (dfa->sb_char == NULL, 0))
return REG_ESPACE;
- /* Clear all bits by, then set those corresponding to single
- byte chars. */
- bitset_empty (dfa->sb_char);
-
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
+ /* Set the bits corresponding to single byte chars. */
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
{
wint_t wch = __btowc (ch);
if (wch != WEOF)
- dfa->sb_char[i] |= 1u << j;
+ dfa->sb_char[i] |= (bitset_word) 1 << j;
# ifndef _LIBC
if (isascii (ch) && wch != ch)
dfa->map_notascii = 1;
{
int i, j, ch;
dfa->word_ops_used = 1;
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
+ for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+ for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
- dfa->word_char[i] |= 1u << j;
+ dfa->word_char[i] |= (bitset_word) 1 << j;
}
/* Free the work area which are only used while compiling. */
return;
case SIMPLE_BRACKET:
/* Just double check. */
- for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i)
- if (dfa->nodes[node].opr.sbcset[i])
- return;
+ {
+ int rshift =
+ (SBC_MAX / 2 % BITSET_WORD_BITS == 0
+ ? 0
+ : BITSET_WORD_BITS - SBC_MAX / 2 % BITSET_WORD_BITS);
+ for (i = SBC_MAX / 2 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
+ {
+ if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
+ return;
+ rshift = 0;
+ }
+ }
break;
default:
abort ();
/* Allocate arrays. */
dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
- dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+ dfa->edests = re_xmalloc (re_node_set, dfa->nodes_alloc);
dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
|| dfa->eclosures == NULL, 0))
return REG_ESPACE;
- dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
+ dfa->subexp_map = re_xmalloc (Idx, preg->re_nsub);
if (dfa->subexp_map != NULL)
{
Idx i;
if ((!preg->re_no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
|| dfa->nbackref)
{
- dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
+ dfa->inveclosures = re_xmalloc (re_node_set, dfa->nodes_len);
if (BE (dfa->inveclosures == NULL, 0))
return REG_ESPACE;
ret = calc_inveclosure (dfa);
node->left->parent = node;
dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
- if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
- dfa->used_bkref_map &= ~(1u << other_idx);
+ if (other_idx < BITSET_WORD_BITS)
+ dfa->used_bkref_map &= ~ ((bitset_word) 1 << other_idx);
}
return REG_NOERROR;
very common, so we do not lose much. An example that triggers
this case is the sed "script" /\(\)/x. */
&& node->left != NULL
- && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
- || !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
+ && ! (node->token.opr.idx < BITSET_WORD_BITS
+ && dfa->used_bkref_map & ((bitset_word) 1 << node->token.opr.idx)))
return node->left;
/* Convert the SUBEXP node to the concatenation of an
update it. */
static reg_errcode_t
-build_range_exp (re_bitset_ptr_t sbcset,
+build_range_exp (bitset sbcset,
# ifdef RE_ENABLE_I18N
re_charset_t *mbcset, Idx *range_alloc,
# endif
wchar_t *new_array_start, *new_array_end;
Idx new_nranges;
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
+ new_nranges = mbcset->nranges;
/* Use realloc since mbcset->range_starts and mbcset->range_ends
are NULL if *range_alloc == 0. */
- new_array_start = re_realloc (mbcset->range_starts, wchar_t,
- new_nranges);
+ new_array_start = re_x2realloc (mbcset->range_starts, wchar_t,
+ &new_nranges);
new_array_end = re_realloc (mbcset->range_ends, wchar_t,
new_nranges);
pointer argument since we may update it. */
static reg_errcode_t
-build_collating_symbol (re_bitset_ptr_t sbcset,
+build_collating_symbol (bitset sbcset,
# ifdef RE_ENABLE_I18N
re_charset_t *mbcset, Idx *coll_sym_alloc,
# endif
auto inline reg_errcode_t
__attribute ((always_inline))
- build_range_exp (re_bitset_ptr_t sbcset, re_charset_t *mbcset,
+ build_range_exp (bitset sbcset, re_charset_t *mbcset,
Idx *range_alloc,
bracket_elem_t *start_elem, bracket_elem_t *end_elem)
{
uint32_t *new_array_end;
Idx new_nranges;
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
- new_nranges);
+ new_nranges = mbcset->nranges;
+ new_array_start = re_x2realloc (mbcset->range_starts, uint32_t,
+ &new_nranges);
new_array_end = re_realloc (mbcset->range_ends, uint32_t,
new_nranges);
auto inline reg_errcode_t
__attribute ((always_inline))
- build_collating_symbol (re_bitset_ptr_t sbcset, re_charset_t *mbcset,
+ build_collating_symbol (bitset sbcset, re_charset_t *mbcset,
Idx *coll_sym_alloc, const unsigned char *name)
{
int32_t elem, idx;
if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
{
/* Not enough, realloc it. */
- /* +1 in case of mbcset->ncoll_syms is 0. */
- Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ Idx new_coll_sym_alloc = mbcset->ncoll_syms;
/* Use realloc since mbcset->coll_syms is NULL
if *alloc == 0. */
- int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
- new_coll_sym_alloc);
+ int32_t *new_coll_syms = re_x2realloc (mbcset->coll_syms, int32_t,
+ &new_coll_sym_alloc);
if (BE (new_coll_syms == NULL, 0))
return REG_ESPACE;
mbcset->coll_syms = new_coll_syms;
_NL_COLLATE_SYMB_EXTRAMB);
}
#endif
- sbcset = re_calloc (unsigned int, BITSET_UINTS);
+ sbcset = re_calloc (bitset_word, BITSET_WORDS);
#ifdef RE_ENABLE_I18N
mbcset = re_calloc (re_charset_t, 1);
#endif /* RE_ENABLE_I18N */
{
wchar_t *new_mbchars;
/* Not enough, realloc it. */
- /* +1 in case of mbcset->nmbchars is 0. */
- mbchar_alloc = 2 * mbcset->nmbchars + 1;
+ mbchar_alloc = mbcset->nmbchars;
/* Use realloc since array is NULL if *alloc == 0. */
- new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
- mbchar_alloc);
+ new_mbchars = re_x2realloc (mbcset->mbchars, wchar_t,
+ &mbchar_alloc);
if (BE (new_mbchars == NULL, 0))
goto parse_bracket_exp_espace;
mbcset->mbchars = new_mbchars;
mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
if (BE (mbc_tree == NULL, 0))
goto parse_bracket_exp_espace;
- for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
+ for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
if (sbcset[sbc_idx])
break;
/* If there are no bits set in sbcset, there is no point
of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
- if (sbc_idx < BITSET_UINTS)
+ if (sbc_idx < BITSET_WORDS)
{
/* Build a tree for simple bracket. */
br_token.type = SIMPLE_BRACKET;
is a pointer argument sinse we may update it. */
static reg_errcode_t
-build_equiv_class (re_bitset_ptr_t sbcset,
+build_equiv_class (bitset sbcset,
#ifdef RE_ENABLE_I18N
re_charset_t *mbcset, Idx *equiv_class_alloc,
#endif
if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
{
/* Not enough, realloc it. */
- /* +1 in case of mbcset->nequiv_classes is 0. */
- Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+ Idx new_equiv_class_alloc = mbcset->nequiv_classes;
/* Use realloc since the array is NULL if *alloc == 0. */
- int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
- int32_t,
- new_equiv_class_alloc);
+ int32_t *new_equiv_classes = re_x2realloc (mbcset->equiv_classes,
+ int32_t,
+ &new_equiv_class_alloc);
if (BE (new_equiv_classes == NULL, 0))
return REG_ESPACE;
mbcset->equiv_classes = new_equiv_classes;
is a pointer argument sinse we may update it. */
static reg_errcode_t
-build_charclass (unsigned REG_TRANSLATE_TYPE trans, re_bitset_ptr_t sbcset,
+build_charclass (unsigned REG_TRANSLATE_TYPE trans, bitset sbcset,
#ifdef RE_ENABLE_I18N
re_charset_t *mbcset, Idx *char_class_alloc,
#endif
if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
{
/* Not enough, realloc it. */
- /* +1 in case of mbcset->nchar_classes is 0. */
- Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
+ Idx new_char_class_alloc = mbcset->nchar_classes;
/* Use realloc since array is NULL if *alloc == 0. */
- wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
- new_char_class_alloc);
+ wctype_t *new_char_classes = re_x2realloc (mbcset->char_classes, wctype_t,
+ &new_char_class_alloc);
if (BE (new_char_classes == NULL, 0))
return REG_ESPACE;
mbcset->char_classes = new_char_classes;
re_token_t br_token;
bin_tree_t *tree;
- sbcset = re_calloc (unsigned int, BITSET_UINTS);
+ sbcset = re_calloc (bitset_word, BITSET_WORDS);
#ifdef RE_ENABLE_I18N
mbcset = re_calloc (re_charset_t, 1);
#endif /* RE_ENABLE_I18N */