/* Extended regular expression matching and search library.
- Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009
- Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
+ Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
init_dfa (re_dfa_t *dfa, size_t pat_len)
{
__re_size_t table_size;
+#ifndef _LIBC
+ char *codeset_name;
+#endif
#ifdef RE_ENABLE_I18N
size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
#else
dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
!= 0);
#else
- if (strcmp (locale_charset (), "UTF-8") == 0)
+ codeset_name = nl_langinfo (CODESET);
+ if (strcasecmp (codeset_name, "UTF-8") == 0
+ || strcasecmp (codeset_name, "UTF8") == 0)
dfa->is_utf8 = 1;
/* We check exhaustively in the loop below if this charset is a
destination. */
org_dest = dfa->edests[org_node].elems[0];
re_node_set_empty (dfa->edests + clone_node);
- clone_dest = search_duplicated_node (dfa, org_dest, constraint);
/* If the node is root_node itself, it means the epsilon closure
has a loop. Then tie it to the destination of the root_node. */
if (org_node == root_node && clone_node != org_node)
}
else
{
- /* There is a duplicated node which satisfy the constraint,
+ /* There is a duplicated node which satisfies the constraint,
use it to avoid infinite loop. */
ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (! ok, 0))
return elem;
}
- if (BE (end != REG_MISSING && start > end, 0))
+ if (BE ((end != REG_MISSING && start > end)
+ || token->type != OP_CLOSE_DUP_NUM, 0))
{
/* First number greater than second. */
*err = REG_BADBR;
return elem;
}
- /* Local function for parse_bracket_exp used in _LIBC environement.
+ /* Local function for parse_bracket_exp used in _LIBC environment.
Look up the collation sequence value of BR_ELEM.
Return the value if succeeded, UINT_MAX otherwise. */
}
else if (br_elem->type == MB_CHAR)
{
- return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ if (nrules != 0)
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
}
else if (br_elem->type == COLL_SYM)
{
/* Build single byte matcing table for this equivalence class. */
char_buf[1] = (unsigned char) '\0';
- len = weights[idx1];
+ len = weights[idx1 & 0xffffff];
for (ch = 0; ch < SBC_MAX; ++ch)
{
char_buf[0] = ch;
if (idx2 == 0)
/* This isn't a valid character. */
continue;
- if (len == weights[idx2])
+ /* Compare only if the length matches and the collation rule
+ index is the same. */
+ if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
{
int cnt = 0;
+
while (cnt <= len &&
- weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
+ weights[(idx1 & 0xffffff) + 1 + cnt]
+ == weights[(idx2 & 0xffffff) + 1 + cnt])
++cnt;
if (cnt > len)