X-Git-Url: https://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregcomp.c;h=0fdc3bcaef3bb13e2b3ffa89bb46a7e28828a91a;hb=84d1749c4b5ad026522be96537320556ec6f7a57;hp=7996dc0b005cd05f9fa2dd15738dcf4ec4751c48;hpb=252b52457da7887667c036d18cc5169777615bb0;p=gnulib.git diff --git a/lib/regcomp.c b/lib/regcomp.c index 7996dc0b0..0fdc3bcae 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -1,20 +1,21 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2012 Free Software Foundation, Inc. + Copyright (C) 2002-2013 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, see . */ + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, size_t length, reg_syntax_t syntax); @@ -93,20 +94,20 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, re_charset_t *mbcset, Idx *char_class_alloc, - const unsigned char *class_name, + const char *class_name, reg_syntax_t syntax); #else /* not RE_ENABLE_I18N */ static reg_errcode_t build_equiv_class (bitset_t sbcset, const unsigned char *name); static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, - const unsigned char *class_name, + const char *class_name, reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ static bin_tree_t *build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, - const unsigned char *class_name, - const unsigned char *extra, + const char *class_name, + const char *extra, bool non_match, reg_errcode_t *err); static bin_tree_t *create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, @@ -899,8 +900,10 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) != 0); #else codeset_name = nl_langinfo (CODESET); - if (strcasecmp (codeset_name, "UTF-8") == 0 - || strcasecmp (codeset_name, "UTF8") == 0) + if ((codeset_name[0] == 'U' || codeset_name[0] == 'u') + && (codeset_name[1] == 'T' || codeset_name[1] == 't') + && (codeset_name[2] == 'F' || codeset_name[2] == 'f') + && strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0) dfa->is_utf8 = 1; /* We check exhaustively in the loop below if this charset is a @@ -950,10 +953,10 @@ static void internal_function init_word_char (re_dfa_t *dfa) { - dfa->word_ops_used = 1; int i = 0; int j; int ch = 0; + dfa->word_ops_used = 1; if (BE (dfa->map_notascii == 0, 1)) { bitset_word_t bits0 = 0x00000000; @@ -2419,8 +2422,8 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, case OP_WORD: case OP_NOTWORD: tree = build_charclass_op (dfa, regexp->trans, - (const unsigned char *) "alnum", - (const unsigned char *) "_", + "alnum", + "_", token->type == OP_NOTWORD, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; @@ -2428,8 +2431,8 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, case OP_SPACE: case OP_NOTSPACE: tree = build_charclass_op (dfa, regexp->trans, - (const unsigned char *) "space", - (const unsigned char *) "", + "space", + "", token->type == OP_NOTSPACE, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; @@ -2621,7 +2624,10 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, old_tree = NULL; if (elem->token.type == SUBEXP) - postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); + { + uintptr_t subidx = elem->token.opr.idx; + postorder (elem, mark_opt_subexp, (void *) subidx); + } tree = create_tree (dfa, elem, NULL, (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT)); @@ -2706,7 +2712,6 @@ build_range_exp (const reg_syntax_t syntax, wchar_t wc; wint_t start_wc; wint_t end_wc; - wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] @@ -2720,11 +2725,7 @@ build_range_exp (const reg_syntax_t syntax, ? __btowc (end_ch) : end_elem->opr.wch); if (start_wc == WEOF || end_wc == WEOF) return REG_ECOLLATE; - cmp_buf[0] = start_wc; - cmp_buf[4] = end_wc; - - if (BE ((syntax & RE_NO_EMPTY_RANGES) - && wcscoll (cmp_buf, cmp_buf + 4) > 0, 0)) + else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0)) return REG_ERANGE; /* Got valid collation sequence values, add them as a new entry. @@ -2765,9 +2766,7 @@ build_range_exp (const reg_syntax_t syntax, /* Build the table for single byte characters. */ for (wc = 0; wc < SBC_MAX; ++wc) { - cmp_buf[2] = wc; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + if (start_wc <= wc && wc <= end_wc) bitset_set (sbcset, wc); } } @@ -2964,6 +2963,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, 0)) return REG_ERANGE; + /* FIXME: Implement rational ranges here, too. */ start_collseq = lookup_collation_sequence_value (start_elem); end_collseq = lookup_collation_sequence_value (end_elem); /* Check start/end collation sequence values. */ @@ -3571,14 +3571,14 @@ static reg_errcode_t #ifdef RE_ENABLE_I18N build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, re_charset_t *mbcset, Idx *char_class_alloc, - const unsigned char *class_name, reg_syntax_t syntax) + const char *class_name, reg_syntax_t syntax) #else /* not RE_ENABLE_I18N */ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, - const unsigned char *class_name, reg_syntax_t syntax) + const char *class_name, reg_syntax_t syntax) #endif /* not RE_ENABLE_I18N */ { int i; - const char *name = (const char *) class_name; + const char *name = class_name; /* In case of REG_ICASE "upper" and "lower" match the both of upper and lower cases. */ @@ -3652,8 +3652,8 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, static bin_tree_t * build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, - const unsigned char *class_name, - const unsigned char *extra, bool non_match, + const char *class_name, + const char *extra, bool non_match, reg_errcode_t *err) { re_bitset_ptr_t sbcset; @@ -3856,7 +3856,7 @@ create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node) { - Idx idx = (Idx) (long) extra; + Idx idx = (uintptr_t) extra; if (node->token.type == SUBEXP && node->token.opr.idx == idx) node->token.opt_subexp = 1;