X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;ds=sidebyside;f=regex.c;h=2e06d0edeafe463dc1f766441041cc76e519bcab;hb=825548e25793fe043c31a8c71975eb958b7cc53f;hp=3b4eb502596b4b6a0689620cff984a8da5e0c6d6;hpb=6a4fc8bee9aa4c578c65e5b78609634a9fa1a338;p=gnulib.git diff --git a/regex.c b/regex.c index 3b4eb5025..2e06d0ede 100644 --- a/regex.c +++ b/regex.c @@ -212,6 +212,7 @@ init_syntax_once () #define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) +#define MAKE_CHAR(charset, c1, c2) (c1) #endif /* not emacs */ #ifndef RE_TRANSLATE @@ -2443,18 +2444,23 @@ regex_compile (pattern, size, syntax, bufp) /* Fetch the character which ends the range. */ PATFETCH (c1); - if (SINGLE_BYTE_CHAR_P (c) - && ! SINGLE_BYTE_CHAR_P (c1)) + if (SINGLE_BYTE_CHAR_P (c)) { - /* Handle a range such as \177-\377 in multibyte mode. - Split that into two ranges,, - the low one ending at 0237, and the high one - starting at ...040. */ - /* Unless I'm missing something, - this line is useless. -sm - int c1_base = (c1 & ~0177) | 040; */ - SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); - c1 = 0237; + if (! SINGLE_BYTE_CHAR_P (c1)) + { + /* Handle a range such as \177-\377 in + multibyte mode. Split that into two + ranges, the low one ending at 0237, and + the high one starting at the smallest + character in the charset of C1 and + ending at C1. */ + int charset = CHAR_CHARSET (c1); + int c2 = MAKE_CHAR (charset, 0, 0); + + SET_RANGE_TABLE_WORK_AREA (range_table_work, + c2, c1); + c1 = 0237; + } } else if (!SAME_CHARSET_P (c, c1)) FREE_STACK_RETURN (REG_ERANGE); @@ -3208,7 +3214,12 @@ at_begline_loc_p (pattern, p, syntax) /* After a subexpression? */ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) /* After an alternative? */ - || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); + || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)) + /* After a shy subexpression? */ + || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern + && prev[-1] == '?' && prev[-2] == '(' + && (syntax & RE_NO_BK_PARENS + || (prev - 3 >= pattern && prev[-3] == '\\'))); } @@ -3947,6 +3958,16 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, dend = end_match_2; \ } +/* Call before fetching a char with *d if you already checked other limits. + This is meant for use in lookahead operations like wordend, etc.. + where we might need to look at parts of the string that might be + outside of the LIMITs (i.e past `stop'). */ +#define PREFETCH_NOLIMIT() \ + if (d == end1) \ + { \ + d = string2; \ + dend = end_match_2; \ + } \ /* Test if at very beginning or at very end of the virtual concatenation of `string1' and `string2'. If only one string, it's `string2'. */ @@ -4492,7 +4513,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) } else { - if (stop <= size1) + if (stop < size1) { /* Only match within string1. */ end_match_1 = string1 + stop; @@ -4507,7 +4528,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) end_match_2 = end_match_1; } else - { + { /* It's important to use this code when stop == size so that + moving `d' from end1 to string2 will not prevent the d == dend + check from catching the end of string. */ end_match_1 = end1; end_match_2 = string2 + stop - size1; } @@ -5009,12 +5032,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) { if (!bufp->not_eol) break; } - - /* We have to ``prefetch'' the next character. */ - else if ((d == end1 ? *string2 : *d) == '\n' - && bufp->newline_anchor) + else { - break; + PREFETCH_NOLIMIT (); + if (*d == '\n' && bufp->newline_anchor) + break; } goto fail; @@ -5249,7 +5271,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) #ifdef emacs UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); #endif - PREFETCH (); + PREFETCH_NOLIMIT (); c2 = RE_STRING_CHAR (d, dend - d); s2 = SYNTAX (c2); @@ -5336,7 +5358,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) /* Case 3: D is not at the end of string ... */ if (!AT_STRINGS_END (d)) { - PREFETCH (); + PREFETCH_NOLIMIT (); c2 = RE_STRING_CHAR (d, dend - d); #ifdef emacs UPDATE_SYNTAX_TABLE_FORWARD (charpos);