X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=df1b0b6d8a92ffe6ed2542e160b7827256221ff8;hb=e25da2873ed708b217c8d0f11c8d85bfb49ae7a8;hp=9a56db728e5b5ea59ee53d05269bbf14c4b64733;hpb=ecd705981eb3f31b6378ac3e636b03dbfc71adec;p=gnulib.git diff --git a/regex.c b/regex.c index 9a56db728..df1b0b6d8 100644 --- a/regex.c +++ b/regex.c @@ -3208,7 +3208,12 @@ at_begline_loc_p (pattern, p, syntax) /* After a subexpression? */ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) /* After an alternative? */ - || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); + || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)) + /* After a shy subexpression? */ + || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern + && prev[-1] == '?' && prev[-2] == '(' + && (syntax & RE_NO_BK_PARENS + || (prev - 3 >= pattern && prev[-3] == '\\'))); } @@ -3933,7 +3938,9 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, : ((regoff_t) ((ptr) - string2 + size1))) /* Call before fetching a character with *d. This switches over to - string2 if necessary. */ + string2 if necessary. + Check re_match_2_internal for a discussion of why end_match_2 might + not be within string2 (but be equal to end_match_1 instead). */ #define PREFETCH() \ while (d == dend) \ { \ @@ -3945,6 +3952,16 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, dend = end_match_2; \ } +/* Call before fetching a char with *d if you already checked other limits. + This is meant for use in lookahead operations like wordend, etc.. + where we might need to look at parts of the string that might be + outside of the LIMITs (i.e past `stop'). */ +#define PREFETCH_NOLIMIT() \ + if (d == end1) \ + { \ + d = string2; \ + dend = end_match_2; \ + } \ /* Test if at very beginning or at very end of the virtual concatenation of `string1' and `string2'. If only one string, it's `string2'. */ @@ -4463,15 +4480,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) for (mcnt = 1; mcnt < num_regs; mcnt++) regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE; - /* Shorten strings to `stop'. */ - if (stop <= size1) - { - size1 = stop; - size2 = 0; - } - else if (stop <= size1 + size2) - size2 = stop - size1; - /* We move `string1' into `string2' if the latter's empty -- but not if `string1' is null. */ if (size2 == 0 && string1 != NULL) @@ -4484,25 +4492,44 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) end1 = string1 + size1; end2 = string2 + size2; - /* Compute where to stop matching, within the two strings. */ - end_match_1 = end1; - end_match_2 = end2; - /* `p' scans through the pattern as `d' scans through the data. `dend' is the end of the input string that `d' points within. `d' is advanced into the following input string whenever necessary, but this happens before fetching; therefore, at the beginning of the loop, `d' can be pointing at the end of a string, but it cannot equal `string2'. */ - if (size1 > 0 && pos <= size1) + if (pos >= size1) { - d = string1 + pos; - dend = end_match_1; + /* Only match within string2. */ + d = string2 + pos - size1; + dend = end_match_2 = string2 + stop - size1; + end_match_1 = end1; /* Just to give it a value. */ } else { - d = string2 + pos - size1; - dend = end_match_2; + if (stop < size1) + { + /* Only match within string1. */ + end_match_1 = string1 + stop; + /* BEWARE! + When we reach end_match_1, PREFETCH normally switches to string2. + But in the present case, this means that just doing a PREFETCH + makes us jump from `stop' to `gap' within the string. + What we really want here is for the search to stop as + soon as we hit end_match_1. That's why we set end_match_2 + to end_match_1 (since PREFETCH fails as soon as we hit + end_match_2). */ + end_match_2 = end_match_1; + } + else + { /* It's important to use this code when stop == size so that + moving `d' from end1 to string2 will not prevent the d == dend + check from catching the end of string. */ + end_match_1 = end1; + end_match_2 = string2 + stop - size1; + } + d = string1 + pos; + dend = end_match_1; } DEBUG_PRINT1 ("The compiled pattern is: "); @@ -4980,9 +5007,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) { if (!bufp->not_bol) break; } - else if (d[-1] == '\n' && bufp->newline_anchor) + else { - break; + unsigned char c; + GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2); + if (c == '\n' && bufp->newline_anchor) + break; } /* In all other cases, we fail. */ goto fail; @@ -4996,12 +5026,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) { if (!bufp->not_eol) break; } - - /* We have to ``prefetch'' the next character. */ - else if ((d == end1 ? *string2 : *d) == '\n' - && bufp->newline_anchor) + else { - break; + PREFETCH_NOLIMIT (); + if (*d == '\n' && bufp->newline_anchor) + break; } goto fail; @@ -5236,7 +5265,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) #ifdef emacs UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); #endif - PREFETCH (); + PREFETCH_NOLIMIT (); c2 = RE_STRING_CHAR (d, dend - d); s2 = SYNTAX (c2); @@ -5323,7 +5352,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) /* Case 3: D is not at the end of string ... */ if (!AT_STRINGS_END (d)) { - PREFETCH (); + PREFETCH_NOLIMIT (); c2 = RE_STRING_CHAR (d, dend - d); #ifdef emacs UPDATE_SYNTAX_TABLE_FORWARD (charpos);