*** empty log message ***

[gnulib.git] / regex.c
diff --git a/regex.c b/regex.c

index 3b4eb50..2e06d0e 100644 (file)
--- a/regex.c
+++ b/regex.c
@@ -212,6 +212,7 @@ init_syntax_once ()
  #define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
  #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
    (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
+#define MAKE_CHAR(charset, c1, c2) (c1)
  #endif /* not emacs */
  
  #ifndef RE_TRANSLATE
@@ -2443,18 +2444,23 @@ regex_compile (pattern, size, syntax, bufp)
                     /* Fetch the character which ends the range. */
                     PATFETCH (c1);
  
-                   if (SINGLE_BYTE_CHAR_P (c)
-                       && ! SINGLE_BYTE_CHAR_P (c1))
+                   if (SINGLE_BYTE_CHAR_P (c))
                       {
-                       /* Handle a range such as \177-\377 in multibyte mode.
-                          Split that into two ranges,,
-                          the low one ending at 0237, and the high one
-                          starting at ...040.  */
-                       /*   Unless I'm missing something,
-                            this line is useless.  -sm
-                          int c1_base = (c1 & ~0177) | 040; */
-                       SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
-                       c1 = 0237;
+                       if (! SINGLE_BYTE_CHAR_P (c1))
+                         {
+                           /* Handle a range such as \177-\377 in
+                              multibyte mode.  Split that into two
+                              ranges, the low one ending at 0237, and
+                              the high one starting at the smallest
+                              character in the charset of C1 and
+                              ending at C1.  */
+                           int charset = CHAR_CHARSET (c1);
+                           int c2 = MAKE_CHAR (charset, 0, 0);
+                           
+                           SET_RANGE_TABLE_WORK_AREA (range_table_work,
+                                                      c2, c1);
+                           c1 = 0237;
+                         }
                       }
                     else if (!SAME_CHARSET_P (c, c1))
                       FREE_STACK_RETURN (REG_ERANGE);
@@ -3208,7 +3214,12 @@ at_begline_loc_p (pattern, p, syntax)
         /* After a subexpression?  */
         (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
         /* After an alternative?         */
-    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
+       /* After a shy subexpression?  */
+    || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
+       && prev[-1] == '?' && prev[-2] == '('
+       && (syntax & RE_NO_BK_PARENS
+           || (prev - 3 >= pattern && prev[-3] == '\\')));
  }
  
  
@@ -3947,6 +3958,16 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
        dend = end_match_2;                                              \
      }
  
+/* Call before fetching a char with *d if you already checked other limits.
+   This is meant for use in lookahead operations like wordend, etc..
+   where we might need to look at parts of the string that might be
+   outside of the LIMITs (i.e past `stop').  */
+#define PREFETCH_NOLIMIT()                                             \
+  if (d == end1)                                                       \
+     {                                                                 \
+       d = string2;                                                    \
+       dend = end_match_2;                                             \
+     }                                                                 \
  
  /* Test if at very beginning or at very end of the virtual concatenation
     of `string1' and `string2'. If only one string, it's `string2'.  */
@@ -4492,7 +4513,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
      }
    else
      {
-      if (stop <= size1)
+      if (stop < size1)
         {
           /* Only match within string1.  */
           end_match_1 = string1 + stop;
@@ -4507,7 +4528,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           end_match_2 = end_match_1;
         }
        else
-       {
+       { /* It's important to use this code when stop == size so that
+            moving `d' from end1 to string2 will not prevent the d == dend
+            check from catching the end of string.  */
           end_match_1 = end1;
           end_match_2 = string2 + stop - size1;
         }
@@ -5009,12 +5032,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             {
               if (!bufp->not_eol) break;
             }
-
-         /* We have to ``prefetch'' the next character.  */
-         else if ((d == end1 ? *string2 : *d) == '\n'
-                  && bufp->newline_anchor)
+         else
             {
-             break;
+             PREFETCH_NOLIMIT ();
+             if (*d == '\n' && bufp->newline_anchor)
+               break;
             }
           goto fail;
  
@@ -5249,7 +5271,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  #ifdef emacs
               UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  #endif
-             PREFETCH ();
+             PREFETCH_NOLIMIT ();
               c2 = RE_STRING_CHAR (d, dend - d);
               s2 = SYNTAX (c2);
  
@@ -5336,7 +5358,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
               /* Case 3: D is not at the end of string ... */
               if (!AT_STRINGS_END (d))
                 {
-                 PREFETCH ();
+                 PREFETCH_NOLIMIT ();
                   c2 = RE_STRING_CHAR (d, dend - d);
  #ifdef emacs
                   UPDATE_SYNTAX_TABLE_FORWARD (charpos);