(noinst_HEADERS): Add __fpending.h.
[gnulib.git] / regex.c
diff --git a/regex.c b/regex.c
index 3b4eb50..0ba9d3b 100644 (file)
--- a/regex.c
+++ b/regex.c
 
 /* TODO:
    - structure the opcode space into opcode+flag.
-   - merge with glibc's regex.[ch]
+   - merge with glibc's regex.[ch].
+   - replace succeed_n + jump_n with a combined operation so that the counter
+     can simply be decremented when popping the failure_point without having
+     to stack up failure_count entries.
  */
 
 /* AIX requires this to be the first thing in the file. */
@@ -212,6 +215,7 @@ init_syntax_once ()
 #define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
   (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
+#define MAKE_CHAR(charset, c1, c2) (c1)
 #endif /* not emacs */
 
 #ifndef RE_TRANSLATE
@@ -1415,14 +1419,37 @@ do {                                                                    \
   PUSH_FAILURE_INT (num);                                              \
 } while (0)
 
+#define PUSH_FAILURE_COUNT(ptr)                                                \
+do {                                                                   \
+  char *destination;                                                   \
+  int c;                                                               \
+  ENSURE_FAIL_STACK(3);                                                        \
+  EXTRACT_NUMBER (c, ptr);                                             \
+  DEBUG_PRINT3 ("    Push counter %p = %d\n", ptr, c);                 \
+  PUSH_FAILURE_INT (c);                                                        \
+  PUSH_FAILURE_POINTER (ptr);                                          \
+  PUSH_FAILURE_INT (-1);                                               \
+} while (0)
+
 /* Pop a saved register off the stack.  */
-#define POP_FAILURE_REG()                                              \
+#define POP_FAILURE_REG_OR_COUNT()                                     \
 do {                                                                   \
   int reg = POP_FAILURE_INT ();                                                \
-  regend[reg] = POP_FAILURE_POINTER ();                                        \
-  regstart[reg] = POP_FAILURE_POINTER ();                              \
-  DEBUG_PRINT4 ("     Pop reg %d (spanning %p -> %p)\n",               \
-               reg, regstart[reg], regend[reg]);                       \
+  if (reg == -1)                                                       \
+    {                                                                  \
+      /* It's a counter.  */                                           \
+      unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER ();    \
+      reg = POP_FAILURE_INT ();                                                \
+      STORE_NUMBER (ptr, reg);                                         \
+      DEBUG_PRINT3 ("     Pop counter %p = %d\n", ptr, reg);           \
+    }                                                                  \
+  else                                                                 \
+    {                                                                  \
+      regend[reg] = POP_FAILURE_POINTER ();                            \
+      regstart[reg] = POP_FAILURE_POINTER ();                          \
+      DEBUG_PRINT4 ("     Pop reg %d (spanning %p -> %p)\n",           \
+                   reg, regstart[reg], regend[reg]);                   \
+    }                                                                  \
 } while (0)
 
 /* Check that we are not stuck in an infinite loop.  */
@@ -1516,7 +1543,7 @@ do {                                                                      \
                                                                        \
   /* Pop the saved registers.  */                                      \
   while (fail_stack.frame < fail_stack.avail)                          \
-    POP_FAILURE_REG ();                                                        \
+    POP_FAILURE_REG_OR_COUNT ();                                       \
                                                                        \
   pat = (unsigned char *) POP_FAILURE_POINTER ();                      \
   DEBUG_PRINT2 ("  Popping pattern %p: ", pat);                                \
@@ -2443,18 +2470,23 @@ regex_compile (pattern, size, syntax, bufp)
                    /* Fetch the character which ends the range. */
                    PATFETCH (c1);
 
-                   if (SINGLE_BYTE_CHAR_P (c)
-                       && ! SINGLE_BYTE_CHAR_P (c1))
+                   if (SINGLE_BYTE_CHAR_P (c))
                      {
-                       /* Handle a range such as \177-\377 in multibyte mode.
-                          Split that into two ranges,,
-                          the low one ending at 0237, and the high one
-                          starting at ...040.  */
-                       /*   Unless I'm missing something,
-                            this line is useless.  -sm
-                          int c1_base = (c1 & ~0177) | 040; */
-                       SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
-                       c1 = 0237;
+                       if (! SINGLE_BYTE_CHAR_P (c1))
+                         {
+                           /* Handle a range such as \177-\377 in
+                              multibyte mode.  Split that into two
+                              ranges, the low one ending at 0237, and
+                              the high one starting at the smallest
+                              character in the charset of C1 and
+                              ending at C1.  */
+                           int charset = CHAR_CHARSET (c1);
+                           int c2 = MAKE_CHAR (charset, 0, 0);
+                           
+                           SET_RANGE_TABLE_WORK_AREA (range_table_work,
+                                                      c2, c1);
+                           c1 = 0237;
+                         }
                      }
                    else if (!SAME_CHARSET_P (c, c1))
                      FREE_STACK_RETURN (REG_ERANGE);
@@ -3058,7 +3090,12 @@ regex_compile (pattern, size, syntax, bufp)
 
          GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
          {
-           int len = CHAR_STRING (c, b);
+           int len;
+
+           if (multibyte)
+             len = CHAR_STRING (c, b);
+           else
+             *b = c, len = 1;
            b += len;
            (*pending_exact) += len;
          }
@@ -3208,7 +3245,12 @@ at_begline_loc_p (pattern, p, syntax)
        /* After a subexpression?  */
        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
        /* After an alternative?         */
-    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
+       /* After a shy subexpression?  */
+    || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
+       && prev[-1] == '?' && prev[-2] == '('
+       && (syntax & RE_NO_BK_PARENS
+           || (prev - 3 >= pattern && prev[-3] == '\\')));
 }
 
 
@@ -3364,7 +3406,15 @@ analyse_first (p, pend, fastmap, multibyte)
         with `break'.  */
 
        case exactn:
-         if (fastmap) fastmap[p[1]] = 1;
+         if (fastmap)
+           {
+             int c = RE_STRING_CHAR (p + 1, pend - p);
+
+             if (SINGLE_BYTE_CHAR_P (c))
+               fastmap[c] = 1;
+             else
+               fastmap[p[1]] = 1;
+           }
          break;
 
 
@@ -3947,6 +3997,16 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
       dend = end_match_2;                                              \
     }
 
+/* Call before fetching a char with *d if you already checked other limits.
+   This is meant for use in lookahead operations like wordend, etc..
+   where we might need to look at parts of the string that might be
+   outside of the LIMITs (i.e past `stop').  */
+#define PREFETCH_NOLIMIT()                                             \
+  if (d == end1)                                                       \
+     {                                                                 \
+       d = string2;                                                    \
+       dend = end_match_2;                                             \
+     }                                                                 \
 
 /* Test if at very beginning or at very end of the virtual concatenation
    of `string1' and `string2'. If only one string, it's `string2'.  */
@@ -4270,7 +4330,9 @@ re_match (bufp, string, size, pos, regs)
 {
   int result = re_match_2_internal (bufp, NULL, 0, string, size,
                                    pos, regs, size);
+#if defined (C_ALLOCA) && !defined (REGEX_MALLOC)
   alloca (0);
+#endif
   return result;
 }
 #endif /* not emacs */
@@ -4314,7 +4376,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
 
   result = re_match_2_internal (bufp, string1, size1, string2, size2,
                                pos, regs, stop);
+#if defined (C_ALLOCA) && !defined (REGEX_MALLOC)
   alloca (0);
+#endif
   return result;
 }
 
@@ -4492,7 +4556,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
     }
   else
     {
-      if (stop <= size1)
+      if (stop < size1)
        {
          /* Only match within string1.  */
          end_match_1 = string1 + stop;
@@ -4507,7 +4571,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
          end_match_2 = end_match_1;
        }
       else
-       {
+       { /* It's important to use this code when stop == size so that
+            moving `d' from end1 to string2 will not prevent the d == dend
+            check from catching the end of string.  */
          end_match_1 = end1;
          end_match_2 = string2 + stop - size1;
        }
@@ -5009,12 +5075,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
            {
              if (!bufp->not_eol) break;
            }
-
-         /* We have to ``prefetch'' the next character.  */
-         else if ((d == end1 ? *string2 : *d) == '\n'
-                  && bufp->newline_anchor)
+         else
            {
-             break;
+             PREFETCH_NOLIMIT ();
+             if (*d == '\n' && bufp->newline_anchor)
+               break;
            }
          goto fail;
 
@@ -5178,22 +5243,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
          EXTRACT_NUMBER (mcnt, p + 2);
          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
 
-         assert (mcnt >= 0);
-         /* Originally, this is how many times we HAVE to succeed.  */
-         if (mcnt > 0)
-           {
-              mcnt--;
-              p += 2;
-              STORE_NUMBER_AND_INCR (p, mcnt);
-              DEBUG_PRINT3 ("  Setting %p to %d.\n", p, mcnt);
-           }
-         else if (mcnt == 0)
+         /* Originally, mcnt is how many times we HAVE to succeed.  */
+         if (mcnt != 0)
            {
-             DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
-             p[2] = (unsigned char) no_op;
-             p[3] = (unsigned char) no_op;
-             goto on_failure;
+             mcnt--;
+             p += 2;
+             PUSH_FAILURE_COUNT (p);
+             STORE_NUMBER_AND_INCR (p, mcnt);
+             DEBUG_PRINT3 ("   Setting %p to %d.\n", p, mcnt);
            }
+         else
+           /* The two bytes encoding mcnt == 0 are two no_op opcodes.  */
+           goto on_failure;
          break;
 
        case jump_n:
@@ -5201,11 +5262,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
 
          /* Originally, this is how many times we CAN jump.  */
-         if (mcnt)
+         if (mcnt != 0)
            {
-              mcnt--;
-              STORE_NUMBER (p + 2, mcnt);
-              goto unconditional_jump;
+             mcnt--;
+             PUSH_FAILURE_COUNT (p + 2);
+             STORE_NUMBER (p + 2, mcnt);
+             goto unconditional_jump;
            }
          /* If don't have to jump any more, skip over the rest of command.  */
          else
@@ -5220,6 +5282,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
            p1 = p + mcnt;
            EXTRACT_NUMBER_AND_INCR (mcnt, p);
            DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
+           PUSH_FAILURE_COUNT (p1);
            STORE_NUMBER (p1, mcnt);
            break;
          }
@@ -5249,7 +5312,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 #ifdef emacs
              UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
-             PREFETCH ();
+             PREFETCH_NOLIMIT ();
              c2 = RE_STRING_CHAR (d, dend - d);
              s2 = SYNTAX (c2);
 
@@ -5336,7 +5399,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
              /* Case 3: D is not at the end of string ... */
              if (!AT_STRINGS_END (d))
                {
-                 PREFETCH ();
+                 PREFETCH_NOLIMIT ();
                  c2 = RE_STRING_CHAR (d, dend - d);
 #ifdef emacs
                  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
@@ -5571,7 +5634,8 @@ re_comp (s)
   if (!s)
     {
       if (!re_comp_buf.buffer)
-       return gettext ("No previous regular expression");
+        /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+       return (char *) gettext ("No previous regular expression");
       return 0;
     }
 
@@ -5579,12 +5643,14 @@ re_comp (s)
     {
       re_comp_buf.buffer = (unsigned char *) malloc (200);
       if (re_comp_buf.buffer == NULL)
-        return gettext (re_error_msgid[(int) REG_ESPACE]);
+        /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
       re_comp_buf.allocated = 200;
 
       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
       if (re_comp_buf.fastmap == NULL)
-       return gettext (re_error_msgid[(int) REG_ESPACE]);
+       /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+       return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
     }
 
   /* Since `re_exec' always passes NULL for the `regs' argument, we