Update and add copyright.
[gnulib.git] / regex.c
diff --git a/regex.c b/regex.c
index fe527dd..fc370b3 100644 (file)
--- a/regex.c
+++ b/regex.c
@@ -86,6 +86,12 @@ char *realloc ();
 #define Sword 1
 #endif
 
+#ifdef SWITCH_ENUM_BUG
+#define SWITCH_ENUM_CAST(x) ((int)(x))
+#else
+#define SWITCH_ENUM_CAST(x) (x)
+#endif
+
 #ifdef SYNTAX_TABLE
 
 extern char *re_syntax_table;
@@ -503,8 +509,6 @@ static int debug = 0;
   if (debug) print_double_string (w, s1, sz1, s2, sz2)
 
 
-extern void printchar ();
-
 /* Print the fastmap in human-readable form.  */
 
 void
@@ -519,7 +523,7 @@ print_fastmap (fastmap)
       if (fastmap[i++])
        {
          was_a_range = 0;
-          printchar (i - 1);
+          putchar (i - 1);
           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
             {
               was_a_range = 1;
@@ -528,7 +532,7 @@ print_fastmap (fastmap)
          if (was_a_range)
             {
               printf ("-");
-              printchar (i - 1);
+              putchar (i - 1);
             }
         }
     }
@@ -571,7 +575,7 @@ print_partial_compiled_pattern (start, end)
           do
            {
               putchar ('/');
-             printchar (*p++);
+             putchar (*p++);
             }
           while (--mcnt);
           break;
@@ -618,18 +622,18 @@ print_partial_compiled_pattern (start, end)
                  /* Have we broken a range?  */
                  else if (last + 1 != c && in_range)
               {
-                     printchar (last);
+                     putchar (last);
                      in_range = 0;
                    }
                 
                  if (! in_range)
-                   printchar (c);
+                   putchar (c);
 
                  last = c;
               }
 
            if (in_range)
-             printchar (last);
+             putchar (last);
 
            putchar (']');
 
@@ -814,13 +818,13 @@ print_double_string (where, string1, size1, string2, size2)
       if (FIRST_STRING_P (where))
         {
           for (this_char = where - string1; this_char < size1; this_char++)
-            printchar (string1[this_char]);
+            putchar (string1[this_char]);
 
           where = string2;    
         }
 
       for (this_char = where - string2; this_char < size2; this_char++)
-        printchar (string2[this_char]);
+        putchar (string2[this_char]);
     }
 }
 
@@ -1005,19 +1009,28 @@ typedef struct
     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,          \
        1))
 
-/* This pushes an item onto the failure stack.  Must be a four-byte
-   value.  Assumes the variable `fail_stack'.  Probably should only
+/* Push a pointer value onto the failure stack.
+   Assumes the variable `fail_stack'.  Probably should only
+   be called from within `PUSH_FAILURE_POINT'.  */
+#define PUSH_FAILURE_POINTER(item)                                     \
+  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) (item)
+
+/* This pushes an integer-valued item onto the failure stack.
+   Assumes the variable `fail_stack'.  Probably should only
    be called from within `PUSH_FAILURE_POINT'.  */
-#define PUSH_FAILURE_ITEM(item)                                                \
-  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
+#define PUSH_FAILURE_INT(item)                                 \
+  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) (EMACS_INT) (item)
 
 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
-#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail]
+
+/* The complement operation.  Assumes `fail_stack' is nonempty.  */
+#define POP_FAILURE_INT() (EMACS_INT) fail_stack.stack[--fail_stack.avail]
 
 /* Used to omit pushing failure point id's when we're not debugging.  */
 #ifdef DEBUG
-#define DEBUG_PUSH PUSH_FAILURE_ITEM
-#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
+#define DEBUG_PUSH PUSH_FAILURE_INT
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
 #else
 #define DEBUG_PUSH(item)
 #define DEBUG_POP(item_addr)
@@ -1070,10 +1083,10 @@ typedef struct
         DEBUG_STATEMENT (num_regs_pushed++);                           \
                                                                        \
        DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
-        PUSH_FAILURE_ITEM (regstart[this_reg]);                                \
+        PUSH_FAILURE_POINTER (regstart[this_reg]);                     \
                                                                         \
        DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
-        PUSH_FAILURE_ITEM (regend[this_reg]);                          \
+        PUSH_FAILURE_POINTER (regend[this_reg]);                       \
                                                                        \
        DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
         DEBUG_PRINT2 (" match_null=%d",                                        \
@@ -1084,24 +1097,24 @@ typedef struct
         DEBUG_PRINT2 (" ever_matched=%d",                              \
                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));    \
        DEBUG_PRINT1 ("\n");                                            \
-        PUSH_FAILURE_ITEM (reg_info[this_reg].word);                   \
+        PUSH_FAILURE_POINTER (reg_info[this_reg].word);                        \
       }                                                                        \
                                                                        \
     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
-    PUSH_FAILURE_ITEM (lowest_active_reg);                             \
+    PUSH_FAILURE_INT (lowest_active_reg);                              \
                                                                        \
     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
-    PUSH_FAILURE_ITEM (highest_active_reg);                            \
+    PUSH_FAILURE_INT (highest_active_reg);                             \
                                                                        \
     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);          \
     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);          \
-    PUSH_FAILURE_ITEM (pattern_place);                                 \
+    PUSH_FAILURE_POINTER (pattern_place);                              \
                                                                        \
     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);           \
     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
                                 size2);                                \
     DEBUG_PRINT1 ("'\n");                                              \
-    PUSH_FAILURE_ITEM (string_place);                                  \
+    PUSH_FAILURE_POINTER (string_place);                               \
                                                                        \
     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);           \
     DEBUG_PUSH (failure_id);                                           \
@@ -1163,7 +1176,7 @@ typedef struct
   /* If the saved string location is NULL, it came from an             \
      on_failure_keep_string_jump opcode, and we want to throw away the \
      saved NULL, thus retaining our current position in the string.  */        \
-  string_temp = POP_FAILURE_ITEM ();                                   \
+  string_temp = POP_FAILURE_POINTER ();                                        \
   if (string_temp != NULL)                                             \
     str = (const char *) string_temp;                                  \
                                                                        \
@@ -1171,31 +1184,32 @@ typedef struct
   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);     \
   DEBUG_PRINT1 ("'\n");                                                        \
                                                                        \
-  pat = (unsigned char *) POP_FAILURE_ITEM ();                         \
+  pat = (unsigned char *) POP_FAILURE_POINTER ();                      \
   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                      \
   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                      \
                                                                        \
   /* Restore register info.  */                                                \
-  high_reg = (unsigned) POP_FAILURE_ITEM ();                           \
+  high_reg = (unsigned) POP_FAILURE_INT ();                            \
   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);          \
                                                                        \
-  low_reg = (unsigned) POP_FAILURE_ITEM ();                            \
+  low_reg = (unsigned) POP_FAILURE_INT ();                             \
   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);           \
                                                                        \
   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)           \
     {                                                                  \
       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                        \
                                                                        \
-      reg_info[this_reg].word = POP_FAILURE_ITEM ();                   \
+      reg_info[this_reg].word = POP_FAILURE_POINTER ();                        \
       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);         \
                                                                        \
-      regend[this_reg] = (const char *) POP_FAILURE_ITEM ();           \
+      regend[this_reg] = (const char *) POP_FAILURE_POINTER ();                \
       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);            \
                                                                        \
-      regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();         \
+      regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();      \
       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);                \
     }                                                                  \
                                                                        \
+  set_regs_matched_done = 0;                                           \
   DEBUG_STATEMENT (nfailure_points_popped++);                          \
 } /* POP_FAILURE_POINT */
 
@@ -1239,19 +1253,23 @@ typedef union
 #define SET_REGS_MATCHED()                                             \
   do                                                                   \
     {                                                                  \
-      unsigned r;                                                      \
-      for (r = lowest_active_reg; r <= highest_active_reg; r++)                \
-        {                                                              \
-          MATCHED_SOMETHING (reg_info[r])                              \
-            = EVER_MATCHED_SOMETHING (reg_info[r])                     \
-            = 1;                                                       \
-        }                                                              \
+      if (!set_regs_matched_done)                                      \
+       {                                                               \
+         unsigned r;                                                   \
+         set_regs_matched_done = 1;                                    \
+         for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
+           {                                                           \
+             MATCHED_SOMETHING (reg_info[r])                           \
+               = EVER_MATCHED_SOMETHING (reg_info[r])                  \
+               = 1;                                                    \
+           }                                                           \
+       }                                                               \
     }                                                                  \
   while (0)
 
-
 /* Registers are set to a sentinel when they haven't yet matched.  */
-#define REG_UNSET_VALUE ((char *) -1)
+static char reg_unset_dummy;
+#define REG_UNSET_VALUE (&reg_unset_dummy)
 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
 
 
@@ -1559,7 +1577,7 @@ regex_compile (pattern, size, syntax, bufp)
       unsigned debug_count;
       
       for (debug_count = 0; debug_count < size; debug_count++)
-        printchar (pattern[debug_count]);
+        putchar (pattern[debug_count]);
       putchar ('\n');
     }
 #endif /* DEBUG */
@@ -2812,11 +2830,7 @@ re_compile_fastmap (bufp)
       /* We should never be about to go beyond the end of the pattern.  */
       assert (p < pend);
       
-#ifdef SWITCH_ENUM_BUG
-      switch ((int) ((re_opcode_t) *p++))
-#else
-      switch ((re_opcode_t) *p++)
-#endif
+      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
        {
 
         /* I guess the idea here is to simply not bother with a fastmap
@@ -3481,6 +3495,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
      and need to test it, it's not garbage.  */
   const char *match_end = NULL;
 
+  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
+  int set_regs_matched_done = 0;
+
   /* Used when we pop values we don't care about.  */
 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   const char **reg_dummy;
@@ -3677,7 +3694,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                 }
             } /* d != end_match_2 */
 
-       succeed:
+       succeed_label:
           DEBUG_PRINT1 ("Accepting match.\n");
 
           /* If caller wants register contents data back, do it.  */
@@ -3766,11 +3783,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
         }
 
       /* Otherwise match next pattern command.  */
-#ifdef SWITCH_ENUM_BUG
-      switch ((int) ((re_opcode_t) *p++))
-#else
-      switch ((re_opcode_t) *p++)
-#endif
+      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
        {
         /* Ignore these.  Used to ignore the n of succeed_n's which
            currently have n == 0.  */
@@ -3780,7 +3793,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
        case succeed:
           DEBUG_PRINT1 ("EXECUTING succeed.\n");
-         goto succeed;
+         goto succeed_label;
 
         /* Match the next n pattern characters exactly.  The following
            byte in the pattern defines n, and the n bytes after that
@@ -3888,6 +3901,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
           IS_ACTIVE (reg_info[*p]) = 1;
           MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+         /* Clear this whenever we change the register activity status.  */
+         set_regs_matched_done = 0;
           
           /* This is the new highest active register.  */
           highest_active_reg = *p;
@@ -3900,6 +3916,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           /* Move past the register number and inner group count.  */
           p += 2;
          just_past_start_mem = p;
+
           break;
 
 
@@ -3925,7 +3942,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
           /* This register isn't active anymore.  */
           IS_ACTIVE (reg_info[*p]) = 0;
-          
+
+         /* Clear this whenever we change the register activity status.  */
+         set_regs_matched_done = 0;
+
           /* If this was the only register active, nothing is active
              anymore.  */
           if (lowest_active_reg == highest_active_reg)
@@ -4019,7 +4039,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                           regstart[r] = old_regstart[r];
 
                           /* xx why this test?  */
-                          if ((int) old_regend[r] >= (int) regstart[r])
+                          if (old_regend[r] >= regstart[r])
                             regend[r] = old_regend[r];
                         }     
                     }
@@ -4093,6 +4113,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                     : bcmp (d, d2, mcnt))
                  goto fail;
                d += mcnt, d2 += mcnt;
+
+               /* Do this because we've match some characters.  */
+               SET_REGS_MATCHED ();
              }
          }
          break;