.
[gnulib.git] / regex.c
diff --git a/regex.c b/regex.c
index c160950..2d28eda 100644 (file)
--- a/regex.c
+++ b/regex.c
@@ -1,9 +1,9 @@
 /* Extended regular expression matching and search library,
-   version 0.11.
+   version 0.12.
    (Implements POSIX draft P10003.2/D11.2, except for
    internationalization features.)
 
-   Copyright (C) 1985, 89, 90, 91, 92 Free Software Foundation, Inc.
+   Copyright (C) 1993 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
 
 #define _GNU_SOURCE
 
+#ifdef HAVE_CONFIG_H
+#if defined (CONFIG_BROKETS)
+/* We use <config.h> instead of "config.h" so that a compilation
+   using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
+   (which it would do because it found this file in $srcdir).  */
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
 #include <sys/types.h>
 
 
 #else  /* not emacs */
 
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+
 /* We used to test for `BSTRING' here, but only GCC and Emacs define
    `BSTRING', as far as I know, and neither of them use this code.  */
 #if HAVE_STRING_H || STDC_HEADERS
 #include <strings.h>
 #endif
 
-#ifdef STDC_HEADERS
-#include <stdlib.h>
-#else
-char *malloc ();
-char *realloc ();
-#endif
-
-
 /* Define the syntax stuff for \<, \>, etc.  */
 
 /* This must be nonzero for the wordchar and notwordchar pattern
@@ -127,31 +138,44 @@ init_syntax_once ()
 /* isalpha etc. are used for the character classes.  */
 #include <ctype.h>
 
-#ifndef isascii
-#define isascii(c) 1
+/* Jim Meyering writes:
+
+   "... Some ctype macros are valid only for character codes that
+   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+   using /bin/cc or gcc but without giving an ansi option).  So, all
+   ctype uses should be through macros like ISPRINT...  If
+   STDC_HEADERS is defined, then autoconf has verified that the ctype
+   macros don't need to be guarded with references to isascii. ...
+   Defining isascii to 1 should let any compiler worth its salt
+   eliminate the && through constant folding."  */
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISASCII(c) 1
+#else
+#define ISASCII(c) isascii(c)
 #endif
 
 #ifdef isblank
-#define ISBLANK(c) (isascii (c) && isblank (c))
+#define ISBLANK(c) (ISASCII (c) && isblank (c))
 #else
 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 #endif
 #ifdef isgraph
-#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
 #else
-#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
 #endif
 
-#define ISPRINT(c) (isascii (c) && isprint (c))
-#define ISDIGIT(c) (isascii (c) && isdigit (c))
-#define ISALNUM(c) (isascii (c) && isalnum (c))
-#define ISALPHA(c) (isascii (c) && isalpha (c))
-#define ISCNTRL(c) (isascii (c) && iscntrl (c))
-#define ISLOWER(c) (isascii (c) && islower (c))
-#define ISPUNCT(c) (isascii (c) && ispunct (c))
-#define ISSPACE(c) (isascii (c) && isspace (c))
-#define ISUPPER(c) (isascii (c) && isupper (c))
-#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
 
 #ifndef NULL
 #define NULL 0
@@ -224,6 +248,8 @@ char *alloca ();
 /* (Re)Allocate N items of type T using malloc, or fail.  */
 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define RETALLOC_IF(addr, n, t) \
+  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
 
 #define BYTEWIDTH 8 /* In bits.  */
@@ -532,6 +558,8 @@ print_partial_compiled_pattern (start, end)
   /* Loop over pattern commands.  */
   while (p < pend)
     {
+      printf ("%d:\t", p - start);
+
       switch ((re_opcode_t) *p++)
        {
         case no_op:
@@ -570,27 +598,45 @@ print_partial_compiled_pattern (start, end)
        case charset:
         case charset_not:
           {
-            register int c;
+            register int c, last = -100;
+           register int in_range = 0;
 
-            printf ("/charset%s",
-                   (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
+           printf ("/charset [%s",
+                   (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
             
             assert (p + *p < pend);
 
-            for (c = 0; c < *p; c++)
+            for (c = 0; c < 256; c++)
+             if (c / 8 < *p
+                 && (p[1 + (c/8)] & (1 << (c % 8))))
+               {
+                 /* Are we starting a range?  */
+                 if (last + 1 == c && ! in_range)
+                   {
+                     putchar ('-');
+                     in_range = 1;
+                   }
+                 /* Have we broken a range?  */
+                 else if (last + 1 != c && in_range)
               {
-                unsigned bit;
-                unsigned char map_byte = p[1 + c];
+                     printchar (last);
+                     in_range = 0;
+                   }
                 
-                putchar ('/');
+                 if (! in_range)
+                   printchar (c);
 
-               for (bit = 0; bit < BYTEWIDTH; bit++)
-                  if (map_byte & (1 << bit))
-                    printchar (c * BYTEWIDTH + bit);
+                 last = c;
               }
+
+           if (in_range)
+             printchar (last);
+
+           putchar (']');
+
            p += 1 + *p;
-           break;
          }
+         break;
 
        case begline:
          printf ("/begline");
@@ -602,17 +648,17 @@ print_partial_compiled_pattern (start, end)
 
        case on_failure_jump:
           extract_number_and_incr (&mcnt, &p);
-         printf ("/on_failure_jump/0/%d", mcnt);
+         printf ("/on_failure_jump to %d", p + mcnt - start);
           break;
 
        case on_failure_keep_string_jump:
           extract_number_and_incr (&mcnt, &p);
-         printf ("/on_failure_keep_string_jump/0/%d", mcnt);
+         printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
           break;
 
        case dummy_failure_jump:
           extract_number_and_incr (&mcnt, &p);
-         printf ("/dummy_failure_jump/0/%d", mcnt);
+         printf ("/dummy_failure_jump to %d", p + mcnt - start);
           break;
 
        case push_dummy_failure:
@@ -621,40 +667,40 @@ print_partial_compiled_pattern (start, end)
           
         case maybe_pop_jump:
           extract_number_and_incr (&mcnt, &p);
-         printf ("/maybe_pop_jump/0/%d", mcnt);
+         printf ("/maybe_pop_jump to %d", p + mcnt - start);
          break;
 
         case pop_failure_jump:
          extract_number_and_incr (&mcnt, &p);
-         printf ("/pop_failure_jump/0/%d", mcnt);
+         printf ("/pop_failure_jump to %d", p + mcnt - start);
          break;          
           
         case jump_past_alt:
          extract_number_and_incr (&mcnt, &p);
-         printf ("/jump_past_alt/0/%d", mcnt);
+         printf ("/jump_past_alt to %d", p + mcnt - start);
          break;          
           
         case jump:
          extract_number_and_incr (&mcnt, &p);
-         printf ("/jump/0/%d", mcnt);
+         printf ("/jump to %d", p + mcnt - start);
          break;
 
         case succeed_n: 
           extract_number_and_incr (&mcnt, &p);
           extract_number_and_incr (&mcnt2, &p);
-         printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
+         printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
           break;
         
         case jump_n: 
           extract_number_and_incr (&mcnt, &p);
           extract_number_and_incr (&mcnt2, &p);
-         printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
+         printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
           break;
         
         case set_number_at: 
           extract_number_and_incr (&mcnt, &p);
           extract_number_and_incr (&mcnt2, &p);
-         printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
+         printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
           break;
         
         case wordbound:
@@ -717,8 +763,11 @@ print_partial_compiled_pattern (start, end)
         default:
           printf ("?%d", *(p-1));
        }
+
+      putchar ('\n');
     }
-  printf ("/\n");
+
+  printf ("%d:\tend of pattern.\n", p - start);
 }
 
 
@@ -837,160 +886,541 @@ static const char *re_error_msg[] =
     "Unmatched ) or \\)",                      /* REG_ERPAREN */
   };
 \f
-/* Subroutine declarations and macros for regex_compile.  */
-
-static void store_op1 (), store_op2 ();
-static void insert_op1 (), insert_op2 ();
-static boolean at_begline_loc_p (), at_endline_loc_p ();
-static boolean group_in_compile_stack ();
-static reg_errcode_t compile_range ();
-
-/* Fetch the next character in the uncompiled pattern---translating it 
-   if necessary.  Also cast from a signed character in the constant
-   string passed to us by the user to an unsigned char that we can use
-   as an array index (in, e.g., `translate').  */
-#define PATFETCH(c)                                                    \
-  do {if (p == pend) return REG_EEND;                                  \
-    c = (unsigned char) *p++;                                          \
-    if (translate) c = translate[c];                                   \
-  } while (0)
-
-/* Fetch the next character in the uncompiled pattern, with no
-   translation.  */
-#define PATFETCH_RAW(c)                                                        \
-  do {if (p == pend) return REG_EEND;                                  \
-    c = (unsigned char) *p++;                                          \
-  } while (0)
-
-/* Go backwards one character in the pattern.  */
-#define PATUNFETCH p--
+/* Avoiding alloca during matching, to placate r_alloc.  */
+
+/* Define MATCH_MAY_ALLOCATE if we need to make sure that the
+   searching and matching functions should not call alloca.  On some
+   systems, alloca is implemented in terms of malloc, and if we're
+   using the relocating allocator routines, then malloc could cause a
+   relocation, which might (if the strings being searched are in the
+   ralloc heap) shift the data out from underneath the regexp
+   routines.
+
+   Here's another reason to avoid allocation: Emacs insists on
+   processing input from X in a signal handler; processing X input may
+   call malloc; if input arrives while a matching routine is calling
+   malloc, then we're scrod.  But Emacs can't just block input while
+   calling matching routines; then we don't notice interrupts when
+   they come in.  So, Emacs blocks input around all regexp calls
+   except the matching calls, which it leaves unprotected, in the
+   faith that they will not malloc.  */
+
+/* Normally, this is fine.  */
+#define MATCH_MAY_ALLOCATE
+
+/* But under some circumstances, it's not.  */
+#if defined (emacs) || (defined (REL_ALLOC) && defined (C_ALLOCA))
+#undef MATCH_MAY_ALLOCATE
+#endif
 
+\f
+/* Failure stack declarations and macros; both re_compile_fastmap and
+   re_match_2 use a failure stack.  These have to be macros because of
+   REGEX_ALLOCATE.  */
+   
 
-/* If `translate' is non-null, return translate[D], else just D.  We
-   cast the subscript to translate because some data is declared as
-   `char *', to avoid warnings when a string constant is passed.  But
-   when we use a character as a subscript we must make it unsigned.  */
-#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+/* Number of failure points for which to initially allocate space
+   when matching.  If this number is exceeded, we allocate more
+   space, so it is not a hard limit.  */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 5
+#endif
 
+/* Roughly the maximum number of failure points on the stack.  Would be
+   exactly that if always used MAX_FAILURE_SPACE each time we failed.
+   This is a variable only so users of regex can assign to it; we never
+   change it ourselves.  */
+int re_max_failures = 2000;
 
-/* Macros for outputting the compiled pattern into `buffer'.  */
+typedef unsigned char *fail_stack_elt_t;
 
-/* If the buffer isn't allocated when it comes in, use this.  */
-#define INIT_BUF_SIZE  32
+typedef struct
+{
+  fail_stack_elt_t *stack;
+  unsigned size;
+  unsigned avail;                      /* Offset of next open position.  */
+} fail_stack_type;
 
-/* Make sure we have at least N more bytes of space in buffer.  */
-#define GET_BUFFER_SPACE(n)                                            \
-    while (b - bufp->buffer + (n) > bufp->allocated)                   \
-      EXTEND_BUFFER ()
+#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
+#define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
 
-/* Make sure we have one more byte of buffer space and then add C to it.  */
-#define BUF_PUSH(c)                                                    \
-  do {                                                                 \
-    GET_BUFFER_SPACE (1);                                              \
-    *b++ = (unsigned char) (c);                                                \
-  } while (0)
 
+/* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
 
-/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
-#define BUF_PUSH_2(c1, c2)                                             \
+#ifdef MATCH_MAY_ALLOCATE
+#define INIT_FAIL_STACK()                                              \
   do {                                                                 \
-    GET_BUFFER_SPACE (2);                                              \
-    *b++ = (unsigned char) (c1);                                       \
-    *b++ = (unsigned char) (c2);                                       \
+    fail_stack.stack = (fail_stack_elt_t *)                            \
+      REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+                                                                       \
+    if (fail_stack.stack == NULL)                                      \
+      return -2;                                                       \
+                                                                       \
+    fail_stack.size = INIT_FAILURE_ALLOC;                              \
+    fail_stack.avail = 0;                                              \
   } while (0)
-
-
-/* As with BUF_PUSH_2, except for three bytes.  */
-#define BUF_PUSH_3(c1, c2, c3)                                         \
+#else
+#define INIT_FAIL_STACK()                                              \
   do {                                                                 \
-    GET_BUFFER_SPACE (3);                                              \
-    *b++ = (unsigned char) (c1);                                       \
-    *b++ = (unsigned char) (c2);                                       \
-    *b++ = (unsigned char) (c3);                                       \
+    fail_stack.avail = 0;                                              \
   } while (0)
+#endif
 
 
-/* Store a jump with opcode OP at LOC to location TO.  We store a
-   relative address offset by the three bytes the jump itself occupies.  */
-#define STORE_JUMP(op, loc, to) \
-  store_op1 (op, loc, (to) - (loc) - 3)
-
-/* Likewise, for a two-argument jump.  */
-#define STORE_JUMP2(op, loc, to, arg) \
-  store_op2 (op, loc, (to) - (loc) - 3, arg)
-
-/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
-#define INSERT_JUMP(op, loc, to) \
-  insert_op1 (op, loc, (to) - (loc) - 3, b)
-
-/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
-#define INSERT_JUMP2(op, loc, to, arg) \
-  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
-
-
-/* This is not an arbitrary limit: the arguments which represent offsets
-   into the pattern are two bytes long.  So if 2^16 bytes turns out to
-   be too small, many things would have to change.  */
-#define MAX_BUF_SIZE (1L << 16)
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
 
+   Return 1 if succeeds, and 0 if either ran out of memory
+   allocating space for it or it was already too large.  
+   
+   REGEX_REALLOCATE requires `destination' be declared.   */
 
-/* Extend the buffer by twice its current size via realloc and
-   reset the pointers that pointed into the old block to point to the
-   correct places in the new one.  If extending the buffer results in it
-   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
-#define EXTEND_BUFFER()                                                        \
-  do {                                                                         \
-    unsigned char *old_buffer = bufp->buffer;                          \
-    if (bufp->allocated == MAX_BUF_SIZE)                               \
-      return REG_ESIZE;                                                        \
-    bufp->allocated <<= 1;                                             \
-    if (bufp->allocated > MAX_BUF_SIZE)                                        \
-      bufp->allocated = MAX_BUF_SIZE;                                  \
-    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
-    if (bufp->buffer == NULL)                                          \
-      return REG_ESPACE;                                               \
-    /* If the buffer moved, move all the pointers into it.  */         \
-    if (old_buffer != bufp->buffer)                                    \
-      {                                                                        \
-        b = (b - old_buffer) + bufp->buffer;                           \
-        begalt = (begalt - old_buffer) + bufp->buffer;                 \
-        if (fixup_alt_jump)                                            \
-          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
-        if (laststart)                                                 \
-          laststart = (laststart - old_buffer) + bufp->buffer;         \
-        if (pending_exact)                                             \
-          pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
-      }                                                                        \
-  } while (0)
+#define DOUBLE_FAIL_STACK(fail_stack)                                  \
+  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS             \
+   ? 0                                                                 \
+   : ((fail_stack).stack = (fail_stack_elt_t *)                                \
+        REGEX_REALLOCATE ((fail_stack).stack,                          \
+          (fail_stack).size * sizeof (fail_stack_elt_t),               \
+          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),       \
+                                                                       \
+      (fail_stack).stack == NULL                                       \
+      ? 0                                                              \
+      : ((fail_stack).size <<= 1,                                      \
+         1)))
 
 
-/* Since we have one byte reserved for the register number argument to
-   {start,stop}_memory, the maximum number of groups we can report
-   things about is what fits in that byte.  */
-#define MAX_REGNUM 255
+/* Push PATTERN_OP on FAIL_STACK. 
 
-/* But patterns can have more than `MAX_REGNUM' registers.  We just
-   ignore the excess.  */
-typedef unsigned regnum_t;
+   Return 1 if was able to do so and 0 if ran out of memory allocating
+   space to do so.  */
+#define PUSH_PATTERN_OP(pattern_op, fail_stack)                                \
+  ((FAIL_STACK_FULL ()                                                 \
+    && !DOUBLE_FAIL_STACK (fail_stack))                                        \
+    ? 0                                                                        \
+    : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,          \
+       1))
 
+/* This pushes an item onto the failure stack.  Must be a four-byte
+   value.  Assumes the variable `fail_stack'.  Probably should only
+   be called from within `PUSH_FAILURE_POINT'.  */
+#define PUSH_FAILURE_ITEM(item)                                                \
+  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
 
-/* Macros for the compile stack.  */
+/* The complement operation.  Assumes `fail_stack' is nonempty.  */
+#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
 
-/* Since offsets can go either forwards or backwards, this type needs to
-   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
-typedef int pattern_offset_t;
+/* Used to omit pushing failure point id's when we're not debugging.  */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_ITEM
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
 
-typedef struct
-{
-  pattern_offset_t begalt_offset;
-  pattern_offset_t fixup_alt_jump;
-  pattern_offset_t inner_group_offset;
-  pattern_offset_t laststart_offset;  
-  regnum_t regnum;
-} compile_stack_elt_t;
 
+/* Push the information about the state we will need
+   if we ever fail back to it.  
+   
+   Requires variables fail_stack, regstart, regend, reg_info, and
+   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
+   declared.
+   
+   Does `return FAILURE_CODE' if runs out of memory.  */
 
-typedef struct
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)  \
+  do {                                                                 \
+    char *destination;                                                 \
+    /* Must be int, so when we don't save any registers, the arithmetic        \
+       of 0 + -1 isn't done as unsigned.  */                           \
+    int this_reg;                                                      \
+                                                                       \
+    DEBUG_STATEMENT (failure_id++);                                    \
+    DEBUG_STATEMENT (nfailure_points_pushed++);                                \
+    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);          \
+    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
+    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
+                                                                       \
+    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);          \
+    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);      \
+                                                                       \
+    /* Ensure we have enough space allocated for what we will push.  */        \
+    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                  \
+      {                                                                        \
+        if (!DOUBLE_FAIL_STACK (fail_stack))                   \
+          return failure_code;                                         \
+                                                                       \
+        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",             \
+                      (fail_stack).size);                              \
+        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+      }                                                                        \
+                                                                       \
+    /* Push the info, starting with the registers.  */                 \
+    DEBUG_PRINT1 ("\n");                                               \
+                                                                       \
+    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+         this_reg++)                                                   \
+      {                                                                        \
+       DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
+        DEBUG_STATEMENT (num_regs_pushed++);                           \
+                                                                       \
+       DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
+        PUSH_FAILURE_ITEM (regstart[this_reg]);                                \
+                                                                        \
+       DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
+        PUSH_FAILURE_ITEM (regend[this_reg]);                          \
+                                                                       \
+       DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
+        DEBUG_PRINT2 (" match_null=%d",                                        \
+                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));   \
+        DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));   \
+        DEBUG_PRINT2 (" matched_something=%d",                         \
+                      MATCHED_SOMETHING (reg_info[this_reg]));         \
+        DEBUG_PRINT2 (" ever_matched=%d",                              \
+                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));    \
+       DEBUG_PRINT1 ("\n");                                            \
+        PUSH_FAILURE_ITEM (reg_info[this_reg].word);                   \
+      }                                                                        \
+                                                                       \
+    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
+    PUSH_FAILURE_ITEM (lowest_active_reg);                             \
+                                                                       \
+    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
+    PUSH_FAILURE_ITEM (highest_active_reg);                            \
+                                                                       \
+    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);          \
+    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);          \
+    PUSH_FAILURE_ITEM (pattern_place);                                 \
+                                                                       \
+    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);           \
+    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
+                                size2);                                \
+    DEBUG_PRINT1 ("'\n");                                              \
+    PUSH_FAILURE_ITEM (string_place);                                  \
+                                                                       \
+    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);           \
+    DEBUG_PUSH (failure_id);                                           \
+  } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+   for each register.  */
+#define NUM_REG_ITEMS  3
+
+/* Individual items aside from the registers.  */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack.  */
+#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items.  */
+#define NUM_FAILURE_ITEMS                                              \
+  ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS        \
+    + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it.  */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+   We restore into the parameters, all of which should be lvalues:
+     STR -- the saved data position.
+     PAT -- the saved pattern position.
+     LOW_REG, HIGH_REG -- the highest and lowest active registers.
+     REGSTART, REGEND -- arrays of string positions.
+     REG_INFO -- array of information about each subexpression.
+   
+   Also assumes the variables `fail_stack' and (if debugging), `bufp',
+   `pend', `string1', `size1', `string2', and `size2'.  */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{                                                                      \
+  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                       \
+  int this_reg;                                                                \
+  const unsigned char *string_temp;                                    \
+                                                                       \
+  assert (!FAIL_STACK_EMPTY ());                                       \
+                                                                       \
+  /* Remove failure points and point to how many regs pushed.  */      \
+  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                               \
+  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);   \
+  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);    \
+                                                                       \
+  assert (fail_stack.avail >= NUM_NONREG_ITEMS);                       \
+                                                                       \
+  DEBUG_POP (&failure_id);                                             \
+  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);             \
+                                                                       \
+  /* If the saved string location is NULL, it came from an             \
+     on_failure_keep_string_jump opcode, and we want to throw away the \
+     saved NULL, thus retaining our current position in the string.  */        \
+  string_temp = POP_FAILURE_ITEM ();                                   \
+  if (string_temp != NULL)                                             \
+    str = (const char *) string_temp;                                  \
+                                                                       \
+  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                      \
+  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);     \
+  DEBUG_PRINT1 ("'\n");                                                        \
+                                                                       \
+  pat = (unsigned char *) POP_FAILURE_ITEM ();                         \
+  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                      \
+  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                      \
+                                                                       \
+  /* Restore register info.  */                                                \
+  high_reg = (unsigned) POP_FAILURE_ITEM ();                           \
+  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);          \
+                                                                       \
+  low_reg = (unsigned) POP_FAILURE_ITEM ();                            \
+  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);           \
+                                                                       \
+  for (this_reg = high_reg; this_reg >= low_reg; this_reg--)           \
+    {                                                                  \
+      DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                        \
+                                                                       \
+      reg_info[this_reg].word = POP_FAILURE_ITEM ();                   \
+      DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);         \
+                                                                       \
+      regend[this_reg] = (const char *) POP_FAILURE_ITEM ();           \
+      DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);            \
+                                                                       \
+      regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();         \
+      DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);                \
+    }                                                                  \
+                                                                       \
+  DEBUG_STATEMENT (nfailure_points_popped++);                          \
+} /* POP_FAILURE_POINT */
+
+
+\f
+/* Structure for per-register (a.k.a. per-group) information.
+   This must not be longer than one word, because we push this value
+   onto the failure stack.  Other register information, such as the
+   starting and ending positions (which are addresses), and the list of
+   inner groups (which is a bits list) are maintained in separate
+   variables.  
+   
+   We are making a (strictly speaking) nonportable assumption here: that
+   the compiler will pack our bit fields into something that fits into
+   the type of `word', i.e., is something that fits into one item on the
+   failure stack.  */
+typedef union
+{
+  fail_stack_elt_t word;
+  struct
+  {
+      /* This field is one if this group can match the empty string,
+         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
+#define MATCH_NULL_UNSET_VALUE 3
+    unsigned match_null_string_p : 2;
+    unsigned is_active : 1;
+    unsigned matched_something : 1;
+    unsigned ever_matched_something : 1;
+  } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R)  ((R).bits.is_active)
+#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+   for the subexpressions which we are currently inside.  Also records
+   that those subexprs have matched.  */
+#define SET_REGS_MATCHED()                                             \
+  do                                                                   \
+    {                                                                  \
+      unsigned r;                                                      \
+      for (r = lowest_active_reg; r <= highest_active_reg; r++)                \
+        {                                                              \
+          MATCHED_SOMETHING (reg_info[r])                              \
+            = EVER_MATCHED_SOMETHING (reg_info[r])                     \
+            = 1;                                                       \
+        }                                                              \
+    }                                                                  \
+  while (0)
+
+
+/* Registers are set to a sentinel when they haven't yet matched.  */
+#define REG_UNSET_VALUE ((char *) -1)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+
+\f
+/* How do we implement a missing MATCH_MAY_ALLOCATE?
+   We make the fail stack a global thing, and then grow it to
+   re_max_failures when we compile.  */
+#ifndef MATCH_MAY_ALLOCATE
+static fail_stack_type fail_stack;
+
+static const char **     regstart, **     regend;
+static const char ** old_regstart, ** old_regend;
+static const char **best_regstart, **best_regend;
+static register_info_type *reg_info; 
+static const char **reg_dummy;
+static register_info_type *reg_info_dummy;
+#endif
+
+\f
+/* Subroutine declarations and macros for regex_compile.  */
+
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it 
+   if necessary.  Also cast from a signed character in the constant
+   string passed to us by the user to an unsigned char that we can use
+   as an array index (in, e.g., `translate').  */
+#define PATFETCH(c)                                                    \
+  do {if (p == pend) return REG_EEND;                                  \
+    c = (unsigned char) *p++;                                          \
+    if (translate) c = translate[c];                                   \
+  } while (0)
+
+/* Fetch the next character in the uncompiled pattern, with no
+   translation.  */
+#define PATFETCH_RAW(c)                                                        \
+  do {if (p == pend) return REG_EEND;                                  \
+    c = (unsigned char) *p++;                                          \
+  } while (0)
+
+/* Go backwards one character in the pattern.  */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D.  We
+   cast the subscript to translate because some data is declared as
+   `char *', to avoid warnings when a string constant is passed.  But
+   when we use a character as a subscript we must make it unsigned.  */
+#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+
+
+/* Macros for outputting the compiled pattern into `buffer'.  */
+
+/* If the buffer isn't allocated when it comes in, use this.  */
+#define INIT_BUF_SIZE  32
+
+/* Make sure we have at least N more bytes of space in buffer.  */
+#define GET_BUFFER_SPACE(n)                                            \
+    while (b - bufp->buffer + (n) > bufp->allocated)                   \
+      EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it.  */
+#define BUF_PUSH(c)                                                    \
+  do {                                                                 \
+    GET_BUFFER_SPACE (1);                                              \
+    *b++ = (unsigned char) (c);                                                \
+  } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
+#define BUF_PUSH_2(c1, c2)                                             \
+  do {                                                                 \
+    GET_BUFFER_SPACE (2);                                              \
+    *b++ = (unsigned char) (c1);                                       \
+    *b++ = (unsigned char) (c2);                                       \
+  } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes.  */
+#define BUF_PUSH_3(c1, c2, c3)                                         \
+  do {                                                                 \
+    GET_BUFFER_SPACE (3);                                              \
+    *b++ = (unsigned char) (c1);                                       \
+    *b++ = (unsigned char) (c2);                                       \
+    *b++ = (unsigned char) (c3);                                       \
+  } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO.  We store a
+   relative address offset by the three bytes the jump itself occupies.  */
+#define STORE_JUMP(op, loc, to) \
+  store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump.  */
+#define STORE_JUMP2(op, loc, to, arg) \
+  store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
+#define INSERT_JUMP(op, loc, to) \
+  insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
+#define INSERT_JUMP2(op, loc, to, arg) \
+  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+   into the pattern are two bytes long.  So if 2^16 bytes turns out to
+   be too small, many things would have to change.  */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+   reset the pointers that pointed into the old block to point to the
+   correct places in the new one.  If extending the buffer results in it
+   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
+#define EXTEND_BUFFER()                                                        \
+  do {                                                                         \
+    unsigned char *old_buffer = bufp->buffer;                          \
+    if (bufp->allocated == MAX_BUF_SIZE)                               \
+      return REG_ESIZE;                                                        \
+    bufp->allocated <<= 1;                                             \
+    if (bufp->allocated > MAX_BUF_SIZE)                                        \
+      bufp->allocated = MAX_BUF_SIZE;                                  \
+    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+    if (bufp->buffer == NULL)                                          \
+      return REG_ESPACE;                                               \
+    /* If the buffer moved, move all the pointers into it.  */         \
+    if (old_buffer != bufp->buffer)                                    \
+      {                                                                        \
+        b = (b - old_buffer) + bufp->buffer;                           \
+        begalt = (begalt - old_buffer) + bufp->buffer;                 \
+        if (fixup_alt_jump)                                            \
+          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+        if (laststart)                                                 \
+          laststart = (laststart - old_buffer) + bufp->buffer;         \
+        if (pending_exact)                                             \
+          pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+      }                                                                        \
+  } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+   {start,stop}_memory, the maximum number of groups we can report
+   things about is what fits in that byte.  */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers.  We just
+   ignore the excess.  */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack.  */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+  pattern_offset_t begalt_offset;
+  pattern_offset_t fixup_alt_jump;
+  pattern_offset_t inner_group_offset;
+  pattern_offset_t laststart_offset;  
+  regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
 {
   compile_stack_elt_t *stack;
   unsigned size;
@@ -1302,6 +1732,7 @@ regex_compile (pattern, size, syntax, bufp)
                    the `*'.  Do we have to do something analogous here
                    for null bytes, because of RE_DOT_NOT_NULL?  */
                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+                   && zero_times_ok
                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
                     && !(syntax & RE_DOT_NEWLINE))
                   { /* We have .*\n.  */
@@ -1612,6 +2043,10 @@ regex_compile (pattern, size, syntax, bufp)
               fixup_alt_jump = 0;
               laststart = 0;
               begalt = b;
+             /* If we've reached MAX_REGNUM groups, then this open
+                won't actually generate any code, so we'll have to
+                clear pending_exact explicitly.  */
+             pending_exact = 0;
               break;
 
 
@@ -1661,6 +2096,10 @@ regex_compile (pattern, size, syntax, bufp)
                     : 0;
                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
                 this_group_regnum = COMPILE_STACK_TOP.regnum;
+               /* If we've reached MAX_REGNUM groups, then this open
+                  won't actually generate any code, so we'll have to
+                  clear pending_exact explicitly.  */
+               pending_exact = 0;
 
                 /* We're at the end of the group, so now we know how many
                    groups were inside this one.  */
@@ -2030,479 +2469,241 @@ regex_compile (pattern, size, syntax, bufp)
   if (!COMPILE_STACK_EMPTY) 
     return REG_EPAREN;
 
-  free (compile_stack.stack);
-
-  /* We have succeeded; set the length of the buffer.  */
-  bufp->used = b - bufp->buffer;
-
-#ifdef DEBUG
-  if (debug)
-    {
-      DEBUG_PRINT1 ("\nCompiled pattern: ");
-      print_compiled_pattern (bufp);
-    }
-#endif /* DEBUG */
-
-  return REG_NOERROR;
-} /* regex_compile */
-\f
-/* Subroutines for `regex_compile'.  */
-
-/* Store OP at LOC followed by two-byte integer parameter ARG.  */
-
-static void
-store_op1 (op, loc, arg)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg;
-{
-  *loc = (unsigned char) op;
-  STORE_NUMBER (loc + 1, arg);
-}
-
-
-/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
-
-static void
-store_op2 (op, loc, arg1, arg2)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg1, arg2;
-{
-  *loc = (unsigned char) op;
-  STORE_NUMBER (loc + 1, arg1);
-  STORE_NUMBER (loc + 3, arg2);
-}
-
-
-/* Copy the bytes from LOC to END to open up three bytes of space at LOC
-   for OP followed by two-byte integer parameter ARG.  */
-
-static void
-insert_op1 (op, loc, arg, end)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg;
-    unsigned char *end;    
-{
-  register unsigned char *pfrom = end;
-  register unsigned char *pto = end + 3;
-
-  while (pfrom != loc)
-    *--pto = *--pfrom;
-    
-  store_op1 (op, loc, arg);
-}
-
-
-/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
-
-static void
-insert_op2 (op, loc, arg1, arg2, end)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg1, arg2;
-    unsigned char *end;    
-{
-  register unsigned char *pfrom = end;
-  register unsigned char *pto = end + 5;
-
-  while (pfrom != loc)
-    *--pto = *--pfrom;
-    
-  store_op2 (op, loc, arg1, arg2);
-}
-
-
-/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
-   after an alternative or a begin-subexpression.  We assume there is at
-   least one character before the ^.  */
-
-static boolean
-at_begline_loc_p (pattern, p, syntax)
-    const char *pattern, *p;
-    reg_syntax_t syntax;
-{
-  const char *prev = p - 2;
-  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
-  
-  return
-       /* After a subexpression?  */
-       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
-       /* After an alternative?  */
-    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
-}
-
-
-/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
-   at least one character after the $, i.e., `P < PEND'.  */
-
-static boolean
-at_endline_loc_p (p, pend, syntax)
-    const char *p, *pend;
-    int syntax;
-{
-  const char *next = p;
-  boolean next_backslash = *next == '\\';
-  const char *next_next = p + 1 < pend ? p + 1 : NULL;
-  
-  return
-       /* Before a subexpression?  */
-       (syntax & RE_NO_BK_PARENS ? *next == ')'
-        : next_backslash && next_next && *next_next == ')')
-       /* Before an alternative?  */
-    || (syntax & RE_NO_BK_VBAR ? *next == '|'
-        : next_backslash && next_next && *next_next == '|');
-}
-
-
-/* Returns true if REGNUM is in one of COMPILE_STACK's elements and 
-   false if it's not.  */
-
-static boolean
-group_in_compile_stack (compile_stack, regnum)
-    compile_stack_type compile_stack;
-    regnum_t regnum;
-{
-  int this_element;
-
-  for (this_element = compile_stack.avail - 1;  
-       this_element >= 0; 
-       this_element--)
-    if (compile_stack.stack[this_element].regnum == regnum)
-      return true;
-
-  return false;
-}
-
-
-/* Read the ending character of a range (in a bracket expression) from the
-   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
-   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
-   Then we set the translation of all bits between the starting and
-   ending characters (inclusive) in the compiled pattern B.
-   
-   Return an error code.
-   
-   We use these short variable names so we can use the same macros as
-   `regex_compile' itself.  */
-
-static reg_errcode_t
-compile_range (p_ptr, pend, translate, syntax, b)
-    const char **p_ptr, *pend;
-    char *translate;
-    reg_syntax_t syntax;
-    unsigned char *b;
-{
-  unsigned this_char;
-
-  const char *p = *p_ptr;
-  
-  /* Even though the pattern is a signed `char *', we need to fetch into
-     `unsigned char's.  Reason: if the high bit of the pattern character
-     is set, the range endpoints will be negative if we fetch into a
-     signed `char *'.  */
-  unsigned char range_end;
-  unsigned char range_start = p[-2];
-
-  if (p == pend)
-    return REG_ERANGE;
-
-  PATFETCH (range_end);
-
-  /* Have to increment the pointer into the pattern string, so the
-     caller isn't still at the ending character.  */
-  (*p_ptr)++;
+  free (compile_stack.stack);
 
-  /* If the start is after the end, the range is empty.  */
-  if (range_start > range_end)
-    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+  /* We have succeeded; set the length of the buffer.  */
+  bufp->used = b - bufp->buffer;
 
-  /* Here we see why `this_char' has to be larger than an `unsigned
-     char' -- the range is inclusive, so if `range_end' == 0xff
-     (assuming 8-bit characters), we would otherwise go into an infinite
-     loop, since all characters <= 0xff.  */
-  for (this_char = range_start; this_char <= range_end; this_char++)
+#ifdef DEBUG
+  if (debug)
     {
-      SET_LIST_BIT (TRANSLATE (this_char));
+      DEBUG_PRINT1 ("\nCompiled pattern: \n");
+      print_compiled_pattern (bufp);
     }
-  
-  return REG_NOERROR;
-}
-\f
-/* Failure stack declarations and macros; both re_compile_fastmap and
-   re_match_2 use a failure stack.  These have to be macros because of
-   REGEX_ALLOCATE.  */
-   
+#endif /* DEBUG */
 
-/* Number of failure points for which to initially allocate space
-   when matching.  If this number is exceeded, we allocate more
-   space, so it is not a hard limit.  */
-#ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
+#ifndef MATCH_MAY_ALLOCATE
+  /* Initialize the failure stack to the largest possible stack.  This
+     isn't necessary unless we're trying to avoid calling alloca in
+     the search and match routines.  */
+  {
+    int num_regs = bufp->re_nsub + 1;
+
+    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
+       is strictly greater than re_max_failures, the largest possible stack
+       is 2 * re_max_failures failure points.  */
+    fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+    if (fail_stack.stack)
+      fail_stack.stack =
+       (fail_stack_elt_t *) realloc (fail_stack.stack,
+                                     (fail_stack.size
+                                      * sizeof (fail_stack_elt_t)));
+    else
+      fail_stack.stack =
+       (fail_stack_elt_t *) malloc (fail_stack.size 
+                                    * sizeof (fail_stack_elt_t));
+
+    /* Initialize some other variables the matcher uses.  */
+    RETALLOC_IF (regstart,      num_regs, const char *);
+    RETALLOC_IF (regend,        num_regs, const char *);
+    RETALLOC_IF (old_regstart,  num_regs, const char *);
+    RETALLOC_IF (old_regend,    num_regs, const char *);
+    RETALLOC_IF (best_regstart,  num_regs, const char *);
+    RETALLOC_IF (best_regend,   num_regs, const char *);
+    RETALLOC_IF (reg_info,      num_regs, register_info_type);
+    RETALLOC_IF (reg_dummy,     num_regs, const char *);
+    RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
+  }
 #endif
 
-/* Roughly the maximum number of failure points on the stack.  Would be
-   exactly that if always used MAX_FAILURE_SPACE each time we failed.
-   This is a variable only so users of regex can assign to it; we never
-   change it ourselves.  */
-int re_max_failures = 2000;
+  return REG_NOERROR;
+} /* regex_compile */
+\f
+/* Subroutines for `regex_compile'.  */
 
-typedef const unsigned char *fail_stack_elt_t;
+/* Store OP at LOC followed by two-byte integer parameter ARG.  */
 
-typedef struct
+static void
+store_op1 (op, loc, arg)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg;
 {
-  fail_stack_elt_t *stack;
-  unsigned size;
-  unsigned avail;                      /* Offset of next open position.  */
-} fail_stack_type;
-
-#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
-#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
-#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
-#define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
+  *loc = (unsigned char) op;
+  STORE_NUMBER (loc + 1, arg);
+}
 
 
-/* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
 
-#define INIT_FAIL_STACK()                                              \
-  do {                                                                 \
-    fail_stack.stack = (fail_stack_elt_t *)                            \
-      REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
-                                                                       \
-    if (fail_stack.stack == NULL)                                      \
-      return -2;                                                       \
-                                                                       \
-    fail_stack.size = INIT_FAILURE_ALLOC;                              \
-    fail_stack.avail = 0;                                              \
-  } while (0)
+static void
+store_op2 (op, loc, arg1, arg2)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg1, arg2;
+{
+  *loc = (unsigned char) op;
+  STORE_NUMBER (loc + 1, arg1);
+  STORE_NUMBER (loc + 3, arg2);
+}
 
 
-/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+   for OP followed by two-byte integer parameter ARG.  */
 
-   Return 1 if succeeds, and 0 if either ran out of memory
-   allocating space for it or it was already too large.  
-   
-   REGEX_REALLOCATE requires `destination' be declared.   */
+static void
+insert_op1 (op, loc, arg, end)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg;
+    unsigned char *end;    
+{
+  register unsigned char *pfrom = end;
+  register unsigned char *pto = end + 3;
 
-#define DOUBLE_FAIL_STACK(fail_stack)                                  \
-  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS             \
-   ? 0                                                                 \
-   : ((fail_stack).stack = (fail_stack_elt_t *)                                \
-        REGEX_REALLOCATE ((fail_stack).stack,                          \
-          (fail_stack).size * sizeof (fail_stack_elt_t),               \
-          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),       \
-                                                                       \
-      (fail_stack).stack == NULL                                       \
-      ? 0                                                              \
-      : ((fail_stack).size <<= 1,                                      \
-         1)))
+  while (pfrom != loc)
+    *--pto = *--pfrom;
+    
+  store_op1 (op, loc, arg);
+}
 
 
-/* Push PATTERN_OP on FAIL_STACK. 
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
 
-   Return 1 if was able to do so and 0 if ran out of memory allocating
-   space to do so.  */
-#define PUSH_PATTERN_OP(pattern_op, fail_stack)                                \
-  ((FAIL_STACK_FULL ()                                                 \
-    && !DOUBLE_FAIL_STACK (fail_stack))                                        \
-    ? 0                                                                        \
-    : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,          \
-       1))
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg1, arg2;
+    unsigned char *end;    
+{
+  register unsigned char *pfrom = end;
+  register unsigned char *pto = end + 5;
 
-/* This pushes an item onto the failure stack.  Must be a four-byte
-   value.  Assumes the variable `fail_stack'.  Probably should only
-   be called from within `PUSH_FAILURE_POINT'.  */
-#define PUSH_FAILURE_ITEM(item)                                                \
-  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
+  while (pfrom != loc)
+    *--pto = *--pfrom;
+    
+  store_op2 (op, loc, arg1, arg2);
+}
 
-/* The complement operation.  Assumes `fail_stack' is nonempty.  */
-#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
 
-/* Used to omit pushing failure point id's when we're not debugging.  */
-#ifdef DEBUG
-#define DEBUG_PUSH PUSH_FAILURE_ITEM
-#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
-#else
-#define DEBUG_PUSH(item)
-#define DEBUG_POP(item_addr)
-#endif
+/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
+   after an alternative or a begin-subexpression.  We assume there is at
+   least one character before the ^.  */
 
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+    const char *pattern, *p;
+    reg_syntax_t syntax;
+{
+  const char *prev = p - 2;
+  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+  
+  return
+       /* After a subexpression?  */
+       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+       /* After an alternative?  */
+    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
 
-/* Push the information about the state we will need
-   if we ever fail back to it.  
-   
-   Requires variables fail_stack, regstart, regend, reg_info, and
-   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
-   declared.
-   
-   Does `return FAILURE_CODE' if runs out of memory.  */
 
-#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)  \
-  do {                                                                 \
-    char *destination;                                                 \
-    /* Must be int, so when we don't save any registers, the arithmetic        \
-       of 0 + -1 isn't done as unsigned.  */                           \
-    int this_reg;                                                      \
-                                                                       \
-    DEBUG_STATEMENT (failure_id++);                                    \
-    DEBUG_STATEMENT (nfailure_points_pushed++);                                \
-    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);          \
-    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
-    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
-                                                                       \
-    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);          \
-    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);      \
-                                                                       \
-    /* Ensure we have enough space allocated for what we will push.  */        \
-    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                  \
-      {                                                                        \
-        if (!DOUBLE_FAIL_STACK (fail_stack))                   \
-          return failure_code;                                         \
-                                                                       \
-        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",             \
-                      (fail_stack).size);                              \
-        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
-      }                                                                        \
-                                                                       \
-    /* Push the info, starting with the registers.  */                 \
-    DEBUG_PRINT1 ("\n");                                               \
-                                                                       \
-    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
-         this_reg++)                                                   \
-      {                                                                        \
-       DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);                 \
-        DEBUG_STATEMENT (num_regs_pushed++);                           \
-                                                                       \
-       DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);         \
-        PUSH_FAILURE_ITEM (regstart[this_reg]);                                \
-                                                                        \
-       DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);             \
-        PUSH_FAILURE_ITEM (regend[this_reg]);                          \
-                                                                       \
-       DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
-        DEBUG_PRINT2 (" match_null=%d",                                        \
-                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));   \
-        DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));   \
-        DEBUG_PRINT2 (" matched_something=%d",                         \
-                      MATCHED_SOMETHING (reg_info[this_reg]));         \
-        DEBUG_PRINT2 (" ever_matched=%d",                              \
-                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));    \
-       DEBUG_PRINT1 ("\n");                                            \
-        PUSH_FAILURE_ITEM (reg_info[this_reg].word);                   \
-      }                                                                        \
-                                                                       \
-    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
-    PUSH_FAILURE_ITEM (lowest_active_reg);                             \
-                                                                       \
-    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
-    PUSH_FAILURE_ITEM (highest_active_reg);                            \
-                                                                       \
-    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);          \
-    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);          \
-    PUSH_FAILURE_ITEM (pattern_place);                                 \
-                                                                       \
-    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);           \
-    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
-                                size2);                                \
-    DEBUG_PRINT1 ("'\n");                                              \
-    PUSH_FAILURE_ITEM (string_place);                                  \
-                                                                       \
-    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);           \
-    DEBUG_PUSH (failure_id);                                           \
-  } while (0)
+/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
+   at least one character after the $, i.e., `P < PEND'.  */
 
-/* This is the number of items that are pushed and popped on the stack
-   for each register.  */
-#define NUM_REG_ITEMS  3
+static boolean
+at_endline_loc_p (p, pend, syntax)
+    const char *p, *pend;
+    int syntax;
+{
+  const char *next = p;
+  boolean next_backslash = *next == '\\';
+  const char *next_next = p + 1 < pend ? p + 1 : NULL;
+  
+  return
+       /* Before a subexpression?  */
+       (syntax & RE_NO_BK_PARENS ? *next == ')'
+        : next_backslash && next_next && *next_next == ')')
+       /* Before an alternative?  */
+    || (syntax & RE_NO_BK_VBAR ? *next == '|'
+        : next_backslash && next_next && *next_next == '|');
+}
 
-/* Individual items aside from the registers.  */
-#ifdef DEBUG
-#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
-#else
-#define NUM_NONREG_ITEMS 4
-#endif
 
-/* We push at most this many items on the stack.  */
-#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and 
+   false if it's not.  */
 
-/* We actually push this many items.  */
-#define NUM_FAILURE_ITEMS                                              \
-  ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS        \
-    + NUM_NONREG_ITEMS)
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+    compile_stack_type compile_stack;
+    regnum_t regnum;
+{
+  int this_element;
 
-/* How many items can still be added to the stack without overflowing it.  */
-#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+  for (this_element = compile_stack.avail - 1;  
+       this_element >= 0; 
+       this_element--)
+    if (compile_stack.stack[this_element].regnum == regnum)
+      return true;
 
+  return false;
+}
 
-/* Pops what PUSH_FAIL_STACK pushes.
 
-   We restore into the parameters, all of which should be lvalues:
-     STR -- the saved data position.
-     PAT -- the saved pattern position.
-     LOW_REG, HIGH_REG -- the highest and lowest active registers.
-     REGSTART, REGEND -- arrays of string positions.
-     REG_INFO -- array of information about each subexpression.
+/* Read the ending character of a range (in a bracket expression) from the
+   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
+   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
+   Then we set the translation of all bits between the starting and
+   ending characters (inclusive) in the compiled pattern B.
    
-   Also assumes the variables `fail_stack' and (if debugging), `bufp',
-   `pend', `string1', `size1', `string2', and `size2'.  */
+   Return an error code.
+   
+   We use these short variable names so we can use the same macros as
+   `regex_compile' itself.  */
 
-#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
-{                                                                      \
-  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                       \
-  int this_reg;                                                                \
-  const unsigned char *string_temp;                                    \
-                                                                       \
-  assert (!FAIL_STACK_EMPTY ());                                       \
-                                                                       \
-  /* Remove failure points and point to how many regs pushed.  */      \
-  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                               \
-  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);   \
-  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);    \
-                                                                       \
-  assert (fail_stack.avail >= NUM_NONREG_ITEMS);                       \
-                                                                       \
-  DEBUG_POP (&failure_id);                                             \
-  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);             \
-                                                                       \
-  /* If the saved string location is NULL, it came from an             \
-     on_failure_keep_string_jump opcode, and we want to throw away the \
-     saved NULL, thus retaining our current position in the string.  */        \
-  string_temp = POP_FAILURE_ITEM ();                                   \
-  if (string_temp != NULL)                                             \
-    str = (const char *) string_temp;                                  \
-                                                                       \
-  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                      \
-  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);     \
-  DEBUG_PRINT1 ("'\n");                                                        \
-                                                                       \
-  pat = (unsigned char *) POP_FAILURE_ITEM ();                         \
-  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);                      \
-  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                      \
-                                                                       \
-  /* Restore register info.  */                                                \
-  high_reg = (unsigned) POP_FAILURE_ITEM ();                           \
-  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);          \
-                                                                       \
-  low_reg = (unsigned) POP_FAILURE_ITEM ();                            \
-  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);           \
-                                                                       \
-  for (this_reg = high_reg; this_reg >= low_reg; this_reg--)           \
-    {                                                                  \
-      DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);                        \
-                                                                       \
-      reg_info[this_reg].word = POP_FAILURE_ITEM ();                   \
-      DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);         \
-                                                                       \
-      regend[this_reg] = (const char *) POP_FAILURE_ITEM ();           \
-      DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);            \
-                                                                       \
-      regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();         \
-      DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);                \
-    }                                                                  \
-                                                                       \
-  DEBUG_STATEMENT (nfailure_points_popped++);                          \
-} /* POP_FAILURE_POINT */
+static reg_errcode_t
+compile_range (p_ptr, pend, translate, syntax, b)
+    const char **p_ptr, *pend;
+    char *translate;
+    reg_syntax_t syntax;
+    unsigned char *b;
+{
+  unsigned this_char;
+
+  const char *p = *p_ptr;
+  int range_start, range_end;
+  
+  if (p == pend)
+    return REG_ERANGE;
+
+  /* Even though the pattern is a signed `char *', we need to fetch
+     with unsigned char *'s; if the high bit of the pattern character
+     is set, the range endpoints will be negative if we fetch using a
+     signed char *.
+
+     We also want to fetch the endpoints without translating them; the 
+     appropriate translation is done in the bit-setting loop below.  */
+  range_start = ((unsigned char *) p)[-2];
+  range_end   = ((unsigned char *) p)[0];
+
+  /* Have to increment the pointer into the pattern string, so the
+     caller isn't still at the ending character.  */
+  (*p_ptr)++;
+
+  /* If the start is after the end, the range is empty.  */
+  if (range_start > range_end)
+    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+  /* Here we see why `this_char' has to be larger than an `unsigned
+     char' -- the range is inclusive, so if `range_end' == 0xff
+     (assuming 8-bit characters), we would otherwise go into an infinite
+     loop, since all characters <= 0xff.  */
+  for (this_char = range_start; this_char <= range_end; this_char++)
+    {
+      SET_LIST_BIT (TRANSLATE (this_char));
+    }
+  
+  return REG_NOERROR;
+}
 \f
 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
@@ -2522,7 +2723,9 @@ re_compile_fastmap (bufp)
      struct re_pattern_buffer *bufp;
 {
   int j, k;
+#ifdef MATCH_MAY_ALLOCATE
   fail_stack_type fail_stack;
+#endif
 #ifndef REGEX_MALLOC
   char *destination;
 #endif
@@ -2532,7 +2735,7 @@ re_compile_fastmap (bufp)
   register char *fastmap = bufp->fastmap;
   unsigned char *pattern = bufp->buffer;
   unsigned long size = bufp->used;
-  const unsigned char *p = pattern;
+  unsigned char *p = pattern;
   register unsigned char *pend = pattern + size;
 
   /* Assume that each path through the pattern can be null until
@@ -2820,7 +3023,7 @@ re_set_registers (bufp, regs, num_regs, starts, ends)
     {
       bufp->regs_allocated = REGS_UNALLOCATED;
       regs->num_regs = 0;
-      regs->start = regs->end = (regoff_t) 0;
+      regs->start = regs->end = (regoff_t *) 0;
     }
 }
 \f
@@ -2928,7 +3131,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
                  inside the loop.  */
              if (translate)
                 while (range > lim
-                       && !fastmap[(unsigned char) translate[*d++]])
+                       && !fastmap[(unsigned char)
+                                  translate[(unsigned char) *d++]])
                   range--;
              else
                 while (range > lim && !fastmap[(unsigned char) *d++])
@@ -2984,64 +3188,12 @@ static boolean alt_match_null_string_p (),
                common_op_match_null_string_p (),
                group_match_null_string_p ();
 
-/* Structure for per-register (a.k.a. per-group) information.
-   This must not be longer than one word, because we push this value
-   onto the failure stack.  Other register information, such as the
-   starting and ending positions (which are addresses), and the list of
-   inner groups (which is a bits list) are maintained in separate
-   variables.  
-   
-   We are making a (strictly speaking) nonportable assumption here: that
-   the compiler will pack our bit fields into something that fits into
-   the type of `word', i.e., is something that fits into one item on the
-   failure stack.  */
-typedef union
-{
-  fail_stack_elt_t word;
-  struct
-  {
-      /* This field is one if this group can match the empty string,
-         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
-#define MATCH_NULL_UNSET_VALUE 3
-    unsigned match_null_string_p : 2;
-    unsigned is_active : 1;
-    unsigned matched_something : 1;
-    unsigned ever_matched_something : 1;
-  } bits;
-} register_info_type;
-
-#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
-#define IS_ACTIVE(R)  ((R).bits.is_active)
-#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
-#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
-
-
-/* Call this when have matched a real character; it sets `matched' flags
-   for the subexpressions which we are currently inside.  Also records
-   that those subexprs have matched.  */
-#define SET_REGS_MATCHED()                                             \
-  do                                                                   \
-    {                                                                  \
-      unsigned r;                                                      \
-      for (r = lowest_active_reg; r <= highest_active_reg; r++)                \
-        {                                                              \
-          MATCHED_SOMETHING (reg_info[r])                              \
-            = EVER_MATCHED_SOMETHING (reg_info[r])                     \
-            = 1;                                                       \
-        }                                                              \
-    }                                                                  \
-  while (0)
-
-
 /* This converts PTR, a pointer into one of the search strings `string1'
    and `string2' into an offset from the beginning of that string.  */
-#define POINTER_TO_OFFSET(ptr)                                         \
-  (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
-
-/* Registers are set to a sentinel when they haven't yet matched.  */
-#define REG_UNSET_VALUE ((char *) -1)
-#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
-
+#define POINTER_TO_OFFSET(ptr)                 \
+  (FIRST_STRING_P (ptr)                                \
+   ? ((regoff_t) ((ptr) - string1))            \
+   : ((regoff_t) ((ptr) - string2 + size1)))
 
 /* Macros for dealing with the split strings in re_match_2.  */
 
@@ -3084,6 +3236,7 @@ typedef union
 
 
 /* Free everything we malloc.  */
+#ifdef MATCH_MAY_ALLOCATE
 #ifdef REGEX_MALLOC
 #define FREE_VAR(var) if (var) free (var); var = NULL
 #define FREE_VARIABLES()                                               \
@@ -3103,7 +3256,9 @@ typedef union
 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
 #define FREE_VARIABLES() alloca (0)
 #endif /* not REGEX_MALLOC */
-
+#else
+#define FREE_VARIABLES() /* Do nothing!  */
+#endif /* not MATCH_MAY_ALLOCATE */
 
 /* These values must meet several constraints.  They must not be valid
    register values; since we have a limit of 255 registers (because
@@ -3184,7 +3339,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
      scanning the strings.  If the latter is zero, the failure point is
      a ``dummy''; if a failure happens and the failure point is a dummy,
      it gets discarded and the next next one is tried.  */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   fail_stack_type fail_stack;
+#endif
 #ifdef DEBUG
   static unsigned failure_id = 0;
   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
@@ -3206,14 +3363,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
      matching and the regnum-th regend points to right after where we
      stopped matching the regnum-th subexpression.  (The zeroth register
      keeps track of what the whole pattern matches.)  */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   const char **regstart, **regend;
+#endif
 
   /* If a group that's operated upon by a repetition operator fails to
      match anything, then the register for its start will need to be
      restored because it will have been set to wherever in the string we
      are when we last see its open-group operator.  Similarly for a
      register's end.  */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   const char **old_regstart, **old_regend;
+#endif
 
   /* The is_active field of reg_info helps us keep track of which (possibly
      nested) subexpressions we are currently in. The matched_something
@@ -3221,14 +3382,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
      matched any of the pattern so far this time through the reg_num-th
      subexpression.  These two fields get reset each time through any
      loop their register is in.  */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
   register_info_type *reg_info; 
+#endif
 
   /* The following record the register info as found in the above
      variables when we find a match better than any we've seen before. 
      This happens as we backtrack through the failure points, which in
      turn happens only if we have not yet matched the entire string. */
   unsigned best_regs_set = false;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   const char **best_regstart, **best_regend;
+#endif
   
   /* Logically, this is `best_regend[0]'.  But we don't want to have to
      allocate space for that if we're not allocating space for anything
@@ -3241,8 +3406,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
   const char *match_end = NULL;
 
   /* Used when we pop values we don't care about.  */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   const char **reg_dummy;
   register_info_type *reg_info_dummy;
+#endif
 
 #ifdef DEBUG
   /* Counts the total number of registers pushed.  */
@@ -3253,6 +3420,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
   
   INIT_FAIL_STACK ();
   
+#ifdef MATCH_MAY_ALLOCATE
   /* Do not bother to initialize all the register variables if there are
      no groups in the pattern, as it takes a fair amount of time.  If
      there are groups, we include space for register 0 (the whole
@@ -3277,7 +3445,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
           return -2;
         }
     }
-#ifdef REGEX_MALLOC
+#if defined (REGEX_MALLOC)
   else
     {
       /* We must initialize all our variables to NULL, so that
@@ -3287,6 +3455,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
       reg_info = reg_info_dummy = (register_info_type *) NULL;
     }
 #endif /* REGEX_MALLOC */
+#endif /* MATCH_MAY_ALLOCATE */
 
   /* The starting position is bogus.  */
   if (pos < 0 || pos > size1 + size2)
@@ -3451,7 +3620,11 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
                     }
                 }
               else
-                assert (bufp->regs_allocated == REGS_FIXED);
+               {
+                 /* These braces fend off a "empty body in an else-statement"
+                    warning under GCC when assert expands to nothing.  */
+                 assert (bufp->regs_allocated == REGS_FIXED);
+               }
 
               /* Convert the pointer data in `regstart' and `regend' to
                  indices.  Register zero has to be set differently,
@@ -3459,8 +3632,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
               if (regs->num_regs > 0)
                 {
                   regs->start[0] = pos;
-                  regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
-                                 : d - string2 + size1);
+                  regs->end[0] = (MATCHING_IN_FIRST_STRING
+                                 ? ((regoff_t) (d - string1))
+                                 : ((regoff_t) (d - string2 + size1)));
                 }
               
               /* Go through the first `min (num_regs, regs->num_regs)'
@@ -3471,8 +3645,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
                     regs->start[mcnt] = regs->end[mcnt] = -1;
                   else
                     {
-                     regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
-                      regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
+                     regs->start[mcnt]
+                       = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
+                      regs->end[mcnt]
+                       = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
                     }
                }
               
@@ -3981,11 +4157,27 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
                detect that here, the alternative has put on a dummy
                failure point which is what we will end up popping.  */
 
-           /* Skip over open/close-group commands.  */
-           while (p2 + 2 < pend
-                  && ((re_opcode_t) *p2 == stop_memory
-                      || (re_opcode_t) *p2 == start_memory))
-             p2 += 3;                  /* Skip over args, too.  */
+           /* Skip over open/close-group commands.
+              If what follows this loop is a ...+ construct,
+              look at what begins its body, since we will have to
+              match at least one of that.  */
+           while (1)
+             {
+               if (p2 + 2 < pend
+                   && ((re_opcode_t) *p2 == stop_memory
+                       || (re_opcode_t) *p2 == start_memory))
+                 p2 += 3;
+               else if (p2 + 6 < pend
+                        && (re_opcode_t) *p2 == dummy_failure_jump)
+                 p2 += 6;
+               else
+                 break;
+             }
+
+           p1 = p + mcnt;
+           /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+              to the `maybe_finalize_jump' of this case.  Examine what 
+              follows.  */
 
             /* If we're at the end of the pattern, we can change.  */
             if (p2 == pend)
@@ -4003,11 +4195,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
              {
                register unsigned char c
                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
-               p1 = p + mcnt;
 
-                /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
-                   to the `maybe_finalize_jump' of this case.  Examine what 
-                   follows.  */
                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
                   {
                    p[-3] = (unsigned char) pop_failure_jump;
@@ -4033,6 +4221,54 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
                       }
                  }
              }
+            else if ((re_opcode_t) *p2 == charset)
+             {
+               register unsigned char c
+                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
+
+                if ((re_opcode_t) p1[3] == exactn
+                   && ! (p2[1] * BYTEWIDTH > p1[4]
+                         && (p2[1 + p1[4] / BYTEWIDTH]
+                             & (1 << (p1[4] % BYTEWIDTH)))))
+                  {
+                   p[-3] = (unsigned char) pop_failure_jump;
+                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
+                                  c, p1[5]);
+                  }
+                  
+               else if ((re_opcode_t) p1[3] == charset_not)
+                 {
+                   int idx;
+                   /* We win if the charset_not inside the loop
+                      lists every character listed in the charset after.  */
+                   for (idx = 0; idx < p2[1]; idx++)
+                     if (! (p2[2 + idx] == 0
+                            || (idx < p1[4]
+                                && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+                       break;
+
+                   if (idx == p2[1])
+                      {
+                       p[-3] = (unsigned char) pop_failure_jump;
+                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
+                      }
+                 }
+               else if ((re_opcode_t) p1[3] == charset)
+                 {
+                   int idx;
+                   /* We win if the charset inside the loop
+                      has no overlap with the one after the loop.  */
+                   for (idx = 0; idx < p2[1] && idx < p1[4]; idx++)
+                     if ((p2[2 + idx] & p1[5 + idx]) != 0)
+                       break;
+
+                   if (idx == p2[1] || idx == p1[4])
+                      {
+                       p[-3] = (unsigned char) pop_failure_jump;
+                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
+                      }
+                 }
+             }
          }
          p -= 2;               /* Point at relative address again.  */
          if ((re_opcode_t) p[-1] != pop_failure_jump)
@@ -4734,6 +4970,7 @@ regcomp (preg, pattern, cflags)
   /* regex_compile will allocate the space for the compiled pattern.  */
   preg->buffer = 0;
   preg->allocated = 0;
+  preg->used = 0;
   
   /* Don't bother to use a fastmap when searching.  This simplifies the
      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
@@ -4867,9 +5104,25 @@ regerror (errcode, preg, errbuf, errbuf_size)
     char *errbuf;
     size_t errbuf_size;
 {
-  const char *msg
-    = re_error_msg[errcode] == NULL ? "Success" : re_error_msg[errcode];
-  size_t msg_size = strlen (msg) + 1; /* Includes the null.  */
+  const char *msg;
+  size_t msg_size;
+
+  if (errcode < 0
+      || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
+    /* Only error codes returned by the rest of the code should be passed 
+       to this routine.  If we are given anything else, or if other regex
+       code generates an invalid error code, then the program has a bug.
+       Dump core so we can fix it.  */
+    abort ();
+
+  msg = re_error_msg[errcode];
+
+  /* POSIX doesn't require that we do anything in this case, but why
+     not be nice.  */
+  if (! msg)
+    msg = "Success";
+
+  msg_size = strlen (msg) + 1; /* Includes the null.  */
   
   if (errbuf_size != 0)
     {