X-Git-Url: https://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=a57ae00d14fbc967c0ac56259aedd46960fa8026;hb=17a80d8d6ed5943d0785c9534f8fa922d0577aac;hp=71c9dfe4507cb88a08bebc758c221d0ac9b80371;hpb=7566b76a02e12fe4b44bb3762d1dd653ba960975;p=gnulib.git

diff --git a/regex.c b/regex.c
index 71c9dfe45..a57ae00d1 100644
--- a/regex.c
+++ b/regex.c
@@ -22,20 +22,17 @@
 /* TODO:
    - structure the opcode space into opcode+flag.
    - merge with glibc's regex.[ch].
-   - replace succeed_n + jump_n with a combined operation so that the counter
-     can simply be decremented when popping the failure_point without having
-     to stack up failure_count entries.
-   - get rid of `newline_anchor'.
- */
+   - replace (succeed_n + jump_n + set_number_at) with something that doesn't
+     need to modify the compiled regexp so that re_match can be reentrant.
+   - get rid of on_failure_jump_smart by doing the optimization in re_comp
+     rather than at run-time, so that re_match can be reentrant.
+*/
 
 /* AIX requires this to be the first thing in the file. */
 #if defined _AIX && !defined REGEX_MALLOC
   #pragma alloca
 #endif
 
-#undef	_GNU_SOURCE
-#define _GNU_SOURCE
-
 #ifdef HAVE_CONFIG_H
 # include <config.h>
 #endif
@@ -47,6 +44,60 @@
 # include <sys/types.h>
 #endif
 
+/* Whether to use ISO C Amendment 1 wide char functions.
+   Those should not be used for Emacs since it uses its own.  */
+#if defined _LIBC
+#define WIDE_CHAR_SUPPORT 1
+#else
+#define WIDE_CHAR_SUPPORT \
+	(HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
+#endif
+
+/* For platform which support the ISO C amendement 1 functionality we
+   support user defined character classes.  */
+#if WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
+# include <wchar.h>
+# include <wctype.h>
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean.  */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+	__regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+	__re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+	__re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+	__re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+	__re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+/* Make sure we call libc's function even if the user overrides them.  */
+# define btowc __btowc
+# define iswctype __iswctype
+# define wctype __wctype
+
+# define WEAK_ALIAS(a,b) weak_alias (a, b)
+
+/* We are also using some library internals.  */
+# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
+# include <langinfo.h>
+#else
+# define WEAK_ALIAS(a,b)
+#endif
+
 /* This is for other GNU distributions with internationalized messages.  */
 #if HAVE_LIBINTL_H || defined _LIBC
 # include <libintl.h>
@@ -74,8 +125,17 @@
 # include "charset.h"
 # include "category.h"
 
+# ifdef malloc
+#  undef malloc
+# endif
 # define malloc xmalloc
+# ifdef realloc
+#  undef realloc
+# endif
 # define realloc xrealloc
+# ifdef free
+#  undef free
+# endif
 # define free xfree
 
 /* Converts the pointer to the char to BEG-based offset from the start.	 */
@@ -97,8 +157,9 @@
        {						    		\
 	 re_char *dtemp = (p) == (str2) ? (end1) : (p);		    	\
 	 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
-	 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp));		\
-	 c = STRING_CHAR (dtemp, (p) - dtemp);				\
+	 re_char *d0 = dtemp;						\
+	 PREV_CHAR_BOUNDARY (d0, dlimit);				\
+	 c = STRING_CHAR (d0, dtemp - d0);				\
        }						    		\
      else						    		\
        (c = ((p) == (str2) ? (end1) : (p))[-1]);			\
@@ -175,6 +236,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 };
 # define SINGLE_BYTE_CHAR_P(c) (1)
 # define SAME_CHARSET_P(c1, c2) (1)
 # define MULTIBYTE_FORM_LENGTH(p, s) (1)
+# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
 # define STRING_CHAR(p, s) (*(p))
 # define RE_STRING_CHAR STRING_CHAR
 # define CHAR_STRING(c, s) (*(s) = (c), 1)
@@ -222,7 +284,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1 };
 		    ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237)	\
 		    : 1)
 
-# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)		\
+# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)				\
 		    ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)	\
 		    : 1)
 
@@ -504,7 +566,7 @@ typedef enum
 	   is followed by a range table:
 	       2 bytes of flags for character sets (low 8 bits, high 8 bits)
 		   See RANGE_TABLE_WORK_BITS below.
-	       2 bytes, the number of pairs that follow
+	       2 bytes, the number of pairs that follow (upto 32767)
 	       pairs, each 2 multibyte characters,
 		   each multibyte character represented as 3 bytes.  */
   charset,
@@ -651,7 +713,7 @@ static void extract_number _RE_ARGS ((int *dest, re_char *source));
 static void
 extract_number (dest, source)
     int *dest;
-    unsigned char *source;
+    re_char *source;
 {
   int temp = SIGN_EXTEND_CHAR (*(source + 1));
   *dest = *source & 0377;
@@ -680,7 +742,7 @@ static void extract_number_and_incr _RE_ARGS ((int *destination,
 static void
 extract_number_and_incr (destination, source)
     int *destination;
-    unsigned char **source;
+    re_char **source;
 {
   extract_number (destination, *source);
   *source += 2;
@@ -754,9 +816,9 @@ extract_number_and_incr (destination, source)
 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
   do									\
     {									\
-      int range_start, range_end;					\
-      unsigned char *p;							\
-      unsigned char *range_table_end					\
+      re_wchar_t range_start, range_end;				\
+      re_char *p;							\
+      re_char *range_table_end						\
 	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
 									\
       for (p = (range_table); p < range_table_end; p += 2 * 3)		\
@@ -780,8 +842,8 @@ extract_number_and_incr (destination, source)
     {									\
       /* Number of ranges in range table. */				\
       int count;							\
-      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);	\
-									\
+      re_char *range_table = CHARSET_RANGE_TABLE (charset);		\
+      									\
       EXTRACT_NUMBER_AND_INCR (count, range_table);			\
       CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
     }									\
@@ -850,59 +912,58 @@ print_fastmap (fastmap)
 
 void
 print_partial_compiled_pattern (start, end)
-    unsigned char *start;
-    unsigned char *end;
+    re_char *start;
+    re_char *end;
 {
   int mcnt, mcnt2;
-  unsigned char *p = start;
-  unsigned char *pend = end;
+  re_char *p = start;
+  re_char *pend = end;
 
   if (start == NULL)
     {
-      printf ("(null)\n");
+      fprintf (stderr, "(null)\n");
       return;
     }
 
   /* Loop over pattern commands.  */
   while (p < pend)
     {
-      printf ("%d:\t", p - start);
+      fprintf (stderr, "%d:\t", p - start);
 
       switch ((re_opcode_t) *p++)
 	{
 	case no_op:
-	  printf ("/no_op");
+	  fprintf (stderr, "/no_op");
 	  break;
 
 	case succeed:
-	  printf ("/succeed");
+	  fprintf (stderr, "/succeed");
 	  break;
 
 	case exactn:
 	  mcnt = *p++;
-	  printf ("/exactn/%d", mcnt);
+	  fprintf (stderr, "/exactn/%d", mcnt);
 	  do
 	    {
-	      putchar ('/');
-	      putchar (*p++);
+	      fprintf (stderr, "/%c", *p++);
 	    }
 	  while (--mcnt);
 	  break;
 
 	case start_memory:
-	  printf ("/start_memory/%d", *p++);
+	  fprintf (stderr, "/start_memory/%d", *p++);
 	  break;
 
 	case stop_memory:
-	  printf ("/stop_memory/%d", *p++);
+	  fprintf (stderr, "/stop_memory/%d", *p++);
 	  break;
 
 	case duplicate:
-	  printf ("/duplicate/%d", *p++);
+	  fprintf (stderr, "/duplicate/%d", *p++);
 	  break;
 
 	case anychar:
-	  printf ("/anychar");
+	  fprintf (stderr, "/anychar");
 	  break;
 
 	case charset:
@@ -913,7 +974,7 @@ print_partial_compiled_pattern (start, end)
 	    int length = CHARSET_BITMAP_SIZE (p - 1);
 	    int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
 
-	    printf ("/charset [%s",
+	    fprintf (stderr, "/charset [%s",
 		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
 
 	    assert (p + *p < pend);
@@ -925,33 +986,33 @@ print_partial_compiled_pattern (start, end)
 		  /* Are we starting a range?  */
 		  if (last + 1 == c && ! in_range)
 		    {
-		      putchar ('-');
+		      fprintf (stderr, "-");
 		      in_range = 1;
 		    }
 		  /* Have we broken a range?  */
 		  else if (last + 1 != c && in_range)
 		    {
-		      putchar (last);
+		      fprintf (stderr, "%c", last);
 		      in_range = 0;
 		    }
 
 		  if (! in_range)
-		    putchar (c);
+		    fprintf (stderr, "%c", c);
 
 		  last = c;
 	      }
 
 	    if (in_range)
-	      putchar (last);
+	      fprintf (stderr, "%c", last);
 
-	    putchar (']');
+	    fprintf (stderr, "]");
 
 	    p += 1 + length;
 
 	    if (has_range_table)
 	      {
 		int count;
-		printf ("has-range-table");
+		fprintf (stderr, "has-range-table");
 
 		/* ??? Should print the range table; for now, just skip it.  */
 		p += 2;		/* skip range table bits */
@@ -962,130 +1023,130 @@ print_partial_compiled_pattern (start, end)
 	  break;
 
 	case begline:
-	  printf ("/begline");
+	  fprintf (stderr, "/begline");
 	  break;
 
 	case endline:
-	  printf ("/endline");
+	  fprintf (stderr, "/endline");
 	  break;
 
 	case on_failure_jump:
 	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/on_failure_jump to %d", p + mcnt - start);
+	  fprintf (stderr, "/on_failure_jump to %d", p + mcnt - start);
 	  break;
 
 	case on_failure_keep_string_jump:
 	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
+	  fprintf (stderr, "/on_failure_keep_string_jump to %d", p + mcnt - start);
 	  break;
 
 	case on_failure_jump_nastyloop:
 	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/on_failure_jump_nastyloop to %d", p + mcnt - start);
+	  fprintf (stderr, "/on_failure_jump_nastyloop to %d", p + mcnt - start);
 	  break;
 
 	case on_failure_jump_loop:
 	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/on_failure_jump_loop to %d", p + mcnt - start);
+	  fprintf (stderr, "/on_failure_jump_loop to %d", p + mcnt - start);
 	  break;
 
 	case on_failure_jump_smart:
 	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/on_failure_jump_smart to %d", p + mcnt - start);
+	  fprintf (stderr, "/on_failure_jump_smart to %d", p + mcnt - start);
 	  break;
 
 	case jump:
 	  extract_number_and_incr (&mcnt, &p);
-	  printf ("/jump to %d", p + mcnt - start);
+	  fprintf (stderr, "/jump to %d", p + mcnt - start);
 	  break;
 
 	case succeed_n:
 	  extract_number_and_incr (&mcnt, &p);
 	  extract_number_and_incr (&mcnt2, &p);
-	  printf ("/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
+	  fprintf (stderr, "/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
 	  break;
 
 	case jump_n:
 	  extract_number_and_incr (&mcnt, &p);
 	  extract_number_and_incr (&mcnt2, &p);
-	  printf ("/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
+	  fprintf (stderr, "/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
 	  break;
 
 	case set_number_at:
 	  extract_number_and_incr (&mcnt, &p);
 	  extract_number_and_incr (&mcnt2, &p);
-	  printf ("/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
+	  fprintf (stderr, "/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
 	  break;
 
 	case wordbound:
-	  printf ("/wordbound");
+	  fprintf (stderr, "/wordbound");
 	  break;
 
 	case notwordbound:
-	  printf ("/notwordbound");
+	  fprintf (stderr, "/notwordbound");
 	  break;
 
 	case wordbeg:
-	  printf ("/wordbeg");
+	  fprintf (stderr, "/wordbeg");
 	  break;
 
 	case wordend:
-	  printf ("/wordend");
+	  fprintf (stderr, "/wordend");
 
 	case syntaxspec:
-	  printf ("/syntaxspec");
+	  fprintf (stderr, "/syntaxspec");
 	  mcnt = *p++;
-	  printf ("/%d", mcnt);
+	  fprintf (stderr, "/%d", mcnt);
 	  break;
 
 	case notsyntaxspec:
-	  printf ("/notsyntaxspec");
+	  fprintf (stderr, "/notsyntaxspec");
 	  mcnt = *p++;
-	  printf ("/%d", mcnt);
+	  fprintf (stderr, "/%d", mcnt);
 	  break;
 
 # ifdef emacs
 	case before_dot:
-	  printf ("/before_dot");
+	  fprintf (stderr, "/before_dot");
 	  break;
 
 	case at_dot:
-	  printf ("/at_dot");
+	  fprintf (stderr, "/at_dot");
 	  break;
 
 	case after_dot:
-	  printf ("/after_dot");
+	  fprintf (stderr, "/after_dot");
 	  break;
 
 	case categoryspec:
-	  printf ("/categoryspec");
+	  fprintf (stderr, "/categoryspec");
 	  mcnt = *p++;
-	  printf ("/%d", mcnt);
+	  fprintf (stderr, "/%d", mcnt);
 	  break;
 
 	case notcategoryspec:
-	  printf ("/notcategoryspec");
+	  fprintf (stderr, "/notcategoryspec");
 	  mcnt = *p++;
-	  printf ("/%d", mcnt);
+	  fprintf (stderr, "/%d", mcnt);
 	  break;
 # endif /* emacs */
 
 	case begbuf:
-	  printf ("/begbuf");
+	  fprintf (stderr, "/begbuf");
 	  break;
 
 	case endbuf:
-	  printf ("/endbuf");
+	  fprintf (stderr, "/endbuf");
 	  break;
 
 	default:
-	  printf ("?%d", *(p-1));
+	  fprintf (stderr, "?%d", *(p-1));
 	}
 
-      putchar ('\n');
+      fprintf (stderr, "\n");
     }
 
-  printf ("%d:\tend of pattern.\n", p - start);
+  fprintf (stderr, "%d:\tend of pattern.\n", p - start);
 }
 
 
@@ -1093,7 +1154,7 @@ void
 print_compiled_pattern (bufp)
     struct re_pattern_buffer *bufp;
 {
-  unsigned char *buffer = bufp->buffer;
+  re_char *buffer = bufp->buffer;
 
   print_partial_compiled_pattern (buffer, buffer + bufp->used);
   printf ("%ld bytes used/%ld bytes allocated.\n",
@@ -1108,7 +1169,6 @@ print_compiled_pattern (bufp)
   printf ("re_nsub: %d\t", bufp->re_nsub);
   printf ("regs_alloc: %d\t", bufp->regs_allocated);
   printf ("can_be_null: %d\t", bufp->can_be_null);
-  printf ("newline_anchor: %d\n", bufp->newline_anchor);
   printf ("no_sub: %d\t", bufp->no_sub);
   printf ("not_bol: %d\t", bufp->not_bol);
   printf ("not_eol: %d\t", bufp->not_eol);
@@ -1184,6 +1244,7 @@ re_set_syntax (syntax)
   re_syntax_options = syntax;
   return ret;
 }
+WEAK_ALIAS (__re_set_syntax, re_set_syntax)
 
 /* This table gives an error message for each of the error codes listed
    in regex.h.  Obviously the order here has to be same as there.
@@ -1264,21 +1325,23 @@ static const char *re_error_msgid[] =
 /* Roughly the maximum number of failure points on the stack.  Would be
    exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
    This is a variable only so users of regex can assign to it; we never
-   change it ourselves.	 */
-#if defined MATCH_MAY_ALLOCATE
-/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
+   change it ourselves.  We always multiply it by TYPICAL_FAILURE_SIZE
+   before using it, so it should probably be a byte-count instead.  */
+# if defined MATCH_MAY_ALLOCATE
+/* Note that 4400 was enough to cause a crash on Alpha OSF/1,
    whose default stack limit is 2mb.  In order for a larger
    value to work reliably, you have to try to make it accord
    with the process stack limit.  */
-int re_max_failures = 40000;
-#else
-int re_max_failures = 4000;
-#endif
+size_t re_max_failures = 40000;
+# else
+size_t re_max_failures = 4000;
+# endif
 
 union fail_stack_elt
 {
-   const unsigned char *pointer;
-  unsigned int integer;
+  re_char *pointer;
+  /* This should be the biggest `int' that's no bigger than a pointer.  */
+  long integer;
 };
 
 typedef union fail_stack_elt fail_stack_elt_t;
@@ -1286,12 +1349,11 @@ typedef union fail_stack_elt fail_stack_elt_t;
 typedef struct
 {
   fail_stack_elt_t *stack;
-  unsigned size;
-  unsigned avail;		/* Offset of next open position.  */
-  unsigned frame;		/* Offset of the cur constructed frame.  */
+  size_t size;
+  size_t avail;	/* Offset of next open position.  */
+  size_t frame;	/* Offset of the cur constructed frame.  */
 } fail_stack_type;
 
-#define PATTERN_STACK_EMPTY()     (fail_stack.avail == 0)
 #define FAIL_STACK_EMPTY()     (fail_stack.frame == 0)
 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
 
@@ -1363,22 +1425,11 @@ typedef struct
 	 1)))
 
 
-/* Push pointer POINTER on FAIL_STACK.
-   Return 1 if was able to do so and 0 if ran out of memory allocating
-   space to do so.  */
-#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
-  ((FAIL_STACK_FULL ()							\
-    && !GROW_FAIL_STACK (FAIL_STACK))					\
-   ? 0									\
-   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
-      1))
-#define POP_PATTERN_OP() POP_FAILURE_POINTER ()
-
 /* Push a pointer value onto the failure stack.
    Assumes the variable `fail_stack'.  Probably should only
    be called from within `PUSH_FAILURE_POINT'.  */
 #define PUSH_FAILURE_POINTER(item)					\
-  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
+  fail_stack.stack[fail_stack.avail++].pointer = (item)
 
 /* This pushes an integer-valued item onto the failure stack.
    Assumes the variable `fail_stack'.  Probably should only
@@ -1428,16 +1479,19 @@ do {									\
   PUSH_FAILURE_INT (num);						\
 } while (0)
 
-#define PUSH_FAILURE_COUNT(ptr)						\
+/* Change the counter's value to VAL, but make sure that it will
+   be reset when backtracking.  */
+#define PUSH_NUMBER(ptr,val)						\
 do {									\
   char *destination;							\
   int c;								\
   ENSURE_FAIL_STACK(3);							\
   EXTRACT_NUMBER (c, ptr);						\
-  DEBUG_PRINT3 ("    Push counter %p = %d\n", ptr, c);			\
+  DEBUG_PRINT4 ("    Push number %p = %d -> %d\n", ptr, c, val);	\
   PUSH_FAILURE_INT (c);							\
   PUSH_FAILURE_POINTER (ptr);						\
   PUSH_FAILURE_INT (-1);						\
+  STORE_NUMBER (ptr, val);						\
 } while (0)
 
 /* Pop a saved register off the stack.  */
@@ -1447,6 +1501,7 @@ do {									\
   if (reg == -1)							\
     {									\
       /* It's a counter.  */						\
+      /* Here, we discard `const', making re_match non-reentrant.  */	\
       unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER ();	\
       reg = POP_FAILURE_INT ();						\
       STORE_NUMBER (ptr, reg);						\
@@ -1464,22 +1519,25 @@ do {									\
 /* Check that we are not stuck in an infinite loop.  */
 #define CHECK_INFINITE_LOOP(pat_cur, string_place)			\
 do {									\
-  int failure = TOP_FAILURE_HANDLE();					\
+  int failure = TOP_FAILURE_HANDLE ();					\
   /* Check for infinite matching loops */				\
-  while (failure > 0 &&							\
-	 (FAILURE_STR (failure) == string_place				\
-	  || FAILURE_STR (failure) == NULL))				\
+  while (failure > 0							\
+	 && (FAILURE_STR (failure) == string_place			\
+	     || FAILURE_STR (failure) == NULL))				\
     {									\
       assert (FAILURE_PAT (failure) >= bufp->buffer			\
 	      && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);	\
       if (FAILURE_PAT (failure) == pat_cur)				\
-	goto fail;							\
+	{								\
+	  cycle = 1;							\
+	  break;							\
+	}								\
       DEBUG_PRINT2 ("  Other pattern: %p\n", FAILURE_PAT (failure));	\
       failure = NEXT_FAILURE_HANDLE(failure);				\
     }									\
   DEBUG_PRINT2 ("  Other string: %p\n", FAILURE_STR (failure));		\
 } while (0)
-    
+
 /* Push the information about the state we will need
    if we ever fail back to it.
 
@@ -1523,7 +1581,7 @@ do {									\
 /* Estimate the size of data pushed by a typical failure stack entry.
    An estimate is all we need, because all we use this for
    is to choose a limit for how big to make the failure stack.  */
-
+/* BEWARE, the value `20' is hard-coded in emacs.c:main().  */
 #define TYPICAL_FAILURE_SIZE 20
 
 /* How many items can still be added to the stack without overflowing it.  */
@@ -1553,14 +1611,14 @@ do {									\
   while (fail_stack.frame < fail_stack.avail)				\
     POP_FAILURE_REG_OR_COUNT ();					\
 									\
-  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
+  pat = POP_FAILURE_POINTER ();				\
   DEBUG_PRINT2 ("  Popping pattern %p: ", pat);				\
   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
 									\
   /* If the saved string location is NULL, it came from an		\
      on_failure_keep_string_jump opcode, and we want to throw away the	\
      saved NULL, thus retaining our current position in the string.  */	\
-  str = (re_char *) POP_FAILURE_POINTER ();				\
+  str = POP_FAILURE_POINTER ();						\
   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
   DEBUG_PRINT1 ("'\n");							\
@@ -1591,29 +1649,19 @@ static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
 				  int arg, unsigned char *end));
 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
 				  int arg1, int arg2, unsigned char *end));
-static boolean at_begline_loc_p _RE_ARGS ((const unsigned char *pattern,
-					   const unsigned char *p,
+static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
+					   re_char *p,
 					   reg_syntax_t syntax));
-static boolean at_endline_loc_p _RE_ARGS ((const unsigned char *p,
-					   const unsigned char *pend,
+static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
+					   re_char *pend,
 					   reg_syntax_t syntax));
-static unsigned char *skip_one_char _RE_ARGS ((unsigned char *p));
-static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
+static re_char *skip_one_char _RE_ARGS ((re_char *p));
+static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
 				    char *fastmap, const int multibyte));
 
-/* Fetch the next character in the uncompiled pattern---translating it
-   if necessary.  Also cast from a signed character in the constant
-   string passed to us by the user to an unsigned char that we can use
-   as an array index (in, e.g., `translate').  */
-#define PATFETCH(c)							\
-  do {									\
-    PATFETCH_RAW (c);							\
-    c = TRANSLATE (c);							\
-  } while (0)
-
 /* Fetch the next character in the uncompiled pattern, with no
    translation.  */
-#define PATFETCH_RAW(c)							\
+#define PATFETCH(c)							\
   do {									\
     int len;								\
     if (p == pend) return REG_EEND;					\
@@ -1639,7 +1687,7 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
 
 /* Make sure we have at least N more bytes of space in buffer.  */
 #define GET_BUFFER_SPACE(n)						\
-    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
+    while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated)		\
       EXTEND_BUFFER ()
 
 /* Make sure we have one more byte of buffer space and then add C to it.  */
@@ -1728,13 +1776,13 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
 #endif
 #define EXTEND_BUFFER()							\
   do {									\
-    unsigned char *old_buffer = bufp->buffer;				\
+    re_char *old_buffer = bufp->buffer;					\
     if (bufp->allocated == MAX_BUF_SIZE)				\
       return REG_ESIZE;							\
     bufp->allocated <<= 1;						\
     if (bufp->allocated > MAX_BUF_SIZE)					\
       bufp->allocated = MAX_BUF_SIZE;					\
-    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+    RETALLOC (bufp->buffer, bufp->allocated, unsigned char);		\
     if (bufp->buffer == NULL)						\
       return REG_ESPACE;						\
     /* If the buffer moved, move all the pointers into it.  */		\
@@ -1761,7 +1809,7 @@ static int analyse_first _RE_ARGS ((unsigned char *p, unsigned char *pend,
 
 /* But patterns can have more than `MAX_REGNUM' registers.  We just
    ignore the excess.  */
-typedef unsigned regnum_t;
+typedef int regnum_t;
 
 
 /* Macros for the compile stack.  */
@@ -1796,7 +1844,17 @@ typedef struct
 /* The next available element.  */
 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
 
-
+/* Explicit quit checking is only used on NTemacs.  */
+#if defined WINDOWSNT && defined emacs && defined QUIT
+extern int immediate_quit;
+# define IMMEDIATE_QUIT_CHECK			\
+    do {					\
+      if (immediate_quit) QUIT;			\
+    } while (0)
+#else
+# define IMMEDIATE_QUIT_CHECK    ((void)0)
+#endif
+
 /* Structure to manage work area for range table.  */
 struct range_table_work_area
 {
@@ -1806,48 +1864,42 @@ struct range_table_work_area
   int bits;			/* flag to record character classes */
 };
 
-/* Make sure that WORK_AREA can hold more N multibyte characters.  */
-#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n)			  \
-  do {									  \
-    if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated)  \
-      {									  \
-	(work_area).allocated += 16 * sizeof (int);			  \
-	if ((work_area).table)						  \
-	  (work_area).table						  \
-	    = (int *) realloc ((work_area).table, (work_area).allocated); \
-	else								  \
-	  (work_area).table						  \
-	    = (int *) malloc ((work_area).allocated);			  \
-	if ((work_area).table == 0)					  \
-	  FREE_STACK_RETURN (REG_ESPACE);				  \
-      }									  \
+/* Make sure that WORK_AREA can hold more N multibyte characters.
+   This is used only in set_image_of_range and set_image_of_range_1.
+   It expects WORK_AREA to be a pointer.
+   If it can't get the space, it returns from the surrounding function.  */
+
+#define EXTEND_RANGE_TABLE(work_area, n)				\
+  do {									\
+    if (((work_area)->used + (n)) * sizeof (int) > (work_area)->allocated) \
+      {									\
+        extend_range_table_work_area (work_area);			\
+        if ((work_area)->table == 0)					\
+          return (REG_ESPACE);						\
+      }									\
   } while (0)
 
 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit)		\
   (work_area).bits |= (bit)
 
-/* These bits represent the various character classes such as [:alnum:]
-   in a charset's range table.  */
-#define BIT_ALNUM 0x1
-#define BIT_ALPHA 0x2
-#define BIT_WORD  0x4
-#define BIT_ASCII 0x8
-#define BIT_NONASCII 0x10
-#define BIT_GRAPH 0x20
-#define BIT_LOWER 0x40
-#define BIT_PRINT 0x80
-#define BIT_PUNCT 0x100
-#define BIT_SPACE 0x200
-#define BIT_UPPER 0x400
-#define BIT_UNIBYTE 0x800
-#define BIT_MULTIBYTE 0x1000
-
-/* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
-#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)	\
+/* Bits used to implement the multibyte-part of the various character classes
+   such as [:alnum:] in a charset's range table.  */
+#define BIT_WORD	0x1
+#define BIT_LOWER	0x2
+#define BIT_PUNCT	0x4
+#define BIT_SPACE	0x8
+#define BIT_UPPER	0x10
+#define BIT_MULTIBYTE	0x20
+
+/* Set a range START..END to WORK_AREA.
+   The range is passed through TRANSLATE, so START and END
+   should be untranslated.  */
+#define SET_RANGE_TABLE_WORK_AREA(work_area, start, end)		\
   do {									\
-    EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);			\
-    (work_area).table[(work_area).used++] = (range_start);		\
-    (work_area).table[(work_area).used++] = (range_end);		\
+    int tem;								\
+    tem = set_image_of_range (&work_area, start, end, translate);	\
+    if (tem > 0)							\
+      FREE_STACK_RETURN (tem);						\
   } while (0)
 
 /* Free allocated memory for WORK_AREA.	 */
@@ -1861,12 +1913,10 @@ struct range_table_work_area
 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
-
+
 
 /* Set the bit for character C in a list.  */
-#define SET_LIST_BIT(c)							\
-  (b[((unsigned char) (c)) / BYTEWIDTH]					\
-   |= 1 << (((unsigned char) c) % BYTEWIDTH))
+#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
 
 
 /* Get the next unsigned number in the uncompiled pattern.  */
@@ -1874,37 +1924,389 @@ struct range_table_work_area
  do { if (p != pend)							\
      {									\
        PATFETCH (c);							\
+       if (c == ' ')							\
+	 FREE_STACK_RETURN (REG_BADBR);					\
        while ('0' <= c && c <= '9')					\
 	 {								\
+           int prev;							\
 	   if (num < 0)							\
-	      num = 0;							\
+	     num = 0;							\
+	   prev = num;							\
 	   num = num * 10 + c - '0';					\
+	   if (num / 10 != prev)					\
+	     FREE_STACK_RETURN (REG_BADBR);				\
 	   if (p == pend)						\
-	      break;							\
+	     break;							\
 	   PATFETCH (c);						\
 	 }								\
+       if (c == ' ')							\
+	 FREE_STACK_RETURN (REG_BADBR);					\
        }								\
     } while (0)
+
+#if WIDE_CHAR_SUPPORT
+/* The GNU C library provides support for user-defined character classes
+   and the functions from ISO C amendement 1.  */
+# ifdef CHARCLASS_NAME_MAX
+#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+# else
+/* This shouldn't happen but some implementation might still have this
+   problem.  Use a reasonable default value.  */
+#  define CHAR_CLASS_MAX_LENGTH 256
+# endif
+typedef wctype_t re_wctype_t;
+typedef wchar_t re_wchar_t;
+# define re_wctype wctype
+# define re_iswctype iswctype
+# define re_wctype_to_bit(cc) 0
+#else
+# define CHAR_CLASS_MAX_LENGTH  9 /* Namely, `multibyte'.  */
+# define btowc(c) c
+
+/* Character classes.  */
+typedef enum { RECC_ERROR = 0,
+	       RECC_ALNUM, RECC_ALPHA, RECC_WORD,
+	       RECC_GRAPH, RECC_PRINT,
+	       RECC_LOWER, RECC_UPPER,
+	       RECC_PUNCT, RECC_CNTRL,
+	       RECC_DIGIT, RECC_XDIGIT,
+	       RECC_BLANK, RECC_SPACE,
+	       RECC_MULTIBYTE, RECC_NONASCII,
+	       RECC_ASCII, RECC_UNIBYTE
+} re_wctype_t;
+
+typedef int re_wchar_t;
+
+/* Map a string to the char class it names (if any).  */
+static re_wctype_t
+re_wctype (str)
+     re_char *str;
+{
+  const char *string = str;
+  if      (STREQ (string, "alnum"))	return RECC_ALNUM;
+  else if (STREQ (string, "alpha"))	return RECC_ALPHA;
+  else if (STREQ (string, "word"))	return RECC_WORD;
+  else if (STREQ (string, "ascii"))	return RECC_ASCII;
+  else if (STREQ (string, "nonascii"))	return RECC_NONASCII;
+  else if (STREQ (string, "graph"))	return RECC_GRAPH;
+  else if (STREQ (string, "lower"))	return RECC_LOWER;
+  else if (STREQ (string, "print"))	return RECC_PRINT;
+  else if (STREQ (string, "punct"))	return RECC_PUNCT;
+  else if (STREQ (string, "space"))	return RECC_SPACE;
+  else if (STREQ (string, "upper"))	return RECC_UPPER;
+  else if (STREQ (string, "unibyte"))	return RECC_UNIBYTE;
+  else if (STREQ (string, "multibyte"))	return RECC_MULTIBYTE;
+  else if (STREQ (string, "digit"))	return RECC_DIGIT;
+  else if (STREQ (string, "xdigit"))	return RECC_XDIGIT;
+  else if (STREQ (string, "cntrl"))	return RECC_CNTRL;
+  else if (STREQ (string, "blank"))	return RECC_BLANK;
+  else return 0;
+}
+
+/* True iff CH is in the char class CC.  */
+static boolean
+re_iswctype (ch, cc)
+     int ch;
+     re_wctype_t cc;
+{
+  switch (cc)
+    {
+    case RECC_ALNUM: return ISALNUM (ch);
+    case RECC_ALPHA: return ISALPHA (ch);
+    case RECC_BLANK: return ISBLANK (ch);
+    case RECC_CNTRL: return ISCNTRL (ch);
+    case RECC_DIGIT: return ISDIGIT (ch);
+    case RECC_GRAPH: return ISGRAPH (ch);
+    case RECC_LOWER: return ISLOWER (ch);
+    case RECC_PRINT: return ISPRINT (ch);
+    case RECC_PUNCT: return ISPUNCT (ch);
+    case RECC_SPACE: return ISSPACE (ch);
+    case RECC_UPPER: return ISUPPER (ch);
+    case RECC_XDIGIT: return ISXDIGIT (ch);
+    case RECC_ASCII: return IS_REAL_ASCII (ch);
+    case RECC_NONASCII: return !IS_REAL_ASCII (ch);
+    case RECC_UNIBYTE: return ISUNIBYTE (ch);
+    case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
+    case RECC_WORD: return ISWORD (ch);
+    case RECC_ERROR: return false;
+    default:
+      abort();
+    }
+}
 
-#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
-
-#define IS_CHAR_CLASS(string)						\
-   (STREQ (string, "alpha") || STREQ (string, "upper")			\
-    || STREQ (string, "lower") || STREQ (string, "digit")		\
-    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
-    || STREQ (string, "space") || STREQ (string, "print")		\
-    || STREQ (string, "punct") || STREQ (string, "graph")		\
-    || STREQ (string, "cntrl") || STREQ (string, "blank")		\
-    || STREQ (string, "word")						\
-    || STREQ (string, "ascii") || STREQ (string, "nonascii")		\
-    || STREQ (string, "unibyte") || STREQ (string, "multibyte"))
-
-/* QUIT is only used on NTemacs.  */
-#if !defined WINDOWSNT || !defined emacs || !defined QUIT
-# undef QUIT
-# define QUIT
+/* Return a bit-pattern to use in the range-table bits to match multibyte
+   chars of class CC.  */
+static int
+re_wctype_to_bit (cc)
+     re_wctype_t cc;
+{
+  switch (cc)
+    {
+    case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
+    case RECC_MULTIBYTE: return BIT_MULTIBYTE;
+    case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
+    case RECC_LOWER: return BIT_LOWER;
+    case RECC_UPPER: return BIT_UPPER;
+    case RECC_PUNCT: return BIT_PUNCT;
+    case RECC_SPACE: return BIT_SPACE;
+    case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
+    case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
+    default:
+      abort();
+    }
+}
 #endif
 
+/* Filling in the work area of a range.  */
+
+/* Actually extend the space in WORK_AREA.  */
+
+static void
+extend_range_table_work_area (work_area)
+     struct range_table_work_area *work_area;
+{
+  work_area->allocated += 16 * sizeof (int);
+  if (work_area->table)
+    work_area->table
+      = (int *) realloc (work_area->table, work_area->allocated);
+  else
+    work_area->table
+      = (int *) malloc (work_area->allocated);
+}
+
+#ifdef emacs
+
+/* Carefully find the ranges of codes that are equivalent
+   under case conversion to the range start..end when passed through
+   TRANSLATE.  Handle the case where non-letters can come in between
+   two upper-case letters (which happens in Latin-1).
+   Also handle the case of groups of more than 2 case-equivalent chars.
+
+   The basic method is to look at consecutive characters and see
+   if they can form a run that can be handled as one.
+
+   Returns -1 if successful, REG_ESPACE if ran out of space.  */
+
+static int
+set_image_of_range_1 (work_area, start, end, translate)
+     RE_TRANSLATE_TYPE translate;
+     struct range_table_work_area *work_area;
+     re_wchar_t start, end;
+{
+  /* `one_case' indicates a character, or a run of characters,
+     each of which is an isolate (no case-equivalents).
+     This includes all ASCII non-letters.
+
+     `two_case' indicates a character, or a run of characters,
+     each of which has two case-equivalent forms.
+     This includes all ASCII letters.
+
+     `strange' indicates a character that has more than one
+     case-equivalent.  */
+
+  enum case_type {one_case, two_case, strange};
+
+  /* Describe the run that is in progress,
+     which the next character can try to extend.
+     If run_type is strange, that means there really is no run.
+     If run_type is one_case, then run_start...run_end is the run.
+     If run_type is two_case, then the run is run_start...run_end,
+     and the case-equivalents end at run_eqv_end.  */
+
+  enum case_type run_type = strange;
+  int run_start, run_end, run_eqv_end;
+
+  Lisp_Object eqv_table;
+
+  if (!RE_TRANSLATE_P (translate))
+    {
+      EXTEND_RANGE_TABLE (work_area, 2);
+      work_area->table[work_area->used++] = (start);
+      work_area->table[work_area->used++] = (end);
+      return -1;
+    }
+
+  eqv_table = XCHAR_TABLE (translate)->extras[2];
+
+  for (; start <= end; start++)
+    {
+      enum case_type this_type;
+      int eqv = RE_TRANSLATE (eqv_table, start);
+      int minchar, maxchar;
+
+      /* Classify this character */
+      if (eqv == start)
+	this_type = one_case;
+      else if (RE_TRANSLATE (eqv_table, eqv) == start)
+	this_type = two_case;
+      else
+	this_type = strange;
+
+      if (start < eqv)
+	minchar = start, maxchar = eqv;
+      else
+	minchar = eqv, maxchar = start;
+
+      /* Can this character extend the run in progress?  */
+      if (this_type == strange || this_type != run_type
+	  || !(minchar == run_end + 1
+	       && (run_type == two_case
+		   ? maxchar == run_eqv_end + 1 : 1)))
+	{
+	  /* No, end the run.
+	     Record each of its equivalent ranges.  */
+	  if (run_type == one_case)
+	    {
+	      EXTEND_RANGE_TABLE (work_area, 2);
+	      work_area->table[work_area->used++] = run_start;
+	      work_area->table[work_area->used++] = run_end;
+	    }
+	  else if (run_type == two_case)
+	    {
+	      EXTEND_RANGE_TABLE (work_area, 4);
+	      work_area->table[work_area->used++] = run_start;
+	      work_area->table[work_area->used++] = run_end;
+	      work_area->table[work_area->used++]
+		= RE_TRANSLATE (eqv_table, run_start);
+	      work_area->table[work_area->used++]
+		= RE_TRANSLATE (eqv_table, run_end);
+	    }
+	  run_type = strange;
+	}
+
+      if (this_type == strange)
+	{
+	  /* For a strange character, add each of its equivalents, one
+	     by one.  Don't start a range.  */
+	  do
+	    {
+	      EXTEND_RANGE_TABLE (work_area, 2);
+	      work_area->table[work_area->used++] = eqv;
+	      work_area->table[work_area->used++] = eqv;
+	      eqv = RE_TRANSLATE (eqv_table, eqv);
+	    }
+	  while (eqv != start);
+	}
+
+      /* Add this char to the run, or start a new run.  */
+      else if (run_type == strange)
+	{
+	  /* Initialize a new range.  */
+	  run_type = this_type;
+	  run_start = start;
+	  run_end = start;
+	  run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
+	}
+      else
+	{
+	  /* Extend a running range.  */
+	  run_end = minchar;
+	  run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
+	}
+    }
+
+  /* If a run is still in progress at the end, finish it now
+     by recording its equivalent ranges.  */
+  if (run_type == one_case)
+    {
+      EXTEND_RANGE_TABLE (work_area, 2);
+      work_area->table[work_area->used++] = run_start;
+      work_area->table[work_area->used++] = run_end;
+    }
+  else if (run_type == two_case)
+    {
+      EXTEND_RANGE_TABLE (work_area, 4);
+      work_area->table[work_area->used++] = run_start;
+      work_area->table[work_area->used++] = run_end;
+      work_area->table[work_area->used++]
+	= RE_TRANSLATE (eqv_table, run_start);
+      work_area->table[work_area->used++]
+	= RE_TRANSLATE (eqv_table, run_end);
+    }
+
+  return -1;
+}
+
+#endif /* emacs */
+
+/* Record the the image of the range start..end when passed through
+   TRANSLATE.  This is not necessarily TRANSLATE(start)..TRANSLATE(end)
+   and is not even necessarily contiguous.
+   Normally we approximate it with the smallest contiguous range that contains
+   all the chars we need.  However, for Latin-1 we go to extra effort
+   to do a better job.
+
+   This function is not called for ASCII ranges.
+
+   Returns -1 if successful, REG_ESPACE if ran out of space.  */
+
+static int
+set_image_of_range (work_area, start, end, translate)
+     RE_TRANSLATE_TYPE translate;
+     struct range_table_work_area *work_area;
+     re_wchar_t start, end;
+{
+  re_wchar_t cmin, cmax;
+
+#ifdef emacs
+  /* For Latin-1 ranges, use set_image_of_range_1
+     to get proper handling of ranges that include letters and nonletters.
+     For a range that includes the whole of Latin-1, this is not necessary.
+     For other character sets, we don't bother to get this right.  */
+  if (RE_TRANSLATE_P (translate) && start < 04400
+      && !(start < 04200 && end >= 04377))
+    {
+      int newend;
+      int tem;
+      newend = end;
+      if (newend > 04377)
+	newend = 04377;
+      tem = set_image_of_range_1 (work_area, start, newend, translate);
+      if (tem > 0)
+	return tem;
+
+      start = 04400;
+      if (end < 04400)
+	return -1;
+    }
+#endif
+
+  EXTEND_RANGE_TABLE (work_area, 2);
+  work_area->table[work_area->used++] = (start);
+  work_area->table[work_area->used++] = (end);
+
+  cmin = -1, cmax = -1;
+
+  if (RE_TRANSLATE_P (translate))
+    {
+      int ch;
+
+      for (ch = start; ch <= end; ch++)
+	{
+	  re_wchar_t c = TRANSLATE (ch);
+	  if (! (start <= c && c <= end))
+	    {
+	      if (cmin == -1)
+		cmin = c, cmax = c;
+	      else
+		{
+		  cmin = MIN (cmin, c);
+		  cmax = MAX (cmax, c);
+		}
+	    }
+	}
+
+      if (cmin != -1)
+	{
+	  EXTEND_RANGE_TABLE (work_area, 2);
+	  work_area->table[work_area->used++] = (cmin);
+	  work_area->table[work_area->used++] = (cmax);
+	}
+    }
+
+  return -1;
+}
+
 #ifndef MATCH_MAY_ALLOCATE
 
 /* If we cannot allocate large objects within re_match_2_internal,
@@ -1963,8 +2365,7 @@ static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
      `re_nsub' is the number of subexpressions in PATTERN;
      `not_bol' and `not_eol' are zero;
 
-   The `fastmap' and `newline_anchor' fields are neither
-   examined nor set.  */
+   The `fastmap' field is neither examined nor set.  */
 
 /* Insert the `jump' from the end of last alternative to "here".
    The space for the jump has already been allocated. */
@@ -1990,10 +2391,8 @@ regex_compile (pattern, size, syntax, bufp)
      reg_syntax_t syntax;
      struct re_pattern_buffer *bufp;
 {
-  /* We fetch characters from PATTERN here.  Even though PATTERN is
-     `char *' (i.e., signed), we declare these variables as unsigned, so
-     they can be reliably used as array indices.  */
-  register unsigned int c, c1;
+  /* We fetch characters from PATTERN here.  */
+  register re_wchar_t c, c1;
 
   /* A random temporary spot in PATTERN.  */
   re_char *p1;
@@ -2126,7 +2525,7 @@ regex_compile (pattern, size, syntax, bufp)
 		|| syntax & RE_CONTEXT_INDEP_ANCHORS
 		   /* Otherwise, depends on what's come before.	 */
 		|| at_begline_loc_p (pattern, p, syntax))
-	      BUF_PUSH (begline);
+	      BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
 	    else
 	      goto normal_char;
 	  }
@@ -2141,7 +2540,7 @@ regex_compile (pattern, size, syntax, bufp)
 		|| syntax & RE_CONTEXT_INDEP_ANCHORS
 		   /* Otherwise, depends on what's next.  */
 		|| at_endline_loc_p (p, pend, syntax))
-	       BUF_PUSH (endline);
+	       BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
 	     else
 	       goto normal_char;
 	   }
@@ -2220,10 +2619,11 @@ regex_compile (pattern, size, syntax, bufp)
 		    boolean simple = skip_one_char (laststart) == b;
 		    unsigned int startoffset = 0;
 		    re_opcode_t ofj =
-		      (simple || !analyse_first (laststart, b, NULL, 0)) ?
-		      on_failure_jump : on_failure_jump_loop;
+		      /* Check if the loop can match the empty string.  */
+		      (simple || !analyse_first (laststart, b, NULL, 0))
+		      ? on_failure_jump : on_failure_jump_loop;
 		    assert (skip_one_char (laststart) <= b);
-		    
+
 		    if (!zero_times_ok && simple)
 		      { /* Since simple * loops can be made faster by using
 		    	   on_failure_keep_string_jump, we turn simple P+
@@ -2269,8 +2669,9 @@ regex_compile (pattern, size, syntax, bufp)
 		  {
 		    boolean emptyp = analyse_first (laststart, b, NULL, 0);
 
-		    /* The non-greedy multiple match looks like a repeat..until:
-		       we only need a conditional jump at the end of the loop */
+		    /* The non-greedy multiple match looks like
+		       a repeat..until: we only need a conditional jump
+		       at the end of the loop.  */
 		    if (emptyp) BUF_PUSH (no_op);
 		    STORE_JUMP (emptyp ? on_failure_jump_nastyloop
 				: on_failure_jump, b, laststart);
@@ -2279,7 +2680,7 @@ regex_compile (pattern, size, syntax, bufp)
 		      {
 			/* The repeat...until naturally matches one or more.
 			   To also match zero times, we need to first jump to
-			   the end of the loop (its conditional jump). */
+			   the end of the loop (its conditional jump).  */
 			INSERT_JUMP (jump, laststart, b);
 			b += 3;
 		      }
@@ -2344,6 +2745,10 @@ regex_compile (pattern, size, syntax, bufp)
 
 		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
+		/* Don't translate yet.  The range TRANSLATE(X..Y) cannot
+		   always be determined from TRANSLATE(X) and TRANSLATE(Y)
+		   So the translation is done later in a loop.  Example:
+		   (let ((case-fold-search t)) (string-match "[A-_]" "A"))  */
 		PATFETCH (c);
 
 		/* \ might escape characters inside [...] and [^...].  */
@@ -2374,7 +2779,7 @@ regex_compile (pattern, size, syntax, bufp)
 		    syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
 		  {
 		    /* Leave room for the null.	 */
-		    char str[CHAR_CLASS_MAX_LENGTH + 1];
+		    unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
 		    const unsigned char *class_beg;
 
 		    PATFETCH (c);
@@ -2386,11 +2791,14 @@ regex_compile (pattern, size, syntax, bufp)
 
 		    for (;;)
 		      {
-			PATFETCH (c);
-			if (c == ':' || c == ']' || p == pend
-			    || c1 == CHAR_CLASS_MAX_LENGTH)
-			  break;
-			str[c1++] = c;
+		        PATFETCH (c);
+		        if ((c == ':' && *p == ']') || p == pend)
+		          break;
+			if (c1 < CHAR_CLASS_MAX_LENGTH)
+			  str[c1++] = c;
+			else
+			  /* This is in any case an invalid class name.  */
+			  str[0] = '\0';
 		      }
 		    str[c1] = '\0';
 
@@ -2400,90 +2808,35 @@ regex_compile (pattern, size, syntax, bufp)
 		       them).  */
 		    if (c == ':' && *p == ']')
 		      {
-			int ch;
-			boolean is_alnum = STREQ (str, "alnum");
-			boolean is_alpha = STREQ (str, "alpha");
-			boolean is_ascii = STREQ (str, "ascii");
-			boolean is_blank = STREQ (str, "blank");
-			boolean is_cntrl = STREQ (str, "cntrl");
-			boolean is_digit = STREQ (str, "digit");
-			boolean is_graph = STREQ (str, "graph");
-			boolean is_lower = STREQ (str, "lower");
-			boolean is_multibyte = STREQ (str, "multibyte");
-			boolean is_nonascii = STREQ (str, "nonascii");
-			boolean is_print = STREQ (str, "print");
-			boolean is_punct = STREQ (str, "punct");
-			boolean is_space = STREQ (str, "space");
-			boolean is_unibyte = STREQ (str, "unibyte");
-			boolean is_upper = STREQ (str, "upper");
-			boolean is_word = STREQ (str, "word");
-			boolean is_xdigit = STREQ (str, "xdigit");
-
-			if (!IS_CHAR_CLASS (str))
+			re_wchar_t ch;
+			re_wctype_t cc;
+
+			cc = re_wctype (str);
+
+			if (cc == 0)
 			  FREE_STACK_RETURN (REG_ECTYPE);
 
-			/* Throw away the ] at the end of the character
-			   class.  */
-			PATFETCH (c);
+                        /* Throw away the ] at the end of the character
+                           class.  */
+                        PATFETCH (c);
 
-			if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
 			/* Most character classes in a multibyte match
 			   just set a flag.  Exceptions are is_blank,
 			   is_digit, is_cntrl, and is_xdigit, since
 			   they can only match ASCII characters.  We
-			   don't need to handle them for multibyte.  */
+			   don't need to handle them for multibyte.
+			   They are distinguished by a negative wctype.  */
 
 			if (multibyte)
-			  {
-			    int bit = 0;
-
-			    if (is_alnum) bit = BIT_ALNUM;
-			    if (is_alpha) bit = BIT_ALPHA;
-			    if (is_ascii) bit = BIT_ASCII;
-			    if (is_graph) bit = BIT_GRAPH;
-			    if (is_lower) bit = BIT_LOWER;
-			    if (is_multibyte) bit = BIT_MULTIBYTE;
-			    if (is_nonascii) bit = BIT_NONASCII;
-			    if (is_print) bit = BIT_PRINT;
-			    if (is_punct) bit = BIT_PUNCT;
-			    if (is_space) bit = BIT_SPACE;
-			    if (is_unibyte) bit = BIT_UNIBYTE;
-			    if (is_upper) bit = BIT_UPPER;
-			    if (is_word) bit = BIT_WORD;
-			    if (bit)
-			      SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
-							     bit);
-			  }
+			  SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
+							 re_wctype_to_bit (cc));
 
-			/* Handle character classes for ASCII characters.  */
-			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
 			  {
 			    int translated = TRANSLATE (ch);
-			    /* This was split into 3 if's to
-			       avoid an arbitrary limit in some compiler.  */
-			    if (   (is_alnum  && ISALNUM (ch))
-				|| (is_alpha  && ISALPHA (ch))
-				|| (is_blank  && ISBLANK (ch))
-				|| (is_cntrl  && ISCNTRL (ch)))
-			      SET_LIST_BIT (translated);
-			    if (   (is_digit  && ISDIGIT (ch))
-				|| (is_graph  && ISGRAPH (ch))
-				|| (is_lower  && ISLOWER (ch))
-				|| (is_print  && ISPRINT (ch)))
-			      SET_LIST_BIT (translated);
-			    if (   (is_punct  && ISPUNCT (ch))
-				|| (is_space  && ISSPACE (ch))
-				|| (is_upper  && ISUPPER (ch))
-				|| (is_xdigit && ISXDIGIT (ch)))
-			      SET_LIST_BIT (translated);
-			    if (   (is_ascii  && IS_REAL_ASCII (ch))
-				|| (is_nonascii && !IS_REAL_ASCII (ch))
-				|| (is_unibyte && ISUNIBYTE (ch))
-				|| (is_multibyte && !ISUNIBYTE (ch)))
-			      SET_LIST_BIT (translated);
-
-			    if (   (is_word   && ISWORD (ch)))
+			    if (re_iswctype (btowc (ch), cc))
 			      SET_LIST_BIT (translated);
 			  }
 
@@ -2516,18 +2869,19 @@ regex_compile (pattern, size, syntax, bufp)
 		      {
 			if (! SINGLE_BYTE_CHAR_P (c1))
 			  {
-			    /* Handle a range such as \177-\377 in
-			       multibyte mode.  Split that into two
-			       ranges, the low one ending at 0237, and
-			       the high one starting at the smallest
-			       character in the charset of C1 and
-			       ending at C1.  */
+			    /* Handle a range starting with a
+			       character of less than 256, and ending
+			       with a character of not less than 256.
+			       Split that into two ranges, the low one
+			       ending at 0377, and the high one
+			       starting at the smallest character in
+			       the charset of C1 and ending at C1.  */
 			    int charset = CHAR_CHARSET (c1);
-			    int c2 = MAKE_CHAR (charset, 0, 0);
-			    
+			    re_wchar_t c2 = MAKE_CHAR (charset, 0, 0);
+
 			    SET_RANGE_TABLE_WORK_AREA (range_table_work,
 						       c2, c1);
-			    c1 = 0237;
+			    c1 = 0377;
 			  }
 		      }
 		    else if (!SAME_CHARSET_P (c, c1))
@@ -2541,8 +2895,8 @@ regex_compile (pattern, size, syntax, bufp)
 		if (SINGLE_BYTE_CHAR_P (c))
 		  /* ... into bitmap.  */
 		  {
-		    unsigned this_char;
-		    int range_start = c, range_end = c1;
+		    re_wchar_t this_char;
+		    re_wchar_t range_start = c, range_end = c1;
 
 		    /* If the start is after the end, the range is empty.  */
 		    if (range_start > range_end)
@@ -2639,7 +2993,7 @@ regex_compile (pattern, size, syntax, bufp)
 	  /* Do not translate the character after the \, so that we can
 	     distinguish, e.g., \B from \b, even if we normally would
 	     translate, e.g., B to b.  */
-	  PATFETCH_RAW (c);
+	  PATFETCH (c);
 
 	  switch (c)
 	    {
@@ -2863,99 +3217,99 @@ regex_compile (pattern, size, syntax, bufp)
 		      goto unfetch_interval;
 		  }
 
-		 if (upper_bound == 0)
-		   /* If the upper bound is zero, just drop the sub pattern
-		      altogether.  */
-		   b = laststart;
-		 else if (lower_bound == 1 && upper_bound == 1)
-		   /* Just match it once: nothing to do here.  */
-		   ;
-
-		 /* Otherwise, we have a nontrivial interval.  When
-		    we're all done, the pattern will look like:
-		      set_number_at <jump count> <upper bound>
-		      set_number_at <succeed_n count> <lower bound>
-		      succeed_n <after jump addr> <succeed_n count>
-		      <body of loop>
-		      jump_n <succeed_n addr> <jump count>
-		    (The upper bound and `jump_n' are omitted if
-		    `upper_bound' is 1, though.)  */
-		 else
-		   { /* If the upper bound is > 1, we need to insert
-			more at the end of the loop.  */
-		     unsigned int nbytes = (upper_bound < 0 ? 3
-					    : upper_bound > 1 ? 5 : 0);
-		     unsigned int startoffset = 0;
-
-		     GET_BUFFER_SPACE (20); /* We might use less.  */
-
-		     if (lower_bound == 0)
-		       {
-			 /* A succeed_n that starts with 0 is really a
-			    a simple on_failure_jump_loop.  */
-			 INSERT_JUMP (on_failure_jump_loop, laststart,
-				      b + 3 + nbytes);
-			 b += 3;
-		       }
-		     else
-		       {
-			 /* Initialize lower bound of the `succeed_n', even
-			    though it will be set during matching by its
-			    attendant `set_number_at' (inserted next),
-			    because `re_compile_fastmap' needs to know.
-			    Jump to the `jump_n' we might insert below.  */
-			 INSERT_JUMP2 (succeed_n, laststart,
-				       b + 5 + nbytes,
-				       lower_bound);
-			 b += 5;
-
-			 /* Code to initialize the lower bound.  Insert
-			    before the `succeed_n'.	 The `5' is the last two
-			    bytes of this `set_number_at', plus 3 bytes of
-			    the following `succeed_n'.  */
-			 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
-			 b += 5;
-			 startoffset += 5;
-		       }
-
-		     if (upper_bound < 0)
-		       {
-			 /* A negative upper bound stands for infinity,
-			    in which case it degenerates to a plain jump.  */
-			 STORE_JUMP (jump, b, laststart + startoffset);
-			 b += 3;
-		       }
-		     else if (upper_bound > 1)
-		       { /* More than one repetition is allowed, so
-			    append a backward jump to the `succeed_n'
-			    that starts this interval.
-
-			    When we've reached this during matching,
-			    we'll have matched the interval once, so
-			    jump back only `upper_bound - 1' times.  */
-			 STORE_JUMP2 (jump_n, b, laststart + startoffset,
-				      upper_bound - 1);
-			 b += 5;
-
-			 /* The location we want to set is the second
-			    parameter of the `jump_n'; that is `b-2' as
-			    an absolute address.  `laststart' will be
-			    the `set_number_at' we're about to insert;
-			    `laststart+3' the number to set, the source
-			    for the relative address.  But we are
-			    inserting into the middle of the pattern --
-			    so everything is getting moved up by 5.
-			    Conclusion: (b - 2) - (laststart + 3) + 5,
-			    i.e., b - laststart.
-
-			    We insert this at the beginning of the loop
-			    so that if we fail during matching, we'll
-			    reinitialize the bounds.  */
-			 insert_op2 (set_number_at, laststart, b - laststart,
-				     upper_bound - 1, b);
-			 b += 5;
-		       }
-		   }
+		if (upper_bound == 0)
+		  /* If the upper bound is zero, just drop the sub pattern
+		     altogether.  */
+		  b = laststart;
+		else if (lower_bound == 1 && upper_bound == 1)
+		  /* Just match it once: nothing to do here.  */
+		  ;
+
+		/* Otherwise, we have a nontrivial interval.  When
+		   we're all done, the pattern will look like:
+		   set_number_at <jump count> <upper bound>
+		   set_number_at <succeed_n count> <lower bound>
+		   succeed_n <after jump addr> <succeed_n count>
+		   <body of loop>
+		   jump_n <succeed_n addr> <jump count>
+		   (The upper bound and `jump_n' are omitted if
+		   `upper_bound' is 1, though.)  */
+		else
+		  { /* If the upper bound is > 1, we need to insert
+		       more at the end of the loop.  */
+		    unsigned int nbytes = (upper_bound < 0 ? 3
+					   : upper_bound > 1 ? 5 : 0);
+		    unsigned int startoffset = 0;
+
+		    GET_BUFFER_SPACE (20); /* We might use less.  */
+
+		    if (lower_bound == 0)
+		      {
+			/* A succeed_n that starts with 0 is really a
+			   a simple on_failure_jump_loop.  */
+			INSERT_JUMP (on_failure_jump_loop, laststart,
+				     b + 3 + nbytes);
+			b += 3;
+		      }
+		    else
+		      {
+			/* Initialize lower bound of the `succeed_n', even
+			   though it will be set during matching by its
+			   attendant `set_number_at' (inserted next),
+			   because `re_compile_fastmap' needs to know.
+			   Jump to the `jump_n' we might insert below.  */
+			INSERT_JUMP2 (succeed_n, laststart,
+				      b + 5 + nbytes,
+				      lower_bound);
+			b += 5;
+
+			/* Code to initialize the lower bound.  Insert
+			   before the `succeed_n'.	 The `5' is the last two
+			   bytes of this `set_number_at', plus 3 bytes of
+			   the following `succeed_n'.  */
+			insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+			b += 5;
+			startoffset += 5;
+		      }
+
+		    if (upper_bound < 0)
+		      {
+			/* A negative upper bound stands for infinity,
+			   in which case it degenerates to a plain jump.  */
+			STORE_JUMP (jump, b, laststart + startoffset);
+			b += 3;
+		      }
+		    else if (upper_bound > 1)
+		      { /* More than one repetition is allowed, so
+			   append a backward jump to the `succeed_n'
+			   that starts this interval.
+
+			   When we've reached this during matching,
+			   we'll have matched the interval once, so
+			   jump back only `upper_bound - 1' times.  */
+			STORE_JUMP2 (jump_n, b, laststart + startoffset,
+				     upper_bound - 1);
+			b += 5;
+
+			/* The location we want to set is the second
+			   parameter of the `jump_n'; that is `b-2' as
+			   an absolute address.  `laststart' will be
+			   the `set_number_at' we're about to insert;
+			   `laststart+3' the number to set, the source
+			   for the relative address.  But we are
+			   inserting into the middle of the pattern --
+			   so everything is getting moved up by 5.
+			   Conclusion: (b - 2) - (laststart + 3) + 5,
+			   i.e., b - laststart.
+
+			   We insert this at the beginning of the loop
+			   so that if we fail during matching, we'll
+			   reinitialize the bounds.  */
+			insert_op2 (set_number_at, laststart, b - laststart,
+				    upper_bound - 1, b);
+			b += 5;
+		      }
+		  }
 		pending_exact = 0;
 		beg_interval = NULL;
 	      }
@@ -2999,13 +3353,13 @@ regex_compile (pattern, size, syntax, bufp)
 
 	    case 'c':
 	      laststart = b;
-	      PATFETCH_RAW (c);
+	      PATFETCH (c);
 	      BUF_PUSH_2 (categoryspec, c);
 	      break;
 
 	    case 'C':
 	      laststart = b;
-	      PATFETCH_RAW (c);
+	      PATFETCH (c);
 	      BUF_PUSH_2 (notcategoryspec, c);
 	      break;
 #endif /* emacs */
@@ -3065,20 +3419,21 @@ regex_compile (pattern, size, syntax, bufp)
 
 	    case '1': case '2': case '3': case '4': case '5':
 	    case '6': case '7': case '8': case '9':
-	      if (syntax & RE_NO_BK_REFS)
-		goto normal_char;
+	      {
+		regnum_t reg;
 
-	      c1 = c - '0';
+		if (syntax & RE_NO_BK_REFS)
+		  goto normal_backslash;
 
-	      if (c1 > regnum)
-		FREE_STACK_RETURN (REG_ESUBREG);
+		reg = c - '0';
 
-	      /* Can't back reference to a subexpression if inside of it.  */
-	      if (group_in_compile_stack (compile_stack, (regnum_t) c1))
-		goto normal_char;
+		/* Can't back reference to a subexpression before its end.  */
+		if (reg > regnum || group_in_compile_stack (compile_stack, reg))
+		  FREE_STACK_RETURN (REG_ESUBREG);
 
-	      laststart = b;
-	      BUF_PUSH_2 (duplicate, c1);
+		laststart = b;
+		BUF_PUSH_2 (duplicate, reg);
+	      }
 	      break;
 
 
@@ -3094,7 +3449,6 @@ regex_compile (pattern, size, syntax, bufp)
 	      /* You might think it would be useful for \ to mean
 		 not to translate; but if we don't translate it
 		 it will never match anything.  */
-	      c = TRANSLATE (c);
 	      goto normal_char;
 	    }
 	  break;
@@ -3103,7 +3457,7 @@ regex_compile (pattern, size, syntax, bufp)
 	default:
 	/* Expects the character in `c'.  */
 	normal_char:
-	      /* If no exactn currently being built.  */
+	  /* If no exactn currently being built.  */
 	  if (!pending_exact
 
 	      /* If last exactn not at current position.  */
@@ -3134,6 +3488,7 @@ regex_compile (pattern, size, syntax, bufp)
 	  {
 	    int len;
 
+	    c = TRANSLATE (c);
 	    if (multibyte)
 	      len = CHAR_STRING (c, b);
 	    else
@@ -3159,8 +3514,6 @@ regex_compile (pattern, size, syntax, bufp)
   if (syntax & RE_NO_POSIX_BACKTRACKING)
     BUF_PUSH (succeed);
 
-  free (compile_stack.stack);
-
   /* We have succeeded; set the length of the buffer.  */
   bufp->used = b - bufp->buffer;
 
@@ -3200,7 +3553,7 @@ regex_compile (pattern, size, syntax, bufp)
   }
 #endif /* not MATCH_MAY_ALLOCATE */
 
-  return REG_NOERROR;
+  FREE_STACK_RETURN (REG_NOERROR);
 } /* regex_compile */
 
 /* Subroutines for `regex_compile'.  */
@@ -3277,10 +3630,10 @@ insert_op2 (op, loc, arg1, arg2, end)
 
 static boolean
 at_begline_loc_p (pattern, p, syntax)
-    const unsigned char *pattern, *p;
+    re_char *pattern, *p;
     reg_syntax_t syntax;
 {
-  const unsigned char *prev = p - 2;
+  re_char *prev = p - 2;
   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
 
   return
@@ -3301,12 +3654,12 @@ at_begline_loc_p (pattern, p, syntax)
 
 static boolean
 at_endline_loc_p (p, pend, syntax)
-    const unsigned char *p, *pend;
+    re_char *p, *pend;
     reg_syntax_t syntax;
 {
-  const unsigned char *next = p;
+  re_char *next = p;
   boolean next_backslash = *next == '\\';
-  const unsigned char *next_next = p + 1 < pend ? p + 1 : 0;
+  re_char *next_next = p + 1 < pend ? p + 1 : 0;
 
   return
        /* Before a subexpression?  */
@@ -3345,36 +3698,16 @@ group_in_compile_stack (compile_stack, regnum)
 
    Return 1  if p..pend might match the empty string.
    Return 0  if p..pend matches at least one char.
-   Return -1 if p..pend matches at least one char, but fastmap was not
-      updated accurately.
-   Return -2 if an error occurred.  */
+   Return -1 if fastmap was not updated accurately.  */
 
 static int
 analyse_first (p, pend, fastmap, multibyte)
-     unsigned char *p, *pend;
+     re_char *p, *pend;
      char *fastmap;
      const int multibyte;
 {
   int j, k;
   boolean not;
-#ifdef MATCH_MAY_ALLOCATE
-  fail_stack_type fail_stack;
-#endif
-#ifndef REGEX_MALLOC
-  char *destination;
-#endif
-
-#if defined REL_ALLOC && defined REGEX_MALLOC
-  /* This holds the pointer to the failure stack, when
-     it is allocated relocatably.  */
-  fail_stack_elt_t *failure_stack_ptr;
-#endif
-
-  /* Assume that each path through the pattern can be null until
-     proven otherwise.  We set this false at the bottom of switch
-     statement, to which we get only if a particular path doesn't
-     match the empty string.  */
-  boolean path_can_be_null = true;
 
   /* If all elements for base leading-codes in fastmap is set, this
      flag is set true.	*/
@@ -3382,8 +3715,6 @@ analyse_first (p, pend, fastmap, multibyte)
 
   assert (p);
 
-  INIT_FAIL_STACK ();
-
   /* The loop below works as follows:
      - It has a working-list kept in the PATTERN_STACK and which basically
        starts by only containing a pointer to the first operation.
@@ -3399,8 +3730,7 @@ analyse_first (p, pend, fastmap, multibyte)
      so that `p' is monotonically increasing.  More to the point, we
      never set `p' (or push) anything `<= p1'.  */
 
-  /* If can_be_null is set, then the fastmap will not be used anyway.  */
-  while (1)
+  while (p < pend)
     {
       /* `p1' is used as a marker of how far back a `on_failure_jump'
 	 can go without being ignored.  It is normally equal to `p'
@@ -3410,29 +3740,12 @@ analyse_first (p, pend, fastmap, multibyte)
 	    3..9: <body>
 	    10: on_failure_jump 3
 	 as used for the *? operator.  */
-      unsigned char *p1 = p;
-
-      if (p >= pend)
-	{
-	  if (path_can_be_null)
-	    return (RESET_FAIL_STACK (), 1);
-
-	  /* We have reached the (effective) end of pattern.  */
-	  if (PATTERN_STACK_EMPTY ())
-	    return (RESET_FAIL_STACK (), 0);
-
-	  p = (unsigned char*) POP_PATTERN_OP ();
-	  path_can_be_null = true;
-	  continue;
-	}
-
-      /* We should never be about to go beyond the end of the pattern.	*/
-      assert (p < pend);
+      re_char *p1 = p;
 
       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
 	{
 	case succeed:
-	  p = pend;
+	  return 1;
 	  continue;
 
 	case duplicate:
@@ -3464,7 +3777,7 @@ analyse_first (p, pend, fastmap, multibyte)
 	  /* We could put all the chars except for \n (and maybe \0)
 	     but we don't bother since it is generally not worth it.  */
 	  if (!fastmap) break;
-	  return (RESET_FAIL_STACK (), -1);
+	  return -1;
 
 
 	case charset_not:
@@ -3539,7 +3852,7 @@ analyse_first (p, pend, fastmap, multibyte)
 #else  /* emacs */
 	  /* This match depends on text properties.  These end with
 	     aborting optimizations.  */
-	  return (RESET_FAIL_STACK (), -1);
+	  return -1;
 
 	case categoryspec:
 	case notcategoryspec:
@@ -3606,8 +3919,14 @@ analyse_first (p, pend, fastmap, multibyte)
 	  EXTRACT_NUMBER_AND_INCR (j, p);
 	  if (p + j <= p1)
 	    ; /* Backward jump to be ignored.  */
-	  else if (!PUSH_PATTERN_OP (p + j, fail_stack))
-	    return (RESET_FAIL_STACK (), -2);
+	  else
+	    { /* We have to look down both arms.
+		 We first go down the "straight" path so as to minimize
+		 stack usage when going through alternatives.  */
+	      int r = analyse_first (p, pend, fastmap, multibyte);
+	      if (r) return r;
+	      p += j;
+	    }
 	  continue;
 
 
@@ -3619,7 +3938,7 @@ analyse_first (p, pend, fastmap, multibyte)
 	     case has already been handled, so we only need to look at the
 	     fallthrough case.  */
 	  continue;
-	  
+
 	case succeed_n:
 	  /* If N == 0, it should be an on_failure_jump_loop instead.  */
 	  DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
@@ -3647,15 +3966,13 @@ analyse_first (p, pend, fastmap, multibyte)
 
       /* Getting here means we have found the possible starting
 	 characters for one path of the pattern -- and that the empty
-	 string does not match.	 We need not follow this path further.
-	 Instead, look at the next alternative (remembered on the
-	 stack), or quit if no more.  The test at the top of the loop
-	 does these things.  */
-      path_can_be_null = false;
-      p = pend;
+	 string does not match.	 We need not follow this path further.  */
+      return 0;
     } /* while p */
 
-  return (RESET_FAIL_STACK (), 0);
+  /* We reached the end without matching anything.  */
+  return 1;
+
 } /* analyse_first */
 
 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
@@ -3689,8 +4006,6 @@ re_compile_fastmap (bufp)
 
   analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
 			    fastmap, RE_MULTIBYTE_P (bufp));
-  if (analysis < -1)
-    return analysis;
   bufp->can_be_null = (analysis != 0);
   return 0;
 } /* re_compile_fastmap */
@@ -3729,6 +4044,7 @@ re_set_registers (bufp, regs, num_regs, starts, ends)
       regs->start = regs->end = (regoff_t *) 0;
     }
 }
+WEAK_ALIAS (__re_set_registers, re_set_registers)
 
 /* Searching routines.	*/
 
@@ -3745,6 +4061,11 @@ re_search (bufp, string, size, startpos, range, regs)
   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
 		      regs, size);
 }
+WEAK_ALIAS (__re_search, re_search)
+
+/* Head address of virtual concatenation of string.  */
+#define HEAD_ADDR_VSTRING(P)		\
+  (((P) >= size1 ? string2 : string1))
 
 /* End address of virtual concatenation of string.  */
 #define STOP_ADDR_VSTRING(P)				\
@@ -3792,7 +4113,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
   register RE_TRANSLATE_TYPE translate = bufp->translate;
   int total_size = size1 + size2;
   int endpos = startpos + range;
-  int anchored_start = 0;
+  boolean anchored_start;
 
   /* Nonzero if we have to concern multibyte character.	 */
   const boolean multibyte = RE_MULTIBYTE_P (bufp);
@@ -3832,12 +4153,10 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 
   /* Update the fastmap now if not correct already.  */
   if (fastmap && !bufp->fastmap_accurate)
-    if (re_compile_fastmap (bufp) == -2)
-      return -2;
+    re_compile_fastmap (bufp);
 
   /* See whether the pattern is anchored.  */
-  if (bufp->buffer[0] == begline)
-    anchored_start = 1;
+  anchored_start = (bufp->buffer[0] == begline);
 
 #ifdef emacs
   gl_state.object = re_match_object;
@@ -3857,10 +4176,9 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 	 because that case doesn't repeat.  */
       if (anchored_start && startpos > 0)
 	{
-	  if (! (bufp->newline_anchor
-		 && ((startpos <= size1 ? string1[startpos - 1]
-		      : string2[startpos - size1 - 1])
-		     == '\n')))
+	  if (! ((startpos <= size1 ? string1[startpos - 1]
+		  : string2[startpos - size1 - 1])
+		 == '\n'))
 	    goto advance;
 	}
 
@@ -3871,7 +4189,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
       if (fastmap && startpos < total_size && !bufp->can_be_null)
 	{
 	  register re_char *d;
-	  register unsigned int buf_ch;
+	  register re_wchar_t buf_ch;
 
 	  d = POS_ADDR_VSTRING (startpos);
 
@@ -3984,31 +4302,23 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 	  /* Update STARTPOS to the previous character boundary.  */
 	  if (multibyte)
 	    {
-	      re_char *p = POS_ADDR_VSTRING (startpos);
-	      int len = 0;
+	      re_char *p = POS_ADDR_VSTRING (startpos) + 1;
+	      re_char *p0 = p;
+	      re_char *phead = HEAD_ADDR_VSTRING (startpos);
 
 	      /* Find the head of multibyte form.  */
-	      while (!CHAR_HEAD_P (*p))
-		p--, len++;
-
-	      /* Adjust it. */
-#if 0				/* XXX */
-	      if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
-		;
-	      else
-#endif
-		{
-		  range += len;
-		  if (range > 0)
-		    break;
+	      PREV_CHAR_BOUNDARY (p, phead);
+	      range += p0 - 1 - p;
+	      if (range > 0)
+		break;
 
-		  startpos -= len;
-		}
+	      startpos -= p0 - 1 - p;
 	    }
 	}
     }
   return -1;
 } /* re_search_2 */
+WEAK_ALIAS (__re_search_2, re_search_2)
 
 /* Declarations and macros for re_match_2.  */
 
@@ -4103,15 +4413,15 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
 
 /* If the operation is a match against one or more chars,
    return a pointer to the next operation, else return NULL.  */
-static unsigned char *
+static re_char *
 skip_one_char (p)
-     unsigned char *p;
+     re_char *p;
 {
   switch (SWITCH_ENUM_CAST (*p++))
     {
     case anychar:
       break;
-      
+
     case exactn:
       p += *p + 1;
       break;
@@ -4128,7 +4438,7 @@ skip_one_char (p)
       else
 	p += 1 + CHARSET_BITMAP_SIZE (p - 1);
       break;
-      
+
     case syntaxspec:
     case notsyntaxspec:
 #ifdef emacs
@@ -4211,16 +4521,13 @@ mutually_exclusive_p (bufp, p1, p2)
 	  return 1;
 	}
       break;
-      
+
     case endline:
-      if (!bufp->newline_anchor)
-	break;
-      /* Fallthrough */
     case exactn:
       {
-	register unsigned int c
+	register re_wchar_t c
 	  = (re_opcode_t) *p2 == endline ? '\n'
-	  : RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
+	  : RE_STRING_CHAR (p2 + 2, pend - p2 - 2);
 
 	if ((re_opcode_t) *p1 == exactn)
 	  {
@@ -4265,13 +4572,11 @@ mutually_exclusive_p (bufp, p1, p2)
       break;
 
     case charset:
-    case charset_not:
       {
 	if ((re_opcode_t) *p1 == exactn)
 	  /* Reuse the code above.  */
 	  return mutually_exclusive_p (bufp, p2, p1);
 
-
       /* It is hard to list up all the character in charset
 	 P2 if it includes multibyte character.  Give up in
 	 such case.  */
@@ -4287,7 +4592,7 @@ mutually_exclusive_p (bufp, p1, p2)
 	     P2 is ASCII, it is enough to test only bitmap
 	     table of P1.  */
 
-	  if (*p1 == *p2)
+	  if ((re_opcode_t) *p1 == charset)
 	    {
 	      int idx;
 	      /* We win if the charset inside the loop
@@ -4306,8 +4611,7 @@ mutually_exclusive_p (bufp, p1, p2)
 		  return 1;
 		}
 	    }
-	  else if ((re_opcode_t) *p1 == charset
-		   || (re_opcode_t) *p1 == charset_not)
+	  else if ((re_opcode_t) *p1 == charset_not)
 	    {
 	      int idx;
 	      /* We win if the charset_not inside the loop lists
@@ -4326,7 +4630,24 @@ mutually_exclusive_p (bufp, p1, p2)
 	      }
 	  }
       }
-      
+      break;
+
+    case charset_not:
+      switch (SWITCH_ENUM_CAST (*p1))
+	{
+	case exactn:
+	case charset:
+	  /* Reuse the code above.  */
+	  return mutually_exclusive_p (bufp, p2, p1);
+	case charset_not:
+	  /* When we have two charset_not, it's very unlikely that
+	     they don't overlap.  The union of the two sets of excluded
+	     chars should cover all possible chars, which, as a matter of
+	     fact, is virtually impossible in multibyte buffers.  */
+	  break;
+	}
+      break;
+
     case wordend:
     case notsyntaxspec:
       return ((re_opcode_t) *p1 == syntaxspec
@@ -4377,6 +4698,7 @@ re_match (bufp, string, size, pos, regs)
 # endif
   return result;
 }
+WEAK_ALIAS (__re_match, re_match)
 #endif /* not emacs */
 
 #ifdef emacs
@@ -4424,6 +4746,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
 #endif
   return result;
 }
+WEAK_ALIAS (__re_match_2, re_match_2)
 
 /* This is a separate function so that we can force an alloca cleanup
    afterwards.	*/
@@ -4438,8 +4761,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 {
   /* General temporaries.  */
   int mcnt;
+  size_t reg;
   boolean not;
-  unsigned char *p1;
 
   /* Just past the end of the corresponding string.  */
   re_char *end1, *end2;
@@ -4458,8 +4781,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   re_char *dfail;
 
   /* Where we are in the pattern, and the end of the pattern.  */
-  unsigned char *p = bufp->buffer;
-  register unsigned char *pend = p + bufp->used;
+  re_char *p = bufp->buffer;
+  re_char *pend = p + bufp->used;
 
   /* We use this to map every character in the string.	*/
   RE_TRANSLATE_TYPE translate = bufp->translate;
@@ -4568,8 +4891,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   /* Initialize subexpression text positions to -1 to mark ones that no
      start_memory/stop_memory has been seen for. Also initialize the
      register information struct.  */
-  for (mcnt = 1; mcnt < num_regs; mcnt++)
-    regstart[mcnt] = regend[mcnt] = NULL;
+  for (reg = 1; reg < num_regs; reg++)
+    regstart[reg] = regend[reg] = NULL;
 
   /* We move `string1' into `string2' if the latter's empty -- but not if
      `string1' is null.	 */
@@ -4671,10 +4994,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
 
-		      for (mcnt = 1; mcnt < num_regs; mcnt++)
+		      for (reg = 1; reg < num_regs; reg++)
 			{
-			  best_regstart[mcnt] = regstart[mcnt];
-			  best_regend[mcnt] = regend[mcnt];
+			  best_regstart[reg] = regstart[reg];
+			  best_regend[reg] = regend[reg];
 			}
 		    }
 		  goto fail;
@@ -4697,10 +5020,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		  dend = ((d >= string1 && d <= end1)
 			   ? end_match_1 : end_match_2);
 
-		  for (mcnt = 1; mcnt < num_regs; mcnt++)
+		  for (reg = 1; reg < num_regs; reg++)
 		    {
-		      regstart[mcnt] = best_regstart[mcnt];
-		      regend[mcnt] = best_regend[mcnt];
+		      regstart[reg] = best_regstart[reg];
+		      regend[reg] = best_regend[reg];
 		    }
 		}
 	    } /* d != end_match_2 */
@@ -4760,16 +5083,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 	      /* Go through the first `min (num_regs, regs->num_regs)'
 		 registers, since that is all we initialized.  */
-	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+	      for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
 		{
-		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
-		    regs->start[mcnt] = regs->end[mcnt] = -1;
+		  if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
+		    regs->start[reg] = regs->end[reg] = -1;
 		  else
 		    {
-		      regs->start[mcnt]
-			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
-		      regs->end[mcnt]
-			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
+		      regs->start[reg]
+			= (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
+		      regs->end[reg]
+			= (regoff_t) POINTER_TO_OFFSET (regend[reg]);
 		    }
 		}
 
@@ -4778,8 +5101,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 we (re)allocated the registers, this is the case,
 		 because we always allocate enough to have at least one
 		 -1 at the end.	 */
-	      for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
-		regs->start[mcnt] = regs->end[mcnt] = -1;
+	      for (reg = num_regs; reg < regs->num_regs; reg++)
+		regs->start[reg] = regs->end[reg] = -1;
 	    } /* regs && !bufp->no_sub */
 
 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
@@ -4877,7 +5200,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	case anychar:
 	  {
 	    int buf_charlen;
-	    unsigned int buf_ch;
+	    re_wchar_t buf_ch;
 
 	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
 
@@ -4906,7 +5229,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 	    /* Start of actual range_table, or end of bitmap if there is no
 	       range table.  */
-	    unsigned char *range_table;
+	    re_char *range_table;
 
 	    /* Nonzero if there is a range table.  */
 	    int range_table_exists;
@@ -4942,17 +5265,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      {
 		int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
 
-		if (  (class_bits & BIT_ALNUM && ISALNUM (c))
-		    | (class_bits & BIT_ALPHA && ISALPHA (c))
-		    | (class_bits & BIT_ASCII && IS_REAL_ASCII (c))
-		    | (class_bits & BIT_GRAPH && ISGRAPH (c))
-		    | (class_bits & BIT_LOWER && ISLOWER (c))
-		    | (class_bits & BIT_MULTIBYTE && !ISUNIBYTE (c))
-		    | (class_bits & BIT_NONASCII && !IS_REAL_ASCII (c))
-		    | (class_bits & BIT_PRINT && ISPRINT (c))
+		if (  (class_bits & BIT_LOWER && ISLOWER (c))
+		    | (class_bits & BIT_MULTIBYTE)
 		    | (class_bits & BIT_PUNCT && ISPUNCT (c))
 		    | (class_bits & BIT_SPACE && ISSPACE (c))
-		    | (class_bits & BIT_UNIBYTE && ISUNIBYTE (c))
 		    | (class_bits & BIT_UPPER && ISUPPER (c))
 		    | (class_bits & BIT_WORD  && ISWORD (c)))
 		  not = !not;
@@ -4999,7 +5315,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 	  assert (!REG_UNSET (regstart[*p]));
 	  /* Strictly speaking, there should be code such as:
-	     
+
 		assert (REG_UNSET (regend[*p]));
 		PUSH_FAILURE_REGSTOP ((unsigned int)*p);
 
@@ -5089,8 +5405,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 
 	/* begline matches the empty string at the beginning of the string
-	   (unless `not_bol' is set in `bufp'), and, if
-	   `newline_anchor' is set, after newlines.  */
+	   (unless `not_bol' is set in `bufp'), and after newlines.  */
 	case begline:
 	  DEBUG_PRINT1 ("EXECUTING begline.\n");
 
@@ -5102,7 +5417,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    {
 	      unsigned char c;
 	      GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
-	      if (c == '\n' && bufp->newline_anchor)
+	      if (c == '\n')
 		break;
 	    }
 	  /* In all other cases, we fail.  */
@@ -5120,7 +5435,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  else
 	    {
 	      PREFETCH_NOLIMIT ();
-	      if (*d == '\n' && bufp->newline_anchor)
+	      if (*d == '\n')
 		break;
 	    }
 	  goto fail;
@@ -5172,7 +5487,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	     cycle detection cannot work.  Worse yet, such a detection
 	     can not only fail to detect a cycle, but it can also wrongly
 	     detect a cycle (between different instantiations of the same
-	     loop.
+	     loop).
 	     So the method used for those nasty loops is a little different:
 	     We use a special cycle-detection-stack-frame which is pushed
 	     when the on_failure_jump_nastyloop failure-point is *popped*.
@@ -5186,11 +5501,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 			mcnt, p + mcnt);
 
 	  assert ((re_opcode_t)p[-4] == no_op);
-	  CHECK_INFINITE_LOOP (p - 4, d);
-	  PUSH_FAILURE_POINT (p - 3, d);
+	  {
+	    int cycle = 0;
+	    CHECK_INFINITE_LOOP (p - 4, d);
+	    if (!cycle)
+	      /* If there's a cycle, just continue without pushing
+		 this failure point.  The failure point is the "try again"
+		 option, which shouldn't be tried.
+		 We want (x?)*?y\1z to match both xxyz and xxyxz.  */
+	      PUSH_FAILURE_POINT (p - 3, d);
+	  }
 	  break;
 
-
 	  /* Simple loop detecting on_failure_jump:  just check on the
 	     failure stack if the same spot was already hit earlier.  */
 	case on_failure_jump_loop:
@@ -5198,9 +5520,19 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
 	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
 			mcnt, p + mcnt);
-
-	  CHECK_INFINITE_LOOP (p - 3, d);
-	  PUSH_FAILURE_POINT (p - 3, d);
+	  {
+	    int cycle = 0;
+	    CHECK_INFINITE_LOOP (p - 3, d);
+	    if (cycle)
+	      /* If there's a cycle, get out of the loop, as if the matching
+		 had failed.  We used to just `goto fail' here, but that was
+		 aborting the search a bit too early: we want to keep the
+		 empty-loop-match and keep matching after the loop.
+		 We want (x?)*y\1z to match both xxyz and xxyxz.  */
+	      p += mcnt;
+	    else
+	      PUSH_FAILURE_POINT (p - 3, d);
+	  }
 	  break;
 
 
@@ -5217,7 +5549,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	   the repetition text and either the following jump or
 	   pop_failure_jump back to this on_failure_jump.  */
 	case on_failure_jump:
-	  QUIT;
+	  IMMEDIATE_QUIT_CHECK;
 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
 	  DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n",
 			mcnt, p + mcnt);
@@ -5233,13 +5565,15 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	   then we can use a non-backtracking loop based on
 	   on_failure_keep_string_jump instead of on_failure_jump.  */
 	case on_failure_jump_smart:
-	  QUIT;
+	  IMMEDIATE_QUIT_CHECK;
 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
 	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
 			mcnt, p + mcnt);
 	  {
-	    unsigned char *p1 = p; /* Next operation.  */
-	    unsigned char *p2 = p + mcnt; /* Destination of the jump.  */
+	    re_char *p1 = p; /* Next operation.  */
+	    /* Here, we discard `const', making re_match non-reentrant.  */
+	    unsigned char *p2 = (unsigned char*) p + mcnt; /* Jump dest.  */
+	    unsigned char *p3 = (unsigned char*) p - 3; /* opcode location.  */
 
 	    p -= 3;		/* Reset so that we will re-execute the
 				   instruction once it's been changed. */
@@ -5255,14 +5589,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      {
 		/* Use a fast `on_failure_keep_string_jump' loop.  */
 		DEBUG_PRINT1 ("  smart exclusive => fast loop.\n");
-		*p = (unsigned char) on_failure_keep_string_jump;
+		*p3 = (unsigned char) on_failure_keep_string_jump;
 		STORE_NUMBER (p2 - 2, mcnt + 3);
 	      }
 	    else
 	      {
 		/* Default to a safe `on_failure_jump' loop.  */
 		DEBUG_PRINT1 ("  smart default => slow loop.\n");
-		*p = (unsigned char) on_failure_jump;
+		*p3 = (unsigned char) on_failure_jump;
 	      }
 	    DEBUG_STATEMENT (debug -= 2);
 	  }
@@ -5271,7 +5605,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	/* Unconditionally jump (without popping any failure points).  */
 	case jump:
 	unconditional_jump:
-	  QUIT;
+	  IMMEDIATE_QUIT_CHECK;
 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
 	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
 	  p += mcnt;				/* Do the jump.	 */
@@ -5282,17 +5616,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	/* Have to succeed matching what follows at least n times.
 	   After that, handle like `on_failure_jump'.  */
 	case succeed_n:
+	  /* Signedness doesn't matter since we only compare MCNT to 0.  */
 	  EXTRACT_NUMBER (mcnt, p + 2);
 	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
 
 	  /* Originally, mcnt is how many times we HAVE to succeed.  */
 	  if (mcnt != 0)
 	    {
+	      /* Here, we discard `const', making re_match non-reentrant.  */
+	      unsigned char *p2 = (unsigned char*) p + 2; /* counter loc.  */
 	      mcnt--;
-	      p += 2;
-	      PUSH_FAILURE_COUNT (p);
-	      DEBUG_PRINT3 ("	Setting %p to %d.\n", p, mcnt);
-	      STORE_NUMBER_AND_INCR (p, mcnt);
+	      p += 4;
+	      PUSH_NUMBER (p2, mcnt);
 	    }
 	  else
 	    /* The two bytes encoding mcnt == 0 are two no_op opcodes.  */
@@ -5300,15 +5635,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  break;
 
 	case jump_n:
+	  /* Signedness doesn't matter since we only compare MCNT to 0.  */
 	  EXTRACT_NUMBER (mcnt, p + 2);
 	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
 
 	  /* Originally, this is how many times we CAN jump.  */
 	  if (mcnt != 0)
 	    {
+	       /* Here, we discard `const', making re_match non-reentrant.  */
+	      unsigned char *p2 = (unsigned char*) p + 2; /* counter loc.  */
 	      mcnt--;
-	      PUSH_FAILURE_COUNT (p + 2);
-	      STORE_NUMBER (p + 2, mcnt);
+	      PUSH_NUMBER (p2, mcnt);
 	      goto unconditional_jump;
 	    }
 	  /* If don't have to jump any more, skip over the rest of command.  */
@@ -5318,14 +5655,16 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 	case set_number_at:
 	  {
+	    unsigned char *p2;	/* Location of the counter.  */
 	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
 
 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
-	    p1 = p + mcnt;
+	    /* Here, we discard `const', making re_match non-reentrant.  */
+	    p2 = (unsigned char*) p + mcnt;
+	    /* Signedness doesn't matter since we only copy MCNT's bits .  */
 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
-	    DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
-	    PUSH_FAILURE_COUNT (p1);
-	    STORE_NUMBER (p1, mcnt);
+	    DEBUG_PRINT3 ("  Setting %p to %d.\n", p2, mcnt);
+	    PUSH_NUMBER (p2, mcnt);
 	    break;
 	  }
 
@@ -5343,7 +5682,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    {
 	      /* C1 is the character before D, S1 is the syntax of C1, C2
 		 is the character at D, and S2 is the syntax of C2.  */
-	      int c1, c2, s1, s2;
+	      re_wchar_t c1, c2;
+	      int s1, s2;
 #ifdef emacs
 	      int offset = PTR_TO_OFFSET (d - 1);
 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5382,7 +5722,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    {
 	      /* C1 is the character before D, S1 is the syntax of C1, C2
 		 is the character at D, and S2 is the syntax of C2.  */
-	      int c1, c2, s1, s2;
+	      re_wchar_t c1, c2;
+	      int s1, s2;
 #ifdef emacs
 	      int offset = PTR_TO_OFFSET (d);
 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5391,7 +5732,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      PREFETCH ();
 	      c2 = RE_STRING_CHAR (d, dend - d);
 	      s2 = SYNTAX (c2);
-	
+
 	      /* Case 2: S2 is not Sword. */
 	      if (s2 != Sword)
 		goto fail;
@@ -5425,7 +5766,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    {
 	      /* C1 is the character before D, S1 is the syntax of C1, C2
 		 is the character at D, and S2 is the syntax of C2.  */
-	      int c1, c2, s1, s2;
+	      re_wchar_t c1, c2;
+	      int s1, s2;
 #ifdef emacs
 	      int offset = PTR_TO_OFFSET (d) - 1;
 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5470,7 +5812,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  }
 #endif
 	  {
-	    int c, len;
+	    int len;
+	    re_wchar_t c;
 
 	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
 
@@ -5506,7 +5849,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
 	  PREFETCH ();
 	  {
-	    int c, len;
+	    int len;
+	    re_wchar_t c;
+
 	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
 
 	    if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
@@ -5525,11 +5870,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
     /* We goto here if a matching operation fails. */
     fail:
-      QUIT;
+      IMMEDIATE_QUIT_CHECK;
       if (!FAIL_STACK_EMPTY ())
 	{
-	  re_char *str;
-	  unsigned char *pat;
+	  re_char *str, *pat;
 	  /* A restart point is known.  Restore to that state.  */
 	  DEBUG_PRINT1 ("\nFAIL:\n");
 	  POP_FAILURE_POINT (str, pat);
@@ -5599,7 +5943,7 @@ bcmp_translate (s1, s2, len, translate, multibyte)
   while (p1 < p1_end && p2 < p2_end)
     {
       int p1_charlen, p2_charlen;
-      int p1_ch, p2_ch;
+      re_wchar_t p1_ch, p2_ch;
 
       p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
       p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
@@ -5645,15 +5989,13 @@ re_compile_pattern (pattern, length, bufp)
      setting no_sub.  */
   bufp->no_sub = 0;
 
-  /* Match anchors at newline.  */
-  bufp->newline_anchor = 1;
-
   ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
 
   if (!ret)
     return NULL;
   return gettext (re_error_msgid[(int) ret]);
 }
+WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
 
 /* Entry points compatible with 4.2 BSD regex library.  We don't define
    them unless specifically requested.  */
@@ -5700,9 +6042,6 @@ re_comp (s)
   /* Since `re_exec' always passes NULL for the `regs' argument, we
      don't need to initialize the pattern buffer fields which affect it.  */
 
-  /* Match anchors at newlines.  */
-  re_comp_buf.newline_anchor = 1;
-
   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
 
   if (!ret)
@@ -5740,8 +6079,8 @@ re_exec (s)
      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
        REG_EXTENDED bit in CFLAGS is set; otherwise, to
        RE_SYNTAX_POSIX_BASIC;
-     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
-     `fastmap' and `fastmap_accurate' to zero;
+     `fastmap' to an allocated space for the fastmap;
+     `fastmap_accurate' to zero;
      `re_nsub' to the number of subexpressions in PATTERN.
 
    PATTERN is the address of the pattern string.
@@ -5766,8 +6105,8 @@ re_exec (s)
 
 int
 regcomp (preg, pattern, cflags)
-    regex_t *preg;
-    const char *pattern;
+    regex_t *__restrict preg;
+    const char *__restrict pattern;
     int cflags;
 {
   reg_errcode_t ret;
@@ -5780,11 +6119,8 @@ regcomp (preg, pattern, cflags)
   preg->allocated = 0;
   preg->used = 0;
 
-  /* Don't bother to use a fastmap when searching.  This simplifies the
-     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
-     characters after newlines into the fastmap.  This way, we just try
-     every character.  */
-  preg->fastmap = 0;
+  /* Try to allocate space for the fastmap.  */
+  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
 
   if (cflags & REG_ICASE)
     {
@@ -5808,11 +6144,9 @@ regcomp (preg, pattern, cflags)
     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
       syntax &= ~RE_DOT_NEWLINE;
       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
-      /* It also changes the matching behavior.  */
-      preg->newline_anchor = 1;
     }
   else
-    preg->newline_anchor = 0;
+    syntax |= RE_NO_NEWLINE_ANCHOR;
 
   preg->no_sub = !!(cflags & REG_NOSUB);
 
@@ -5822,10 +6156,22 @@ regcomp (preg, pattern, cflags)
 
   /* POSIX doesn't distinguish between an unmatched open-group and an
      unmatched close-group: both are REG_EPAREN.  */
-  if (ret == REG_ERPAREN) ret = REG_EPAREN;
-
+  if (ret == REG_ERPAREN)
+    ret = REG_EPAREN;
+
+  if (ret == REG_NOERROR && preg->fastmap)
+    { /* Compute the fastmap now, since regexec cannot modify the pattern
+	 buffer.  */
+      re_compile_fastmap (preg);
+      if (preg->can_be_null)
+	{ /* The fastmap can't be used anyway.  */
+	  free (preg->fastmap);
+	  preg->fastmap = NULL;
+	}
+    }
   return (int) ret;
 }
+WEAK_ALIAS (__regcomp, regcomp)
 
 
 /* regexec searches for a given pattern, specified by PREG, in the
@@ -5844,17 +6190,17 @@ regcomp (preg, pattern, cflags)
 
 int
 regexec (preg, string, nmatch, pmatch, eflags)
-    const regex_t *preg;
-    const char *string;
+    const regex_t *__restrict preg;
+    const char *__restrict string;
     size_t nmatch;
-    regmatch_t pmatch[];
+    regmatch_t pmatch[__restrict_arr];
     int eflags;
 {
   int ret;
   struct re_registers regs;
   regex_t private_preg;
   int len = strlen (string);
-  boolean want_reg_info = !preg->no_sub && nmatch > 0;
+  boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
 
   private_preg = *preg;
 
@@ -5875,6 +6221,15 @@ regexec (preg, string, nmatch, pmatch, eflags)
       regs.end = regs.start + nmatch;
     }
 
+  /* Instead of using not_eol to implement REG_NOTEOL, we could simply
+     pass (&private_preg, string, len + 1, 0, len, ...) pretending the string
+     was a little bit longer but still only matching the real part.
+     This works because the `endline' will check for a '\n' and will find a
+     '\0', correctly deciding that this is not the end of a line.
+     But it doesn't work out so nicely for REG_NOTBOL, since we don't have
+     a convenient '\0' there.  For all we know, the string could be preceded
+     by '\n' which would throw things off.  */
+
   /* Perform the searching operation.  */
   ret = re_search (&private_preg, string, len,
 		   /* start: */ 0, /* range: */ len,
@@ -5901,6 +6256,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
   /* We want zero return to mean success, unlike `re_search'.  */
   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
 }
+WEAK_ALIAS (__regexec, regexec)
 
 
 /* Returns a message corresponding to an error code, ERRCODE, returned
@@ -5941,6 +6297,7 @@ regerror (errcode, preg, errbuf, errbuf_size)
 
   return msg_size;
 }
+WEAK_ALIAS (__regerror, regerror)
 
 
 /* Free dynamically allocated space used by PREG.  */
@@ -5965,5 +6322,9 @@ regfree (preg)
     free (preg->translate);
   preg->translate = NULL;
 }
+WEAK_ALIAS (__regfree, regfree)
 
 #endif /* not emacs  */
+
+/* arch-tag: 4ffd68ba-2a9e-435b-a21a-018990f9eeb2
+   (do not change this comment) */