X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=3b4eb502596b4b6a0689620cff984a8da5e0c6d6;hb=871e5b2ce2391c85ae9288ea1e92895005817e21;hp=711f7c7afa9fe847b52e5f6f3442795db00c143d;hpb=7540e1406f34e8083740a2954fd2e97b9878e5e6;p=gnulib.git

diff --git a/regex.c b/regex.c
index 711f7c7af..3b4eb5025 100644
--- a/regex.c
+++ b/regex.c
@@ -20,12 +20,9 @@
    USA.	 */
 
 /* TODO:
-   - detect nasty infinite loops like "\\(\\)+?ab" when matching "ac".
-   - use `keep_string' more often than just .*\n.
    - structure the opcode space into opcode+flag.
-   - merge with glic's regex.[ch]
-
-   That's it for now    -sm */
+   - merge with glibc's regex.[ch]
+ */
 
 /* AIX requires this to be the first thing in the file. */
 #if defined (_AIX) && !defined (REGEX_MALLOC)
@@ -39,8 +36,6 @@
 /* Converts the pointer to the char to BEG-based offset from the start.	 */
 #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
 #define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
-#else
-#define PTR_TO_OFFSET(d) 0
 #endif
 
 #ifdef HAVE_CONFIG_H
@@ -81,8 +76,28 @@
 #define realloc xrealloc
 #define free xfree
 
+#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
 #define RE_STRING_CHAR(p, s) \
   (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
+#define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
+  (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
+
+/* Set C a (possibly multibyte) character before P.  P points into a
+   string which is the virtual concatenation of STR1 (which ends at
+   END1) or STR2 (which ends at END2).  */
+#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)			\
+  do {									\
+    if (multibyte)							\
+       {						    		\
+         re_char *dtemp = (p) == (str2) ? (end1) : (p);		    	\
+         re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
+         while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp));		\
+         c = STRING_CHAR (dtemp, (p) - dtemp);				\
+       }						    		\
+     else						    		\
+       (c = ((p) == (str2) ? (end1) : (p))[-1]);			\
+  } while (0)
+
 
 #else  /* not emacs */
 
@@ -130,11 +145,8 @@ char *realloc ();
 
 /* Define the syntax stuff for \<, \>, etc.  */
 
-/* This must be nonzero for the wordchar and notwordchar pattern
-   commands in re_match_2.  */
-#ifndef Sword
-#define Sword 1
-#endif
+/* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
+enum syntaxcode { Swhitespace = 0, Sword = 1 };
 
 #ifdef SWITCH_ENUM_BUG
 #define SWITCH_ENUM_CAST(x) ((int)(x))
@@ -184,6 +196,10 @@ init_syntax_once ()
 
 /* Dummy macros for non-Emacs environments.  */
 #define BASE_LEADING_CODE_P(c) (0)
+#define CHAR_CHARSET(c) 0
+#define CHARSET_LEADING_CODE_BASE(c) 0
+#define MAX_MULTIBYTE_LENGTH 1
+#define RE_MULTIBYTE_P(x) 0
 #define WORD_BOUNDARY_P(c1, c2) (0)
 #define CHAR_HEAD_P(p) (1)
 #define SINGLE_BYTE_CHAR_P(c) (1)
@@ -191,7 +207,9 @@ init_syntax_once ()
 #define MULTIBYTE_FORM_LENGTH(p, s) (1)
 #define STRING_CHAR(p, s) (*(p))
 #define RE_STRING_CHAR STRING_CHAR
+#define CHAR_STRING(c, s) (*(s) = (c), 1)
 #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
+#define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
   (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
 #endif /* not emacs */
@@ -411,7 +429,7 @@ char *alloca ();
 #define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
    REGEX_REALLOCATE (source, osize, nsize)
 /* No need to explicitly free anything.	 */
-#define REGEX_FREE_STACK(arg)
+#define REGEX_FREE_STACK(arg) ((void)0)
 
 #endif /* not REGEX_MALLOC */
 #endif /* not using relocating allocator */
@@ -531,6 +549,12 @@ typedef enum
 	   indefinitely).  */
   on_failure_jump_loop,
 
+	/* Just like `on_failure_jump_loop', except that it checks for
+	   a different kind of loop (the kind that shows up with non-greedy
+	   operators).  This operation has to be immediately preceded
+	   by a `no_op'.  */
+  on_failure_jump_nastyloop,
+
         /* A smart `on_failure_jump' used for greedy * and + operators.
 	   It analyses the loop before which it is put and if the
 	   loop does not require backtracking, it changes itself to
@@ -540,7 +564,9 @@ typedef enum
   on_failure_jump_smart,
 
 	/* Followed by two-byte relative address and two-byte number n.
-	   After matching N times, jump to the address upon failure.  */
+	   After matching N times, jump to the address upon failure.
+	   Does not work if N starts at 0: use on_failure_jump_loop
+	   instead.  */
   succeed_n,
 
 	/* Followed by two-byte relative address, and two-byte number n.
@@ -552,26 +578,23 @@ typedef enum
 	   bytes of number.  */
   set_number_at,
 
-  wordchar,	/* Matches any word-constituent character.  */
-  notwordchar,	/* Matches any char that is not a word-constituent.  */
-
   wordbeg,	/* Succeeds if at word beginning.  */
   wordend,	/* Succeeds if at word end.  */
 
   wordbound,	/* Succeeds if at a word boundary.  */
-  notwordbound	/* Succeeds if not at a word boundary.	*/
-
-#ifdef emacs
-  ,before_dot,	/* Succeeds if before point.  */
-  at_dot,	/* Succeeds if at point.  */
-  after_dot,	/* Succeeds if after point.  */
+  notwordbound,	/* Succeeds if not at a word boundary.	*/
 
 	/* Matches any character whose syntax is specified.  Followed by
 	   a byte which contains a syntax code, e.g., Sword.  */
   syntaxspec,
 
 	/* Matches any character whose syntax is not that specified.  */
-  notsyntaxspec,
+  notsyntaxspec
+
+#ifdef emacs
+  ,before_dot,	/* Succeeds if before point.  */
+  at_dot,	/* Succeeds if at point.  */
+  after_dot,	/* Succeeds if after point.  */
 
   /* Matches any character whose category-set contains the specified
      category.	The operator is followed by a byte which contains a
@@ -944,6 +967,11 @@ print_partial_compiled_pattern (start, end)
 	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
 	  break;
 
+	case on_failure_jump_nastyloop:
+	  extract_number_and_incr (&mcnt, &p);
+	  printf ("/on_failure_jump_nastyloop to %d", p + mcnt - start);
+	  break;
+
 	case on_failure_jump_loop:
 	  extract_number_and_incr (&mcnt, &p);
 	  printf ("/on_failure_jump_loop to %d", p + mcnt - start);
@@ -992,6 +1020,18 @@ print_partial_compiled_pattern (start, end)
 	case wordend:
 	  printf ("/wordend");
 
+	case syntaxspec:
+	  printf ("/syntaxspec");
+	  mcnt = *p++;
+	  printf ("/%d", mcnt);
+	  break;
+
+	case notsyntaxspec:
+	  printf ("/notsyntaxspec");
+	  mcnt = *p++;
+	  printf ("/%d", mcnt);
+	  break;
+
 #ifdef emacs
 	case before_dot:
 	  printf ("/before_dot");
@@ -1005,27 +1045,19 @@ print_partial_compiled_pattern (start, end)
 	  printf ("/after_dot");
 	  break;
 
-	case syntaxspec:
-	  printf ("/syntaxspec");
+	case categoryspec:
+	  printf ("/categoryspec");
 	  mcnt = *p++;
 	  printf ("/%d", mcnt);
 	  break;
 
-	case notsyntaxspec:
-	  printf ("/notsyntaxspec");
+	case notcategoryspec:
+	  printf ("/notcategoryspec");
 	  mcnt = *p++;
 	  printf ("/%d", mcnt);
 	  break;
 #endif /* emacs */
 
-	case wordchar:
-	  printf ("/wordchar");
-	  break;
-
-	case notwordchar:
-	  printf ("/notwordchar");
-	  break;
-
 	case begbuf:
 	  printf ("/begbuf");
 	  break;
@@ -1277,7 +1309,7 @@ typedef struct
     fail_stack.frame = 0;						\
   } while (0)
 
-#define RESET_FAIL_STACK()
+#define RESET_FAIL_STACK() ((void)0)
 #endif
 
 
@@ -1529,29 +1561,29 @@ static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p,
 					  const unsigned char *pend,
 					  reg_syntax_t syntax));
 static unsigned char *skip_one_char _RE_ARGS((unsigned char *p));
+static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
+				   char *fastmap, const int multibyte));
 
 /* Fetch the next character in the uncompiled pattern---translating it
    if necessary.  Also cast from a signed character in the constant
    string passed to us by the user to an unsigned char that we can use
    as an array index (in, e.g., `translate').  */
-#ifndef PATFETCH
 #define PATFETCH(c)							\
   do {									\
     PATFETCH_RAW (c);							\
-    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);	\
+    c = TRANSLATE (c);							\
   } while (0)
-#endif
 
 /* Fetch the next character in the uncompiled pattern, with no
    translation.	 */
 #define PATFETCH_RAW(c)							\
-  do {if (p == pend) return REG_EEND;					\
-    c = *p++;								\
+  do {									\
+    int len;								\
+    if (p == pend) return REG_EEND;					\
+    c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len);			\
+    p += len;								\
   } while (0)
 
-/* Go backwards one character in the pattern.  */
-#define PATUNFETCH p--
-
 
 /* If `translate' is non-null, return translate[D], else just D.  We
    cast the subscript to translate because some data is declared as
@@ -1946,6 +1978,9 @@ regex_compile (pattern, size, syntax, bufp)
   /* Work area for range table of charset.  */
   struct range_table_work_area range_table_work;
 
+  /* If the object matched can contain multibyte characters.  */
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
+
 #ifdef DEBUG
   debug++;
   DEBUG_PRINT1 ("\nCompiling pattern: ");
@@ -1983,14 +2018,6 @@ regex_compile (pattern, size, syntax, bufp)
   /* Always count groups, whether or not bufp->no_sub is set.  */
   bufp->re_nsub = 0;
 
-#ifdef emacs
-  /* bufp->multibyte is set before regex_compile is called, so don't alter
-     it. */
-#else  /* not emacs */
-  /* Nothing is recognized as a multibyte character.  */
-  bufp->multibyte = 0;
-#endif
-
 #if !defined (emacs) && !defined (SYNTAX_TABLE)
   /* Initialize the syntax table.  */
    init_syntax_once ();
@@ -2091,35 +2118,24 @@ regex_compile (pattern, size, syntax, bufp)
 
 		if (p == pend)
 		  break;
-
-		PATFETCH (c);
-
-		if (c == '*'
-		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+		else if (*p == '*'
+			 || (!(syntax & RE_BK_PLUS_QM)
+			     && (*p == '+' || *p == '?')))
 		  ;
-
-		else if (syntax & RE_BK_PLUS_QM	 &&  c == '\\')
+		else if (syntax & RE_BK_PLUS_QM	 && *p == '\\')
 		  {
-		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
-
-		    PATFETCH (c1);
-		    if (!(c1 == '+' || c1 == '?'))
-		      {
-			PATUNFETCH;
-			PATUNFETCH;
-			break;
-		      }
-
-		    c = c1;
+		    if (p+1 == pend)
+		      FREE_STACK_RETURN (REG_EESCAPE);
+		    if (p[1] == '+' || p[1] == '?')
+		      PATFETCH (c); /* Gobble up the backslash.  */
+		    else
+		      break;
 		  }
 		else
-		  {
-		    PATUNFETCH;
-		    break;
-		  }
-
+		  break;
 		/* If we get here, we found another repeat character.  */
-	      }
+		PATFETCH (c);
+	       }
 
 	    /* Star, etc. applied to an empty pattern is equivalent
 	       to an empty pattern.  */
@@ -2134,6 +2150,9 @@ regex_compile (pattern, size, syntax, bufp)
 		  {
 		    boolean simple = skip_one_char (laststart) == b;
 		    unsigned int startoffset = 0;
+		    re_opcode_t ofj =
+		      (simple || !analyse_first (laststart, b, NULL, 0)) ?
+		      on_failure_jump : on_failure_jump_loop;
 		    assert (skip_one_char (laststart) <= b);
 		    
 		    if (!zero_times_ok && simple)
@@ -2152,14 +2171,13 @@ regex_compile (pattern, size, syntax, bufp)
 		    GET_BUFFER_SPACE (6);
 		    if (!zero_times_ok)
 		      /* A + loop.  */
-		      STORE_JUMP (on_failure_jump_loop, b, b + 6);
+		      STORE_JUMP (ofj, b, b + 6);
 		    else
 		      /* Simple * loops can use on_failure_keep_string_jump
 			 depending on what follows.  But since we don't know
 			 that yet, we leave the decision up to
 			 on_failure_jump_smart.  */
-		      INSERT_JUMP (simple ? on_failure_jump_smart
-				   : on_failure_jump_loop,
+		      INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
 				   laststart + startoffset, b + 6);
 		    b += 3;
 		    STORE_JUMP (jump, b, laststart + startoffset);
@@ -2177,19 +2195,22 @@ regex_compile (pattern, size, syntax, bufp)
 	    else		/* not greedy */
 	      { /* I wish the greedy and non-greedy cases could be merged. */
 
+		GET_BUFFER_SPACE (7); /* We might use less.  */
 		if (many_times_ok)
 		  {
+		    boolean emptyp = analyse_first (laststart, b, NULL, 0);
+
 		    /* The non-greedy multiple match looks like a repeat..until:
 		       we only need a conditional jump at the end of the loop */
-		    GET_BUFFER_SPACE (3);
-		    STORE_JUMP (on_failure_jump, b, laststart);
+		    if (emptyp) BUF_PUSH (no_op);
+		    STORE_JUMP (emptyp ? on_failure_jump_nastyloop
+				: on_failure_jump, b, laststart);
 		    b += 3;
 		    if (zero_times_ok)
 		      {
 			/* The repeat...until naturally matches one or more.
 			   To also match zero times, we need to first jump to
 			   the end of the loop (its conditional jump). */
-			GET_BUFFER_SPACE (3);
 			INSERT_JUMP (jump, laststart, b);
 			b += 3;
 		      }
@@ -2197,7 +2218,6 @@ regex_compile (pattern, size, syntax, bufp)
 		else
 		  {
 		    /* non-greedy a?? */
-		    GET_BUFFER_SPACE (6);
 		    INSERT_JUMP (jump, laststart, b + 3);
 		    b += 3;
 		    INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
@@ -2250,8 +2270,8 @@ regex_compile (pattern, size, syntax, bufp)
 	    /* Read in characters and ranges, setting map bits.	 */
 	    for (;;)
 	      {
-		int len;
 		boolean escaped_char = false;
+		const unsigned char *p2 = p;
 
 		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
@@ -2270,19 +2290,10 @@ regex_compile (pattern, size, syntax, bufp)
 		    /* Could be the end of the bracket expression.	If it's
 		       not (i.e., when the bracket expression is `[]' so
 		       far), the ']' character bit gets set way below.  */
-		    if (c == ']' && p != p1 + 1)
+		    if (c == ']' && p2 != p1)
 		      break;
 		  }
 
-		/* If C indicates start of multibyte char, get the
-		   actual character code in C, and set the pattern
-		   pointer P to the next character boundary.  */
-		if (bufp->multibyte && BASE_LEADING_CODE_P (c))
-		  {
-		    PATUNFETCH;
-		    c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
-		    p += len;
-		  }
 		/* What should we do for the character which is
 		   greater than 0x7F, but not BASE_LEADING_CODE_P?
 		   XXX */
@@ -2290,14 +2301,16 @@ regex_compile (pattern, size, syntax, bufp)
 		/* See if we're at the beginning of a possible character
 		   class.  */
 
-		else if (!escaped_char &&
-			 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+		if (!escaped_char &&
+		    syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
 		  {
 		    /* Leave room for the null.	 */
 		    char str[CHAR_CLASS_MAX_LENGTH + 1];
+		    const unsigned char *class_beg;
 
 		    PATFETCH (c);
 		    c1 = 0;
+		    class_beg = p;
 
 		    /* If pattern is `[[:'.  */
 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@@ -2352,7 +2365,7 @@ regex_compile (pattern, size, syntax, bufp)
 			   they can only match ASCII characters.  We
 			   don't need to handle them for multibyte.  */
 
-			if (bufp->multibyte)
+			if (multibyte)
 			  {
 			    int bit = 0;
 
@@ -2410,9 +2423,8 @@ regex_compile (pattern, size, syntax, bufp)
 		      }
 		    else
 		      {
-			c1++;
-			while (c1--)
-			  PATUNFETCH;
+			/* Go back to right after the "[:".  */
+			p = class_beg;
 			SET_LIST_BIT ('[');
 
 			/* Because the `:' may starts the range, we
@@ -2430,12 +2442,6 @@ regex_compile (pattern, size, syntax, bufp)
 
 		    /* Fetch the character which ends the range. */
 		    PATFETCH (c1);
-		    if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
-		      {
-			PATUNFETCH;
-			c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
-			p += len;
-		      }
 
 		    if (SINGLE_BYTE_CHAR_P (c)
 			&& ! SINGLE_BYTE_CHAR_P (c1))
@@ -2573,9 +2579,9 @@ regex_compile (pattern, size, syntax, bufp)
 		if (p+1 < pend)
 		  {
 		    /* Look for a special (?...) construct */
-		    PATFETCH (c);
-		    if ((syntax & RE_SHY_GROUPS) && c == '?')
+		    if ((syntax & RE_SHY_GROUPS) && *p == '?')
 		      {
+			PATFETCH (c); /* Gobble up the '?'.  */
 			PATFETCH (c);
 			switch (c)
 			  {
@@ -2585,7 +2591,6 @@ regex_compile (pattern, size, syntax, bufp)
 			    FREE_STACK_RETURN (REG_BADPAT);
 			  }
 		      }
-		    else PATUNFETCH;
 		  }
 
 		if (!shy)
@@ -2733,8 +2738,9 @@ regex_compile (pattern, size, syntax, bufp)
 	      if (!(syntax & RE_INTERVALS)
 		     /* If we're at `\{' and it's not the open-interval
 			operator.  */
-		  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
-		  || (p - 2 == pattern	&&  p == pend))
+		  || (syntax & RE_NO_BK_BRACES)
+		  /* What is that? -sm  */
+		  /* || (p - 2 == pattern && p == pend) */)
 		goto normal_backslash;
 
 	    handle_interval:
@@ -2744,7 +2750,7 @@ regex_compile (pattern, size, syntax, bufp)
 		/* At least (most) this many matches must be made.  */
 		int lower_bound = 0, upper_bound = -1;
 
-		beg_interval = p - 1;
+		beg_interval = p;
 
 		if (p == pend)
 		  {
@@ -2757,16 +2763,13 @@ regex_compile (pattern, size, syntax, bufp)
 		GET_UNSIGNED_NUMBER (lower_bound);
 
 		if (c == ',')
-		  {
-		    GET_UNSIGNED_NUMBER (upper_bound);
-		    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
-		  }
+		  GET_UNSIGNED_NUMBER (upper_bound);
 		else
 		  /* Interval such as `{1}' => match exactly once. */
 		  upper_bound = lower_bound;
 
 		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
-		    || lower_bound > upper_bound)
+		    || (upper_bound >= 0 && lower_bound > upper_bound))
 		  {
 		    if (syntax & RE_NO_BK_BRACES)
 		      goto unfetch_interval;
@@ -2802,15 +2805,13 @@ regex_compile (pattern, size, syntax, bufp)
 		      goto unfetch_interval;
 		  }
 
-		/* If the upper bound is zero, don't want to succeed at
-		   all; jump from `laststart' to `b + 3', which will be
-		   the end of the buffer after we insert the jump.  */
 		 if (upper_bound == 0)
-		   {
-		     GET_BUFFER_SPACE (3);
-		     INSERT_JUMP (jump, laststart, b + 3);
-		     b += 3;
-		   }
+		   /* If the upper bound is zero, just drop the sub pattern
+		      altogether.  */
+		   b = laststart;
+		 else if (lower_bound == 1 && upper_bound == 1)
+		   /* Just match it once: nothing to do here.  */
+		   ;
 
 		 /* Otherwise, we have a nontrivial interval.  When
 		    we're all done, the pattern will look like:
@@ -2824,28 +2825,49 @@ regex_compile (pattern, size, syntax, bufp)
 		 else
 		   { /* If the upper bound is > 1, we need to insert
 			more at the end of the loop.  */
-		     unsigned nbytes = 10 + (upper_bound > 1) * 10;
-
-		     GET_BUFFER_SPACE (nbytes);
-
-		     /* Initialize lower bound of the `succeed_n', even
-			though it will be set during matching by its
-			attendant `set_number_at' (inserted next),
-			because `re_compile_fastmap' needs to know.
-			Jump to the `jump_n' we might insert below.  */
-		     INSERT_JUMP2 (succeed_n, laststart,
-				   b + 5 + (upper_bound > 1) * 5,
-				   lower_bound);
-		     b += 5;
-
-		     /* Code to initialize the lower bound.  Insert
-			before the `succeed_n'.	 The `5' is the last two
-			bytes of this `set_number_at', plus 3 bytes of
-			the following `succeed_n'.  */
-		     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
-		     b += 5;
-
-		     if (upper_bound > 1)
+		     unsigned int nbytes = (upper_bound < 0 ? 3
+					    : upper_bound > 1 ? 5 : 0);
+		     unsigned int startoffset = 0;
+
+		     GET_BUFFER_SPACE (20); /* We might use less.  */
+
+		     if (lower_bound == 0)
+		       {
+			 /* A succeed_n that starts with 0 is really a
+			    a simple on_failure_jump_loop.  */
+			 INSERT_JUMP (on_failure_jump_loop, laststart,
+				      b + 3 + nbytes);
+			 b += 3;
+		       }
+		     else
+		       {
+			 /* Initialize lower bound of the `succeed_n', even
+			    though it will be set during matching by its
+			    attendant `set_number_at' (inserted next),
+			    because `re_compile_fastmap' needs to know.
+			    Jump to the `jump_n' we might insert below.  */
+			 INSERT_JUMP2 (succeed_n, laststart,
+				       b + 5 + nbytes,
+				       lower_bound);
+			 b += 5;
+
+			 /* Code to initialize the lower bound.  Insert
+			    before the `succeed_n'.	 The `5' is the last two
+			    bytes of this `set_number_at', plus 3 bytes of
+			    the following `succeed_n'.  */
+			 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+			 b += 5;
+			 startoffset += 5;
+		       }
+
+		     if (upper_bound < 0)
+		       {
+			 /* A negative upper bound stands for infinity,
+			    in which case it degenerates to a plain jump.  */
+			 STORE_JUMP (jump, b, laststart + startoffset);
+			 b += 3;
+		       }
+		     else if (upper_bound > 1)
 		       { /* More than one repetition is allowed, so
 			    append a backward jump to the `succeed_n'
 			    that starts this interval.
@@ -2853,7 +2875,7 @@ regex_compile (pattern, size, syntax, bufp)
 			    When we've reached this during matching,
 			    we'll have matched the interval once, so
 			    jump back only `upper_bound - 1' times.  */
-			 STORE_JUMP2 (jump_n, b, laststart + 5,
+			 STORE_JUMP2 (jump_n, b, laststart + startoffset,
 				      upper_bound - 1);
 			 b += 5;
 
@@ -2888,14 +2910,15 @@ regex_compile (pattern, size, syntax, bufp)
 	       beg_interval = NULL;
 
 	       /* normal_char and normal_backslash need `c'.  */
-	       PATFETCH (c);
+	       c = '{';
 
 	       if (!(syntax & RE_NO_BK_BRACES))
 		 {
-		   if (p > pattern  &&	p[-1] == '\\')
-		     goto normal_backslash;
+		   assert (p > pattern && p[-1] == '\\');
+		   goto normal_backslash;
 		 }
-	       goto normal_char;
+	       else
+		 goto normal_char;
 
 #ifdef emacs
 	    /* There is no way to specify the before_dot and after_dot
@@ -2932,13 +2955,13 @@ regex_compile (pattern, size, syntax, bufp)
 
 	    case 'w':
 	      laststart = b;
-	      BUF_PUSH (wordchar);
+	      BUF_PUSH_2 (syntaxspec, Sword);
 	      break;
 
 
 	    case 'W':
 	      laststart = b;
-	      BUF_PUSH (notwordchar);
+	      BUF_PUSH_2 (notsyntaxspec, Sword);
 	      break;
 
 
@@ -3006,16 +3029,6 @@ regex_compile (pattern, size, syntax, bufp)
 	default:
 	/* Expects the character in `c'.  */
 	normal_char:
-	  p1 = p - 1;		/* P1 points the head of C.  */
-#ifdef emacs
-	  if (bufp->multibyte)
-	    {
-	      c = STRING_CHAR (p1, pend - p1);
-	      c = TRANSLATE (c);
-	      /* Set P to the next character boundary.  */
-	      p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
-	    }
-#endif
 	      /* If no exactn currently being built.  */
 	  if (!pending_exact
 
@@ -3023,7 +3036,7 @@ regex_compile (pattern, size, syntax, bufp)
 	      || pending_exact + *pending_exact + 1 != b
 
 	      /* We have only one byte following the exactn for the count.  */
-	      || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
+	      || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
 
 	      /* If followed by a repetition operator.	*/
 	      || (p != pend && (*p == '*' || *p == '^'))
@@ -3043,24 +3056,13 @@ regex_compile (pattern, size, syntax, bufp)
 	      pending_exact = b - 1;
 	    }
 
-#ifdef emacs
-	  if (! SINGLE_BYTE_CHAR_P (c))
-	    {
-	      unsigned char str[MAX_MULTIBYTE_LENGTH];
-	      int i = CHAR_STRING (c, str);
-	      int j;
-	      for (j = 0; j < i; j++)
-		{
-		  BUF_PUSH (str[j]);
-		  (*pending_exact)++;
-		}
-	    }
-	  else
-#endif
-	    {
-	      BUF_PUSH (c);
-	      (*pending_exact)++;
-	    }
+	  GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
+	  {
+	    int len = CHAR_STRING (c, b);
+	    b += len;
+	    (*pending_exact) += len;
+	  }
+
 	  break;
 	} /* switch (c) */
     } /* while p != pend */
@@ -3251,28 +3253,26 @@ group_in_compile_stack (compile_stack, regnum)
   return false;
 }
 
-/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
-   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
-   characters can start a string that matches the pattern.  This fastmap
-   is used by re_search to skip quickly over impossible starting points.
+/* analyse_first.
+   If fastmap is non-NULL, go through the pattern and fill fastmap
+   with all the possible leading chars.  If fastmap is NULL, don't
+   bother filling it up (obviously) and only return whether the
+   pattern could potentially match the empty string.
+
+   Return 1  if p..pend might match the empty string.
+   Return 0  if p..pend matches at least one char.
+   Return -1 if p..pend matches at least one char, but fastmap was not
+      updated accurately.
+   Return -2 if an error occurred.  */
 
-   Character codes above (1 << BYTEWIDTH) are not represented in the
-   fastmap, but the leading codes are represented.  Thus, the fastmap
-   indicates which character sets could start a match.
-
-   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
-   area as BUFP->fastmap.
-
-   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
-   the pattern buffer.
-
-   Returns 0 if we succeed, -2 if an internal error.   */
-
-int
-re_compile_fastmap (bufp)
-     struct re_pattern_buffer *bufp;
+static int
+analyse_first (p, pend, fastmap, multibyte)
+     unsigned char *p, *pend;
+     char *fastmap;
+     const int multibyte;
 {
   int j, k;
+  boolean not;
 #ifdef MATCH_MAY_ALLOCATE
   fail_stack_type fail_stack;
 #endif
@@ -3280,12 +3280,6 @@ re_compile_fastmap (bufp)
   char *destination;
 #endif
 
-  register char *fastmap = bufp->fastmap;
-  unsigned char *pattern = bufp->buffer;
-  unsigned long size = bufp->used;
-  unsigned char *p = pattern;
-  register unsigned char *pend = pattern + size;
-
 #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
   /* This holds the pointer to the failure stack, when
      it is allocated relocatably.  */
@@ -3298,22 +3292,13 @@ re_compile_fastmap (bufp)
      match the empty string.  */
   boolean path_can_be_null = true;
 
-  /* We aren't doing a `succeed_n' to begin with.  */
-  boolean succeed_n_p = false;
-
   /* If all elements for base leading-codes in fastmap is set, this
      flag is set true.	*/
   boolean match_any_multibyte_characters = false;
 
-  /* Maximum code of simple (single byte) character. */
-  int simple_char_max;
-
-  assert (fastmap != NULL && p != NULL);
+  assert (p);
 
   INIT_FAIL_STACK ();
-  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.	*/
-  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
-  bufp->can_be_null = 0;
 
   /* The loop below works as follows:
      - It has a working-list kept in the PATTERN_STACK and which basically
@@ -3331,7 +3316,7 @@ re_compile_fastmap (bufp)
      never set `p' (or push) anything `<= p1'.  */
 
   /* If can_be_null is set, then the fastmap will not be used anyway.  */
-  while (!bufp->can_be_null)
+  while (1)
     {
       /* `p1' is used as a marker of how far back a `on_failure_jump'
 	 can go without being ignored.  It is normally equal to `p'
@@ -3343,22 +3328,18 @@ re_compile_fastmap (bufp)
 	 as used for the *? operator.  */
       unsigned char *p1 = p;
 
-      if (p == pend || *p == succeed)
+      if (p >= pend)
 	{
-	  /* We have reached the (effective) end of pattern.  */
-	  if (!PATTERN_STACK_EMPTY ())
-	    {
-	      bufp->can_be_null |= path_can_be_null;
-
-	      /* Reset for next path.  */
-	      path_can_be_null = true;
+	  if (path_can_be_null)
+	    return (RESET_FAIL_STACK (), 1);
 
-	      p = (unsigned char*) POP_PATTERN_OP ();
+	  /* We have reached the (effective) end of pattern.  */
+	  if (PATTERN_STACK_EMPTY ())
+	    return (RESET_FAIL_STACK (), 0);
 
-	      continue;
-	    }
-	  else
-	    break;
+	  p = (unsigned char*) POP_PATTERN_OP ();
+	  path_can_be_null = true;
+	  continue;
 	}
 
       /* We should never be about to go beyond the end of the pattern.	*/
@@ -3366,6 +3347,9 @@ re_compile_fastmap (bufp)
 
       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
 	{
+	case succeed:
+	  p = pend;
+	  continue;
 
 	case duplicate:
 	  /* If the first character has to match a backreference, that means
@@ -3380,83 +3364,15 @@ re_compile_fastmap (bufp)
 	 with `break'.	*/
 
 	case exactn:
-	  fastmap[p[1]] = 1;
-	  break;
-
-
-#ifndef emacs
-	case charset:
-	  {
-	    int length = (*p & 0x7f);;
-	    p++;
-
-	    for (j = length * BYTEWIDTH - 1; j >= 0; j--)
-	      if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
-		fastmap[j] = 1;
-	  }
-	  break;
-
-	case charset_not:
-	  /* Chars beyond end of map must be allowed.  */
-	  {
-	    int length = (*p & 0x7f);;
-	    p++;
-
-	    for (j = length * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
-	      fastmap[j] = 1;
-
-	    for (j = length * BYTEWIDTH - 1; j >= 0; j--)
-	      if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
-		fastmap[j] = 1;
-	  }
-	  break;
-
-	case wordchar:
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
-	    if (SYNTAX (j) == Sword)
-	      fastmap[j] = 1;
+	  if (fastmap) fastmap[p[1]] = 1;
 	  break;
 
 
-	case notwordchar:
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
-	    if (SYNTAX (j) != Sword)
-	      fastmap[j] = 1;
-	  break;
-#else  /* emacs */
-	case charset:
-	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
-	       j >= 0; j--)
-	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
-	      fastmap[j] = 1;
-
-	  /* If we can match a character class, we can match
-	     any character set.  */
-	  if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
-	      && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)
-	    goto set_fastmap_for_multibyte_characters;
-
-	  if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
-	      && match_any_multibyte_characters == false)
-	    {
-	      /* Set fastmap[I] 1 where I is a base leading code of each
-		 multibyte character in the range table. */
-	      int c, count;
-
-	      /* Make P points the range table. */
-	      p += CHARSET_BITMAP_SIZE (&p[-2]);
-
-	      /* Extract the number of ranges in range table into COUNT.  */
-	      EXTRACT_NUMBER_AND_INCR (count, p);
-	      for (; count > 0; count--, p += 2 * 3) /* XXX */
-		{
-		  /* Extract the start of each range.  */
-		  EXTRACT_CHARACTER (c, p);
-		  j = CHAR_CHARSET (c);
-		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
-		}
-	    }
-	  break;
+	case anychar:
+	  /* We could put all the chars except for \n (and maybe \0)
+	     but we don't bother since it is generally not worth it.  */
+	  if (!fastmap) break;
+	  return (RESET_FAIL_STACK (), -1);
 
 
 	case charset_not:
@@ -3464,19 +3380,26 @@ re_compile_fastmap (bufp)
 	     All the single-byte codes can occur in multibyte buffers.
 	     So any that are not listed in the charset
 	     are possible matches, even in multibyte buffers.  */
-	  simple_char_max = (1 << BYTEWIDTH);
+	  if (!fastmap) break;
 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
-	       j < simple_char_max; j++)
+	       j < (1 << BYTEWIDTH); j++)
 	    fastmap[j] = 1;
-
+	  /* Fallthrough */
+	case charset:
+	  if (!fastmap) break;
+	  not = (re_opcode_t) *(p - 1) == charset_not;
 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
 	       j >= 0; j--)
-	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+	    if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
 	      fastmap[j] = 1;
 
-	  if (bufp->multibyte)
-	    /* Any character set can possibly contain a character
-	       which doesn't match the specified set of characters.  */
+	  if ((not && multibyte)
+	      /* Any character set can possibly contain a character
+		 which doesn't match the specified set of characters.  */
+	      || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+		  && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
+	    /* If we can match a character class, we can match
+	       any character set.  */
 	    {
 	    set_fastmap_for_multibyte_characters:
 	      if (match_any_multibyte_characters == false)
@@ -3487,153 +3410,79 @@ re_compile_fastmap (bufp)
 		  match_any_multibyte_characters = true;
 		}
 	    }
-	  break;
-
-
-	case wordchar:
-	  /* All the single-byte codes can occur in multibyte buffers,
-	     and they may have word syntax.  So do consider them.  */
-	  simple_char_max = (1 << BYTEWIDTH);
-	  for (j = 0; j < simple_char_max; j++)
-	    if (SYNTAX (j) == Sword)
-	      fastmap[j] = 1;
-
-	  if (bufp->multibyte)
-	    /* Any character set can possibly contain a character
-	       whose syntax is `Sword'.	 */
-	    goto set_fastmap_for_multibyte_characters;
-	  break;
 
+	  else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+		   && match_any_multibyte_characters == false)
+	    {
+	      /* Set fastmap[I] 1 where I is a base leading code of each
+		 multibyte character in the range table. */
+	      int c, count;
 
-	case notwordchar:
-	  /* All the single-byte codes can occur in multibyte buffers,
-	     and they may not have word syntax.  So do consider them.  */
-	  simple_char_max = (1 << BYTEWIDTH);
-	  for (j = 0; j < simple_char_max; j++)
-	    if (SYNTAX (j) != Sword)
-	      fastmap[j] = 1;
+	      /* Make P points the range table.  `+ 2' is to skip flag
+                 bits for a character class.  */
+	      p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
 
-	  if (bufp->multibyte)
-	    /* Any character set can possibly contain a character
-	       whose syntax is not `Sword'.  */
-	    goto set_fastmap_for_multibyte_characters;
+	      /* Extract the number of ranges in range table into COUNT.  */
+	      EXTRACT_NUMBER_AND_INCR (count, p);
+	      for (; count > 0; count--, p += 2 * 3) /* XXX */
+		{
+		  /* Extract the start of each range.  */
+		  EXTRACT_CHARACTER (c, p);
+		  j = CHAR_CHARSET (c);
+		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
+		}
+	    }
 	  break;
-#endif
-
-	case anychar:
-	  {
-	    int fastmap_newline = fastmap['\n'];
-
-	    /* `.' matches anything, except perhaps newline.
-	       Even in a multibyte buffer, it should match any
-	       conceivable byte value for the fastmap.  */
-	    if (bufp->multibyte)
-	      match_any_multibyte_characters = true;
-
-	    simple_char_max = (1 << BYTEWIDTH);
-	    for (j = 0; j < simple_char_max; j++)
-	      fastmap[j] = 1;
-
-	    /* ... except perhaps newline.  */
-	    if (!(bufp->syntax & RE_DOT_NEWLINE))
-	      fastmap['\n'] = fastmap_newline;
-
-	    /* Otherwise, have to check alternative paths.  */
-	    break;
-	  }
 
-#ifdef emacs
-	case wordbound:
-	case notwordbound:
-	case wordbeg:
-	case wordend:
-	case notsyntaxspec:
 	case syntaxspec:
-	  /* This match depends on text properties.  These end with
-	     aborting optimizations.  */
-	  bufp->can_be_null = 1;
-	  continue;
-#if 0
-	  k = *p++;
-	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
-	  for (j = 0; j < simple_char_max; j++)
-	    if (SYNTAX (j) == (enum syntaxcode) k)
-	      fastmap[j] = 1;
-
-	  if (bufp->multibyte)
-	    /* Any character set can possibly contain a character
-	       whose syntax is K.  */
-	    goto set_fastmap_for_multibyte_characters;
-	  break;
-
 	case notsyntaxspec:
+	  if (!fastmap) break;
+#ifndef emacs
+	  not = (re_opcode_t)p[-1] == notsyntaxspec;
 	  k = *p++;
-	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
-	  for (j = 0; j < simple_char_max; j++)
-	    if (SYNTAX (j) != (enum syntaxcode) k)
+	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    if ((SYNTAX (j) == (enum syntaxcode) k) ^ not)
 	      fastmap[j] = 1;
-
-	  if (bufp->multibyte)
-	    /* Any character set can possibly contain a character
-	       whose syntax is not K.  */
-	    goto set_fastmap_for_multibyte_characters;
 	  break;
-#endif
-
+#else  /* emacs */
+	  /* This match depends on text properties.  These end with
+	     aborting optimizations.  */
+	  return (RESET_FAIL_STACK (), -1);
 
 	case categoryspec:
-	  k = *p++;
-	  simple_char_max = (1 << BYTEWIDTH);
-	  for (j = 0; j < simple_char_max; j++)
-	    if (CHAR_HAS_CATEGORY (j, k))
-	      fastmap[j] = 1;
-
-	  if (bufp->multibyte)
-	    /* Any character set can possibly contain a character
-	       whose category is K.  */
-	    goto set_fastmap_for_multibyte_characters;
-	  break;
-
-
 	case notcategoryspec:
+	  if (!fastmap) break;
+	  not = (re_opcode_t)p[-1] == notcategoryspec;
 	  k = *p++;
-	  simple_char_max = (1 << BYTEWIDTH);
-	  for (j = 0; j < simple_char_max; j++)
-	    if (!CHAR_HAS_CATEGORY (j, k))
+	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
 	      fastmap[j] = 1;
 
-	  if (bufp->multibyte)
+	  if (multibyte)
 	    /* Any character set can possibly contain a character
-	       whose category is not K.	 */
+	       whose category is K (or not).  */
 	    goto set_fastmap_for_multibyte_characters;
 	  break;
 
       /* All cases after this match the empty string.  These end with
 	 `continue'.  */
 
-
 	case before_dot:
 	case at_dot:
 	case after_dot:
-	  continue;
-#endif /* emacs */
-
-
+#endif /* !emacs */
 	case no_op:
 	case begline:
 	case endline:
 	case begbuf:
 	case endbuf:
-#ifndef emacs
 	case wordbound:
 	case notwordbound:
 	case wordbeg:
 	case wordend:
-#endif
 	  continue;
 
 
-	case jump_n:
 	case jump:
 	  EXTRACT_NUMBER_AND_INCR (j, p);
 	  if (j < 0)
@@ -3646,6 +3495,7 @@ re_compile_fastmap (bufp)
 	    case on_failure_jump:
 	    case on_failure_keep_string_jump:
 	    case on_failure_jump_loop:
+	    case on_failure_jump_nastyloop:
 	    case on_failure_jump_smart:
 	      p++;
 	      break;
@@ -3658,53 +3508,33 @@ re_compile_fastmap (bufp)
 
 	case on_failure_jump:
 	case on_failure_keep_string_jump:
+	case on_failure_jump_nastyloop:
 	case on_failure_jump_loop:
 	case on_failure_jump_smart:
-	handle_on_failure_jump:
 	  EXTRACT_NUMBER_AND_INCR (j, p);
-
-	  /* For some patterns, e.g., `(a?)?', `p+j' here points to the
-	     end of the pattern.  We don't want to push such a point,
-	     since when we restore it above, entering the switch will
-	     increment `p' past the end of the pattern.	 We don't need
-	     to push such a point since we obviously won't find any more
-	     fastmap entries beyond `pend'.  Such a pattern can match
-	     the null string, though.  */
 	  if (p + j <= p1)
-	    /* Backward jump to be ignored.  */
-	    ;
-	  else if (p + j < pend)
-	    {
-	      if (!PUSH_PATTERN_OP (p + j, fail_stack))
-		{
-		  RESET_FAIL_STACK ();
-		  return -2;
-		}
-	    }
-	  else
-	    bufp->can_be_null = 1;
-
-	  if (succeed_n_p)
-	    {
-	      EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.	*/
-	      succeed_n_p = false;
-	    }
-
+	    ; /* Backward jump to be ignored.  */
+	  else if (!PUSH_PATTERN_OP (p + j, fail_stack))
+	    return (RESET_FAIL_STACK (), -2);
 	  continue;
 
 
+	case jump_n:
+	  /* This code simply does not properly handle forward jump_n.  */
+	  DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0));
+	  p += 4;
+	  /* jump_n can either jump or fall through.  The (backward) jump
+	     case has already been handled, so we only need to look at the
+	     fallthrough case.  */
+	  continue;
+	  
 	case succeed_n:
-	  /* Get to the number of times to succeed.  */
-	  p += 2;
-
-	  /* Increment p past the n for when k != 0.  */
-	  EXTRACT_NUMBER_AND_INCR (k, p);
-	  if (k == 0)
-	    {
-	      p -= 4;
-	      succeed_n_p = true;  /* Spaghetti code alert.  */
-	      goto handle_on_failure_jump;
-	    }
+	  /* If N == 0, it should be an on_failure_jump_loop instead.  */
+	  DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
+	  p += 4;
+	  /* We only care about one iteration of the loop, so we don't
+	     need to consider the case where this behaves like an
+	     on_failure_jump.  */
 	  continue;
 
 
@@ -3733,10 +3563,43 @@ re_compile_fastmap (bufp)
       p = pend;
     } /* while p */
 
-  /* Set `can_be_null' for the last path (also the first path, if the
-     pattern is empty).	 */
-  bufp->can_be_null |= path_can_be_null;
-  RESET_FAIL_STACK ();
+  return (RESET_FAIL_STACK (), 0);
+} /* analyse_first */
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
+   characters can start a string that matches the pattern.  This fastmap
+   is used by re_search to skip quickly over impossible starting points.
+
+   Character codes above (1 << BYTEWIDTH) are not represented in the
+   fastmap, but the leading codes are represented.  Thus, the fastmap
+   indicates which character sets could start a match.
+
+   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+   area as BUFP->fastmap.
+
+   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+   the pattern buffer.
+
+   Returns 0 if we succeed, -2 if an internal error.   */
+
+int
+re_compile_fastmap (bufp)
+     struct re_pattern_buffer *bufp;
+{
+  char *fastmap = bufp->fastmap;
+  int analysis;
+
+  assert (fastmap && bufp->buffer);
+
+  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.	*/
+  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
+
+  analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
+			    fastmap, RE_MULTIBYTE_P (bufp));
+  if (analysis < -1)
+    return analysis;
+  bufp->can_be_null = (analysis != 0);
   return 0;
 } /* re_compile_fastmap */
 
@@ -3840,7 +3703,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
   int anchored_start = 0;
 
   /* Nonzero if we have to concern multibyte character.	 */
-  int multibyte = bufp->multibyte;
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
 
   /* Check for out-of-range STARTPOS.  */
   if (startpos < 0 || startpos > total_size)
@@ -3967,11 +3830,11 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 	    }
 	  else				/* Searching backwards.	 */
 	    {
-	      buf_ch = STRING_CHAR (d, (startpos >= size1
-					? size2 + size1 - startpos
-					: size1 - startpos));
-	      if (RE_TRANSLATE_P (translate))
-		buf_ch = RE_TRANSLATE (translate, buf_ch);
+	      int room = (startpos >= size1
+			  ? size2 + size1 - startpos
+			  : size1 - startpos);
+	      buf_ch = RE_STRING_CHAR (d, room);
+	      buf_ch = TRANSLATE (buf_ch);
 
 	      if (! (buf_ch >= 0400
 		     || fastmap[buf_ch]))
@@ -4057,7 +3920,10 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 
 /* Declarations and macros for re_match_2.  */
 
-static int bcmp_translate ();
+static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
+				    register int len,
+				    RE_TRANSLATE_TYPE translate,
+				    const int multibyte));
 
 /* This converts PTR, a pointer into one of the search strings `string1'
    and `string2' into an offset from the beginning of that string.  */
@@ -4067,7 +3933,9 @@ static int bcmp_translate ();
    : ((regoff_t) ((ptr) - string2 + size1)))
 
 /* Call before fetching a character with *d.  This switches over to
-   string2 if necessary.  */
+   string2 if necessary.
+   Check re_match_2_internal for a discussion of why end_match_2 might
+   not be within string2 (but be equal to end_match_1 instead).  */
 #define PREFETCH()							\
   while (d == dend)							\
     {									\
@@ -4159,9 +4027,9 @@ skip_one_char (p)
 	p += 1 + CHARSET_BITMAP_SIZE (p - 1);
       break;
       
-#ifdef emacs
     case syntaxspec:
     case notsyntaxspec:
+#ifdef emacs
     case categoryspec:
     case notcategoryspec:
 #endif /* emacs */
@@ -4210,7 +4078,7 @@ mutually_exclusive_p (bufp, p1, p2)
      unsigned char *p1, *p2;
 {
   re_opcode_t op2;
-  const boolean multibyte = bufp->multibyte;
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
   unsigned char *pend = bufp->buffer + bufp->used;
 
   assert (p1 >= bufp->buffer && p1 < pend
@@ -4357,7 +4225,6 @@ mutually_exclusive_p (bufp, p1, p2)
 	  }
       }
       
-#ifdef emacs
     case wordend:
     case notsyntaxspec:
       return ((re_opcode_t) *p1 == syntaxspec
@@ -4373,6 +4240,7 @@ mutually_exclusive_p (bufp, p1, p2)
 	       || (re_opcode_t) *p1 == syntaxspec)
 	      && p1[1] == Sword);
 
+#ifdef emacs
     case categoryspec:
       return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
     case notcategoryspec:
@@ -4490,7 +4358,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   RE_TRANSLATE_TYPE translate = bufp->translate;
 
   /* Nonzero if we have to concern multibyte character.	 */
-  int multibyte = bufp->multibyte;
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
 
   /* Failure point stack.  Each place that can handle a failure further
      down the line pushes a failure point on this stack.  It consists of
@@ -4597,15 +4465,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   for (mcnt = 1; mcnt < num_regs; mcnt++)
     regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE;
 
-  /* Shorten strings to `stop'.  */
-  if (stop <= size1)
-    {
-      size1 = stop;
-      size2 = 0;
-    }
-  else if (stop <= size1 + size2)
-    size2 = stop - size1;
-
   /* We move `string1' into `string2' if the latter's empty -- but not if
      `string1' is null.	 */
   if (size2 == 0 && string1 != NULL)
@@ -4618,25 +4477,42 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   end1 = string1 + size1;
   end2 = string2 + size2;
 
-  /* Compute where to stop matching, within the two strings.  */
-  end_match_1 = end1;
-  end_match_2 = end2;
-
   /* `p' scans through the pattern as `d' scans through the data.
      `dend' is the end of the input string that `d' points within.  `d'
      is advanced into the following input string whenever necessary, but
      this happens before fetching; therefore, at the beginning of the
      loop, `d' can be pointing at the end of a string, but it cannot
      equal `string2'.  */
-  if (size1 > 0 && pos <= size1)
+  if (pos >= size1)
     {
-      d = string1 + pos;
-      dend = end_match_1;
+      /* Only match within string2.  */
+      d = string2 + pos - size1;
+      dend = end_match_2 = string2 + stop - size1;
+      end_match_1 = end1;	/* Just to give it a value.  */
     }
   else
     {
-      d = string2 + pos - size1;
-      dend = end_match_2;
+      if (stop <= size1)
+	{
+	  /* Only match within string1.  */
+	  end_match_1 = string1 + stop;
+	  /* BEWARE!
+	     When we reach end_match_1, PREFETCH normally switches to string2.
+	     But in the present case, this means that just doing a PREFETCH
+	     makes us jump from `stop' to `gap' within the string.
+	     What we really want here is for the search to stop as
+	     soon as we hit end_match_1.  That's why we set end_match_2
+	     to end_match_1 (since PREFETCH fails as soon as we hit
+	     end_match_2).  */
+	  end_match_2 = end_match_1;
+	}
+      else
+	{
+	  end_match_1 = end1;
+	  end_match_2 = string2 + stop - size1;
+	}
+      d = string1 + pos;
+      dend = end_match_1;
     }
 
   DEBUG_PRINT1 ("The compiled pattern is: ");
@@ -4838,7 +4714,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	     testing `translate' inside the loop.  */
 	  if (RE_TRANSLATE_P (translate))
 	    {
-#ifdef emacs
 	      if (multibyte)
 		do
 		  {
@@ -4862,7 +4737,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		  }
 		while (mcnt > 0);
 	      else
-#endif /* not emacs */
 		do
 		  {
 		    PREFETCH ();
@@ -4900,17 +4774,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
 
 	    PREFETCH ();
-
-#ifdef emacs
-	    if (multibyte)
-	      buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
-	    else
-#endif /* not emacs */
-	      {
-		buf_ch = *d;
-		buf_charlen = 1;
-	      }
-
+	    buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
 	    buf_ch = TRANSLATE (buf_ch);
 
 	    if ((!(bufp->syntax & RE_DOT_NEWLINE)
@@ -4945,27 +4809,20 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
 
-	    PREFETCH ();
-	    c = *d;
-
 	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
 
-#ifdef emacs
 	    if (range_table_exists)
 	      {
 		range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
 		EXTRACT_NUMBER_AND_INCR (count, range_table);
 	      }
 
-	    if (multibyte && BASE_LEADING_CODE_P (c))
-	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-#endif /* emacs */
+	    PREFETCH ();
+	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
+	    c = TRANSLATE (c); /* The character to match.  */
 
 	    if (SINGLE_BYTE_CHAR_P (c))
 	      {			/* Lookup bitmap.  */
-		c = TRANSLATE (c); /* The character to match.  */
-		len = 1;
-
 		/* Cast to `unsigned' instead of `unsigned char' in
 		   case the bit list is a full 32 bytes long.  */
 		if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
@@ -5111,7 +4968,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		/* Compare that many; failure if mismatch, else move
 		   past them.  */
 		if (RE_TRANSLATE_P (translate)
-		    ? bcmp_translate (d, d2, mcnt, translate)
+		    ? bcmp_translate (d, d2, mcnt, translate, multibyte)
 		    : bcmp (d, d2, mcnt))
 		  {
 		    d = dfail;
@@ -5133,9 +4990,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    {
 	      if (!bufp->not_bol) break;
 	    }
-	  else if (d[-1] == '\n' && bufp->newline_anchor)
+	  else
 	    {
-	      break;
+	      unsigned char c;
+	      GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
+	      if (c == '\n' && bufp->newline_anchor)
+		break;
 	    }
 	  /* In all other cases, we fail.  */
 	  goto fail;
@@ -5199,6 +5059,30 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  PUSH_FAILURE_POINT (p - 3, NULL);
 	  break;
 
+	  /* A nasty loop is introduced by the non-greedy *? and +?.
+	     With such loops, the stack only ever contains one failure point
+	     at a time, so that a plain on_failure_jump_loop kind of
+	     cycle detection cannot work.  Worse yet, such a detection
+	     can not only fail to detect a cycle, but it can also wrongly
+	     detect a cycle (between different instantiations of the same
+	     loop.
+	     So the method used for those nasty loops is a little different:
+	     We use a special cycle-detection-stack-frame which is pushed
+	     when the on_failure_jump_nastyloop failure-point is *popped*.
+	     This special frame thus marks the beginning of one iteration
+	     through the loop and we can hence easily check right here
+	     whether something matched between the beginning and the end of
+	     the loop.  */
+	case on_failure_jump_nastyloop:
+	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
+	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
+			mcnt, p + mcnt);
+
+	  assert ((re_opcode_t)p[-4] == no_op);
+	  CHECK_INFINITE_LOOP (p - 4, d);
+	  PUSH_FAILURE_POINT (p - 3, d);
+	  break;
+
 
 	  /* Simple loop detecting on_failure_jump:  just check on the
 	     failure stack if the same spot was already hit earlier.  */
@@ -5356,18 +5240,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
 #ifdef emacs
-	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d - 1));
+	      int offset = PTR_TO_OFFSET (d - 1);
+	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
 	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
-	      /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
 	      s1 = SYNTAX (c1);
 #ifdef emacs
 	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
 	      PREFETCH ();
-	      /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
-	      c2 = STRING_CHAR (d, dend - d);
+	      c2 = RE_STRING_CHAR (d, dend - d);
 	      s2 = SYNTAX (c2);
 
 	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
@@ -5396,12 +5279,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
 #ifdef emacs
-	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+	      int offset = PTR_TO_OFFSET (d);
+	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
 	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      PREFETCH ();
-	      /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
-	      c2 = STRING_CHAR (d, dend - d);
+	      c2 = RE_STRING_CHAR (d, dend - d);
 	      s2 = SYNTAX (c2);
 	
 	      /* Case 2: S2 is not Sword. */
@@ -5439,7 +5322,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
 #ifdef emacs
-	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d) - 1);
+	      int offset = PTR_TO_OFFSET (d) - 1;
+	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
 	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
@@ -5453,8 +5337,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      if (!AT_STRINGS_END (d))
 		{
 		  PREFETCH ();
-		  /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
-		  c2 = STRING_CHAR (d, dend - d);
+		  c2 = RE_STRING_CHAR (d, dend - d);
 #ifdef emacs
 		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
 #endif
@@ -5468,141 +5351,66 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    }
 	  break;
 
-#ifdef emacs
-	case before_dot:
-	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
-	  if (PTR_BYTE_POS (d) >= PT_BYTE)
-	    goto fail;
-	  break;
-
-	case at_dot:
-	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
-	  if (PTR_BYTE_POS (d) != PT_BYTE)
-	    goto fail;
-	  break;
-
-	case after_dot:
-	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
-	  if (PTR_BYTE_POS (d) <= PT_BYTE)
-	    goto fail;
-	  break;
-
 	case syntaxspec:
-	  DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+	case notsyntaxspec:
+	  not = (re_opcode_t) *(p - 1) == notsyntaxspec;
 	  mcnt = *p++;
-	  goto matchsyntax;
-
-	case wordchar:
-	  DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
-	  mcnt = (int) Sword;
-	matchsyntax:
+	  DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
 	  PREFETCH ();
 #ifdef emacs
 	  {
-	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+	    int offset = PTR_TO_OFFSET (d);
+	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
 	    UPDATE_SYNTAX_TABLE (pos1);
 	  }
 #endif
 	  {
 	    int c, len;
 
-	    if (multibyte)
-	      /* we must concern about multibyte form, ... */
-	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-	    else
-	      /* everything should be handled as ASCII, even though it
-		 looks like multibyte form.  */
-	      c = *d, len = 1;
+	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
 
-	    if (SYNTAX (c) != (enum syntaxcode) mcnt)
-	    goto fail;
+	    if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
+	      goto fail;
 	    d += len;
 	  }
 	  break;
 
-	case notsyntaxspec:
-	  DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
-	  mcnt = *p++;
-	  goto matchnotsyntax;
-
-	case notwordchar:
-	  DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
-	  mcnt = (int) Sword;
-	matchnotsyntax:
-	  PREFETCH ();
 #ifdef emacs
-	  {
-	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
-	    UPDATE_SYNTAX_TABLE (pos1);
-	  }
-#endif
-	  {
-	    int c, len;
-
-	    if (multibyte)
-	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-	    else
-	      c = *d, len = 1;
-
-	    if (SYNTAX (c) == (enum syntaxcode) mcnt)
+	case before_dot:
+	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+	  if (PTR_BYTE_POS (d) >= PT_BYTE)
 	    goto fail;
-	    d += len;
-	  }
 	  break;
 
-	case categoryspec:
-	  DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p);
-	  mcnt = *p++;
-	  PREFETCH ();
-	  {
-	    int c, len;
-
-	    if (multibyte)
-	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-	    else
-	      c = *d, len = 1;
+	case at_dot:
+	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+	  if (PTR_BYTE_POS (d) != PT_BYTE)
+	    goto fail;
+	  break;
 
-	    if (!CHAR_HAS_CATEGORY (c, mcnt))
-	      goto fail;
-	    d += len;
-	  }
+	case after_dot:
+	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+	  if (PTR_BYTE_POS (d) <= PT_BYTE)
+	    goto fail;
 	  break;
 
+	case categoryspec:
 	case notcategoryspec:
-	  DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p);
+	  not = (re_opcode_t) *(p - 1) == notcategoryspec;
 	  mcnt = *p++;
+	  DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
 	  PREFETCH ();
 	  {
 	    int c, len;
+	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
 
-	    if (multibyte)
-	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-	    else
-	      c = *d, len = 1;
-
-	    if (CHAR_HAS_CATEGORY (c, mcnt))
+	    if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
 	      goto fail;
 	    d += len;
 	  }
-          break;
-
-#else /* not emacs */
-	case wordchar:
-          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
-	  PREFETCH ();
-          if (!WORDCHAR_P (d))
-            goto fail;
-          d++;
 	  break;
 
-	case notwordchar:
-          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
-	  PREFETCH ();
-	  if (WORDCHAR_P (d))
-            goto fail;
-          d++;
-	  break;
-#endif /* not emacs */
+#endif /* emacs */
 
         default:
           abort ();
@@ -5626,6 +5434,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      assert (str == NULL);
 	      goto continue_failure_jump;
 
+	    case on_failure_jump_nastyloop:
+	      assert ((re_opcode_t)pat[-2] == no_op);
+	      PUSH_FAILURE_POINT (pat - 2, str);
+	      /* Fallthrough */
+
 	    case on_failure_jump_loop:
 	    case on_failure_jump:
 	    case succeed_n:
@@ -5635,6 +5448,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      p = pat + mcnt;
 	      break;
 
+	    case no_op:
+	      /* A special frame used for nastyloops. */
+	      goto fail;
+
 	    default:
 	      abort();
 	    }
@@ -5662,23 +5479,23 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
    bytes; nonzero otherwise.  */
 
 static int
-bcmp_translate (s1, s2, len, translate)
-     unsigned char *s1, *s2;
+bcmp_translate (s1, s2, len, translate, multibyte)
+     re_char *s1, *s2;
      register int len;
      RE_TRANSLATE_TYPE translate;
+     const int multibyte;
 {
-  register unsigned char *p1 = s1, *p2 = s2;
-  unsigned char *p1_end = s1 + len;
-  unsigned char *p2_end = s2 + len;
+  register re_char *p1 = s1, *p2 = s2;
+  re_char *p1_end = s1 + len;
+  re_char *p2_end = s2 + len;
 
   while (p1 != p1_end && p2 != p2_end)
     {
       int p1_charlen, p2_charlen;
       int p1_ch, p2_ch;
 
-      /* FIXME: This assumes `multibyte = true'.  */
-      p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
-      p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
+      p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
+      p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
 
       if (RE_TRANSLATE (translate, p1_ch)
 	  != RE_TRANSLATE (translate, p2_ch))