X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=c9219c66c83624e8f2273ed11577ef22967a558b;hb=19e03725a5160847ec25e1ac559cedb86bf67205;hp=6a45db69b8fce0a4c2802ac8ad996d981a909647;hpb=837c5301b458371192e6bd370dac274b070c8dc2;p=gnulib.git

diff --git a/regex.c b/regex.c
index 6a45db69b..c9219c66c 100644
--- a/regex.c
+++ b/regex.c
@@ -2,7 +2,7 @@
    0.12.  (Implements POSIX draft P10003.2/D11.2, except for
    internationalization features.)
 
-   Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+   Copyright (C) 1993, 1994-1998, 1999 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -27,11 +27,15 @@
 #undef	_GNU_SOURCE
 #define _GNU_SOURCE
 
+#ifdef emacs
 /* Converts the pointer to the char to BEG-based offset from the start.	 */
 #define PTR_TO_OFFSET(d)						\
 	POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING			\
 			  ? (d) - string1 : (d) - (string2 - size1))
-#define POS_AS_IN_BUFFER(p) ((p) + 1)
+#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
+#else
+#define PTR_TO_OFFSET(d) 0
+#endif
 
 #ifdef HAVE_CONFIG_H
 #include <config.h>
@@ -68,6 +72,7 @@
 #include "category.h"
 
 #define malloc xmalloc
+#define realloc xrealloc
 #define free xfree
 
 #else  /* not emacs */
@@ -168,7 +173,7 @@ init_syntax_once ()
 
 #define SYNTAX(c) re_syntax_table[c]
 
-/* Dummy macro for non emacs environments.  */
+/* Dummy macros for non-Emacs environments.  */
 #define BASE_LEADING_CODE_P(c) (0)
 #define WORD_BOUNDARY_P(c1, c2) (0)
 #define CHAR_HEAD_P(p) (1)
@@ -189,6 +194,64 @@ init_syntax_once ()
 /* isalpha etc. are used for the character classes.  */
 #include <ctype.h>
 
+#ifdef emacs
+
+/* 1 if C is an ASCII character.  */
+#define IS_REAL_ASCII(c) ((c) < 0200)
+
+/* 1 if C is a unibyte character.  */
+#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
+
+/* The Emacs definitions should not be directly affected by locales.  */
+
+/* In Emacs, these are only used for single-byte characters.  */
+#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define ISCNTRL(c) ((c) < ' ')
+#define ISXDIGIT(c) (((c) >= '0' && (c) <= '9')		\
+		     || ((c) >= 'a' && (c) <= 'f')	\
+		     || ((c) >= 'A' && (c) <= 'F'))
+
+/* This is only used for single-byte characters.  */
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+
+/* The rest must handle multibyte characters.  */
+
+#define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)				\
+		    ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237)	\
+		    : 1)
+
+#define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)		\
+		    ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)	\
+		    : 1)
+
+#define ISALNUM(c) (IS_REAL_ASCII (c)			\
+		    ? (((c) >= 'a' && (c) <= 'z')	\
+		       || ((c) >= 'A' && (c) <= 'Z')	\
+		       || ((c) >= '0' && (c) <= '9'))	\
+		    : SYNTAX (c) == Sword)
+
+#define ISALPHA(c) (IS_REAL_ASCII (c)			\
+		    ? (((c) >= 'a' && (c) <= 'z')	\
+		       || ((c) >= 'A' && (c) <= 'Z'))	\
+		    : SYNTAX (c) == Sword)
+
+#define ISLOWER(c) (LOWERCASEP (c))
+
+#define ISPUNCT(c) (IS_REAL_ASCII (c)				\
+		    ? ((c) > ' ' && (c) < 0177			\
+		       && !(((c) >= 'a' && (c) <= 'z')		\
+		            || ((c) >= 'A' && (c) <= 'Z')	\
+		            || ((c) >= '0' && (c) <= '9')))	\
+		    : SYNTAX (c) != Sword)
+
+#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
+
+#define ISUPPER(c) (UPPERCASEP (c))
+
+#define ISWORD(c) (SYNTAX (c) == Sword)
+
+#else /* not emacs */
+
 /* Jim Meyering writes:
 
    "... Some ctype macros are valid only for character codes that
@@ -206,6 +269,16 @@ init_syntax_once ()
 #define ISASCII(c) isascii(c)
 #endif
 
+/* 1 if C is an ASCII character.  */
+#define IS_REAL_ASCII(c) ((c) < 0200)
+
+/* This distinction is not meaningful, except in Emacs.  */
+#define ISUNIBYTE(c) 1
+
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
 #ifdef isblank
 #define ISBLANK(c) (ISASCII (c) && isblank (c))
 #else
@@ -228,6 +301,10 @@ init_syntax_once ()
 #define ISUPPER(c) (ISASCII (c) && isupper (c))
 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
 
+#define ISWORD(c) ISALPHA(c)
+
+#endif /* not emacs */
+
 #ifndef NULL
 #define NULL (void *)0
 #endif
@@ -378,7 +455,15 @@ typedef enum
 	   for a bitmap saying which chars are in.  Bits in each byte
 	   are ordered low-bit-first.  A character is in the set if its
 	   bit is 1.  A character too large to have a bit in the map is
-	   automatically not in the set.  */
+	   automatically not in the set.
+
+	   If the length byte has the 0x80 bit set, then that stuff
+	   is followed by a range table:
+	       2 bytes of flags for character sets (low 8 bits, high 8 bits)
+	           See RANGE_TABLE_WORK_BITS below.
+	       2 bytes, the number of pairs that follow
+	       pairs, each 2 multibyte characters,
+	           each multibyte character represented as 3 bytes.  */
   charset,
 
 	/* Same parameters as charset, but match any character that is
@@ -612,8 +697,14 @@ extract_number_and_incr (destination, source)
 
 /* Return the address of range table of charset P.  But not the start
    of table itself, but the before where the number of ranges is
-   stored.  `2 +' means to skip re_opcode_t and size of bitmap.	 */
-#define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
+   stored.  `2 +' means to skip re_opcode_t and size of bitmap,
+   and the 2 bytes of flags at the start of the range table.  */
+#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
+
+/* Extract the bit flags that start a range table.  */
+#define CHARSET_RANGE_TABLE_BITS(p)		\
+  ((p)[2 + CHARSET_BITMAP_SIZE (p)]		\
+   + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
 
 /* Test if C is listed in the bitmap of charset P.  */
 #define CHARSET_LOOKUP_BITMAP(p, c)				\
@@ -786,6 +877,9 @@ print_partial_compiled_pattern (start, end)
 	  {
 	    register int c, last = -100;
 	    register int in_range = 0;
+	    int length = *p & 0x7f;
+	    int has_range_table = *p & 0x80;
+	    int range_length = p[length + 2] + p[length + 3] * 0x100;
 
 	    printf ("/charset [%s",
 		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
@@ -793,7 +887,7 @@ print_partial_compiled_pattern (start, end)
 	    assert (p + *p < pend);
 
 	    for (c = 0; c < 256; c++)
-	      if (c / 8 < *p
+	      if (c / 8 < length
 		  && (p[1 + (c/8)] & (1 << (c % 8))))
 		{
 		  /* Are we starting a range?  */
@@ -804,7 +898,7 @@ print_partial_compiled_pattern (start, end)
 		    }
 		  /* Have we broken a range?  */
 		  else if (last + 1 != c && in_range)
-	      {
+		    {
 		      putchar (last);
 		      in_range = 0;
 		    }
@@ -815,12 +909,20 @@ print_partial_compiled_pattern (start, end)
 		  last = c;
 	      }
 
+	    p += 1 + length;
+
 	    if (in_range)
 	      putchar (last);
 
 	    putchar (']');
 
-	    p += 1 + *p;
+	    if (has_range_table)
+	      printf ("has-range-table");
+
+	    /* ??? Should print the range table; for now,
+	       just skip it.  */
+	    if (has_range_table)
+	      p += 4 + 6 * range_length;
 	  }
 	  break;
 
@@ -1120,23 +1222,25 @@ static const char *re_error_msgid[] =
    REGEX_ALLOCATE_STACK.  */
 
 
-/* Number of failure points for which to initially allocate space
+/* Approximate number of failure points for which to initially allocate space
    when matching.  If this number is exceeded, we allocate more
    space, so it is not a hard limit.  */
 #ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
+#define INIT_FAILURE_ALLOC 20
 #endif
 
 /* Roughly the maximum number of failure points on the stack.  Would be
-   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
    This is a variable only so users of regex can assign to it; we never
    change it ourselves.	 */
 #if defined (MATCH_MAY_ALLOCATE)
-/* 4400 was enough to cause a crash on Alpha OSF/1,
-   whose default stack limit is 2mb.  */
-int re_max_failures = 20000;
+/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
+   whose default stack limit is 2mb.  In order for a larger
+   value to work reliably, you have to try to make it accord
+   with the process stack limit.  */
+int re_max_failures = 40000;
 #else
-int re_max_failures = 2000;
+int re_max_failures = 4000;
 #endif
 
 union fail_stack_elt
@@ -1166,7 +1270,8 @@ typedef struct
 #define INIT_FAIL_STACK()						\
   do {									\
     fail_stack.stack = (fail_stack_elt_t *)				\
-      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
+      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
+			    * sizeof (fail_stack_elt_t));		\
 									\
     if (fail_stack.stack == NULL)					\
       return -2;							\
@@ -1186,24 +1291,40 @@ typedef struct
 #endif
 
 
-/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+/* Double the size of FAIL_STACK, up to a limit
+   which allows approximately `re_max_failures' items.
 
    Return 1 if succeeds, and 0 if either ran out of memory
    allocating space for it or it was already too large.
 
    REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
 
-#define DOUBLE_FAIL_STACK(fail_stack)					\
-  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
+/* Factor to increase the failure stack size by
+   when we increase it.
+   This used to be 2, but 2 was too wasteful
+   because the old discarded stacks added up to as much space
+   were as ultimate, maximum-size stack.  */
+#define FAIL_STACK_GROWTH_FACTOR 4
+
+#define GROW_FAIL_STACK(fail_stack)					\
+  (((fail_stack).size * sizeof (fail_stack_elt_t)			\
+    >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
    ? 0									\
-   : ((fail_stack).stack = (fail_stack_elt_t *)				\
+   : ((fail_stack).stack						\
+      = (fail_stack_elt_t *)						\
 	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
 	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
-	  ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
+	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
+	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
+		* FAIL_STACK_GROWTH_FACTOR))),				\
 									\
       (fail_stack).stack == NULL					\
       ? 0								\
-      : ((fail_stack).size <<= 1,					\
+      : ((fail_stack).size						\
+	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
+		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
+		  * FAIL_STACK_GROWTH_FACTOR))				\
+	    / sizeof (fail_stack_elt_t)),				\
 	 1)))
 
 
@@ -1212,7 +1333,7 @@ typedef struct
    space to do so.  */
 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
   ((FAIL_STACK_FULL ()							\
-    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
+    && !GROW_FAIL_STACK (FAIL_STACK))					\
    ? 0									\
    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
       1))
@@ -1255,7 +1376,7 @@ typedef struct
    if we ever fail back to it.
 
    Requires variables fail_stack, regstart, regend, reg_info, and
-   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
+   num_regs be declared.  GROW_FAIL_STACK requires `destination' be
    declared.
 
    Does `return FAILURE_CODE' if runs out of memory.  */
@@ -1279,7 +1400,7 @@ typedef struct
     /* Ensure we have enough space allocated for what we will push.  */	\
     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
       {									\
-	if (!DOUBLE_FAIL_STACK (fail_stack))				\
+	if (!GROW_FAIL_STACK (fail_stack))				\
 	  return failure_code;						\
 									\
 	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
@@ -1346,13 +1467,14 @@ typedef struct
 #define NUM_NONREG_ITEMS 4
 #endif
 
-/* We push at most this many items on the stack.  */
-/* We used to use (num_regs - 1), which is the number of registers
-   this regexp will save; but that was changed to 5
-   to avoid stack overflow for a regexp with lots of parens.  */
-#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+/* Estimate the size of data pushed by a typical failure stack entry.
+   An estimate is all we need, because all we use this for
+   is to choose a limit for how big to make the failure stack.  */
 
-/* We actually push this many items.  */
+#define TYPICAL_FAILURE_SIZE 20
+
+/* This is how many items we actually use for a failure point.
+   It depends on the regexp.  */
 #define NUM_FAILURE_ITEMS				\
   (((0							\
      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
@@ -1390,8 +1512,8 @@ typedef struct
 									\
   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
 									\
-  DEBUG_POP (&failure_id);						\
-  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
+  DEBUG_POP (&failure_id.integer);					\
+  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id.integer);	\
 									\
   /* If the saved string location is NULL, it came from an		\
      on_failure_keep_string_jump opcode, and we want to throw away the	\
@@ -1519,7 +1641,7 @@ static reg_errcode_t compile_range ();
 #define PATFETCH(c)							\
   do {if (p == pend) return REG_EEND;					\
     c = (unsigned char) *p++;						\
-    if (translate) c = (unsigned char) translate[c];			\
+    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);	\
   } while (0)
 #endif
 
@@ -1540,7 +1662,8 @@ static reg_errcode_t compile_range ();
    when we use a character as a subscript we must make it unsigned.  */
 #ifndef TRANSLATE
 #define TRANSLATE(d) \
-  (translate ? (unsigned char) translate[(unsigned char) (d)] : (d))
+  (RE_TRANSLATE_P (translate) \
+   ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
 #endif
 
 
@@ -1684,6 +1807,7 @@ struct range_table_work_area
   int *table;			/* actual work area.  */
   int allocated;		/* allocated size for work area in bytes.  */
   int used;			/* actually used size in words.	 */
+  int bits;			/* flag to record character classes */
 };
 
 /* Make sure that WORK_AREA can hold more N multibyte characters.  */
@@ -1703,6 +1827,25 @@ struct range_table_work_area
       }									  \
   } while (0)
 
+#define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit)		\
+  (work_area).bits |= (bit)
+
+/* These bits represent the various character classes such as [:alnum:]
+   in a charset's range table.  */
+#define BIT_ALNUM 0x1
+#define BIT_ALPHA 0x2
+#define BIT_WORD  0x4
+#define BIT_ASCII 0x8
+#define BIT_NONASCII 0x10
+#define BIT_GRAPH 0x20
+#define BIT_LOWER 0x40
+#define BIT_PRINT 0x80
+#define BIT_PUNCT 0x100
+#define BIT_SPACE 0x200
+#define BIT_UPPER 0x400
+#define BIT_UNIBYTE 0x800
+#define BIT_MULTIBYTE 0x1000
+
 /* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)	\
   do {									\
@@ -1718,8 +1861,9 @@ struct range_table_work_area
       free ((work_area).table);			\
   } while (0)
 
-#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0)
+#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
+#define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
 
 
@@ -1754,7 +1898,10 @@ struct range_table_work_area
     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
     || STREQ (string, "space") || STREQ (string, "print")		\
     || STREQ (string, "punct") || STREQ (string, "graph")		\
-    || STREQ (string, "cntrl") || STREQ (string, "blank"))
+    || STREQ (string, "cntrl") || STREQ (string, "blank")		\
+    || STREQ (string, "word")						\
+    || STREQ (string, "ascii") || STREQ (string, "nonascii")		\
+    || STREQ (string, "unibyte") || STREQ (string, "multibyte"))
 
 #ifndef MATCH_MAY_ALLOCATE
 
@@ -1852,7 +1999,12 @@ regex_compile (pattern, size, syntax, bufp)
   compile_stack_type compile_stack;
 
   /* Points to the current (ending) position in the pattern.  */
+#ifdef AIX
+  /* `const' makes AIX compiler fail.  */
+  char *p = pattern;
+#else
   const char *p = pattern;
+#endif
   const char *pend = pattern + size;
 
   /* How to translate the characters in the pattern.  */
@@ -2016,6 +2168,7 @@ regex_compile (pattern, size, syntax, bufp)
 
 	    /* 1 means zero (many) matches is allowed.	*/
 	    char zero_times_ok = 0, many_times_ok = 0;
+	    char greedy = 1;
 
 	    /* If there is a sequence of repetition chars, collapse it
 	       down to just one (the right one).  We can't combine
@@ -2024,8 +2177,14 @@ regex_compile (pattern, size, syntax, bufp)
 
 	    for (;;)
 	      {
-		zero_times_ok |= c != '+';
-		many_times_ok |= c != '?';
+		if (!(syntax & RE_ALL_GREEDY)
+		    && c == '?' && (zero_times_ok || many_times_ok))
+		  greedy = 0;
+		else
+		  {
+		    zero_times_ok |= c != '+';
+		    many_times_ok |= c != '?';
+		  }
 
 		if (p == pend)
 		  break;
@@ -2066,6 +2225,8 @@ regex_compile (pattern, size, syntax, bufp)
 
 	    /* Now we know whether or not zero matches is allowed
 	       and also whether or not two or more matches is allowed.	*/
+	    if (greedy)
+	      {
 	    if (many_times_ok)
 	      { /* More than one repetition is allowed, so put in at the
 		   end a backward relative jump from `b' to before the next
@@ -2087,9 +2248,10 @@ regex_compile (pattern, size, syntax, bufp)
 		   incremented `p', by the way, to be the character after
 		   the `*'.  Do we have to do something analogous here
 		   for null bytes, because of RE_DOT_NOT_NULL?	*/
-		if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+		if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.')
 		    && zero_times_ok
-		    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+		    && p < pend
+		    && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n')
 		    && !(syntax & RE_DOT_NEWLINE))
 		  { /* We have .*\n.  */
 		    STORE_JUMP (jump, b, laststart);
@@ -2123,7 +2285,39 @@ regex_compile (pattern, size, syntax, bufp)
 		INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
 		b += 3;
 	      }
-	    }
+
+	      }
+	    else		/* not greedy */
+	      { /* I wish the greedy and non-greedy cases could be merged. */
+
+		if (many_times_ok)
+		  {
+		    /* The greedy multiple match looks like a repeat..until:
+		       we only need a conditional jump at the end of the loop */
+		    GET_BUFFER_SPACE (3);
+		    STORE_JUMP (on_failure_jump, b, laststart);
+		    b += 3;
+		    if (zero_times_ok)
+		      {
+			/* The repeat...until naturally matches one or more.
+			   To also match zero times, we need to first jump to
+			   the end of the loop (its conditional jump). */
+			GET_BUFFER_SPACE (3);
+			INSERT_JUMP (jump, laststart, b);
+			b += 3;
+		      }
+		  }
+		else
+		  {
+		    /* non-greedy a?? */
+		    GET_BUFFER_SPACE (6);
+		    INSERT_JUMP (jump, laststart, b + 3);
+		    b += 3;
+		    INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
+		    b += 3;
+		  }
+	      }
+	  }
 	  break;
 
 
@@ -2185,11 +2379,11 @@ regex_compile (pattern, size, syntax, bufp)
 		  }
 		else
 		  {
-		/* Could be the end of the bracket expression.	If it's
-		   not (i.e., when the bracket expression is `[]' so
-		   far), the ']' character bit gets set way below.  */
-		if (c == ']' && p != p1 + 1)
-		  break;
+		    /* Could be the end of the bracket expression.	If it's
+		       not (i.e., when the bracket expression is `[]' so
+		       far), the ']' character bit gets set way below.  */
+		    if (c == ']' && p != p1 + 1)
+		      break;
 		  }
 
 		/* If C indicates start of multibyte char, get the
@@ -2210,7 +2404,8 @@ regex_compile (pattern, size, syntax, bufp)
 
 		else if (!escaped_char &&
 			 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
-		  { /* Leave room for the null.	 */
+		  {
+		    /* Leave room for the null.	 */
 		    char str[CHAR_CLASS_MAX_LENGTH + 1];
 
 		    PATFETCH (c);
@@ -2238,15 +2433,20 @@ regex_compile (pattern, size, syntax, bufp)
 			int ch;
 			boolean is_alnum = STREQ (str, "alnum");
 			boolean is_alpha = STREQ (str, "alpha");
+			boolean is_ascii = STREQ (str, "ascii");
 			boolean is_blank = STREQ (str, "blank");
 			boolean is_cntrl = STREQ (str, "cntrl");
 			boolean is_digit = STREQ (str, "digit");
 			boolean is_graph = STREQ (str, "graph");
 			boolean is_lower = STREQ (str, "lower");
+			boolean is_multibyte = STREQ (str, "multibyte");
+			boolean is_nonascii = STREQ (str, "nonascii");
 			boolean is_print = STREQ (str, "print");
 			boolean is_punct = STREQ (str, "punct");
 			boolean is_space = STREQ (str, "space");
+			boolean is_unibyte = STREQ (str, "unibyte");
 			boolean is_upper = STREQ (str, "upper");
+			boolean is_word = STREQ (str, "word");
 			boolean is_xdigit = STREQ (str, "xdigit");
 
 			if (!IS_CHAR_CLASS (str))
@@ -2258,6 +2458,35 @@ regex_compile (pattern, size, syntax, bufp)
 
 			if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
+			/* Most character classes in a multibyte match
+			   just set a flag.  Exceptions are is_blank,
+			   is_digit, is_cntrl, and is_xdigit, since
+			   they can only match ASCII characters.  We
+			   don't need to handle them for multibyte.  */
+
+			if (bufp->multibyte)
+			  {
+			    int bit = 0;
+
+			    if (is_alnum) bit = BIT_ALNUM;
+			    if (is_alpha) bit = BIT_ALPHA;
+			    if (is_ascii) bit = BIT_ASCII;
+			    if (is_graph) bit = BIT_GRAPH;
+			    if (is_lower) bit = BIT_LOWER;
+			    if (is_multibyte) bit = BIT_MULTIBYTE;
+			    if (is_nonascii) bit = BIT_NONASCII;
+			    if (is_print) bit = BIT_PRINT;
+			    if (is_punct) bit = BIT_PUNCT;
+			    if (is_space) bit = BIT_SPACE;
+			    if (is_unibyte) bit = BIT_UNIBYTE;
+			    if (is_upper) bit = BIT_UPPER;
+			    if (is_word) bit = BIT_WORD;
+			    if (bit)
+			      SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
+							     bit);
+			  }
+
+			/* Handle character classes for ASCII characters.  */
 			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
 			  {
 			    int translated = TRANSLATE (ch);
@@ -2278,6 +2507,14 @@ regex_compile (pattern, size, syntax, bufp)
 				|| (is_upper  && ISUPPER (ch))
 				|| (is_xdigit && ISXDIGIT (ch)))
 			      SET_LIST_BIT (translated);
+			    if (   (is_ascii  && IS_REAL_ASCII (ch))
+				|| (is_nonascii && !IS_REAL_ASCII (ch))
+				|| (is_unibyte && ISUNIBYTE (ch))
+				|| (is_multibyte && !ISUNIBYTE (ch)))
+			      SET_LIST_BIT (translated);
+
+			    if (   (is_word   && ISWORD (ch)))
+			      SET_LIST_BIT (translated);
 			  }
 
 			/* Repeat the loop. */
@@ -2312,7 +2549,18 @@ regex_compile (pattern, size, syntax, bufp)
 			p += len;
 		      }
 
-		    if (!SAME_CHARSET_P (c, c1))
+		    if (SINGLE_BYTE_CHAR_P (c)
+			&& ! SINGLE_BYTE_CHAR_P (c1))
+		      {
+			/* Handle a range such as \177-\377 in multibyte mode.
+			   Split that into two ranges,,
+			   the low one ending at 0237, and the high one
+			   starting at ...040.  */
+			int c1_base = (c1 & ~0177) | 040;
+			SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
+			c1 = 0237;
+		      }
+		    else if (!SAME_CHARSET_P (c, c1))
 		      FREE_STACK_RETURN (REG_ERANGE);
 		  }
 		else
@@ -2338,8 +2586,8 @@ regex_compile (pattern, size, syntax, bufp)
 			for (this_char = range_start; this_char <= range_end;
 			     this_char++)
 			  SET_LIST_BIT (TRANSLATE (this_char));
+		      }
 		  }
-	      }
 		else
 		  /* ... into range table.  */
 		  SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
@@ -2351,19 +2599,26 @@ regex_compile (pattern, size, syntax, bufp)
 	      b[-1]--;
 	    b += b[-1];
 
-	    /* Build real range table from work area. */
-	    if (RANGE_TABLE_WORK_USED (range_table_work))
+	    /* Build real range table from work area.  */
+	    if (RANGE_TABLE_WORK_USED (range_table_work)
+		|| RANGE_TABLE_WORK_BITS (range_table_work))
 	      {
 		int i;
 		int used = RANGE_TABLE_WORK_USED (range_table_work);
 
 		/* Allocate space for COUNT + RANGE_TABLE.  Needs two
-		   bytes for COUNT and three bytes for each character.	*/
-		GET_BUFFER_SPACE (2 + used * 3);
+		   bytes for flags, two for COUNT, and three bytes for
+		   each character. */
+		GET_BUFFER_SPACE (4 + used * 3);
 
 		/* Indicate the existence of range table.  */
 		laststart[1] |= 0x80;
 
+		/* Store the character class flag bits into the range table.
+		   If not in emacs, these flag bits are always 0.  */
+		*b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
+		*b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
+
 		STORE_NUMBER_AND_INCR (b, used / 2);
 		for (i = 0; i < used; i++)
 		  STORE_CHARACTER_AND_INCR
@@ -2859,8 +3114,12 @@ regex_compile (pattern, size, syntax, bufp)
 	  p1 = p - 1;		/* P1 points the head of C.  */
 #ifdef emacs
 	  if (bufp->multibyte)
-	    /* Set P to the next character boundary.  */
-	    p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
+	    {
+	      c = STRING_CHAR (p1, pend - p1);
+	      c = TRANSLATE (c);
+	      /* Set P to the next character boundary.  */
+	      p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
+	    }
 #endif
 	      /* If no exactn currently being built.  */
 	  if (!pending_exact
@@ -2872,14 +3131,14 @@ regex_compile (pattern, size, syntax, bufp)
 	      || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
 
 	      /* If followed by a repetition operator.	*/
-	      || *p == '*' || *p == '^'
+	      || (p != pend && (*p == '*' || *p == '^'))
 	      || ((syntax & RE_BK_PLUS_QM)
-		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
-		  : (*p == '+' || *p == '?'))
+		  ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
+		  : p != pend && (*p == '+' || *p == '?'))
 	      || ((syntax & RE_INTERVALS)
 		  && ((syntax & RE_NO_BK_BRACES)
-		      ? *p == '{'
-		      : (p[0] == '\\' && p[1] == '{'))))
+		      ? p != pend && *p == '{'
+		      : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
 	    {
 	      /* Start building a new exactn.  */
 
@@ -2889,16 +3148,23 @@ regex_compile (pattern, size, syntax, bufp)
 	      pending_exact = b - 1;
 	    }
 
-	  /* Here, C may translated, therefore C may not equal to *P1. */
-	  while (1)
+#ifdef emacs
+	  if (! SINGLE_BYTE_CHAR_P (c))
 	    {
-	  BUF_PUSH (c);
-	  (*pending_exact)++;
-	      if (++p1 == p)
-		break;
-
-	      /* Rest of multibyte form should be copied literally. */
-	      c = *(unsigned char *)p1;
+	      unsigned char str[MAX_MULTIBYTE_LENGTH];
+	      int i = CHAR_STRING (c, str);
+	      int j;
+	      for (j = 0; j < i; j++)
+		{
+		  BUF_PUSH (str[j]);
+		  (*pending_exact)++;
+		}
+	    }
+	  else
+#endif
+	    {
+	      BUF_PUSH (c);
+	      (*pending_exact)++;
 	    }
 	  break;
 	} /* switch (c) */
@@ -2938,12 +3204,9 @@ regex_compile (pattern, size, syntax, bufp)
   {
     int num_regs = bufp->re_nsub + 1;
 
-    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
-       is strictly greater than re_max_failures, the largest possible stack
-       is 2 * re_max_failures failure points.  */
-    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+    if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
       {
-	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+	fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
 
 #ifdef emacs
 	if (! fail_stack.stack)
@@ -3103,70 +3366,16 @@ group_in_compile_stack (compile_stack, regnum)
 
   return false;
 }
-
-
-/* Read the ending character of a range (in a bracket expression) from the
-   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
-   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
-   Then we set the translation of all bits between the starting and
-   ending characters (inclusive) in the compiled pattern B.
-
-   Return an error code.
-
-   We use these short variable names so we can use the same macros as
-   `regex_compile' itself.  */
-
-static reg_errcode_t
-compile_range (p_ptr, pend, translate, syntax, b)
-    const char **p_ptr, *pend;
-    RE_TRANSLATE_TYPE translate;
-    reg_syntax_t syntax;
-    unsigned char *b;
-{
-  unsigned this_char;
-
-  const char *p = *p_ptr;
-  int range_start, range_end;
-
-  if (p == pend)
-    return REG_ERANGE;
-
-  /* Even though the pattern is a signed `char *', we need to fetch
-     with unsigned char *'s; if the high bit of the pattern character
-     is set, the range endpoints will be negative if we fetch using a
-     signed char *.
-
-     We also want to fetch the endpoints without translating them; the
-     appropriate translation is done in the bit-setting loop below.  */
-  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
-  range_start = ((const unsigned char *) p)[-2];
-  range_end   = ((const unsigned char *) p)[0];
-
-  /* Have to increment the pointer into the pattern string, so the
-     caller isn't still at the ending character.  */
-  (*p_ptr)++;
-
-  /* If the start is after the end, the range is empty.	 */
-  if (range_start > range_end)
-    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
-
-  /* Here we see why `this_char' has to be larger than an `unsigned
-     char' -- the range is inclusive, so if `range_end' == 0xff
-     (assuming 8-bit characters), we would otherwise go into an infinite
-     loop, since all characters <= 0xff.  */
-  for (this_char = range_start; this_char <= range_end; this_char++)
-    {
-      SET_LIST_BIT (TRANSLATE (this_char));
-    }
-
-  return REG_NOERROR;
-}
 
 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
    characters can start a string that matches the pattern.  This fastmap
    is used by re_search to skip quickly over impossible starting points.
 
+   Character codes above (1 << BYTEWIDTH) are not represented in the
+   fastmap, but the leading codes are represented.  Thus, the fastmap
+   indicates which character sets could start a match.
+
    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
    area as BUFP->fastmap.
 
@@ -3268,22 +3477,30 @@ re_compile_fastmap (bufp)
 
 #ifndef emacs
 	case charset:
-	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
-	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
-	      fastmap[j] = 1;
-	  break;
+	  {
+	    int length = (*p & 0x7f);;
+	    p++;
 
+	    for (j = length * BYTEWIDTH - 1; j >= 0; j--)
+	      if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+		fastmap[j] = 1;
+	  }
+	  break;
 
 	case charset_not:
 	  /* Chars beyond end of map must be allowed.  */
-	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
-	    fastmap[j] = 1;
+	  {
+	    int length = (*p & 0x7f);;
+	    p++;
 
-	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
-	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+	    for (j = length * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
 	      fastmap[j] = 1;
-	  break;
 
+	    for (j = length * BYTEWIDTH - 1; j >= 0; j--)
+	      if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+		fastmap[j] = 1;
+	  }
+	  break;
 
 	case wordchar:
 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
@@ -3304,6 +3521,12 @@ re_compile_fastmap (bufp)
 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
 	      fastmap[j] = 1;
 
+	  /* If we can match a character class, we can match
+	     any character set.  */
+	  if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+	      && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)
+	    goto set_fastmap_for_multibyte_characters;
+
 	  if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
 	      && match_any_multibyte_characters == false)
 	    {
@@ -3314,8 +3537,7 @@ re_compile_fastmap (bufp)
 	      /* Make P points the range table. */
 	      p += CHARSET_BITMAP_SIZE (&p[-2]);
 
-	      /* Extract the number of ranges in range table into
-		 COUNT.	 */
+	      /* Extract the number of ranges in range table into COUNT.  */
 	      EXTRACT_NUMBER_AND_INCR (count, p);
 	      for (; count > 0; count--, p += 2 * 3) /* XXX */
 		{
@@ -3329,9 +3551,11 @@ re_compile_fastmap (bufp)
 
 
 	case charset_not:
-	  /* Chars beyond end of map must be allowed.  End of map is
-	     `127' if bufp->multibyte is nonzero.  */
-	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  /* Chars beyond end of bitmap are possible matches.
+	     All the single-byte codes can occur in multibyte buffers.
+	     So any that are not listed in the charset
+	     are possible matches, even in multibyte buffers.  */
+	  simple_char_max = (1 << BYTEWIDTH);
 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
 	       j < simple_char_max; j++)
 	    fastmap[j] = 1;
@@ -3358,7 +3582,9 @@ re_compile_fastmap (bufp)
 
 
 	case wordchar:
-	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  /* All the single-byte codes can occur in multibyte buffers,
+	     and they may have word syntax.  So do consider them.  */
+	  simple_char_max = (1 << BYTEWIDTH);
 	  for (j = 0; j < simple_char_max; j++)
 	    if (SYNTAX (j) == Sword)
 	      fastmap[j] = 1;
@@ -3371,7 +3597,9 @@ re_compile_fastmap (bufp)
 
 
 	case notwordchar:
-	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  /* All the single-byte codes can occur in multibyte buffers,
+	     and they may not have word syntax.  So do consider them.  */
+	  simple_char_max = (1 << BYTEWIDTH);
 	  for (j = 0; j < simple_char_max; j++)
 	    if (SYNTAX (j) != Sword)
 	      fastmap[j] = 1;
@@ -3387,21 +3615,13 @@ re_compile_fastmap (bufp)
 	  {
 	    int fastmap_newline = fastmap['\n'];
 
-	    /* `.' matches anything (but if bufp->multibyte is
-	       nonzero, matches `\000' .. `\127' and possible multibyte
-	       character) ...  */
+	    /* `.' matches anything, except perhaps newline.
+	       Even in a multibyte buffer, it should match any
+	       conceivable byte value for the fastmap.  */
 	    if (bufp->multibyte)
-	      {
-		simple_char_max = 0x80;
-
-		for (j = 0x80; j < 0xA0; j++)
-		  if (BASE_LEADING_CODE_P (j))
-		    fastmap[j] = 1;
-		match_any_multibyte_characters = true;
-	      }
-	    else
-	      simple_char_max = (1 << BYTEWIDTH);
+	      match_any_multibyte_characters = true;
 
+	    simple_char_max = (1 << BYTEWIDTH);
 	    for (j = 0; j < simple_char_max; j++)
 	      fastmap[j] = 1;
 
@@ -3459,7 +3679,7 @@ re_compile_fastmap (bufp)
 
 	case categoryspec:
 	  k = *p++;
-	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  simple_char_max = (1 << BYTEWIDTH);
 	  for (j = 0; j < simple_char_max; j++)
 	    if (CHAR_HAS_CATEGORY (j, k))
 	      fastmap[j] = 1;
@@ -3473,7 +3693,7 @@ re_compile_fastmap (bufp)
 
 	case notcategoryspec:
 	  k = *p++;
-	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  simple_char_max = (1 << BYTEWIDTH);
 	  for (j = 0; j < simple_char_max; j++)
 	    if (!CHAR_HAS_CATEGORY (j, k))
 	      fastmap[j] = 1;
@@ -3736,13 +3956,13 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
     range = total_size - startpos;
 
   /* If the search isn't to be a backwards one, don't waste time in a
-     search for a pattern that must be anchored.  */
+     search for a pattern anchored at beginning of buffer.  */
   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
     {
       if (startpos > 0)
 	return -1;
       else
-	range = 1;
+	range = 0;
     }
 
 #ifdef emacs
@@ -3750,8 +3970,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
      don't keep searching past point.  */
   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
     {
-      range = PT - startpos;
-      if (range <= 0)
+      range = PT_BYTE - BEGV_BYTE - startpos;
+      if (range < 0)
 	return -1;
     }
 #endif /* emacs */
@@ -3766,10 +3986,13 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
     anchored_start = 1;
 
 #ifdef emacs
-  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object,
-				 POS_AS_IN_BUFFER (startpos > 0
-						   ? startpos - 1 : startpos),
-				 1);
+  gl_state.object = re_match_object;
+  {
+    int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+    int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos);
+
+    SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
+  }
 #endif
 
   /* Loop through the string, looking for a place to start matching.  */
@@ -3794,37 +4017,69 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
 	 the first null string.	 */
       if (fastmap && startpos < total_size && !bufp->can_be_null)
 	{
+	  register const char *d;
+	  register unsigned int buf_ch;
+
+	  d = POS_ADDR_VSTRING (startpos);
+
 	  if (range > 0)	/* Searching forwards.	*/
 	    {
-	      register const char *d;
 	      register int lim = 0;
 	      int irange = range;
 
 	      if (startpos < size1 && startpos + range >= size1)
 		lim = range - (size1 - startpos);
 
-	      d = POS_ADDR_VSTRING (startpos);
-
 	      /* Written out as an if-else to avoid testing `translate'
 		 inside the loop.  */
-	      if (translate)
-		while (range > lim
-		       && !fastmap[(unsigned char)
-				   translate[(unsigned char) *d++]])
-		  range--;
+	      if (RE_TRANSLATE_P (translate))
+		{
+		  if (multibyte)
+		    while (range > lim)
+		      {
+			int buf_charlen;
+
+			buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
+							 buf_charlen);
+
+			buf_ch = RE_TRANSLATE (translate, buf_ch);
+			if (buf_ch >= 0400
+			    || fastmap[buf_ch])
+			  break;
+
+			range -= buf_charlen;
+			d += buf_charlen;
+		      }
+		  else
+		    while (range > lim
+			   && !fastmap[(unsigned char)
+				       RE_TRANSLATE (translate, (unsigned char) *d)])
+		      {
+			d++;
+			range--;
+		      }
+		}
 	      else
-		while (range > lim && !fastmap[(unsigned char) *d++])
-		  range--;
+		while (range > lim && !fastmap[(unsigned char) *d])
+		  {
+		    d++;
+		    range--;
+		  }
 
 	      startpos += irange - range;
 	    }
 	  else				/* Searching backwards.	 */
 	    {
-	      register char c = (size1 == 0 || startpos >= size1
-				 ? string2[startpos - size1]
-				 : string1[startpos]);
+	      int room = (size1 == 0 || startpos >= size1
+			  ? size2 + size1 - startpos
+			  : size1 - startpos);
+
+	      buf_ch = STRING_CHAR (d, room);
+	      if (RE_TRANSLATE_P (translate))
+		buf_ch = RE_TRANSLATE (translate, buf_ch);
 
-	      if (!fastmap[(unsigned char) TRANSLATE (c)])
+	      if (! (buf_ch >= 0400
+		     || fastmap[buf_ch]))
 		goto advance;
 	    }
 	}
@@ -3886,7 +4141,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
 	      int len = 0;
 
 	      /* Find the head of multibyte form.  */
-	      while (!CHAR_HEAD_P (p))
+	      while (!CHAR_HEAD_P (*p))
 		p--, len++;
 
 	      /* Adjust it. */
@@ -4053,13 +4308,15 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
   int result;
 
 #ifdef emacs
-  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object,
-				 POS_AS_IN_BUFFER (pos > 0 ? pos - 1 : pos),
-				 1);
+  int charpos;
+  int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+  gl_state.object = re_match_object;
+  charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos);
+  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
 #endif
 
   result = re_match_2_internal (bufp, string1, size1, string2, size2,
-				    pos, regs, stop);
+				pos, regs, stop);
   alloca (0);
   return result;
 }
@@ -4495,16 +4752,39 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 	  /* This is written out as an if-else so we don't waste time
 	     testing `translate' inside the loop.  */
-	  if (translate)
+	  if (RE_TRANSLATE_P (translate))
 	    {
-	      do
-		{
-		  PREFETCH ();
-		  if ((unsigned char) translate[(unsigned char) *d++]
-		      != (unsigned char) *p++)
-		    goto fail;
-		}
-	      while (--mcnt);
+#ifdef emacs
+	      if (multibyte)
+		do
+		  {
+		    int pat_charlen, buf_charlen;
+		    unsigned int pat_ch, buf_ch;
+
+		    PREFETCH ();
+		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+		    buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+
+		    if (RE_TRANSLATE (translate, buf_ch)
+			!= pat_ch)
+		      goto fail;
+
+		    p += pat_charlen;
+		    d += buf_charlen;
+		    mcnt -= pat_charlen;
+		  }
+		while (mcnt > 0);
+	      else
+#endif /* not emacs */
+		do
+		  {
+		    PREFETCH ();
+		    if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d)
+			!= (unsigned char) *p++)
+		      goto fail;
+		    d++;
+		  }
+		while (--mcnt);
 	    }
 	  else
 	    {
@@ -4521,17 +4801,36 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 	/* Match any character except possibly a newline or a null.  */
 	case anychar:
-	  DEBUG_PRINT1 ("EXECUTING anychar.\n");
+	  {
+	    int buf_charlen;
+	    unsigned int buf_ch;
 
-	  PREFETCH ();
+	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
 
-	  if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
-	      || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
-	    goto fail;
+	    PREFETCH ();
 
-	  SET_REGS_MATCHED ();
-	  DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
-	  d += multibyte ? MULTIBYTE_FORM_LENGTH (d, dend - d) : 1;
+#ifdef emacs
+	    if (multibyte)
+	      buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+	    else
+#endif /* not emacs */
+	      {
+		buf_ch = (unsigned char) *d;
+		buf_charlen = 1;
+	      }
+
+	    buf_ch = TRANSLATE (buf_ch);
+
+	    if ((!(bufp->syntax & RE_DOT_NEWLINE)
+		 && buf_ch == '\n')
+		|| ((bufp->syntax & RE_DOT_NOT_NULL)
+		    && buf_ch == '\000'))
+	      goto fail;
+
+	    SET_REGS_MATCHED ();
+	    DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
+	    d += buf_charlen;
+	  }
 	  break;
 
 
@@ -4546,26 +4845,30 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	       range table.  */
 	    unsigned char *range_table;
 
-	    /* Nonzero if there is range table.	 */
+	    /* Nonzero if there is a range table.  */
 	    int range_table_exists;
 
-	    /* Number of ranges of range table.	 Not in bytes.	*/
-	    int count;
+	    /* Number of ranges of range table.  This is not included
+	       in the initial byte-length of the command.  */
+	    int count = 0;
 
 	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
 
 	    PREFETCH ();
 	    c = (unsigned char) *d;
 
-	    range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
 	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
+
+#ifdef emacs
 	    if (range_table_exists)
-	      EXTRACT_NUMBER_AND_INCR (count, range_table);
-	    else
-	      count = 0;
+	      {
+		range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
+		EXTRACT_NUMBER_AND_INCR (count, range_table);
+	      }
 
 	    if (multibyte && BASE_LEADING_CODE_P (c))
 	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+#endif /* emacs */
 
 	    if (SINGLE_BYTE_CHAR_P (c))
 	      {			/* Lookup bitmap.  */
@@ -4575,13 +4878,37 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		/* Cast to `unsigned' instead of `unsigned char' in
 		   case the bit list is a full 32 bytes long.  */
 		if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
-		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
-	      not = !not;
+		    && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+		  not = !not;
 	      }
+#ifdef emacs
 	    else if (range_table_exists)
-	      CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
+	      {
+		int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
+
+		if (  (class_bits & BIT_ALNUM && ISALNUM (c))
+		    | (class_bits & BIT_ALPHA && ISALPHA (c))
+		    | (class_bits & BIT_ASCII && IS_REAL_ASCII (c))
+		    | (class_bits & BIT_GRAPH && ISGRAPH (c))
+		    | (class_bits & BIT_LOWER && ISLOWER (c))
+		    | (class_bits & BIT_MULTIBYTE && !ISUNIBYTE (c))
+		    | (class_bits & BIT_NONASCII && !IS_REAL_ASCII (c))
+		    | (class_bits & BIT_PRINT && ISPRINT (c))
+		    | (class_bits & BIT_PUNCT && ISPUNCT (c))
+		    | (class_bits & BIT_SPACE && ISSPACE (c))
+		    | (class_bits & BIT_UNIBYTE && ISUNIBYTE (c))
+		    | (class_bits & BIT_UPPER && ISUPPER (c))
+		    | (class_bits & BIT_WORD  && ISWORD (c)))
+		  not = !not;
+		else
+		  CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
+	      }
+#endif /* emacs */
 
-	    p = CHARSET_RANGE_TABLE_END (range_table, count);
+	    if (range_table_exists)
+	      p = CHARSET_RANGE_TABLE_END (range_table, count);
+	    else
+	      p += CHARSET_BITMAP_SIZE (&p[-1]) + 1;
 
 	    if (!not) goto fail;
 
@@ -4829,7 +5156,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 		/* Compare that many; failure if mismatch, else move
 		   past them.  */
-		if (translate
+		if (RE_TRANSLATE_P (translate)
 		    ? bcmp_translate (d, d2, mcnt, translate)
 		    : bcmp (d, d2, mcnt))
 		  goto fail;
@@ -4936,6 +5263,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	on_failure:
 	  DEBUG_PRINT1 ("EXECUTING on_failure_jump");
 
+#if defined (WINDOWSNT) && defined (emacs)
+	  QUIT;
+#endif
+
 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
 	  DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
 
@@ -4976,6 +5307,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	/* A smart repeat ends with `maybe_pop_jump'.
 	   We change it to either `pop_failure_jump' or `jump'.	 */
 	case maybe_pop_jump:
+#if defined (WINDOWSNT) && defined (emacs)
+	  QUIT;
+#endif
 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
 	  DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
 	  {
@@ -5196,6 +5530,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	/* Unconditionally jump (without popping any failure points).  */
 	case jump:
 	unconditional_jump:
+#if defined (WINDOWSNT) && defined (emacs)
+	  QUIT;
+#endif
 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
 	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
 	  p += mcnt;				/* Do the jump.	 */
@@ -5301,15 +5638,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
 	      int pos1 = PTR_TO_OFFSET (d - 1);
+	      int charpos;
 
 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
 	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE (pos1 ? pos1 : 1);
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      s1 = SYNTAX (c1);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
+	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
 	      s2 = SYNTAX (c2);
 
@@ -5336,15 +5675,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
 	      int pos1 = PTR_TO_OFFSET (d - 1);
+	      int charpos;
 
 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
 	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE (pos1);
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      s1 = SYNTAX (c1);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
+	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
 	      s2 = SYNTAX (c2);
 
@@ -5371,10 +5712,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
 	      int pos1 = PTR_TO_OFFSET (d);
+	      int charpos;
 
 	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
 #ifdef emacs
-	      UPDATE_SYNTAX_TABLE (pos1);
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      s2 = SYNTAX (c2);
 	
@@ -5387,7 +5730,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		{
 		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
 #ifdef emacs
-		  UPDATE_SYNTAX_TABLE_BACKWARD (pos1 - 1);
+		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
 #endif
 		  s1 = SYNTAX (c1);
 
@@ -5412,8 +5755,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      /* C1 is the character before D, S1 is the syntax of C1, C2
 		 is the character at D, and S2 is the syntax of C2.  */
 	      int c1, c2, s1, s2;
+	      int pos1 = PTR_TO_OFFSET (d);
+	      int charpos;
 
 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+	      charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
+	      UPDATE_SYNTAX_TABLE (charpos);
+#endif
 	      s1 = SYNTAX (c1);
 
 	      /* Case 2: S1 is not Sword.  */
@@ -5424,6 +5773,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      if (!AT_STRINGS_END (d))
 		{
 		  GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+#endif
 		  s2 = SYNTAX (c2);
 
 		  /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
@@ -5437,19 +5789,19 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 #ifdef emacs
 	case before_dot:
 	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) >= PT)
+	  if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
 	    goto fail;
 	  break;
 
 	case at_dot:
 	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) != PT)
+	  if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
 	    goto fail;
 	  break;
 
 	case after_dot:
 	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
-	  if (PTR_CHAR_POS ((unsigned char *) d) <= PT)
+	  if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
 	    goto fail;
 	  break;
 
@@ -5465,7 +5817,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  PREFETCH ();
 #ifdef emacs
 	  {
-	    int pos1 = PTR_TO_OFFSET (d);
+	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
 	    UPDATE_SYNTAX_TABLE (pos1);
 	  }
 #endif
@@ -5499,7 +5851,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  PREFETCH ();
 #ifdef emacs
 	  {
-	    int pos1 = PTR_TO_OFFSET (d);
+	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
 	    UPDATE_SYNTAX_TABLE (pos1);
 	  }
 #endif
@@ -5584,6 +5936,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
     /* We goto here if a matching operation fails. */
     fail:
+#if defined (WINDOWSNT) && defined (emacs)
+      QUIT;
+#endif
       if (!FAIL_STACK_EMPTY ())
 	{ /* A restart point is known.  Restore to that state.  */
           DEBUG_PRINT1 ("\nFAIL:\n");
@@ -5893,11 +6248,27 @@ bcmp_translate (s1, s2, len, translate)
      RE_TRANSLATE_TYPE translate;
 {
   register unsigned char *p1 = s1, *p2 = s2;
-  while (len)
+  unsigned char *p1_end = s1 + len;
+  unsigned char *p2_end = s2 + len;
+
+  while (p1 != p1_end && p2 != p2_end)
     {
-      if (translate[*p1++] != translate[*p2++]) return 1;
-      len--;
+      int p1_charlen, p2_charlen;
+      int p1_ch, p2_ch;
+
+      p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
+      p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
+
+      if (RE_TRANSLATE (translate, p1_ch)
+	  != RE_TRANSLATE (translate, p2_ch))
+	return 1;
+
+      p1 += p1_charlen, p2 += p2_charlen;
     }
+
+  if (p1 != p1_end || p2 != p2_end)
+    return 1;
+
   return 0;
 }