.

[gnulib.git] / regex.c
diff --git a/regex.c b/regex.c

index 4f222a5..c9219c6 100644 (file)
--- a/regex.c
+++ b/regex.c
@@ -2,7 +2,7 @@
     0.12.  (Implements POSIX draft P10003.2/D11.2, except for
     internationalization features.)
  
     0.12.  (Implements POSIX draft P10003.2/D11.2, except for
     internationalization features.)
  
-   Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+   Copyright (C) 1993, 1994-1998, 1999 Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -27,6 +27,16 @@
  #undef _GNU_SOURCE
  #define _GNU_SOURCE
  
  #undef _GNU_SOURCE
  #define _GNU_SOURCE
  
+#ifdef emacs
+/* Converts the pointer to the char to BEG-based offset from the start.         */
+#define PTR_TO_OFFSET(d)                                               \
+       POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING                      \
+                         ? (d) - string1 : (d) - (string2 - size1))
+#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
+#else
+#define PTR_TO_OFFSET(d) 0
+#endif
+
  #ifdef HAVE_CONFIG_H
  #include <config.h>
  #endif
  #ifdef HAVE_CONFIG_H
  #include <config.h>
  #endif
@@ -53,7 +63,17 @@
  
  #include "lisp.h"
  #include "buffer.h"
  
  #include "lisp.h"
  #include "buffer.h"
+
+/* Make syntax table lookup grant data in gl_state.  */
+#define SYNTAX_ENTRY_VIA_PROPERTY
+
  #include "syntax.h"
  #include "syntax.h"
+#include "charset.h"
+#include "category.h"
+
+#define malloc xmalloc
+#define realloc xrealloc
+#define free xfree
  
  #else  /* not emacs */
  
  
  #else  /* not emacs */
  
@@ -153,6 +173,19 @@ init_syntax_once ()
  
  #define SYNTAX(c) re_syntax_table[c]
  
  
  #define SYNTAX(c) re_syntax_table[c]
  
+/* Dummy macros for non-Emacs environments.  */
+#define BASE_LEADING_CODE_P(c) (0)
+#define WORD_BOUNDARY_P(c1, c2) (0)
+#define CHAR_HEAD_P(p) (1)
+#define SINGLE_BYTE_CHAR_P(c) (1)
+#define SAME_CHARSET_P(c1, c2) (1)
+#define MULTIBYTE_FORM_LENGTH(p, s) (1)
+#define STRING_CHAR(p, s) (*(p))
+#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
+#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
+  (c = ((p) == (end1) ? *(str2) : *(p)))
+#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
+  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
  #endif /* not emacs */
  \f
  /* Get the interface, including the syntax bits.  */
  #endif /* not emacs */
  \f
  /* Get the interface, including the syntax bits.  */
@@ -161,6 +194,64 @@ init_syntax_once ()
  /* isalpha etc. are used for the character classes.  */
  #include <ctype.h>
  
  /* isalpha etc. are used for the character classes.  */
  #include <ctype.h>
  
+#ifdef emacs
+
+/* 1 if C is an ASCII character.  */
+#define IS_REAL_ASCII(c) ((c) < 0200)
+
+/* 1 if C is a unibyte character.  */
+#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
+
+/* The Emacs definitions should not be directly affected by locales.  */
+
+/* In Emacs, these are only used for single-byte characters.  */
+#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define ISCNTRL(c) ((c) < ' ')
+#define ISXDIGIT(c) (((c) >= '0' && (c) <= '9')                \
+                    || ((c) >= 'a' && (c) <= 'f')      \
+                    || ((c) >= 'A' && (c) <= 'F'))
+
+/* This is only used for single-byte characters.  */
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+
+/* The rest must handle multibyte characters.  */
+
+#define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)                             \
+                   ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237)        \
+                   : 1)
+
+#define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)             \
+                   ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)       \
+                   : 1)
+
+#define ISALNUM(c) (IS_REAL_ASCII (c)                  \
+                   ? (((c) >= 'a' && (c) <= 'z')       \
+                      || ((c) >= 'A' && (c) <= 'Z')    \
+                      || ((c) >= '0' && (c) <= '9'))   \
+                   : SYNTAX (c) == Sword)
+
+#define ISALPHA(c) (IS_REAL_ASCII (c)                  \
+                   ? (((c) >= 'a' && (c) <= 'z')       \
+                      || ((c) >= 'A' && (c) <= 'Z'))   \
+                   : SYNTAX (c) == Sword)
+
+#define ISLOWER(c) (LOWERCASEP (c))
+
+#define ISPUNCT(c) (IS_REAL_ASCII (c)                          \
+                   ? ((c) > ' ' && (c) < 0177                  \
+                      && !(((c) >= 'a' && (c) <= 'z')          \
+                           || ((c) >= 'A' && (c) <= 'Z')       \
+                           || ((c) >= '0' && (c) <= '9')))     \
+                   : SYNTAX (c) != Sword)
+
+#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
+
+#define ISUPPER(c) (UPPERCASEP (c))
+
+#define ISWORD(c) (SYNTAX (c) == Sword)
+
+#else /* not emacs */
+
  /* Jim Meyering writes:
  
     "... Some ctype macros are valid only for character codes that
  /* Jim Meyering writes:
  
     "... Some ctype macros are valid only for character codes that
@@ -178,6 +269,16 @@ init_syntax_once ()
  #define ISASCII(c) isascii(c)
  #endif
  
  #define ISASCII(c) isascii(c)
  #endif
  
+/* 1 if C is an ASCII character.  */
+#define IS_REAL_ASCII(c) ((c) < 0200)
+
+/* This distinction is not meaningful, except in Emacs.  */
+#define ISUNIBYTE(c) 1
+
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
  #ifdef isblank
  #define ISBLANK(c) (ISASCII (c) && isblank (c))
  #else
  #ifdef isblank
  #define ISBLANK(c) (ISASCII (c) && isblank (c))
  #else
@@ -200,6 +301,10 @@ init_syntax_once ()
  #define ISUPPER(c) (ISASCII (c) && isupper (c))
  #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
  
  #define ISUPPER(c) (ISASCII (c) && isupper (c))
  #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
  
+#define ISWORD(c) ISALPHA(c)
+
+#endif /* not emacs */
+\f
  #ifndef NULL
  #define NULL (void *)0
  #endif
  #ifndef NULL
  #define NULL (void *)0
  #endif
@@ -350,7 +455,15 @@ typedef enum
            for a bitmap saying which chars are in.  Bits in each byte
            are ordered low-bit-first.  A character is in the set if its
            bit is 1.  A character too large to have a bit in the map is
            for a bitmap saying which chars are in.  Bits in each byte
            are ordered low-bit-first.  A character is in the set if its
            bit is 1.  A character too large to have a bit in the map is
-          automatically not in the set.  */
+          automatically not in the set.
+
+          If the length byte has the 0x80 bit set, then that stuff
+          is followed by a range table:
+              2 bytes of flags for character sets (low 8 bits, high 8 bits)
+                  See RANGE_TABLE_WORK_BITS below.
+              2 bytes, the number of pairs that follow
+              pairs, each 2 multibyte characters,
+                  each multibyte character represented as 3 bytes.  */
    charset,
  
         /* Same parameters as charset, but match any character that is
    charset,
  
         /* Same parameters as charset, but match any character that is
@@ -462,7 +575,17 @@ typedef enum
    syntaxspec,
  
         /* Matches any character whose syntax is not that specified.  */
    syntaxspec,
  
         /* Matches any character whose syntax is not that specified.  */
-  notsyntaxspec
+  notsyntaxspec,
+
+  /* Matches any character whose category-set contains the specified
+     category. The operator is followed by a byte which contains a
+     category code (mnemonic ASCII character). */
+  categoryspec,
+
+  /* Matches any character whose category-set does not contain the
+     specified category.  The operator is followed by a byte which
+     contains the category code (mnemonic ASCII character).  */
+  notcategoryspec
  #endif /* emacs */
  } re_opcode_t;
  \f
  #endif /* emacs */
  } re_opcode_t;
  \f
@@ -540,6 +663,99 @@ extract_number_and_incr (destination, source)
  
  #endif /* DEBUG */
  \f
  
  #endif /* DEBUG */
  \f
+/* Store a multibyte character in three contiguous bytes starting
+   DESTINATION, and increment DESTINATION to the byte after where the
+   character is stored.         Therefore, DESTINATION must be an lvalue.  */
+
+#define STORE_CHARACTER_AND_INCR(destination, character)       \
+  do {                                                         \
+    (destination)[0] = (character) & 0377;                     \
+    (destination)[1] = ((character) >> 8) & 0377;              \
+    (destination)[2] = (character) >> 16;                      \
+    (destination) += 3;                                                \
+  } while (0)
+
+/* Put into DESTINATION a character stored in three contiguous bytes
+   starting at SOURCE. */
+
+#define EXTRACT_CHARACTER(destination, source) \
+  do {                                         \
+    (destination) = ((source)[0]               \
+                    | ((source)[1] << 8)       \
+                    | ((source)[2] << 16));    \
+  } while (0)
+
+
+/* Macros for charset. */
+
+/* Size of bitmap of charset P in bytes.  P is a start of charset,
+   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
+#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
+
+/* Nonzero if charset P has range table.  */
+#define CHARSET_RANGE_TABLE_EXISTS_P(p)         ((p)[1] & 0x80)
+
+/* Return the address of range table of charset P.  But not the start
+   of table itself, but the before where the number of ranges is
+   stored.  `2 +' means to skip re_opcode_t and size of bitmap,
+   and the 2 bytes of flags at the start of the range table.  */
+#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
+
+/* Extract the bit flags that start a range table.  */
+#define CHARSET_RANGE_TABLE_BITS(p)            \
+  ((p)[2 + CHARSET_BITMAP_SIZE (p)]            \
+   + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
+
+/* Test if C is listed in the bitmap of charset P.  */
+#define CHARSET_LOOKUP_BITMAP(p, c)                            \
+  ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH                   \
+   && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
+
+/* Return the address of end of RANGE_TABLE.  COUNT is number of
+   ranges (which is a pair of (start, end)) in the RANGE_TABLE.         `* 2'
+   is start of range and end of range. `* 3' is size of each start
+   and end.  */
+#define CHARSET_RANGE_TABLE_END(range_table, count)    \
+  ((range_table) + (count) * 2 * 3)
+
+/* Test if C is in RANGE_TABLE.         A flag NOT is negated if C is in.
+   COUNT is number of ranges in RANGE_TABLE.  */
+#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)     \
+  do                                                                   \
+    {                                                                  \
+      int range_start, range_end;                                      \
+      unsigned char *p;                                                        \
+      unsigned char *range_table_end                                   \
+       = CHARSET_RANGE_TABLE_END ((range_table), (count));             \
+                                                                       \
+      for (p = (range_table); p < range_table_end; p += 2 * 3)         \
+       {                                                               \
+         EXTRACT_CHARACTER (range_start, p);                           \
+         EXTRACT_CHARACTER (range_end, p + 3);                         \
+                                                                       \
+         if (range_start <= (c) && (c) <= range_end)                   \
+           {                                                           \
+             (not) = !(not);                                           \
+             break;                                                    \
+           }                                                           \
+       }                                                               \
+    }                                                                  \
+  while (0)
+
+/* Test if C is in range table of CHARSET.  The flag NOT is negated if
+   C is listed in it.  */
+#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)                    \
+  do                                                                   \
+    {                                                                  \
+      /* Number of ranges in range table. */                           \
+      int count;                                                       \
+      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);      \
+                                                                       \
+      EXTRACT_NUMBER_AND_INCR (count, range_table);                    \
+      CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
+    }                                                                  \
+  while (0)
+\f
  /* If DEBUG is defined, Regex prints many voluminous messages about what
     it is doing (if the variable `debug' is nonzero).  If linked with the
     main program in `iregex.c', you can enter patterns and strings
  /* If DEBUG is defined, Regex prints many voluminous messages about what
     it is doing (if the variable `debug' is nonzero).  If linked with the
     main program in `iregex.c', you can enter patterns and strings
@@ -661,6 +877,9 @@ print_partial_compiled_pattern (start, end)
           {
             register int c, last = -100;
             register int in_range = 0;
           {
             register int c, last = -100;
             register int in_range = 0;
+           int length = *p & 0x7f;
+           int has_range_table = *p & 0x80;
+           int range_length = p[length + 2] + p[length + 3] * 0x100;
  
             printf ("/charset [%s",
                     (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
  
             printf ("/charset [%s",
                     (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
@@ -668,7 +887,7 @@ print_partial_compiled_pattern (start, end)
             assert (p + *p < pend);
  
             for (c = 0; c < 256; c++)
             assert (p + *p < pend);
  
             for (c = 0; c < 256; c++)
-             if (c / 8 < *p
+             if (c / 8 < length
                   && (p[1 + (c/8)] & (1 << (c % 8))))
                 {
                   /* Are we starting a range?  */
                   && (p[1 + (c/8)] & (1 << (c % 8))))
                 {
                   /* Are we starting a range?  */
@@ -679,7 +898,7 @@ print_partial_compiled_pattern (start, end)
                     }
                   /* Have we broken a range?  */
                   else if (last + 1 != c && in_range)
                     }
                   /* Have we broken a range?  */
                   else if (last + 1 != c && in_range)
-             {
+                   {
                       putchar (last);
                       in_range = 0;
                     }
                       putchar (last);
                       in_range = 0;
                     }
@@ -690,12 +909,20 @@ print_partial_compiled_pattern (start, end)
                   last = c;
               }
  
                   last = c;
               }
  
+           p += 1 + length;
+
             if (in_range)
               putchar (last);
  
             putchar (']');
  
             if (in_range)
               putchar (last);
  
             putchar (']');
  
-           p += 1 + *p;
+           if (has_range_table)
+             printf ("has-range-table");
+
+           /* ??? Should print the range table; for now,
+              just skip it.  */
+           if (has_range_table)
+             p += 4 + 6 * range_length;
           }
           break;
  
           }
           break;
  
@@ -995,23 +1222,25 @@ static const char *re_error_msgid[] =
     REGEX_ALLOCATE_STACK.  */
  
  
     REGEX_ALLOCATE_STACK.  */
  
  
-/* Number of failure points for which to initially allocate space
+/* Approximate number of failure points for which to initially allocate space
     when matching.  If this number is exceeded, we allocate more
     space, so it is not a hard limit.  */
  #ifndef INIT_FAILURE_ALLOC
     when matching.  If this number is exceeded, we allocate more
     space, so it is not a hard limit.  */
  #ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
+#define INIT_FAILURE_ALLOC 20
  #endif
  
  /* Roughly the maximum number of failure points on the stack.  Would be
  #endif
  
  /* Roughly the maximum number of failure points on the stack.  Would be
-   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
     This is a variable only so users of regex can assign to it; we never
     change it ourselves.         */
  #if defined (MATCH_MAY_ALLOCATE)
     This is a variable only so users of regex can assign to it; we never
     change it ourselves.         */
  #if defined (MATCH_MAY_ALLOCATE)
-/* 4400 was enough to cause a crash on Alpha OSF/1,
-   whose default stack limit is 2mb.  */
-int re_max_failures = 20000;
+/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
+   whose default stack limit is 2mb.  In order for a larger
+   value to work reliably, you have to try to make it accord
+   with the process stack limit.  */
+int re_max_failures = 40000;
  #else
  #else
-int re_max_failures = 2000;
+int re_max_failures = 4000;
  #endif
  
  union fail_stack_elt
  #endif
  
  union fail_stack_elt
@@ -1041,7 +1270,8 @@ typedef struct
  #define INIT_FAIL_STACK()                                              \
    do {                                                                 \
      fail_stack.stack = (fail_stack_elt_t *)                            \
  #define INIT_FAIL_STACK()                                              \
    do {                                                                 \
      fail_stack.stack = (fail_stack_elt_t *)                            \
-      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));   \
+      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE  \
+                           * sizeof (fail_stack_elt_t));               \
                                                                         \
      if (fail_stack.stack == NULL)                                      \
        return -2;                                                       \
                                                                         \
      if (fail_stack.stack == NULL)                                      \
        return -2;                                                       \
@@ -1061,24 +1291,40 @@ typedef struct
  #endif
  
  
  #endif
  
  
-/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+/* Double the size of FAIL_STACK, up to a limit
+   which allows approximately `re_max_failures' items.
  
     Return 1 if succeeds, and 0 if either ran out of memory
     allocating space for it or it was already too large.
  
     REGEX_REALLOCATE_STACK requires `destination' be declared.  */
  
  
     Return 1 if succeeds, and 0 if either ran out of memory
     allocating space for it or it was already too large.
  
     REGEX_REALLOCATE_STACK requires `destination' be declared.  */
  
-#define DOUBLE_FAIL_STACK(fail_stack)                                  \
-  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS             \
+/* Factor to increase the failure stack size by
+   when we increase it.
+   This used to be 2, but 2 was too wasteful
+   because the old discarded stacks added up to as much space
+   were as ultimate, maximum-size stack.  */
+#define FAIL_STACK_GROWTH_FACTOR 4
+
+#define GROW_FAIL_STACK(fail_stack)                                    \
+  (((fail_stack).size * sizeof (fail_stack_elt_t)                      \
+    >= re_max_failures * TYPICAL_FAILURE_SIZE)                         \
     ? 0                                                                 \
     ? 0                                                                 \
-   : ((fail_stack).stack = (fail_stack_elt_t *)                                \
+   : ((fail_stack).stack                                               \
+      = (fail_stack_elt_t *)                                           \
         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
           (fail_stack).size * sizeof (fail_stack_elt_t),                \
         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
           (fail_stack).size * sizeof (fail_stack_elt_t),                \
-         ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
+         MIN (re_max_failures * TYPICAL_FAILURE_SIZE,                  \
+              ((fail_stack).size * sizeof (fail_stack_elt_t)           \
+               * FAIL_STACK_GROWTH_FACTOR))),                          \
                                                                         \
        (fail_stack).stack == NULL                                       \
        ? 0                                                              \
                                                                         \
        (fail_stack).stack == NULL                                       \
        ? 0                                                              \
-      : ((fail_stack).size <<= 1,                                      \
+      : ((fail_stack).size                                             \
+        = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,                \
+                ((fail_stack).size * sizeof (fail_stack_elt_t)         \
+                 * FAIL_STACK_GROWTH_FACTOR))                          \
+           / sizeof (fail_stack_elt_t)),                               \
          1)))
  
  
          1)))
  
  
@@ -1087,7 +1333,7 @@ typedef struct
     space to do so.  */
  #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                           \
    ((FAIL_STACK_FULL ()                                                 \
     space to do so.  */
  #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                           \
    ((FAIL_STACK_FULL ()                                                 \
-    && !DOUBLE_FAIL_STACK (FAIL_STACK))                                        \
+    && !GROW_FAIL_STACK (FAIL_STACK))                                  \
     ? 0                                                                 \
     : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,      \
        1))
     ? 0                                                                 \
     : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,      \
        1))
@@ -1130,7 +1376,7 @@ typedef struct
     if we ever fail back to it.
  
     Requires variables fail_stack, regstart, regend, reg_info, and
     if we ever fail back to it.
  
     Requires variables fail_stack, regstart, regend, reg_info, and
-   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
+   num_regs be declared.  GROW_FAIL_STACK requires `destination' be
     declared.
  
     Does `return FAILURE_CODE' if runs out of memory.  */
     declared.
  
     Does `return FAILURE_CODE' if runs out of memory.  */
@@ -1154,7 +1400,7 @@ typedef struct
      /* Ensure we have enough space allocated for what we will push.  */        \
      while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                  \
        {                                                                        \
      /* Ensure we have enough space allocated for what we will push.  */        \
      while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                  \
        {                                                                        \
-       if (!DOUBLE_FAIL_STACK (fail_stack))                            \
+       if (!GROW_FAIL_STACK (fail_stack))                              \
           return failure_code;                                          \
                                                                         \
         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
           return failure_code;                                          \
                                                                         \
         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
@@ -1221,13 +1467,14 @@ typedef struct
  #define NUM_NONREG_ITEMS 4
  #endif
  
  #define NUM_NONREG_ITEMS 4
  #endif
  
-/* We push at most this many items on the stack.  */
-/* We used to use (num_regs - 1), which is the number of registers
-   this regexp will save; but that was changed to 5
-   to avoid stack overflow for a regexp with lots of parens.  */
-#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+/* Estimate the size of data pushed by a typical failure stack entry.
+   An estimate is all we need, because all we use this for
+   is to choose a limit for how big to make the failure stack.  */
  
  
-/* We actually push this many items.  */
+#define TYPICAL_FAILURE_SIZE 20
+
+/* This is how many items we actually use for a failure point.
+   It depends on the regexp.  */
  #define NUM_FAILURE_ITEMS                              \
    (((0                                                 \
       ? 0 : highest_active_reg - lowest_active_reg + 1) \
  #define NUM_FAILURE_ITEMS                              \
    (((0                                                 \
       ? 0 : highest_active_reg - lowest_active_reg + 1) \
@@ -1265,8 +1512,8 @@ typedef struct
                                                                         \
    assert (fail_stack.avail >= NUM_NONREG_ITEMS);                       \
                                                                         \
                                                                         \
    assert (fail_stack.avail >= NUM_NONREG_ITEMS);                       \
                                                                         \
-  DEBUG_POP (&failure_id);                                             \
-  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);             \
+  DEBUG_POP (&failure_id.integer);                                     \
+  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id.integer);     \
                                                                         \
    /* If the saved string location is NULL, it came from an             \
       on_failure_keep_string_jump opcode, and we want to throw away the \
                                                                         \
    /* If the saved string location is NULL, it came from an             \
       on_failure_keep_string_jump opcode, and we want to throw away the \
@@ -1394,7 +1641,7 @@ static reg_errcode_t compile_range ();
  #define PATFETCH(c)                                                    \
    do {if (p == pend) return REG_EEND;                                  \
      c = (unsigned char) *p++;                                          \
  #define PATFETCH(c)                                                    \
    do {if (p == pend) return REG_EEND;                                  \
      c = (unsigned char) *p++;                                          \
-    if (translate) c = (unsigned char) translate[c];                   \
+    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);   \
    } while (0)
  #endif
  
    } while (0)
  #endif
  
@@ -1415,7 +1662,8 @@ static reg_errcode_t compile_range ();
     when we use a character as a subscript we must make it unsigned.  */
  #ifndef TRANSLATE
  #define TRANSLATE(d) \
     when we use a character as a subscript we must make it unsigned.  */
  #ifndef TRANSLATE
  #define TRANSLATE(d) \
-  (translate ? (char) translate[(unsigned char) (d)] : (d))
+  (RE_TRANSLATE_P (translate) \
+   ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
  #endif
  
  
  #endif
  
  
@@ -1553,6 +1801,72 @@ typedef struct
  #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
  
  
  #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
  
  
+/* Structure to manage work area for range table.  */
+struct range_table_work_area
+{
+  int *table;                  /* actual work area.  */
+  int allocated;               /* allocated size for work area in bytes.  */
+  int used;                    /* actually used size in words.  */
+  int bits;                    /* flag to record character classes */
+};
+
+/* Make sure that WORK_AREA can hold more N multibyte characters.  */
+#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n)                       \
+  do {                                                                   \
+    if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated)  \
+      {                                                                          \
+       (work_area).allocated += 16 * sizeof (int);                       \
+       if ((work_area).table)                                            \
+         (work_area).table                                               \
+           = (int *) realloc ((work_area).table, (work_area).allocated); \
+       else                                                              \
+         (work_area).table                                               \
+           = (int *) malloc ((work_area).allocated);                     \
+       if ((work_area).table == 0)                                       \
+         FREE_STACK_RETURN (REG_ESPACE);                                 \
+      }                                                                          \
+  } while (0)
+
+#define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit)          \
+  (work_area).bits |= (bit)
+
+/* These bits represent the various character classes such as [:alnum:]
+   in a charset's range table.  */
+#define BIT_ALNUM 0x1
+#define BIT_ALPHA 0x2
+#define BIT_WORD  0x4
+#define BIT_ASCII 0x8
+#define BIT_NONASCII 0x10
+#define BIT_GRAPH 0x20
+#define BIT_LOWER 0x40
+#define BIT_PRINT 0x80
+#define BIT_PUNCT 0x100
+#define BIT_SPACE 0x200
+#define BIT_UPPER 0x400
+#define BIT_UNIBYTE 0x800
+#define BIT_MULTIBYTE 0x1000
+
+/* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
+#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)   \
+  do {                                                                 \
+    EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);                     \
+    (work_area).table[(work_area).used++] = (range_start);             \
+    (work_area).table[(work_area).used++] = (range_end);               \
+  } while (0)
+
+/* Free allocated memory for WORK_AREA.         */
+#define FREE_RANGE_TABLE_WORK_AREA(work_area)  \
+  do {                                         \
+    if ((work_area).table)                     \
+      free ((work_area).table);                        \
+  } while (0)
+
+#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
+#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
+#define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
+#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
+
+
  /* Set the bit for character C in a list.  */
  #define SET_LIST_BIT(c)                                      \
    (b[((unsigned char) (c)) / BYTEWIDTH]                      \
  /* Set the bit for character C in a list.  */
  #define SET_LIST_BIT(c)                                      \
    (b[((unsigned char) (c)) / BYTEWIDTH]                      \
@@ -1584,7 +1898,10 @@ typedef struct
      || STREQ (string, "alnum") || STREQ (string, "xdigit")             \
      || STREQ (string, "space") || STREQ (string, "print")              \
      || STREQ (string, "punct") || STREQ (string, "graph")              \
      || STREQ (string, "alnum") || STREQ (string, "xdigit")             \
      || STREQ (string, "space") || STREQ (string, "print")              \
      || STREQ (string, "punct") || STREQ (string, "graph")              \
-    || STREQ (string, "cntrl") || STREQ (string, "blank"))
+    || STREQ (string, "cntrl") || STREQ (string, "blank")              \
+    || STREQ (string, "word")                                          \
+    || STREQ (string, "ascii") || STREQ (string, "nonascii")           \
+    || STREQ (string, "unibyte") || STREQ (string, "multibyte"))
  \f
  #ifndef MATCH_MAY_ALLOCATE
  
  \f
  #ifndef MATCH_MAY_ALLOCATE
  
@@ -1654,7 +1971,11 @@ regex_grow_registers (num_regs)
  
  /* Return, freeing storage we allocated.  */
  #define FREE_STACK_RETURN(value)               \
  
  /* Return, freeing storage we allocated.  */
  #define FREE_STACK_RETURN(value)               \
-  return (free (compile_stack.stack), value)
+  do {                                                 \
+    FREE_RANGE_TABLE_WORK_AREA (range_table_work);     \
+    free (compile_stack.stack);                                \
+    return value;                                      \
+  } while (0)
  
  static reg_errcode_t
  regex_compile (pattern, size, syntax, bufp)
  
  static reg_errcode_t
  regex_compile (pattern, size, syntax, bufp)
@@ -1666,7 +1987,7 @@ regex_compile (pattern, size, syntax, bufp)
    /* We fetch characters from PATTERN here.  Even though PATTERN is
       `char *' (i.e., signed), we declare these variables as unsigned, so
       they can be reliably used as array indices.  */
    /* We fetch characters from PATTERN here.  Even though PATTERN is
       `char *' (i.e., signed), we declare these variables as unsigned, so
       they can be reliably used as array indices.  */
-  register unsigned char c, c1;
+  register unsigned int c, c1;
  
    /* A random temporary spot in PATTERN.  */
    const char *p1;
  
    /* A random temporary spot in PATTERN.  */
    const char *p1;
@@ -1678,7 +1999,12 @@ regex_compile (pattern, size, syntax, bufp)
    compile_stack_type compile_stack;
  
    /* Points to the current (ending) position in the pattern.  */
    compile_stack_type compile_stack;
  
    /* Points to the current (ending) position in the pattern.  */
+#ifdef AIX
+  /* `const' makes AIX compiler fail.  */
+  char *p = pattern;
+#else
    const char *p = pattern;
    const char *p = pattern;
+#endif
    const char *pend = pattern + size;
  
    /* How to translate the characters in the pattern.  */
    const char *pend = pattern + size;
  
    /* How to translate the characters in the pattern.  */
@@ -1712,6 +2038,9 @@ regex_compile (pattern, size, syntax, bufp)
       number is put in the stop_memory as the start_memory.  */
    regnum_t regnum = 0;
  
       number is put in the stop_memory as the start_memory.  */
    regnum_t regnum = 0;
  
+  /* Work area for range table of charset.  */
+  struct range_table_work_area range_table_work;
+
  #ifdef DEBUG
    DEBUG_PRINT1 ("\nCompiling pattern: ");
    if (debug)
  #ifdef DEBUG
    DEBUG_PRINT1 ("\nCompiling pattern: ");
    if (debug)
@@ -1732,6 +2061,9 @@ regex_compile (pattern, size, syntax, bufp)
    compile_stack.size = INIT_COMPILE_STACK_SIZE;
    compile_stack.avail = 0;
  
    compile_stack.size = INIT_COMPILE_STACK_SIZE;
    compile_stack.avail = 0;
  
+  range_table_work.table = 0;
+  range_table_work.allocated = 0;
+
    /* Initialize the pattern buffer.  */
    bufp->syntax = syntax;
    bufp->fastmap_accurate = 0;
    /* Initialize the pattern buffer.  */
    bufp->syntax = syntax;
    bufp->fastmap_accurate = 0;
@@ -1745,6 +2077,14 @@ regex_compile (pattern, size, syntax, bufp)
    /* Always count groups, whether or not bufp->no_sub is set.  */
    bufp->re_nsub = 0;
  
    /* Always count groups, whether or not bufp->no_sub is set.  */
    bufp->re_nsub = 0;
  
+#ifdef emacs
+  /* bufp->multibyte is set before regex_compile is called, so don't alter
+     it. */
+#else  /* not emacs */
+  /* Nothing is recognized as a multibyte character.  */
+  bufp->multibyte = 0;
+#endif
+
  #if !defined (emacs) && !defined (SYNTAX_TABLE)
    /* Initialize the syntax table.  */
     init_syntax_once ();
  #if !defined (emacs) && !defined (SYNTAX_TABLE)
    /* Initialize the syntax table.  */
     init_syntax_once ();
@@ -1828,6 +2168,7 @@ regex_compile (pattern, size, syntax, bufp)
  
             /* 1 means zero (many) matches is allowed.  */
             char zero_times_ok = 0, many_times_ok = 0;
  
             /* 1 means zero (many) matches is allowed.  */
             char zero_times_ok = 0, many_times_ok = 0;
+           char greedy = 1;
  
             /* If there is a sequence of repetition chars, collapse it
                down to just one (the right one).  We can't combine
  
             /* If there is a sequence of repetition chars, collapse it
                down to just one (the right one).  We can't combine
@@ -1836,8 +2177,14 @@ regex_compile (pattern, size, syntax, bufp)
  
             for (;;)
               {
  
             for (;;)
               {
-               zero_times_ok |= c != '+';
-               many_times_ok |= c != '?';
+               if (!(syntax & RE_ALL_GREEDY)
+                   && c == '?' && (zero_times_ok || many_times_ok))
+                 greedy = 0;
+               else
+                 {
+                   zero_times_ok |= c != '+';
+                   many_times_ok |= c != '?';
+                 }
  
                 if (p == pend)
                   break;
  
                 if (p == pend)
                   break;
@@ -1878,6 +2225,8 @@ regex_compile (pattern, size, syntax, bufp)
  
             /* Now we know whether or not zero matches is allowed
                and also whether or not two or more matches is allowed.  */
  
             /* Now we know whether or not zero matches is allowed
                and also whether or not two or more matches is allowed.  */
+           if (greedy)
+             {
             if (many_times_ok)
               { /* More than one repetition is allowed, so put in at the
                    end a backward relative jump from `b' to before the next
             if (many_times_ok)
               { /* More than one repetition is allowed, so put in at the
                    end a backward relative jump from `b' to before the next
@@ -1899,9 +2248,10 @@ regex_compile (pattern, size, syntax, bufp)
                    incremented `p', by the way, to be the character after
                    the `*'.  Do we have to do something analogous here
                    for null bytes, because of RE_DOT_NOT_NULL?  */
                    incremented `p', by the way, to be the character after
                    the `*'.  Do we have to do something analogous here
                    for null bytes, because of RE_DOT_NOT_NULL?  */
-               if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+               if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.')
                     && zero_times_ok
                     && zero_times_ok
-                   && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+                   && p < pend
+                   && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n')
                     && !(syntax & RE_DOT_NEWLINE))
                   { /* We have .*\n.  */
                     STORE_JUMP (jump, b, laststart);
                     && !(syntax & RE_DOT_NEWLINE))
                   { /* We have .*\n.  */
                     STORE_JUMP (jump, b, laststart);
@@ -1935,7 +2285,39 @@ regex_compile (pattern, size, syntax, bufp)
                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
                 b += 3;
               }
                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
                 b += 3;
               }
-           }
+
+             }
+           else                /* not greedy */
+             { /* I wish the greedy and non-greedy cases could be merged. */
+
+               if (many_times_ok)
+                 {
+                   /* The greedy multiple match looks like a repeat..until:
+                      we only need a conditional jump at the end of the loop */
+                   GET_BUFFER_SPACE (3);
+                   STORE_JUMP (on_failure_jump, b, laststart);
+                   b += 3;
+                   if (zero_times_ok)
+                     {
+                       /* The repeat...until naturally matches one or more.
+                          To also match zero times, we need to first jump to
+                          the end of the loop (its conditional jump). */
+                       GET_BUFFER_SPACE (3);
+                       INSERT_JUMP (jump, laststart, b);
+                       b += 3;
+                     }
+                 }
+               else
+                 {
+                   /* non-greedy a?? */
+                   GET_BUFFER_SPACE (6);
+                   INSERT_JUMP (jump, laststart, b + 3);
+                   b += 3;
+                   INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
+                   b += 3;
+                 }
+             }
+         }
           break;
  
  
           break;
  
  
@@ -1947,7 +2329,7 @@ regex_compile (pattern, size, syntax, bufp)
  
         case '[':
           {
  
         case '[':
           {
-           boolean had_char_class = false;
+           CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
  
             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
  
             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
@@ -1980,6 +2362,9 @@ regex_compile (pattern, size, syntax, bufp)
             /* Read in characters and ranges, setting map bits.  */
             for (;;)
               {
             /* Read in characters and ranges, setting map bits.  */
             for (;;)
               {
+               int len;
+               boolean escaped_char = false;
+
                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
                 PATFETCH (c);
                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
                 PATFETCH (c);
@@ -1989,52 +2374,38 @@ regex_compile (pattern, size, syntax, bufp)
                   {
                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
  
                   {
                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
  
-                   PATFETCH (c1);
-                   SET_LIST_BIT (c1);
-                   continue;
+                   PATFETCH (c);
+                   escaped_char = true;
                   }
                   }
-
-               /* Could be the end of the bracket expression.  If it's
-                  not (i.e., when the bracket expression is `[]' so
-                  far), the ']' character bit gets set way below.  */
-               if (c == ']' && p != p1 + 1)
-                 break;
-
-               /* Look ahead to see if it's a range when the last thing
-                  was a character class.  */
-               if (had_char_class && c == '-' && *p != ']')
-                 FREE_STACK_RETURN (REG_ERANGE);
-
-               /* Look ahead to see if it's a range when the last thing
-                  was a character: if this is a hyphen not at the
-                  beginning or the end of a list, then it's the range
-                  operator.  */
-               if (c == '-'
-                   && !(p - 2 >= pattern && p[-2] == '[')
-                   && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
-                   && *p != ']')
+               else
                   {
                   {
-                   reg_errcode_t ret
-                     = compile_range (&p, pend, translate, syntax, b);
-                   if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+                   /* Could be the end of the bracket expression.      If it's
+                      not (i.e., when the bracket expression is `[]' so
+                      far), the ']' character bit gets set way below.  */
+                   if (c == ']' && p != p1 + 1)
+                     break;
                   }
  
                   }
  
-               else if (p[0] == '-' && p[1] != ']')
-                 { /* This handles ranges made up of characters only.  */
-                   reg_errcode_t ret;
-
-                   /* Move past the `-'.  */
-                   PATFETCH (c1);
-
-                   ret = compile_range (&p, pend, translate, syntax, b);
-                   if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+               /* If C indicates start of multibyte char, get the
+                  actual character code in C, and set the pattern
+                  pointer P to the next character boundary.  */
+               if (bufp->multibyte && BASE_LEADING_CODE_P (c))
+                 {
+                   PATUNFETCH;
+                   c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+                   p += len;
                   }
                   }
+               /* What should we do for the character which is
+                  greater than 0x7F, but not BASE_LEADING_CODE_P?
+                  XXX */
  
                 /* See if we're at the beginning of a possible character
                    class.  */
  
  
                 /* See if we're at the beginning of a possible character
                    class.  */
  
-               else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
-                 { /* Leave room for the null.  */
+               else if (!escaped_char &&
+                        syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+                 {
+                   /* Leave room for the null.  */
                     char str[CHAR_CLASS_MAX_LENGTH + 1];
  
                     PATFETCH (c);
                     char str[CHAR_CLASS_MAX_LENGTH + 1];
  
                     PATFETCH (c);
@@ -2053,23 +2424,29 @@ regex_compile (pattern, size, syntax, bufp)
                       }
                     str[c1] = '\0';
  
                       }
                     str[c1] = '\0';
  
-                   /* If isn't a word bracketed by `[:' and:`]':
-                      undo the ending character, the letters, and leave
-                      the leading `:' and `[' (but set bits for them).  */
+                   /* If isn't a word bracketed by `[:' and `:]':
+                      undo the ending character, the letters, and
+                      leave the leading `:' and `[' (but set bits for
+                      them).  */
                     if (c == ':' && *p == ']')
                       {
                         int ch;
                         boolean is_alnum = STREQ (str, "alnum");
                         boolean is_alpha = STREQ (str, "alpha");
                     if (c == ':' && *p == ']')
                       {
                         int ch;
                         boolean is_alnum = STREQ (str, "alnum");
                         boolean is_alpha = STREQ (str, "alpha");
+                       boolean is_ascii = STREQ (str, "ascii");
                         boolean is_blank = STREQ (str, "blank");
                         boolean is_cntrl = STREQ (str, "cntrl");
                         boolean is_digit = STREQ (str, "digit");
                         boolean is_graph = STREQ (str, "graph");
                         boolean is_lower = STREQ (str, "lower");
                         boolean is_blank = STREQ (str, "blank");
                         boolean is_cntrl = STREQ (str, "cntrl");
                         boolean is_digit = STREQ (str, "digit");
                         boolean is_graph = STREQ (str, "graph");
                         boolean is_lower = STREQ (str, "lower");
+                       boolean is_multibyte = STREQ (str, "multibyte");
+                       boolean is_nonascii = STREQ (str, "nonascii");
                         boolean is_print = STREQ (str, "print");
                         boolean is_punct = STREQ (str, "punct");
                         boolean is_space = STREQ (str, "space");
                         boolean is_print = STREQ (str, "print");
                         boolean is_punct = STREQ (str, "punct");
                         boolean is_space = STREQ (str, "space");
+                       boolean is_unibyte = STREQ (str, "unibyte");
                         boolean is_upper = STREQ (str, "upper");
                         boolean is_upper = STREQ (str, "upper");
+                       boolean is_word = STREQ (str, "word");
                         boolean is_xdigit = STREQ (str, "xdigit");
  
                         if (!IS_CHAR_CLASS (str))
                         boolean is_xdigit = STREQ (str, "xdigit");
  
                         if (!IS_CHAR_CLASS (str))
@@ -2081,6 +2458,35 @@ regex_compile (pattern, size, syntax, bufp)
  
                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
  
                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
+                       /* Most character classes in a multibyte match
+                          just set a flag.  Exceptions are is_blank,
+                          is_digit, is_cntrl, and is_xdigit, since
+                          they can only match ASCII characters.  We
+                          don't need to handle them for multibyte.  */
+
+                       if (bufp->multibyte)
+                         {
+                           int bit = 0;
+
+                           if (is_alnum) bit = BIT_ALNUM;
+                           if (is_alpha) bit = BIT_ALPHA;
+                           if (is_ascii) bit = BIT_ASCII;
+                           if (is_graph) bit = BIT_GRAPH;
+                           if (is_lower) bit = BIT_LOWER;
+                           if (is_multibyte) bit = BIT_MULTIBYTE;
+                           if (is_nonascii) bit = BIT_NONASCII;
+                           if (is_print) bit = BIT_PRINT;
+                           if (is_punct) bit = BIT_PUNCT;
+                           if (is_space) bit = BIT_SPACE;
+                           if (is_unibyte) bit = BIT_UNIBYTE;
+                           if (is_upper) bit = BIT_UPPER;
+                           if (is_word) bit = BIT_WORD;
+                           if (bit)
+                             SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
+                                                            bit);
+                         }
+
+                       /* Handle character classes for ASCII characters.  */
                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
                           {
                             int translated = TRANSLATE (ch);
                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
                           {
                             int translated = TRANSLATE (ch);
@@ -2101,8 +2507,18 @@ regex_compile (pattern, size, syntax, bufp)
                                 || (is_upper  && ISUPPER (ch))
                                 || (is_xdigit && ISXDIGIT (ch)))
                               SET_LIST_BIT (translated);
                                 || (is_upper  && ISUPPER (ch))
                                 || (is_xdigit && ISXDIGIT (ch)))
                               SET_LIST_BIT (translated);
+                           if (   (is_ascii  && IS_REAL_ASCII (ch))
+                               || (is_nonascii && !IS_REAL_ASCII (ch))
+                               || (is_unibyte && ISUNIBYTE (ch))
+                               || (is_multibyte && !ISUNIBYTE (ch)))
+                             SET_LIST_BIT (translated);
+
+                           if (   (is_word   && ISWORD (ch)))
+                             SET_LIST_BIT (translated);
                           }
                           }
-                       had_char_class = true;
+
+                       /* Repeat the loop. */
+                       continue;
                       }
                     else
                       {
                       }
                     else
                       {
@@ -2110,15 +2526,71 @@ regex_compile (pattern, size, syntax, bufp)
                         while (c1--)
                           PATUNFETCH;
                         SET_LIST_BIT ('[');
                         while (c1--)
                           PATUNFETCH;
                         SET_LIST_BIT ('[');
-                       SET_LIST_BIT (':');
-                       had_char_class = false;
+
+                       /* Because the `:' may starts the range, we
+                          can't simply set bit and repeat the loop.
+                          Instead, just set it to C and handle below.  */
+                       c = ':';
+                     }
+                 }
+
+               if (p < pend && p[0] == '-' && p[1] != ']')
+                 {
+
+                   /* Discard the `-'. */
+                   PATFETCH (c1);
+
+                   /* Fetch the character which ends the range. */
+                   PATFETCH (c1);
+                   if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
+                     {
+                       PATUNFETCH;
+                       c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+                       p += len;
+                     }
+
+                   if (SINGLE_BYTE_CHAR_P (c)
+                       && ! SINGLE_BYTE_CHAR_P (c1))
+                     {
+                       /* Handle a range such as \177-\377 in multibyte mode.
+                          Split that into two ranges,,
+                          the low one ending at 0237, and the high one
+                          starting at ...040.  */
+                       int c1_base = (c1 & ~0177) | 040;
+                       SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
+                       c1 = 0237;
                       }
                       }
+                   else if (!SAME_CHARSET_P (c, c1))
+                     FREE_STACK_RETURN (REG_ERANGE);
                   }
                 else
                   }
                 else
+                 /* Range from C to C. */
+                 c1 = c;
+
+               /* Set the range ... */
+               if (SINGLE_BYTE_CHAR_P (c))
+                 /* ... into bitmap.  */
                   {
                   {
-                   had_char_class = false;
-                   SET_LIST_BIT (c);
+                   unsigned this_char;
+                   int range_start = c, range_end = c1;
+
+                   /* If the start is after the end, the range is empty.  */
+                   if (range_start > range_end)
+                     {
+                       if (syntax & RE_NO_EMPTY_RANGES)
+                         FREE_STACK_RETURN (REG_ERANGE);
+                       /* Else, repeat the loop.  */
+                     }
+                   else
+                     {
+                       for (this_char = range_start; this_char <= range_end;
+                            this_char++)
+                         SET_LIST_BIT (TRANSLATE (this_char));
+                     }
                   }
                   }
+               else
+                 /* ... into range table.  */
+                 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
               }
  
             /* Discard any (non)matching list bytes that are all 0 at the
               }
  
             /* Discard any (non)matching list bytes that are all 0 at the
@@ -2126,6 +2598,32 @@ regex_compile (pattern, size, syntax, bufp)
             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
               b[-1]--;
             b += b[-1];
             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
               b[-1]--;
             b += b[-1];
+
+           /* Build real range table from work area.  */
+           if (RANGE_TABLE_WORK_USED (range_table_work)
+               || RANGE_TABLE_WORK_BITS (range_table_work))
+             {
+               int i;
+               int used = RANGE_TABLE_WORK_USED (range_table_work);
+
+               /* Allocate space for COUNT + RANGE_TABLE.  Needs two
+                  bytes for flags, two for COUNT, and three bytes for
+                  each character. */
+               GET_BUFFER_SPACE (4 + used * 3);
+
+               /* Indicate the existence of range table.  */
+               laststart[1] |= 0x80;
+
+               /* Store the character class flag bits into the range table.
+                  If not in emacs, these flag bits are always 0.  */
+               *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
+               *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
+
+               STORE_NUMBER_AND_INCR (b, used / 2);
+               for (i = 0; i < used; i++)
+                 STORE_CHARACTER_AND_INCR
+                   (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
+             }
           }
           break;
  
           }
           break;
  
@@ -2522,6 +3020,18 @@ regex_compile (pattern, size, syntax, bufp)
               PATFETCH (c);
               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
               break;
               PATFETCH (c);
               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
               break;
+
+           case 'c':
+             laststart = b;
+             PATFETCH_RAW (c);
+             BUF_PUSH_2 (categoryspec, c);
+             break;
+
+           case 'C':
+             laststart = b;
+             PATFETCH_RAW (c);
+             BUF_PUSH_2 (notcategoryspec, c);
+             break;
  #endif /* emacs */
  
  
  #endif /* emacs */
  
  
@@ -2601,6 +3111,16 @@ regex_compile (pattern, size, syntax, bufp)
         default:
         /* Expects the character in `c'.  */
         normal_char:
         default:
         /* Expects the character in `c'.  */
         normal_char:
+         p1 = p - 1;           /* P1 points the head of C.  */
+#ifdef emacs
+         if (bufp->multibyte)
+           {
+             c = STRING_CHAR (p1, pend - p1);
+             c = TRANSLATE (c);
+             /* Set P to the next character boundary.  */
+             p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
+           }
+#endif
               /* If no exactn currently being built.  */
           if (!pending_exact
  
               /* If no exactn currently being built.  */
           if (!pending_exact
  
@@ -2608,17 +3128,17 @@ regex_compile (pattern, size, syntax, bufp)
               || pending_exact + *pending_exact + 1 != b
  
               /* We have only one byte following the exactn for the count.  */
               || pending_exact + *pending_exact + 1 != b
  
               /* We have only one byte following the exactn for the count.  */
-             || *pending_exact == (1 << BYTEWIDTH) - 1
+             || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
  
               /* If followed by a repetition operator.  */
  
               /* If followed by a repetition operator.  */
-             || *p == '*' || *p == '^'
+             || (p != pend && (*p == '*' || *p == '^'))
               || ((syntax & RE_BK_PLUS_QM)
               || ((syntax & RE_BK_PLUS_QM)
-                 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
-                 : (*p == '+' || *p == '?'))
+                 ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
+                 : p != pend && (*p == '+' || *p == '?'))
               || ((syntax & RE_INTERVALS)
                   && ((syntax & RE_NO_BK_BRACES)
               || ((syntax & RE_INTERVALS)
                   && ((syntax & RE_NO_BK_BRACES)
-                     ? *p == '{'
-                     : (p[0] == '\\' && p[1] == '{'))))
+                     ? p != pend && *p == '{'
+                     : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
             {
               /* Start building a new exactn.  */
  
             {
               /* Start building a new exactn.  */
  
@@ -2628,8 +3148,24 @@ regex_compile (pattern, size, syntax, bufp)
               pending_exact = b - 1;
             }
  
               pending_exact = b - 1;
             }
  
-         BUF_PUSH (c);
-         (*pending_exact)++;
+#ifdef emacs
+         if (! SINGLE_BYTE_CHAR_P (c))
+           {
+             unsigned char str[MAX_MULTIBYTE_LENGTH];
+             int i = CHAR_STRING (c, str);
+             int j;
+             for (j = 0; j < i; j++)
+               {
+                 BUF_PUSH (str[j]);
+                 (*pending_exact)++;
+               }
+           }
+         else
+#endif
+           {
+             BUF_PUSH (c);
+             (*pending_exact)++;
+           }
           break;
         } /* switch (c) */
      } /* while p != pend */
           break;
         } /* switch (c) */
      } /* while p != pend */
@@ -2668,12 +3204,9 @@ regex_compile (pattern, size, syntax, bufp)
    {
      int num_regs = bufp->re_nsub + 1;
  
    {
      int num_regs = bufp->re_nsub + 1;
  
-    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
-       is strictly greater than re_max_failures, the largest possible stack
-       is 2 * re_max_failures failure points.  */
-    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+    if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
        {
        {
-       fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+       fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
  
  #ifdef emacs
         if (! fail_stack.stack)
  
  #ifdef emacs
         if (! fail_stack.stack)
@@ -2833,70 +3366,16 @@ group_in_compile_stack (compile_stack, regnum)
  
    return false;
  }
  
    return false;
  }
-
-
-/* Read the ending character of a range (in a bracket expression) from the
-   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
-   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
-   Then we set the translation of all bits between the starting and
-   ending characters (inclusive) in the compiled pattern B.
-
-   Return an error code.
-
-   We use these short variable names so we can use the same macros as
-   `regex_compile' itself.  */
-
-static reg_errcode_t
-compile_range (p_ptr, pend, translate, syntax, b)
-    const char **p_ptr, *pend;
-    RE_TRANSLATE_TYPE translate;
-    reg_syntax_t syntax;
-    unsigned char *b;
-{
-  unsigned this_char;
-
-  const char *p = *p_ptr;
-  int range_start, range_end;
-
-  if (p == pend)
-    return REG_ERANGE;
-
-  /* Even though the pattern is a signed `char *', we need to fetch
-     with unsigned char *'s; if the high bit of the pattern character
-     is set, the range endpoints will be negative if we fetch using a
-     signed char *.
-
-     We also want to fetch the endpoints without translating them; the
-     appropriate translation is done in the bit-setting loop below.  */
-  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
-  range_start = ((const unsigned char *) p)[-2];
-  range_end   = ((const unsigned char *) p)[0];
-
-  /* Have to increment the pointer into the pattern string, so the
-     caller isn't still at the ending character.  */
-  (*p_ptr)++;
-
-  /* If the start is after the end, the range is empty.         */
-  if (range_start > range_end)
-    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
-
-  /* Here we see why `this_char' has to be larger than an `unsigned
-     char' -- the range is inclusive, so if `range_end' == 0xff
-     (assuming 8-bit characters), we would otherwise go into an infinite
-     loop, since all characters <= 0xff.  */
-  for (this_char = range_start; this_char <= range_end; this_char++)
-    {
-      SET_LIST_BIT (TRANSLATE (this_char));
-    }
-
-  return REG_NOERROR;
-}
  \f
  /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
     BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
     characters can start a string that matches the pattern.  This fastmap
     is used by re_search to skip quickly over impossible starting points.
  
  \f
  /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
     BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
     characters can start a string that matches the pattern.  This fastmap
     is used by re_search to skip quickly over impossible starting points.
  
+   Character codes above (1 << BYTEWIDTH) are not represented in the
+   fastmap, but the leading codes are represented.  Thus, the fastmap
+   indicates which character sets could start a match.
+
     The caller must supply the address of a (1 << BYTEWIDTH)-byte data
     area as BUFP->fastmap.
  
     The caller must supply the address of a (1 << BYTEWIDTH)-byte data
     area as BUFP->fastmap.
  
@@ -2909,7 +3388,7 @@ int
  re_compile_fastmap (bufp)
       struct re_pattern_buffer *bufp;
  {
  re_compile_fastmap (bufp)
       struct re_pattern_buffer *bufp;
  {
-  int j, k;
+  int i, j, k;
  #ifdef MATCH_MAY_ALLOCATE
    fail_stack_type fail_stack;
  #endif
  #ifdef MATCH_MAY_ALLOCATE
    fail_stack_type fail_stack;
  #endif
@@ -2938,6 +3417,13 @@ re_compile_fastmap (bufp)
    /* We aren't doing a `succeed_n' to begin with.  */
    boolean succeed_n_p = false;
  
    /* We aren't doing a `succeed_n' to begin with.  */
    boolean succeed_n_p = false;
  
+  /* If all elements for base leading-codes in fastmap is set, this
+     flag is set true. */
+  boolean match_any_multibyte_characters = false;
+
+  /* Maximum code of simple (single byte) character. */
+  int simple_char_max;
+
    assert (fastmap != NULL && p != NULL);
  
    INIT_FAIL_STACK ();
    assert (fastmap != NULL && p != NULL);
  
    INIT_FAIL_STACK ();
@@ -2989,44 +3475,154 @@ re_compile_fastmap (bufp)
           break;
  
  
           break;
  
  
+#ifndef emacs
+       case charset:
+         {
+           int length = (*p & 0x7f);;
+           p++;
+
+           for (j = length * BYTEWIDTH - 1; j >= 0; j--)
+             if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+               fastmap[j] = 1;
+         }
+         break;
+
+       case charset_not:
+         /* Chars beyond end of map must be allowed.  */
+         {
+           int length = (*p & 0x7f);;
+           p++;
+
+           for (j = length * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+             fastmap[j] = 1;
+
+           for (j = length * BYTEWIDTH - 1; j >= 0; j--)
+             if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+               fastmap[j] = 1;
+         }
+         break;
+
+       case wordchar:
+         for (j = 0; j < (1 << BYTEWIDTH); j++)
+           if (SYNTAX (j) == Sword)
+             fastmap[j] = 1;
+         break;
+
+
+       case notwordchar:
+         for (j = 0; j < (1 << BYTEWIDTH); j++)
+           if (SYNTAX (j) != Sword)
+             fastmap[j] = 1;
+         break;
+#else  /* emacs */
         case charset:
         case charset:
-         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+         for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
+              j >= 0; j--)
             if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
               fastmap[j] = 1;
             if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
               fastmap[j] = 1;
+
+         /* If we can match a character class, we can match
+            any character set.  */
+         if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+             && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0)
+           goto set_fastmap_for_multibyte_characters;
+
+         if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+             && match_any_multibyte_characters == false)
+           {
+             /* Set fastmap[I] 1 where I is a base leading code of each
+                multibyte character in the range table. */
+             int c, count;
+
+             /* Make P points the range table. */
+             p += CHARSET_BITMAP_SIZE (&p[-2]);
+
+             /* Extract the number of ranges in range table into COUNT.  */
+             EXTRACT_NUMBER_AND_INCR (count, p);
+             for (; count > 0; count--, p += 2 * 3) /* XXX */
+               {
+                 /* Extract the start of each range.  */
+                 EXTRACT_CHARACTER (c, p);
+                 j = CHAR_CHARSET (c);
+                 fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
+               }
+           }
           break;
  
  
         case charset_not:
           break;
  
  
         case charset_not:
-         /* Chars beyond end of map must be allowed.  */
-         for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+         /* Chars beyond end of bitmap are possible matches.
+            All the single-byte codes can occur in multibyte buffers.
+            So any that are not listed in the charset
+            are possible matches, even in multibyte buffers.  */
+         simple_char_max = (1 << BYTEWIDTH);
+         for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
+              j < simple_char_max; j++)
             fastmap[j] = 1;
  
             fastmap[j] = 1;
  
-         for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+         for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
+              j >= 0; j--)
             if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
               fastmap[j] = 1;
             if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
               fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              which doesn't match the specified set of characters.  */
+           {
+           set_fastmap_for_multibyte_characters:
+             if (match_any_multibyte_characters == false)
+               {
+                 for (j = 0x80; j < 0xA0; j++) /* XXX */
+                   if (BASE_LEADING_CODE_P (j))
+                     fastmap[j] = 1;
+                 match_any_multibyte_characters = true;
+               }
+           }
           break;
  
  
         case wordchar:
           break;
  
  
         case wordchar:
-         for (j = 0; j < (1 << BYTEWIDTH); j++)
+         /* All the single-byte codes can occur in multibyte buffers,
+            and they may have word syntax.  So do consider them.  */
+         simple_char_max = (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
             if (SYNTAX (j) == Sword)
               fastmap[j] = 1;
             if (SYNTAX (j) == Sword)
               fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose syntax is `Sword'.  */
+           goto set_fastmap_for_multibyte_characters;
           break;
  
  
         case notwordchar:
           break;
  
  
         case notwordchar:
-         for (j = 0; j < (1 << BYTEWIDTH); j++)
+         /* All the single-byte codes can occur in multibyte buffers,
+            and they may not have word syntax.  So do consider them.  */
+         simple_char_max = (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
             if (SYNTAX (j) != Sword)
               fastmap[j] = 1;
             if (SYNTAX (j) != Sword)
               fastmap[j] = 1;
-         break;
  
  
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose syntax is not `Sword'.  */
+           goto set_fastmap_for_multibyte_characters;
+         break;
+#endif
  
         case anychar:
           {
             int fastmap_newline = fastmap['\n'];
  
  
         case anychar:
           {
             int fastmap_newline = fastmap['\n'];
  
-           /* `.' matches anything ...  */
-           for (j = 0; j < (1 << BYTEWIDTH); j++)
+           /* `.' matches anything, except perhaps newline.
+              Even in a multibyte buffer, it should match any
+              conceivable byte value for the fastmap.  */
+           if (bufp->multibyte)
+             match_any_multibyte_characters = true;
+
+           simple_char_max = (1 << BYTEWIDTH);
+           for (j = 0; j < simple_char_max; j++)
               fastmap[j] = 1;
  
             /* ... except perhaps newline.  */
               fastmap[j] = 1;
  
             /* ... except perhaps newline.  */
@@ -3043,22 +3639,71 @@ re_compile_fastmap (bufp)
           }
  
  #ifdef emacs
           }
  
  #ifdef emacs
-       case syntaxspec:
-         k = *p++;
-         for (j = 0; j < (1 << BYTEWIDTH); j++)
+       case wordbound:
+       case notwordbound:
+       case wordbeg:
+       case wordend:
+       case notsyntaxspec:
+       case syntaxspec:
+         /* This match depends on text properties.  These end with
+            aborting optimizations.  */
+         bufp->can_be_null = 1;
+         goto done;
+#if 0
+         k = *p++;
+         simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
             if (SYNTAX (j) == (enum syntaxcode) k)
               fastmap[j] = 1;
             if (SYNTAX (j) == (enum syntaxcode) k)
               fastmap[j] = 1;
-         break;
  
  
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose syntax is K.  */
+           goto set_fastmap_for_multibyte_characters;
+         break;
  
         case notsyntaxspec:
           k = *p++;
  
         case notsyntaxspec:
           k = *p++;
-         for (j = 0; j < (1 << BYTEWIDTH); j++)
+         simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
             if (SYNTAX (j) != (enum syntaxcode) k)
               fastmap[j] = 1;
             if (SYNTAX (j) != (enum syntaxcode) k)
               fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose syntax is not K.  */
+           goto set_fastmap_for_multibyte_characters;
           break;
           break;
+#endif
  
  
  
  
+       case categoryspec:
+         k = *p++;
+         simple_char_max = (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
+           if (CHAR_HAS_CATEGORY (j, k))
+             fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose category is K.  */
+           goto set_fastmap_for_multibyte_characters;
+         break;
+
+
+       case notcategoryspec:
+         k = *p++;
+         simple_char_max = (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
+           if (!CHAR_HAS_CATEGORY (j, k))
+             fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose category is not K.  */
+           goto set_fastmap_for_multibyte_characters;
+         break;
+
        /* All cases after this match the empty string.  These end with
          `continue'.  */
  
        /* All cases after this match the empty string.  These end with
          `continue'.  */
  
@@ -3075,10 +3720,12 @@ re_compile_fastmap (bufp)
         case endline:
         case begbuf:
         case endbuf:
         case endline:
         case begbuf:
         case endbuf:
+#ifndef emacs
         case wordbound:
         case notwordbound:
         case wordbeg:
         case wordend:
         case wordbound:
         case notwordbound:
         case wordbeg:
         case wordend:
+#endif
         case push_dummy_failure:
           continue;
  
         case push_dummy_failure:
           continue;
  
@@ -3247,6 +3894,13 @@ re_search (bufp, string, size, startpos, range, regs)
                       regs, size);
  }
  
                       regs, size);
  }
  
+/* End address of virtual concatenation of string.  */
+#define STOP_ADDR_VSTRING(P)                           \
+  (((P) >= size1 ? string2 + size2 : string1 + size1))
+
+/* Address of POS in the concatenation of virtual string. */
+#define POS_ADDR_VSTRING(POS)                                  \
+  (((POS) >= size1 ? string2 - size1 : string1) + (POS))
  
  /* Using the compiled pattern in BUFP->buffer, first tries to match the
     virtual concatenation of STRING1 and STRING2, starting first at index
  
  /* Using the compiled pattern in BUFP->buffer, first tries to match the
     virtual concatenation of STRING1 and STRING2, starting first at index
@@ -3286,6 +3940,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
    int endpos = startpos + range;
    int anchored_start = 0;
  
    int endpos = startpos + range;
    int anchored_start = 0;
  
+  /* Nonzero if we have to concern multibyte character.         */
+  int multibyte = bufp->multibyte;
+
    /* Check for out-of-range STARTPOS.  */
    if (startpos < 0 || startpos > total_size)
      return -1;
    /* Check for out-of-range STARTPOS.  */
    if (startpos < 0 || startpos > total_size)
      return -1;
@@ -3299,13 +3956,13 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
      range = total_size - startpos;
  
    /* If the search isn't to be a backwards one, don't waste time in a
      range = total_size - startpos;
  
    /* If the search isn't to be a backwards one, don't waste time in a
-     search for a pattern that must be anchored.  */
+     search for a pattern anchored at beginning of buffer.  */
    if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
      {
        if (startpos > 0)
         return -1;
        else
    if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
      {
        if (startpos > 0)
         return -1;
        else
-       range = 1;
+       range = 0;
      }
  
  #ifdef emacs
      }
  
  #ifdef emacs
@@ -3313,8 +3970,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
       don't keep searching past point.  */
    if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
      {
       don't keep searching past point.  */
    if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
      {
-      range = PT - startpos;
-      if (range <= 0)
+      range = PT_BYTE - BEGV_BYTE - startpos;
+      if (range < 0)
         return -1;
      }
  #endif /* emacs */
         return -1;
      }
  #endif /* emacs */
@@ -3328,6 +3985,16 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
    if (bufp->buffer[0] == begline)
      anchored_start = 1;
  
    if (bufp->buffer[0] == begline)
      anchored_start = 1;
  
+#ifdef emacs
+  gl_state.object = re_match_object;
+  {
+    int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+    int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos);
+
+    SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
+  }
+#endif
+
    /* Loop through the string, looking for a place to start matching.  */
    for (;;)
      {
    /* Loop through the string, looking for a place to start matching.  */
    for (;;)
      {
@@ -3350,37 +4017,69 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
          the first null string.  */
        if (fastmap && startpos < total_size && !bufp->can_be_null)
         {
          the first null string.  */
        if (fastmap && startpos < total_size && !bufp->can_be_null)
         {
+         register const char *d;
+         register unsigned int buf_ch;
+
+         d = POS_ADDR_VSTRING (startpos);
+
           if (range > 0)        /* Searching forwards.  */
             {
           if (range > 0)        /* Searching forwards.  */
             {
-             register const char *d;
               register int lim = 0;
               int irange = range;
  
               if (startpos < size1 && startpos + range >= size1)
                 lim = range - (size1 - startpos);
  
               register int lim = 0;
               int irange = range;
  
               if (startpos < size1 && startpos + range >= size1)
                 lim = range - (size1 - startpos);
  
-             d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
-
               /* Written out as an if-else to avoid testing `translate'
                  inside the loop.  */
               /* Written out as an if-else to avoid testing `translate'
                  inside the loop.  */
-             if (translate)
-               while (range > lim
-                      && !fastmap[(unsigned char)
-                                  translate[(unsigned char) *d++]])
-                 range--;
+             if (RE_TRANSLATE_P (translate))
+               {
+                 if (multibyte)
+                   while (range > lim)
+                     {
+                       int buf_charlen;
+
+                       buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
+                                                        buf_charlen);
+
+                       buf_ch = RE_TRANSLATE (translate, buf_ch);
+                       if (buf_ch >= 0400
+                           || fastmap[buf_ch])
+                         break;
+
+                       range -= buf_charlen;
+                       d += buf_charlen;
+                     }
+                 else
+                   while (range > lim
+                          && !fastmap[(unsigned char)
+                                      RE_TRANSLATE (translate, (unsigned char) *d)])
+                     {
+                       d++;
+                       range--;
+                     }
+               }
               else
               else
-               while (range > lim && !fastmap[(unsigned char) *d++])
-                 range--;
+               while (range > lim && !fastmap[(unsigned char) *d])
+                 {
+                   d++;
+                   range--;
+                 }
  
               startpos += irange - range;
             }
           else                          /* Searching backwards.  */
             {
  
               startpos += irange - range;
             }
           else                          /* Searching backwards.  */
             {
-             register char c = (size1 == 0 || startpos >= size1
-                                ? string2[startpos - size1]
-                                : string1[startpos]);
+             int room = (size1 == 0 || startpos >= size1
+                         ? size2 + size1 - startpos
+                         : size1 - startpos);
  
  
-             if (!fastmap[(unsigned char) TRANSLATE (c)])
+             buf_ch = STRING_CHAR (d, room);
+             if (RE_TRANSLATE_P (translate))
+               buf_ch = RE_TRANSLATE (translate, buf_ch);
+
+             if (! (buf_ch >= 0400
+                    || fastmap[buf_ch]))
                 goto advance;
             }
         }
                 goto advance;
             }
         }
@@ -3409,13 +4108,56 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
         break;
        else if (range > 0)
         {
         break;
        else if (range > 0)
         {
-         range--;
-         startpos++;
+         /* Update STARTPOS to the next character boundary.  */
+         if (multibyte)
+           {
+             const unsigned char *p
+               = (const unsigned char *) POS_ADDR_VSTRING (startpos);
+             const unsigned char *pend
+               = (const unsigned char *) STOP_ADDR_VSTRING (startpos);
+             int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
+
+             range -= len;
+             if (range < 0)
+               break;
+             startpos += len;
+           }
+         else
+           {
+             range--;
+             startpos++;
+           }
         }
        else
         {
           range++;
           startpos--;
         }
        else
         {
           range++;
           startpos--;
+
+         /* Update STARTPOS to the previous character boundary.  */
+         if (multibyte)
+           {
+             const unsigned char *p
+               = (const unsigned char *) POS_ADDR_VSTRING (startpos);
+             int len = 0;
+
+             /* Find the head of multibyte form.  */
+             while (!CHAR_HEAD_P (*p))
+               p--, len++;
+
+             /* Adjust it. */
+#if 0                          /* XXX */
+             if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
+               ;
+             else
+#endif
+               {
+                 range += len;
+                 if (range > 0)
+                   break;
+
+                 startpos -= len;
+               }
+           }
         }
      }
    return -1;
         }
      }
    return -1;
@@ -3469,6 +4211,15 @@ static boolean alt_match_null_string_p (),
     == Sword)
  
  /* Disabled due to a compiler bug -- see comment at case wordbound */
     == Sword)
  
  /* Disabled due to a compiler bug -- see comment at case wordbound */
+
+/* The comment at case wordbound is following one, but we don't use
+   AT_WORD_BOUNDARY anymore to support multibyte form.
+
+   The DEC Alpha C compiler 3.x generates incorrect code for the
+   test         WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
+   AT_WORD_BOUNDARY, so this code is disabled. Expanding the
+   macro and introducing temporary variables works around the bug.  */
+
  #if 0
  /* Test if the character before D and the one at D differ with respect
     to being word-constituent.  */
  #if 0
  /* Test if the character before D and the one at D differ with respect
     to being word-constituent.  */
@@ -3526,6 +4277,11 @@ re_match (bufp, string, size, pos, regs)
  }
  #endif /* not emacs */
  
  }
  #endif /* not emacs */
  
+#ifdef emacs
+/* In Emacs, this is the string or buffer in which we
+   are matching.  It is used for looking up syntax properties. */
+Lisp_Object re_match_object;
+#endif
  
  /* re_match_2 matches the compiled pattern in BUFP against the
     the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
  
  /* re_match_2 matches the compiled pattern in BUFP against the
     the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
@@ -3549,8 +4305,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
       struct re_registers *regs;
       int stop;
  {
       struct re_registers *regs;
       int stop;
  {
-  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
-                                   pos, regs, stop);
+  int result;
+
+#ifdef emacs
+  int charpos;
+  int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+  gl_state.object = re_match_object;
+  charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos);
+  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
+#endif
+
+  result = re_match_2_internal (bufp, string1, size1, string2, size2,
+                               pos, regs, stop);
    alloca (0);
    return result;
  }
    alloca (0);
    return result;
  }
@@ -3591,6 +4357,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
    /* We use this to map every character in the string. */
    RE_TRANSLATE_TYPE translate = bufp->translate;
  
    /* We use this to map every character in the string. */
    RE_TRANSLATE_TYPE translate = bufp->translate;
  
+  /* Nonzero if we have to concern multibyte character.         */
+  int multibyte = bufp->multibyte;
+
    /* Failure point stack.  Each place that can handle a failure further
       down the line pushes a failure point on this stack.  It consists of
       restart, regend, and reg_info for all registers corresponding to
    /* Failure point stack.  Each place that can handle a failure further
       down the line pushes a failure point on this stack.  It consists of
       restart, regend, and reg_info for all registers corresponding to
@@ -3983,16 +4752,39 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  
           /* This is written out as an if-else so we don't waste time
              testing `translate' inside the loop.  */
  
           /* This is written out as an if-else so we don't waste time
              testing `translate' inside the loop.  */
-         if (translate)
+         if (RE_TRANSLATE_P (translate))
             {
             {
-             do
-               {
-                 PREFETCH ();
-                 if ((unsigned char) translate[(unsigned char) *d++]
-                     != (unsigned char) *p++)
-                   goto fail;
-               }
-             while (--mcnt);
+#ifdef emacs
+             if (multibyte)
+               do
+                 {
+                   int pat_charlen, buf_charlen;
+                   unsigned int pat_ch, buf_ch;
+
+                   PREFETCH ();
+                   pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+                   buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+
+                   if (RE_TRANSLATE (translate, buf_ch)
+                       != pat_ch)
+                     goto fail;
+
+                   p += pat_charlen;
+                   d += buf_charlen;
+                   mcnt -= pat_charlen;
+                 }
+               while (mcnt > 0);
+             else
+#endif /* not emacs */
+               do
+                 {
+                   PREFETCH ();
+                   if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d)
+                       != (unsigned char) *p++)
+                     goto fail;
+                   d++;
+                 }
+               while (--mcnt);
             }
           else
             {
             }
           else
             {
@@ -4009,43 +4801,119 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  
         /* Match any character except possibly a newline or a null.  */
         case anychar:
  
         /* Match any character except possibly a newline or a null.  */
         case anychar:
-         DEBUG_PRINT1 ("EXECUTING anychar.\n");
+         {
+           int buf_charlen;
+           unsigned int buf_ch;
  
  
-         PREFETCH ();
+           DEBUG_PRINT1 ("EXECUTING anychar.\n");
  
  
-         if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
-             || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
-           goto fail;
+           PREFETCH ();
  
  
-         SET_REGS_MATCHED ();
-         DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
-         d++;
+#ifdef emacs
+           if (multibyte)
+             buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+           else
+#endif /* not emacs */
+             {
+               buf_ch = (unsigned char) *d;
+               buf_charlen = 1;
+             }
+
+           buf_ch = TRANSLATE (buf_ch);
+
+           if ((!(bufp->syntax & RE_DOT_NEWLINE)
+                && buf_ch == '\n')
+               || ((bufp->syntax & RE_DOT_NOT_NULL)
+                   && buf_ch == '\000'))
+             goto fail;
+
+           SET_REGS_MATCHED ();
+           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
+           d += buf_charlen;
+         }
           break;
  
  
         case charset:
         case charset_not:
           {
           break;
  
  
         case charset:
         case charset_not:
           {
-           register unsigned char c;
+           register unsigned int c;
             boolean not = (re_opcode_t) *(p - 1) == charset_not;
             boolean not = (re_opcode_t) *(p - 1) == charset_not;
+           int len;
+
+           /* Start of actual range_table, or end of bitmap if there is no
+              range table.  */
+           unsigned char *range_table;
+
+           /* Nonzero if there is a range table.  */
+           int range_table_exists;
+
+           /* Number of ranges of range table.  This is not included
+              in the initial byte-length of the command.  */
+           int count = 0;
  
             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
  
             PREFETCH ();
  
             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
  
             PREFETCH ();
-           c = TRANSLATE (*d); /* The character to match.  */
+           c = (unsigned char) *d;
  
  
-           /* Cast to `unsigned' instead of `unsigned char' in case the
-              bit list is a full 32 bytes long.  */
-           if (c < (unsigned) (*p * BYTEWIDTH)
-               && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
-             not = !not;
+           range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
  
  
-           p += 1 + *p;
+#ifdef emacs
+           if (range_table_exists)
+             {
+               range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
+               EXTRACT_NUMBER_AND_INCR (count, range_table);
+             }
+
+           if (multibyte && BASE_LEADING_CODE_P (c))
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+#endif /* emacs */
+
+           if (SINGLE_BYTE_CHAR_P (c))
+             {                 /* Lookup bitmap.  */
+               c = TRANSLATE (c); /* The character to match.  */
+               len = 1;
+
+               /* Cast to `unsigned' instead of `unsigned char' in
+                  case the bit list is a full 32 bytes long.  */
+               if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
+                   && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+                 not = !not;
+             }
+#ifdef emacs
+           else if (range_table_exists)
+             {
+               int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
+
+               if (  (class_bits & BIT_ALNUM && ISALNUM (c))
+                   | (class_bits & BIT_ALPHA && ISALPHA (c))
+                   | (class_bits & BIT_ASCII && IS_REAL_ASCII (c))
+                   | (class_bits & BIT_GRAPH && ISGRAPH (c))
+                   | (class_bits & BIT_LOWER && ISLOWER (c))
+                   | (class_bits & BIT_MULTIBYTE && !ISUNIBYTE (c))
+                   | (class_bits & BIT_NONASCII && !IS_REAL_ASCII (c))
+                   | (class_bits & BIT_PRINT && ISPRINT (c))
+                   | (class_bits & BIT_PUNCT && ISPUNCT (c))
+                   | (class_bits & BIT_SPACE && ISSPACE (c))
+                   | (class_bits & BIT_UNIBYTE && ISUNIBYTE (c))
+                   | (class_bits & BIT_UPPER && ISUPPER (c))
+                   | (class_bits & BIT_WORD  && ISWORD (c)))
+                 not = !not;
+               else
+                 CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
+             }
+#endif /* emacs */
+
+           if (range_table_exists)
+             p = CHARSET_RANGE_TABLE_END (range_table, count);
+           else
+             p += CHARSET_BITMAP_SIZE (&p[-1]) + 1;
  
             if (!not) goto fail;
  
             SET_REGS_MATCHED ();
  
             if (!not) goto fail;
  
             SET_REGS_MATCHED ();
-           d++;
+           d += len;
             break;
           }
  
             break;
           }
  
@@ -4288,7 +5156,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  
                 /* Compare that many; failure if mismatch, else move
                    past them.  */
  
                 /* Compare that many; failure if mismatch, else move
                    past them.  */
-               if (translate
+               if (RE_TRANSLATE_P (translate)
                     ? bcmp_translate (d, d2, mcnt, translate)
                     : bcmp (d, d2, mcnt))
                   goto fail;
                     ? bcmp_translate (d, d2, mcnt, translate)
                     : bcmp (d, d2, mcnt))
                   goto fail;
@@ -4395,6 +5263,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
         on_failure:
           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
  
         on_failure:
           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
  
+#if defined (WINDOWSNT) && defined (emacs)
+         QUIT;
+#endif
+
           EXTRACT_NUMBER_AND_INCR (mcnt, p);
           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
  
           EXTRACT_NUMBER_AND_INCR (mcnt, p);
           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
  
@@ -4435,6 +5307,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
         /* A smart repeat ends with `maybe_pop_jump'.
            We change it to either `pop_failure_jump' or `jump'.  */
         case maybe_pop_jump:
         /* A smart repeat ends with `maybe_pop_jump'.
            We change it to either `pop_failure_jump' or `jump'.  */
         case maybe_pop_jump:
+#if defined (WINDOWSNT) && defined (emacs)
+         QUIT;
+#endif
           EXTRACT_NUMBER_AND_INCR (mcnt, p);
           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
           {
           EXTRACT_NUMBER_AND_INCR (mcnt, p);
           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
           {
@@ -4489,24 +5364,42 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             else if ((re_opcode_t) *p2 == exactn
                      || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
               {
             else if ((re_opcode_t) *p2 == exactn
                      || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
               {
-               register unsigned char c
+               register unsigned int c
                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
  
                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
  
-               if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+               if ((re_opcode_t) p1[3] == exactn)
+                 {
+                   if (!(multibyte /* && (c != '\n') */
+                         && BASE_LEADING_CODE_P (c))
+                       ? c != p1[5]
+                       : (STRING_CHAR (&p2[2], pend - &p2[2])
+                          != STRING_CHAR (&p1[5], pend - &p1[5])))
                   {
                     p[-3] = (unsigned char) pop_failure_jump;
                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
                                   c, p1[5]);
                   }
                   {
                     p[-3] = (unsigned char) pop_failure_jump;
                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
                                   c, p1[5]);
                   }
+                 }
  
                 else if ((re_opcode_t) p1[3] == charset
                          || (re_opcode_t) p1[3] == charset_not)
                   {
                     int not = (re_opcode_t) p1[3] == charset_not;
  
  
                 else if ((re_opcode_t) p1[3] == charset
                          || (re_opcode_t) p1[3] == charset_not)
                   {
                     int not = (re_opcode_t) p1[3] == charset_not;
  
-                   if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+                   if (multibyte /* && (c != '\n') */
+                       && BASE_LEADING_CODE_P (c))
+                     c = STRING_CHAR (&p2[2], pend - &p2[2]);
+
+                   /* Test if C is listed in charset (or charset_not)
+                      at `&p1[3]'.  */
+                   if (SINGLE_BYTE_CHAR_P (c))
+                     {
+                       if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH
                         && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                       not = !not;
                         && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                       not = !not;
+                     }
+                   else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
+                     CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
  
                     /* `not' is equal to 1 if c would match, which means
                         that we can't change to pop_failure_jump.  */
  
                     /* `not' is equal to 1 if c would match, which means
                         that we can't change to pop_failure_jump.  */
@@ -4519,29 +5412,55 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
               }
             else if ((re_opcode_t) *p2 == charset)
               {
               }
             else if ((re_opcode_t) *p2 == charset)
               {
-#ifdef DEBUG
-               register unsigned char c
-                 = *p2 == (unsigned char) endline ? '\n' : p2[2];
-#endif
+               if ((re_opcode_t) p1[3] == exactn)
+                 {
+                   register unsigned int c = p1[5];
+                   int not = 0;
+
+                   if (multibyte && BASE_LEADING_CODE_P (c))
+                     c = STRING_CHAR (&p1[5], pend - &p1[5]);
+
+                   /* Test if C is listed in charset at `p2'.  */
+                   if (SINGLE_BYTE_CHAR_P (c))
+                     {
+                       if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
+                           && (p2[2 + c / BYTEWIDTH]
+                               & (1 << (c % BYTEWIDTH))))
+                         not = !not;
+                     }
+                   else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
+                     CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
  
  
-               if ((re_opcode_t) p1[3] == exactn
-                   && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
-                         && (p2[2 + p1[5] / BYTEWIDTH]
-                             & (1 << (p1[5] % BYTEWIDTH)))))
+                   if (!not)
                   {
                     p[-3] = (unsigned char) pop_failure_jump;
                   {
                     p[-3] = (unsigned char) pop_failure_jump;
-                   DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
-                                 c, p1[5]);
+                       DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
+                     }
                   }
  
                   }
  
-               else if ((re_opcode_t) p1[3] == charset_not)
+               /* It is hard to list up all the character in charset
+                  P2 if it includes multibyte character.  Give up in
+                  such case.  */
+               else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
+                 {
+                   /* Now, we are sure that P2 has no range table.
+                      So, for the size of bitmap in P2, `p2[1]' is
+                      enough.  But P1 may have range table, so the
+                      size of bitmap table of P1 is extracted by
+                      using macro `CHARSET_BITMAP_SIZE'.
+
+                      Since we know that all the character listed in
+                      P2 is ASCII, it is enough to test only bitmap
+                      table of P1.  */
+
+                   if ((re_opcode_t) p1[3] == charset_not)
                   {
                     int idx;
                   {
                     int idx;
-                   /* We win if the charset_not inside the loop
-                      lists every character listed in the charset after.  */
+                       /* We win if the charset_not inside the loop lists
+                          every character listed in the charset after.  */
                     for (idx = 0; idx < (int) p2[1]; idx++)
                       if (! (p2[2 + idx] == 0
                     for (idx = 0; idx < (int) p2[1]; idx++)
                       if (! (p2[2 + idx] == 0
-                            || (idx < (int) p1[4]
+                                || (idx < CHARSET_BITMAP_SIZE (&p1[3])
                                  && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
                         break;
  
                                  && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
                         break;
  
@@ -4557,12 +5476,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                     /* We win if the charset inside the loop
                        has no overlap with the one after the loop.  */
                     for (idx = 0;
                     /* We win if the charset inside the loop
                        has no overlap with the one after the loop.  */
                     for (idx = 0;
-                        idx < (int) p2[1] && idx < (int) p1[4];
+                            (idx < (int) p2[1]
+                             && idx < CHARSET_BITMAP_SIZE (&p1[3]));
                          idx++)
                       if ((p2[2 + idx] & p1[5 + idx]) != 0)
                         break;
  
                          idx++)
                       if ((p2[2 + idx] & p1[5 + idx]) != 0)
                         break;
  
-                   if (idx == p2[1] || idx == p1[4])
+                       if (idx == p2[1]
+                           || idx == CHARSET_BITMAP_SIZE (&p1[3]))
                       {
                         p[-3] = (unsigned char) pop_failure_jump;
                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
                       {
                         p[-3] = (unsigned char) pop_failure_jump;
                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
@@ -4570,6 +5491,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                   }
               }
           }
                   }
               }
           }
+         }
           p -= 2;               /* Point at relative address again.  */
           if ((re_opcode_t) p[-1] != pop_failure_jump)
             {
           p -= 2;               /* Point at relative address again.  */
           if ((re_opcode_t) p[-1] != pop_failure_jump)
             {
@@ -4608,6 +5530,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
         /* Unconditionally jump (without popping any failure points).  */
         case jump:
         unconditional_jump:
         /* Unconditionally jump (without popping any failure points).  */
         case jump:
         unconditional_jump:
+#if defined (WINDOWSNT) && defined (emacs)
+         QUIT;
+#endif
           EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
           p += mcnt;                            /* Do the jump.  */
           EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
           p += mcnt;                            /* Do the jump.  */
@@ -4699,84 +5624,184 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             break;
           }
  
             break;
           }
  
-#if 0
-       /* The DEC Alpha C compiler 3.x generates incorrect code for the
-          test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
-          AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
-          macro and introducing temporary variables works around the bug.  */
-
         case wordbound:
           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
         case wordbound:
           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
-         if (AT_WORD_BOUNDARY (d))
-           break;
-         goto fail;
  
  
-       case notwordbound:
-         DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
-         if (AT_WORD_BOUNDARY (d))
-           goto fail;
-         break;
-#else
-       case wordbound:
-       {
-         boolean prevchar, thischar;
+         /* We SUCCEED in one of the following cases: */
  
  
-         DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+         /* Case 1: D is at the beginning or the end of string.  */
           if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
             break;
           if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
             break;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             int c1, c2, s1, s2;
+             int pos1 = PTR_TO_OFFSET (d - 1);
+             int charpos;
+
+             GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+             GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+             charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             s1 = SYNTAX (c1);
+#ifdef emacs
+             UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+#endif
+             s2 = SYNTAX (c2);
  
  
-         prevchar = WORDCHAR_P (d - 1);
-         thischar = WORDCHAR_P (d);
-         if (prevchar != thischar)
+             if (/* Case 2: Only one of S1 and S2 is Sword.  */
+                 ((s1 == Sword) != (s2 == Sword))
+                 /* Case 3: Both of S1 and S2 are Sword, and macro
+                    WORD_BOUNDARY_P (C1, C2) returns nonzero.  */
+                 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
             break;
             break;
-         goto fail;
         }
         }
+         goto fail;
  
        case notwordbound:
  
        case notwordbound:
-       {
-         boolean prevchar, thischar;
-
           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+
+         /* We FAIL in one of the following cases: */
+
+         /* Case 1: D is at the beginning or the end of string.  */
           if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
             goto fail;
           if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
             goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             int c1, c2, s1, s2;
+             int pos1 = PTR_TO_OFFSET (d - 1);
+             int charpos;
+
+             GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+             GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+             charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             s1 = SYNTAX (c1);
+#ifdef emacs
+             UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+#endif
+             s2 = SYNTAX (c2);
  
  
-         prevchar = WORDCHAR_P (d - 1);
-         thischar = WORDCHAR_P (d);
-         if (prevchar != thischar)
+             if (/* Case 2: Only one of S1 and S2 is Sword.  */
+                 ((s1 == Sword) != (s2 == Sword))
+                 /* Case 3: Both of S1 and S2 are Sword, and macro
+                    WORD_BOUNDARY_P (C1, C2) returns nonzero.  */
+                 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
             goto fail;
             goto fail;
-         break;
         }
         }
-#endif
+         break;
  
         case wordbeg:
           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
  
         case wordbeg:
           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
-         if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
-           break;
+
+         /* We FAIL in one of the following cases: */
+
+         /* Case 1: D is at the end of string.  */
+         if (AT_STRINGS_END (d))
           goto fail;
           goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             int c1, c2, s1, s2;
+             int pos1 = PTR_TO_OFFSET (d);
+             int charpos;
+
+             GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+             charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             s2 = SYNTAX (c2);
+       
+             /* Case 2: S2 is not Sword. */
+             if (s2 != Sword)
+               goto fail;
+
+             /* Case 3: D is not at the beginning of string ... */
+             if (!AT_STRINGS_BEG (d))
+               {
+                 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+                 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
+#endif
+                 s1 = SYNTAX (c1);
+
+                 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
+                    returns 0.  */
+                 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
+                   goto fail;
+               }
+           }
+         break;
  
         case wordend:
           DEBUG_PRINT1 ("EXECUTING wordend.\n");
  
         case wordend:
           DEBUG_PRINT1 ("EXECUTING wordend.\n");
-         if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
-             && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
-           break;
+
+         /* We FAIL in one of the following cases: */
+
+         /* Case 1: D is at the beginning of string.  */
+         if (AT_STRINGS_BEG (d))
+           goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             int c1, c2, s1, s2;
+             int pos1 = PTR_TO_OFFSET (d);
+             int charpos;
+
+             GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+             charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             s1 = SYNTAX (c1);
+
+             /* Case 2: S1 is not Sword.  */
+             if (s1 != Sword)
+               goto fail;
+
+             /* Case 3: D is not at the end of string ... */
+             if (!AT_STRINGS_END (d))
+               {
+                 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+                 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+#endif
+                 s2 = SYNTAX (c2);
+
+                 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
+                    returns 0.  */
+                 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
           goto fail;
           goto fail;
+               }
+           }
+         break;
  
  #ifdef emacs
         case before_dot:
           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
  
  #ifdef emacs
         case before_dot:
           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
-         if (PTR_CHAR_POS ((unsigned char *) d) >= PT)
+         if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
             goto fail;
           break;
  
         case at_dot:
           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
             goto fail;
           break;
  
         case at_dot:
           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
-         if (PTR_CHAR_POS ((unsigned char *) d) != PT)
+         if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
             goto fail;
           break;
  
         case after_dot:
           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
             goto fail;
           break;
  
         case after_dot:
           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
-         if (PTR_CHAR_POS ((unsigned char *) d) <= PT)
+         if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
             goto fail;
           break;
  
             goto fail;
           break;
  
@@ -4790,10 +5815,27 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           mcnt = (int) Sword;
         matchsyntax:
           PREFETCH ();
           mcnt = (int) Sword;
         matchsyntax:
           PREFETCH ();
-         /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
-         d++;
-         if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
+#ifdef emacs
+         {
+           int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+           UPDATE_SYNTAX_TABLE (pos1);
+         }
+#endif
+         {
+           int c, len;
+
+           if (multibyte)
+             /* we must concern about multibyte form, ... */
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+           else
+             /* everything should be handled as ASCII, even though it
+                looks like multibyte form.  */
+             c = *d, len = 1;
+
+           if (SYNTAX (c) != (enum syntaxcode) mcnt)
             goto fail;
             goto fail;
+           d += len;
+         }
           SET_REGS_MATCHED ();
           break;
  
           SET_REGS_MATCHED ();
           break;
  
@@ -4807,86 +5849,141 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           mcnt = (int) Sword;
         matchnotsyntax:
           PREFETCH ();
           mcnt = (int) Sword;
         matchnotsyntax:
           PREFETCH ();
-         /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
-         d++;
-         if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
+#ifdef emacs
+         {
+           int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+           UPDATE_SYNTAX_TABLE (pos1);
+         }
+#endif
+         {
+           int c, len;
+
+           if (multibyte)
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+           else
+             c = *d, len = 1;
+
+           if (SYNTAX (c) == (enum syntaxcode) mcnt)
             goto fail;
             goto fail;
+           d += len;
+         }
+         SET_REGS_MATCHED ();
+         break;
+
+       case categoryspec:
+         DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p);
+         mcnt = *p++;
+         PREFETCH ();
+         {
+           int c, len;
+
+           if (multibyte)
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+           else
+             c = *d, len = 1;
+
+           if (!CHAR_HAS_CATEGORY (c, mcnt))
+             goto fail;
+           d += len;
+         }
           SET_REGS_MATCHED ();
           break;
  
           SET_REGS_MATCHED ();
           break;
  
+       case notcategoryspec:
+         DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p);
+         mcnt = *p++;
+         PREFETCH ();
+         {
+           int c, len;
+
+           if (multibyte)
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+           else
+             c = *d, len = 1;
+
+           if (CHAR_HAS_CATEGORY (c, mcnt))
+             goto fail;
+           d += len;
+         }
+         SET_REGS_MATCHED ();
+          break;
+
  #else /* not emacs */
         case wordchar:
  #else /* not emacs */
         case wordchar:
-         DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
           PREFETCH ();
           PREFETCH ();
-         if (!WORDCHAR_P (d))
-           goto fail;
+          if (!WORDCHAR_P (d))
+            goto fail;
           SET_REGS_MATCHED ();
           SET_REGS_MATCHED ();
-         d++;
+          d++;
           break;
  
         case notwordchar:
           break;
  
         case notwordchar:
-         DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
           PREFETCH ();
           if (WORDCHAR_P (d))
           PREFETCH ();
           if (WORDCHAR_P (d))
-           goto fail;
-         SET_REGS_MATCHED ();
-         d++;
+            goto fail;
+          SET_REGS_MATCHED ();
+          d++;
           break;
  #endif /* not emacs */
  
           break;
  #endif /* not emacs */
  
-       default:
-         abort ();
+        default:
+          abort ();
         }
         }
-      continue;         /* Successfully executed one pattern command; keep going.  */
+      continue;  /* Successfully executed one pattern command; keep going.  */
  
  
      /* We goto here if a matching operation fails. */
      fail:
  
  
      /* We goto here if a matching operation fails. */
      fail:
+#if defined (WINDOWSNT) && defined (emacs)
+      QUIT;
+#endif
        if (!FAIL_STACK_EMPTY ())
        if (!FAIL_STACK_EMPTY ())
-       { /* A restart point is known.  Restore to that state.  */
-         DEBUG_PRINT1 ("\nFAIL:\n");
-         POP_FAILURE_POINT (d, p,
-                            lowest_active_reg, highest_active_reg,
-                            regstart, regend, reg_info);
-
-         /* If this failure point is a dummy, try the next one.  */
-         if (!p)
+       { /* A restart point is known.  Restore to that state.  */
+          DEBUG_PRINT1 ("\nFAIL:\n");
+          POP_FAILURE_POINT (d, p,
+                             lowest_active_reg, highest_active_reg,
+                             regstart, regend, reg_info);
+
+          /* If this failure point is a dummy, try the next one.  */
+          if (!p)
             goto fail;
  
             goto fail;
  
-         /* If we failed to the end of the pattern, don't examine *p.  */
+          /* If we failed to the end of the pattern, don't examine *p.  */
           assert (p <= pend);
           assert (p <= pend);
-         if (p < pend)
-           {
-             boolean is_a_jump_n = false;
-
-             /* If failed to a backwards jump that's part of a repetition
-                loop, need to pop this failure point and use the next one.  */
-             switch ((re_opcode_t) *p)
-               {
-               case jump_n:
-                 is_a_jump_n = true;
-               case maybe_pop_jump:
-               case pop_failure_jump:
-               case jump:
-                 p1 = p + 1;
-                 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-                 p1 += mcnt;
-
-                 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
-                     || (!is_a_jump_n
-                         && (re_opcode_t) *p1 == on_failure_jump))
-                   goto fail;
-                 break;
-               default:
-                 /* do nothing */ ;
-               }
-           }
-
-         if (d >= string1 && d <= end1)
+          if (p < pend)
+            {
+              boolean is_a_jump_n = false;
+
+              /* If failed to a backwards jump that's part of a repetition
+                 loop, need to pop this failure point and use the next one.  */
+              switch ((re_opcode_t) *p)
+                {
+                case jump_n:
+                  is_a_jump_n = true;
+                case maybe_pop_jump:
+                case pop_failure_jump:
+                case jump:
+                  p1 = p + 1;
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  p1 += mcnt;
+
+                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+                      || (!is_a_jump_n
+                          && (re_opcode_t) *p1 == on_failure_jump))
+                    goto fail;
+                  break;
+                default:
+                  /* do nothing */ ;
+                }
+            }
+
+          if (d >= string1 && d <= end1)
             dend = end_match_1;
             dend = end_match_1;
-       }
+        }
        else
        else
-       break;   /* Matching at this starting point really fails.  */
+        break;   /* Matching at this starting point really fails.  */
      } /* for (;;) */
  
    if (best_regs_set)
      } /* for (;;) */
  
    if (best_regs_set)
@@ -4894,7 +5991,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  
    FREE_VARIABLES ();
  
  
    FREE_VARIABLES ();
  
-  return -1;                           /* Failure to match.  */
+  return -1;                           /* Failure to match.  */
  } /* re_match_2 */
  \f
  /* Subroutine definitions for re_match_2.  */
  } /* re_match_2 */
  \f
  /* Subroutine definitions for re_match_2.  */
@@ -4923,92 +6020,92 @@ group_match_null_string_p (p, end, reg_info)
      {
        /* Skip over opcodes that can match nothing, and return true or
          false, as appropriate, when we get to one that can't, or to the
      {
        /* Skip over opcodes that can match nothing, and return true or
          false, as appropriate, when we get to one that can't, or to the
-        matching stop_memory.  */
+         matching stop_memory.  */
  
        switch ((re_opcode_t) *p1)
  
        switch ((re_opcode_t) *p1)
-       {
-       /* Could be either a loop or a series of alternatives.  */
-       case on_failure_jump:
-         p1++;
-         EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+        {
+        /* Could be either a loop or a series of alternatives.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
  
  
-         /* If the next operation is not a jump backwards in the
+          /* If the next operation is not a jump backwards in the
              pattern.  */
  
           if (mcnt >= 0)
             {
              pattern.  */
  
           if (mcnt >= 0)
             {
-             /* Go through the on_failure_jumps of the alternatives,
-                seeing if any of the alternatives cannot match nothing.
-                The last alternative starts with only a jump,
-                whereas the rest start with on_failure_jump and end
-                with a jump, e.g., here is the pattern for `a|b|c':
+              /* Go through the on_failure_jumps of the alternatives,
+                 seeing if any of the alternatives cannot match nothing.
+                 The last alternative starts with only a jump,
+                 whereas the rest start with on_failure_jump and end
+                 with a jump, e.g., here is the pattern for `a|b|c':
  
  
-                /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
-                /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
-                /exactn/1/c
+                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+                 /exactn/1/c
  
  
-                So, we have to first go through the first (n-1)
-                alternatives and then deal with the last one separately.  */
+                 So, we have to first go through the first (n-1)
+                 alternatives and then deal with the last one separately.  */
  
  
  
  
-             /* Deal with the first (n-1) alternatives, which start
-                with an on_failure_jump (see above) that jumps to right
-                past a jump_past_alt.  */
+              /* Deal with the first (n-1) alternatives, which start
+                 with an on_failure_jump (see above) that jumps to right
+                 past a jump_past_alt.  */
  
  
-             while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
-               {
-                 /* `mcnt' holds how many bytes long the alternative
-                    is, including the ending `jump_past_alt' and
-                    its number.  */
+              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+                {
+                  /* `mcnt' holds how many bytes long the alternative
+                     is, including the ending `jump_past_alt' and
+                     its number.  */
  
  
-                 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
-                                                     reg_info))
-                   return false;
+                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+                                                     reg_info))
+                    return false;
  
  
-                 /* Move to right after this alternative, including the
+                  /* Move to right after this alternative, including the
                      jump_past_alt.  */
                      jump_past_alt.  */
-                 p1 += mcnt;
+                  p1 += mcnt;
  
  
-                 /* Break if it's the beginning of an n-th alternative
-                    that doesn't begin with an on_failure_jump.  */
-                 if ((re_opcode_t) *p1 != on_failure_jump)
-                   break;
+                  /* Break if it's the beginning of an n-th alternative
+                     that doesn't begin with an on_failure_jump.  */
+                  if ((re_opcode_t) *p1 != on_failure_jump)
+                    break;
  
                   /* Still have to check that it's not an n-th
                      alternative that starts with an on_failure_jump.  */
                   p1++;
  
                   /* Still have to check that it's not an n-th
                      alternative that starts with an on_failure_jump.  */
                   p1++;
-                 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-                 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
-                   {
-                     /* Get to the beginning of the n-th alternative.  */
-                     p1 -= 3;
-                     break;
-                   }
-               }
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+                    {
+                     /* Get to the beginning of the n-th alternative.  */
+                      p1 -= 3;
+                      break;
+                    }
+                }
  
  
-             /* Deal with the last alternative: go back and get number
-                of the `jump_past_alt' just before it.  `mcnt' contains
-                the length of the alternative.  */
-             EXTRACT_NUMBER (mcnt, p1 - 2);
+              /* Deal with the last alternative: go back and get number
+                 of the `jump_past_alt' just before it.  `mcnt' contains
+                 the length of the alternative.  */
+              EXTRACT_NUMBER (mcnt, p1 - 2);
  
  
-             if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
-               return false;
+              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+                return false;
  
  
-             p1 += mcnt;       /* Get past the n-th alternative.  */
-           } /* if mcnt > 0 */
-         break;
+              p1 += mcnt;      /* Get past the n-th alternative.  */
+            } /* if mcnt > 0 */
+          break;
  
  
  
  
-       case stop_memory:
+        case stop_memory:
           assert (p1[1] == **p);
           assert (p1[1] == **p);
-         *p = p1 + 2;
-         return true;
+          *p = p1 + 2;
+          return true;
  
  
  
  
-       default:
-         if (!common_op_match_null_string_p (&p1, end, reg_info))
-           return false;
-       }
+        default:
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
      } /* while p1 < end */
  
    return false;
      } /* while p1 < end */
  
    return false;
@@ -5030,21 +6127,21 @@ alt_match_null_string_p (p, end, reg_info)
    while (p1 < end)
      {
        /* Skip over opcodes that can match nothing, and break when we get
    while (p1 < end)
      {
        /* Skip over opcodes that can match nothing, and break when we get
-        to one that can't.  */
+         to one that can't.  */
  
        switch ((re_opcode_t) *p1)
  
        switch ((re_opcode_t) *p1)
-       {
-       /* It's a loop.  */
-       case on_failure_jump:
-         p1++;
-         EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-         p1 += mcnt;
-         break;
+        {
+       /* It's a loop.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+          break;
  
         default:
  
         default:
-         if (!common_op_match_null_string_p (&p1, end, reg_info))
-           return false;
-       }
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
      }  /* while p1 < end */
  
    return true;
      }  /* while p1 < end */
  
    return true;
@@ -5090,42 +6187,42 @@ common_op_match_null_string_p (p, end, reg_info)
        ret = group_match_null_string_p (&p1, end, reg_info);
  
        /* Have to set this here in case we're checking a group which
        ret = group_match_null_string_p (&p1, end, reg_info);
  
        /* Have to set this here in case we're checking a group which
-        contains a group and a back reference to it.  */
+         contains a group and a back reference to it.  */
  
        if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
  
        if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
-       REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
  
        if (!ret)
  
        if (!ret)
-       return false;
+        return false;
        break;
  
        break;
  
-    /* If this is an optimized succeed_n for zero times, make the jump.         */
+    /* If this is an optimized succeed_n for zero times, make the jump.  */
      case jump:
        EXTRACT_NUMBER_AND_INCR (mcnt, p1);
        if (mcnt >= 0)
      case jump:
        EXTRACT_NUMBER_AND_INCR (mcnt, p1);
        if (mcnt >= 0)
-       p1 += mcnt;
+        p1 += mcnt;
        else
        else
-       return false;
+        return false;
        break;
  
      case succeed_n:
        break;
  
      case succeed_n:
-      /* Get to the number of times to succeed.         */
+      /* Get to the number of times to succeed.  */
        p1 += 2;
        EXTRACT_NUMBER_AND_INCR (mcnt, p1);
  
        if (mcnt == 0)
        p1 += 2;
        EXTRACT_NUMBER_AND_INCR (mcnt, p1);
  
        if (mcnt == 0)
-       {
-         p1 -= 4;
-         EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-         p1 += mcnt;
-       }
+        {
+          p1 -= 4;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+        }
        else
        else
-       return false;
+        return false;
        break;
  
      case duplicate:
        if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
        break;
  
      case duplicate:
        if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
-       return false;
+        return false;
        break;
  
      case set_number_at:
        break;
  
      case set_number_at:
@@ -5151,11 +6248,27 @@ bcmp_translate (s1, s2, len, translate)
       RE_TRANSLATE_TYPE translate;
  {
    register unsigned char *p1 = s1, *p2 = s2;
       RE_TRANSLATE_TYPE translate;
  {
    register unsigned char *p1 = s1, *p2 = s2;
-  while (len)
+  unsigned char *p1_end = s1 + len;
+  unsigned char *p2_end = s2 + len;
+
+  while (p1 != p1_end && p2 != p2_end)
      {
      {
-      if (translate[*p1++] != translate[*p2++]) return 1;
-      len--;
+      int p1_charlen, p2_charlen;
+      int p1_ch, p2_ch;
+
+      p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
+      p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
+
+      if (RE_TRANSLATE (translate, p1_ch)
+         != RE_TRANSLATE (translate, p2_ch))
+       return 1;
+
+      p1 += p1_charlen, p2 += p2_charlen;
      }
      }
+
+  if (p1 != p1_end || p2 != p2_end)
+    return 1;
+
    return 0;
  }
  \f
    return 0;
  }
  \f
@@ -5168,7 +6281,7 @@ bcmp_translate (s1, s2, len, translate)
     Assumes the `allocated' (and perhaps `buffer') and `translate' fields
     are set in BUFP on entry.
  
     Assumes the `allocated' (and perhaps `buffer') and `translate' fields
     are set in BUFP on entry.
  
-   We call regex_compile to do the actual compilation. */
+   We call regex_compile to do the actual compilation.  */
  
  const char *
  re_compile_pattern (pattern, length, bufp)
  
  const char *
  re_compile_pattern (pattern, length, bufp)
@@ -5187,7 +6300,7 @@ re_compile_pattern (pattern, length, bufp)
       setting no_sub.  */
    bufp->no_sub = 0;
  
       setting no_sub.  */
    bufp->no_sub = 0;
  
-  /* Match anchors at newline. */
+  /* Match anchors at newline.  */
    bufp->newline_anchor = 1;
  
    ret = regex_compile (pattern, length, re_syntax_options, bufp);
    bufp->newline_anchor = 1;
  
    ret = regex_compile (pattern, length, re_syntax_options, bufp);
@@ -5197,8 +6310,8 @@ re_compile_pattern (pattern, length, bufp)
    return gettext (re_error_msgid[(int) ret]);
  }
  \f
    return gettext (re_error_msgid[(int) ret]);
  }
  \f
-/* Entry points compatible with 4.2 BSD regex library. We don't define
-   them unless specifically requested. */
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
  
  #if defined (_REGEX_RE_COMP) || defined (_LIBC)
  
  
  #if defined (_REGEX_RE_COMP) || defined (_LIBC)
  
@@ -5228,7 +6341,7 @@ re_comp (s)
      {
        re_comp_buf.buffer = (unsigned char *) malloc (200);
        if (re_comp_buf.buffer == NULL)
      {
        re_comp_buf.buffer = (unsigned char *) malloc (200);
        if (re_comp_buf.buffer == NULL)
-       return gettext (re_error_msgid[(int) REG_ESPACE]);
+        return gettext (re_error_msgid[(int) REG_ESPACE]);
        re_comp_buf.allocated = 200;
  
        re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
        re_comp_buf.allocated = 200;
  
        re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
@@ -5239,7 +6352,7 @@ re_comp (s)
    /* Since `re_exec' always passes NULL for the `regs' argument, we
       don't need to initialize the pattern buffer fields which affect it.  */
  
    /* Since `re_exec' always passes NULL for the `regs' argument, we
       don't need to initialize the pattern buffer fields which affect it.  */
  
-  /* Match anchors at newlines.         */
+  /* Match anchors at newlines.  */
    re_comp_buf.newline_anchor = 1;
  
    ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
    re_comp_buf.newline_anchor = 1;
  
    ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
@@ -5271,7 +6384,7 @@ re_exec (s)
  
  /* regcomp takes a regular expression as a string and compiles it.
  
  
  /* regcomp takes a regular expression as a string and compiles it.
  
-   PREG is a regex_t *.         We do not expect any fields to be initialized,
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
     since POSIX says we shouldn't.  Thus, we set
  
       `buffer' to the compiled pattern;
     since POSIX says we shouldn't.  Thus, we set
  
       `buffer' to the compiled pattern;
@@ -5300,7 +6413,7 @@ re_exec (s)
       routine will report only success or failure, and nothing about the
       registers.
  
       routine will report only success or failure, and nothing about the
       registers.
  
-   It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
     the return codes and their meanings.)  */
  
  int
     the return codes and their meanings.)  */
  
  int
@@ -5333,11 +6446,11 @@ regcomp (preg, pattern, cflags)
         = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
                                       * sizeof (*(RE_TRANSLATE_TYPE)0));
        if (preg->translate == NULL)
         = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
                                       * sizeof (*(RE_TRANSLATE_TYPE)0));
        if (preg->translate == NULL)
-       return (int) REG_ESPACE;
+        return (int) REG_ESPACE;
  
        /* Map uppercase characters to corresponding lowercase ones.  */
        for (i = 0; i < CHAR_SET_SIZE; i++)
  
        /* Map uppercase characters to corresponding lowercase ones.  */
        for (i = 0; i < CHAR_SET_SIZE; i++)
-       preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
      }
    else
      preg->translate = NULL;
      }
    else
      preg->translate = NULL;
@@ -5347,7 +6460,7 @@ regcomp (preg, pattern, cflags)
      { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
        syntax &= ~RE_DOT_NEWLINE;
        syntax |= RE_HAT_LISTS_NOT_NEWLINE;
      { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
        syntax &= ~RE_DOT_NEWLINE;
        syntax |= RE_HAT_LISTS_NOT_NEWLINE;
-      /* It also changes the matching behavior.         */
+      /* It also changes the matching behavior.  */
        preg->newline_anchor = 1;
      }
    else
        preg->newline_anchor = 1;
      }
    else
@@ -5371,7 +6484,7 @@ regcomp (preg, pattern, cflags)
     string STRING.
  
     If NMATCH is zero or REG_NOSUB was set in the cflags argument to
     string STRING.
  
     If NMATCH is zero or REG_NOSUB was set in the cflags argument to
-   `regcomp', we ignore PMATCH.         Otherwise, we assume PMATCH has at
+   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
     least NMATCH elements, and we set them to the offsets of the
     corresponding matched substrings.
  
     least NMATCH elements, and we set them to the offsets of the
     corresponding matched substrings.
  
@@ -5402,7 +6515,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
  
    /* The user has told us exactly how many registers to return
       information about, via `nmatch'.  We have to pass that on to the
  
    /* The user has told us exactly how many registers to return
       information about, via `nmatch'.  We have to pass that on to the
-     matching routines.         */
+     matching routines.  */
    private_preg.regs_allocated = REGS_FIXED;
  
    if (want_reg_info)
    private_preg.regs_allocated = REGS_FIXED;
  
    if (want_reg_info)
@@ -5411,29 +6524,29 @@ regexec (preg, string, nmatch, pmatch, eflags)
        regs.start = TALLOC (nmatch, regoff_t);
        regs.end = TALLOC (nmatch, regoff_t);
        if (regs.start == NULL || regs.end == NULL)
        regs.start = TALLOC (nmatch, regoff_t);
        regs.end = TALLOC (nmatch, regoff_t);
        if (regs.start == NULL || regs.end == NULL)
-       return (int) REG_NOMATCH;
+        return (int) REG_NOMATCH;
      }
  
    /* Perform the searching operation.  */
    ret = re_search (&private_preg, string, len,
      }
  
    /* Perform the searching operation.  */
    ret = re_search (&private_preg, string, len,
-                  /* start: */ 0, /* range: */ len,
-                  want_reg_info ? &regs : (struct re_registers *) 0);
+                   /* start: */ 0, /* range: */ len,
+                   want_reg_info ? &regs : (struct re_registers *) 0);
  
    /* Copy the register information to the POSIX structure.  */
    if (want_reg_info)
      {
        if (ret >= 0)
  
    /* Copy the register information to the POSIX structure.  */
    if (want_reg_info)
      {
        if (ret >= 0)
-       {
-         unsigned r;
+        {
+          unsigned r;
  
  
-         for (r = 0; r < nmatch; r++)
-           {
-             pmatch[r].rm_so = regs.start[r];
-             pmatch[r].rm_eo = regs.end[r];
-           }
-       }
+          for (r = 0; r < nmatch; r++)
+            {
+              pmatch[r].rm_so = regs.start[r];
+              pmatch[r].rm_eo = regs.end[r];
+            }
+        }
  
  
-      /* If we needed the temporary register info, free the space now. */
+      /* If we needed the temporary register info, free the space now.  */
        free (regs.start);
        free (regs.end);
      }
        free (regs.start);
        free (regs.end);
      }
@@ -5459,7 +6572,7 @@ regerror (errcode, preg, errbuf, errbuf_size)
    if (errcode < 0
        || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
      /* Only error codes returned by the rest of the code should be passed
    if (errcode < 0
        || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
      /* Only error codes returned by the rest of the code should be passed
-       to this routine.         If we are given anything else, or if other regex
+       to this routine.  If we are given anything else, or if other regex
         code generates an invalid error code, then the program has a bug.
         Dump core so we can fix it.  */
      abort ();
         code generates an invalid error code, then the program has a bug.
         Dump core so we can fix it.  */
      abort ();
@@ -5471,12 +6584,12 @@ regerror (errcode, preg, errbuf, errbuf_size)
    if (errbuf_size != 0)
      {
        if (msg_size > errbuf_size)
    if (errbuf_size != 0)
      {
        if (msg_size > errbuf_size)
-       {
-         strncpy (errbuf, msg, errbuf_size - 1);
-         errbuf[errbuf_size - 1] = 0;
-       }
+        {
+          strncpy (errbuf, msg, errbuf_size - 1);
+          errbuf[errbuf_size - 1] = 0;
+        }
        else
        else
-       strcpy (errbuf, msg);
+        strcpy (errbuf, msg);
      }
  
    return msg_size;
      }
  
    return msg_size;