Declare localtime_r if necessary.

[gnulib.git] / regex.c
diff --git a/regex.c b/regex.c

index e26641b..cf89000 100644 (file)
--- a/regex.c
+++ b/regex.c
@@ -2,7 +2,7 @@
     0.12.  (Implements POSIX draft P10003.2/D11.2, except for
     internationalization features.)
  
-   Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -27,6 +27,16 @@
  #undef _GNU_SOURCE
  #define _GNU_SOURCE
  
+#ifdef emacs
+/* Converts the pointer to the char to BEG-based offset from the start.         */
+#define PTR_TO_OFFSET(d)                                               \
+       POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING                      \
+                         ? (d) - string1 : (d) - (string2 - size1))
+#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
+#else
+#define PTR_TO_OFFSET(d) 0
+#endif
+
  #ifdef HAVE_CONFIG_H
  #include <config.h>
  #endif
@@ -53,9 +63,16 @@
  
  #include "lisp.h"
  #include "buffer.h"
+
+/* Make syntax table lookup grant data in gl_state.  */
+#define SYNTAX_ENTRY_VIA_PROPERTY
+
  #include "syntax.h"
+#include "charset.h"
+#include "category.h"
  
  #define malloc xmalloc
+#define realloc xrealloc
  #define free xfree
  
  #else  /* not emacs */
@@ -156,6 +173,19 @@ init_syntax_once ()
  
  #define SYNTAX(c) re_syntax_table[c]
  
+/* Dummy macros for non-Emacs environments.  */
+#define BASE_LEADING_CODE_P(c) (0)
+#define WORD_BOUNDARY_P(c1, c2) (0)
+#define CHAR_HEAD_P(p) (1)
+#define SINGLE_BYTE_CHAR_P(c) (1)
+#define SAME_CHARSET_P(c1, c2) (1)
+#define MULTIBYTE_FORM_LENGTH(p, s) (1)
+#define STRING_CHAR(p, s) (*(p))
+#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
+#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
+  (c = ((p) == (end1) ? *(str2) : *(p)))
+#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
+  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
  #endif /* not emacs */
  \f
  /* Get the interface, including the syntax bits.  */
@@ -465,7 +495,17 @@ typedef enum
    syntaxspec,
  
         /* Matches any character whose syntax is not that specified.  */
-  notsyntaxspec
+  notsyntaxspec,
+
+  /* Matches any character whose category-set contains the specified
+     category. The operator is followed by a byte which contains a
+     category code (mnemonic ASCII character). */
+  categoryspec,
+
+  /* Matches any character whose category-set does not contain the
+     specified category.  The operator is followed by a byte which
+     contains the category code (mnemonic ASCII character).  */
+  notcategoryspec
  #endif /* emacs */
  } re_opcode_t;
  \f
@@ -543,6 +583,93 @@ extract_number_and_incr (destination, source)
  
  #endif /* DEBUG */
  \f
+/* Store a multibyte character in three contiguous bytes starting
+   DESTINATION, and increment DESTINATION to the byte after where the
+   character is stored.         Therefore, DESTINATION must be an lvalue.  */
+
+#define STORE_CHARACTER_AND_INCR(destination, character)       \
+  do {                                                         \
+    (destination)[0] = (character) & 0377;                     \
+    (destination)[1] = ((character) >> 8) & 0377;              \
+    (destination)[2] = (character) >> 16;                      \
+    (destination) += 3;                                                \
+  } while (0)
+
+/* Put into DESTINATION a character stored in three contiguous bytes
+   starting at SOURCE. */
+
+#define EXTRACT_CHARACTER(destination, source) \
+  do {                                         \
+    (destination) = ((source)[0]               \
+                    | ((source)[1] << 8)       \
+                    | ((source)[2] << 16));    \
+  } while (0)
+
+
+/* Macros for charset. */
+
+/* Size of bitmap of charset P in bytes.  P is a start of charset,
+   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
+#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
+
+/* Nonzero if charset P has range table.  */
+#define CHARSET_RANGE_TABLE_EXISTS_P(p)         ((p)[1] & 0x80)
+
+/* Return the address of range table of charset P.  But not the start
+   of table itself, but the before where the number of ranges is
+   stored.  `2 +' means to skip re_opcode_t and size of bitmap.         */
+#define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
+
+/* Test if C is listed in the bitmap of charset P.  */
+#define CHARSET_LOOKUP_BITMAP(p, c)                            \
+  ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH                   \
+   && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
+
+/* Return the address of end of RANGE_TABLE.  COUNT is number of
+   ranges (which is a pair of (start, end)) in the RANGE_TABLE.         `* 2'
+   is start of range and end of range. `* 3' is size of each start
+   and end.  */
+#define CHARSET_RANGE_TABLE_END(range_table, count)    \
+  ((range_table) + (count) * 2 * 3)
+
+/* Test if C is in RANGE_TABLE.         A flag NOT is negated if C is in.
+   COUNT is number of ranges in RANGE_TABLE.  */
+#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)     \
+  do                                                                   \
+    {                                                                  \
+      int range_start, range_end;                                      \
+      unsigned char *p;                                                        \
+      unsigned char *range_table_end                                   \
+       = CHARSET_RANGE_TABLE_END ((range_table), (count));             \
+                                                                       \
+      for (p = (range_table); p < range_table_end; p += 2 * 3)         \
+       {                                                               \
+         EXTRACT_CHARACTER (range_start, p);                           \
+         EXTRACT_CHARACTER (range_end, p + 3);                         \
+                                                                       \
+         if (range_start <= (c) && (c) <= range_end)                   \
+           {                                                           \
+             (not) = !(not);                                           \
+             break;                                                    \
+           }                                                           \
+       }                                                               \
+    }                                                                  \
+  while (0)
+
+/* Test if C is in range table of CHARSET.  The flag NOT is negated if
+   C is listed in it.  */
+#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)                    \
+  do                                                                   \
+    {                                                                  \
+      /* Number of ranges in range table. */                           \
+      int count;                                                       \
+      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);      \
+                                                                       \
+      EXTRACT_NUMBER_AND_INCR (count, range_table);                    \
+      CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
+    }                                                                  \
+  while (0)
+\f
  /* If DEBUG is defined, Regex prints many voluminous messages about what
     it is doing (if the variable `debug' is nonzero).  If linked with the
     main program in `iregex.c', you can enter patterns and strings
@@ -998,23 +1125,25 @@ static const char *re_error_msgid[] =
     REGEX_ALLOCATE_STACK.  */
  
  
-/* Number of failure points for which to initially allocate space
+/* Approximate number of failure points for which to initially allocate space
     when matching.  If this number is exceeded, we allocate more
     space, so it is not a hard limit.  */
  #ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
+#define INIT_FAILURE_ALLOC 20
  #endif
  
  /* Roughly the maximum number of failure points on the stack.  Would be
-   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+   exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
     This is a variable only so users of regex can assign to it; we never
     change it ourselves.         */
  #if defined (MATCH_MAY_ALLOCATE)
-/* 4400 was enough to cause a crash on Alpha OSF/1,
-   whose default stack limit is 2mb.  */
-int re_max_failures = 20000;
+/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
+   whose default stack limit is 2mb.  In order for a larger
+   value to work reliably, you have to try to make it accord
+   with the process stack limit.  */
+int re_max_failures = 40000;
  #else
-int re_max_failures = 2000;
+int re_max_failures = 4000;
  #endif
  
  union fail_stack_elt
@@ -1044,7 +1173,8 @@ typedef struct
  #define INIT_FAIL_STACK()                                              \
    do {                                                                 \
      fail_stack.stack = (fail_stack_elt_t *)                            \
-      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));   \
+      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE  \
+                           * sizeof (fail_stack_elt_t));               \
                                                                         \
      if (fail_stack.stack == NULL)                                      \
        return -2;                                                       \
@@ -1064,24 +1194,40 @@ typedef struct
  #endif
  
  
-/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+/* Double the size of FAIL_STACK, up to a limit
+   which allows approximately `re_max_failures' items.
  
     Return 1 if succeeds, and 0 if either ran out of memory
     allocating space for it or it was already too large.
  
     REGEX_REALLOCATE_STACK requires `destination' be declared.  */
  
-#define DOUBLE_FAIL_STACK(fail_stack)                                  \
-  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS             \
+/* Factor to increase the failure stack size by
+   when we increase it.
+   This used to be 2, but 2 was too wasteful
+   because the old discarded stacks added up to as much space
+   were as ultimate, maximum-size stack.  */
+#define FAIL_STACK_GROWTH_FACTOR 4
+
+#define GROW_FAIL_STACK(fail_stack)                                    \
+  (((fail_stack).size * sizeof (fail_stack_elt_t)                      \
+    >= re_max_failures * TYPICAL_FAILURE_SIZE)                         \
     ? 0                                                                 \
-   : ((fail_stack).stack = (fail_stack_elt_t *)                                \
+   : ((fail_stack).stack                                               \
+      = (fail_stack_elt_t *)                                           \
         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
           (fail_stack).size * sizeof (fail_stack_elt_t),                \
-         ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
+         MIN (re_max_failures * TYPICAL_FAILURE_SIZE,                  \
+              ((fail_stack).size * sizeof (fail_stack_elt_t)           \
+               * FAIL_STACK_GROWTH_FACTOR))),                          \
                                                                         \
        (fail_stack).stack == NULL                                       \
        ? 0                                                              \
-      : ((fail_stack).size <<= 1,                                      \
+      : ((fail_stack).size                                             \
+        = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,                \
+                ((fail_stack).size * sizeof (fail_stack_elt_t)         \
+                 * FAIL_STACK_GROWTH_FACTOR))                          \
+           / sizeof (fail_stack_elt_t)),                               \
          1)))
  
  
@@ -1090,7 +1236,7 @@ typedef struct
     space to do so.  */
  #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                           \
    ((FAIL_STACK_FULL ()                                                 \
-    && !DOUBLE_FAIL_STACK (FAIL_STACK))                                        \
+    && !GROW_FAIL_STACK (FAIL_STACK))                                  \
     ? 0                                                                 \
     : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,      \
        1))
@@ -1133,7 +1279,7 @@ typedef struct
     if we ever fail back to it.
  
     Requires variables fail_stack, regstart, regend, reg_info, and
-   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
+   num_regs be declared.  GROW_FAIL_STACK requires `destination' be
     declared.
  
     Does `return FAILURE_CODE' if runs out of memory.  */
@@ -1157,7 +1303,7 @@ typedef struct
      /* Ensure we have enough space allocated for what we will push.  */        \
      while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                  \
        {                                                                        \
-       if (!DOUBLE_FAIL_STACK (fail_stack))                            \
+       if (!GROW_FAIL_STACK (fail_stack))                              \
           return failure_code;                                          \
                                                                         \
         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
@@ -1224,13 +1370,14 @@ typedef struct
  #define NUM_NONREG_ITEMS 4
  #endif
  
-/* We push at most this many items on the stack.  */
-/* We used to use (num_regs - 1), which is the number of registers
-   this regexp will save; but that was changed to 5
-   to avoid stack overflow for a regexp with lots of parens.  */
-#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+/* Estimate the size of data pushed by a typical failure stack entry.
+   An estimate is all we need, because all we use this for
+   is to choose a limit for how big to make the failure stack.  */
+
+#define TYPICAL_FAILURE_SIZE 20
  
-/* We actually push this many items.  */
+/* This is how many items we actually use for a failure point.
+   It depends on the regexp.  */
  #define NUM_FAILURE_ITEMS                              \
    (((0                                                 \
       ? 0 : highest_active_reg - lowest_active_reg + 1) \
@@ -1397,7 +1544,7 @@ static reg_errcode_t compile_range ();
  #define PATFETCH(c)                                                    \
    do {if (p == pend) return REG_EEND;                                  \
      c = (unsigned char) *p++;                                          \
-    if (translate) c = (unsigned char) translate[c];                   \
+    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);   \
    } while (0)
  #endif
  
@@ -1418,7 +1565,8 @@ static reg_errcode_t compile_range ();
     when we use a character as a subscript we must make it unsigned.  */
  #ifndef TRANSLATE
  #define TRANSLATE(d) \
-  (translate ? (char) translate[(unsigned char) (d)] : (d))
+  (RE_TRANSLATE_P (translate) \
+   ? (unsigned) RE_TRANSLATE (translate, (unsigned) (d)) : (d))
  #endif
  
  
@@ -1556,6 +1704,51 @@ typedef struct
  #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
  
  
+/* Structure to manage work area for range table.  */
+struct range_table_work_area
+{
+  int *table;                  /* actual work area.  */
+  int allocated;               /* allocated size for work area in bytes.  */
+  int used;                    /* actually used size in words.  */
+};
+
+/* Make sure that WORK_AREA can hold more N multibyte characters.  */
+#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n)                       \
+  do {                                                                   \
+    if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated)  \
+      {                                                                          \
+       (work_area).allocated += 16 * sizeof (int);                       \
+       if ((work_area).table)                                            \
+         (work_area).table                                               \
+           = (int *) realloc ((work_area).table, (work_area).allocated); \
+       else                                                              \
+         (work_area).table                                               \
+           = (int *) malloc ((work_area).allocated);                     \
+       if ((work_area).table == 0)                                       \
+         FREE_STACK_RETURN (REG_ESPACE);                                 \
+      }                                                                          \
+  } while (0)
+
+/* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
+#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)   \
+  do {                                                                 \
+    EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);                     \
+    (work_area).table[(work_area).used++] = (range_start);             \
+    (work_area).table[(work_area).used++] = (range_end);               \
+  } while (0)
+
+/* Free allocated memory for WORK_AREA.         */
+#define FREE_RANGE_TABLE_WORK_AREA(work_area)  \
+  do {                                         \
+    if ((work_area).table)                     \
+      free ((work_area).table);                        \
+  } while (0)
+
+#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0)
+#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
+#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
+
+
  /* Set the bit for character C in a list.  */
  #define SET_LIST_BIT(c)                                      \
    (b[((unsigned char) (c)) / BYTEWIDTH]                      \
@@ -1657,7 +1850,11 @@ regex_grow_registers (num_regs)
  
  /* Return, freeing storage we allocated.  */
  #define FREE_STACK_RETURN(value)               \
-  return (free (compile_stack.stack), value)
+  do {                                                 \
+    FREE_RANGE_TABLE_WORK_AREA (range_table_work);     \
+    free (compile_stack.stack);                                \
+    return value;                                      \
+  } while (0)
  
  static reg_errcode_t
  regex_compile (pattern, size, syntax, bufp)
@@ -1669,7 +1866,7 @@ regex_compile (pattern, size, syntax, bufp)
    /* We fetch characters from PATTERN here.  Even though PATTERN is
       `char *' (i.e., signed), we declare these variables as unsigned, so
       they can be reliably used as array indices.  */
-  register unsigned char c, c1;
+  register unsigned int c, c1;
  
    /* A random temporary spot in PATTERN.  */
    const char *p1;
@@ -1681,7 +1878,12 @@ regex_compile (pattern, size, syntax, bufp)
    compile_stack_type compile_stack;
  
    /* Points to the current (ending) position in the pattern.  */
+#ifdef AIX
+  /* `const' makes AIX compiler fail.  */
+  char *p = pattern;
+#else
    const char *p = pattern;
+#endif
    const char *pend = pattern + size;
  
    /* How to translate the characters in the pattern.  */
@@ -1715,6 +1917,9 @@ regex_compile (pattern, size, syntax, bufp)
       number is put in the stop_memory as the start_memory.  */
    regnum_t regnum = 0;
  
+  /* Work area for range table of charset.  */
+  struct range_table_work_area range_table_work;
+
  #ifdef DEBUG
    DEBUG_PRINT1 ("\nCompiling pattern: ");
    if (debug)
@@ -1735,6 +1940,9 @@ regex_compile (pattern, size, syntax, bufp)
    compile_stack.size = INIT_COMPILE_STACK_SIZE;
    compile_stack.avail = 0;
  
+  range_table_work.table = 0;
+  range_table_work.allocated = 0;
+
    /* Initialize the pattern buffer.  */
    bufp->syntax = syntax;
    bufp->fastmap_accurate = 0;
@@ -1748,6 +1956,14 @@ regex_compile (pattern, size, syntax, bufp)
    /* Always count groups, whether or not bufp->no_sub is set.  */
    bufp->re_nsub = 0;
  
+#ifdef emacs
+  /* bufp->multibyte is set before regex_compile is called, so don't alter
+     it. */
+#else  /* not emacs */
+  /* Nothing is recognized as a multibyte character.  */
+  bufp->multibyte = 0;
+#endif
+
  #if !defined (emacs) && !defined (SYNTAX_TABLE)
    /* Initialize the syntax table.  */
     init_syntax_once ();
@@ -1902,9 +2118,10 @@ regex_compile (pattern, size, syntax, bufp)
                    incremented `p', by the way, to be the character after
                    the `*'.  Do we have to do something analogous here
                    for null bytes, because of RE_DOT_NOT_NULL?  */
-               if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+               if (TRANSLATE ((unsigned char)*(p - 2)) == TRANSLATE ('.')
                     && zero_times_ok
-                   && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+                   && p < pend
+                   && TRANSLATE ((unsigned char)*p) == TRANSLATE ('\n')
                     && !(syntax & RE_DOT_NEWLINE))
                   { /* We have .*\n.  */
                     STORE_JUMP (jump, b, laststart);
@@ -1950,7 +2167,7 @@ regex_compile (pattern, size, syntax, bufp)
  
         case '[':
           {
-           boolean had_char_class = false;
+           CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
  
             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
@@ -1983,6 +2200,9 @@ regex_compile (pattern, size, syntax, bufp)
             /* Read in characters and ranges, setting map bits.  */
             for (;;)
               {
+               int len;
+               boolean escaped_char = false;
+
                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
                 PATFETCH (c);
@@ -1992,52 +2212,38 @@ regex_compile (pattern, size, syntax, bufp)
                   {
                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
  
-                   PATFETCH (c1);
-                   SET_LIST_BIT (c1);
-                   continue;
+                   PATFETCH (c);
+                   escaped_char = true;
                   }
-
-               /* Could be the end of the bracket expression.  If it's
-                  not (i.e., when the bracket expression is `[]' so
-                  far), the ']' character bit gets set way below.  */
-               if (c == ']' && p != p1 + 1)
-                 break;
-
-               /* Look ahead to see if it's a range when the last thing
-                  was a character class.  */
-               if (had_char_class && c == '-' && *p != ']')
-                 FREE_STACK_RETURN (REG_ERANGE);
-
-               /* Look ahead to see if it's a range when the last thing
-                  was a character: if this is a hyphen not at the
-                  beginning or the end of a list, then it's the range
-                  operator.  */
-               if (c == '-'
-                   && !(p - 2 >= pattern && p[-2] == '[')
-                   && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
-                   && *p != ']')
+               else
                   {
-                   reg_errcode_t ret
-                     = compile_range (&p, pend, translate, syntax, b);
-                   if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+                   /* Could be the end of the bracket expression.      If it's
+                      not (i.e., when the bracket expression is `[]' so
+                      far), the ']' character bit gets set way below.  */
+                   if (c == ']' && p != p1 + 1)
+                     break;
                   }
  
-               else if (p[0] == '-' && p[1] != ']')
-                 { /* This handles ranges made up of characters only.  */
-                   reg_errcode_t ret;
-
-                   /* Move past the `-'.  */
-                   PATFETCH (c1);
-
-                   ret = compile_range (&p, pend, translate, syntax, b);
-                   if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+               /* If C indicates start of multibyte char, get the
+                  actual character code in C, and set the pattern
+                  pointer P to the next character boundary.  */
+               if (bufp->multibyte && BASE_LEADING_CODE_P (c))
+                 {
+                   PATUNFETCH;
+                   c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+                   p += len;
                   }
+               /* What should we do for the character which is
+                  greater than 0x7F, but not BASE_LEADING_CODE_P?
+                  XXX */
  
                 /* See if we're at the beginning of a possible character
                    class.  */
  
-               else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
-                 { /* Leave room for the null.  */
+               else if (!escaped_char &&
+                        syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+                 {
+                   /* Leave room for the null.  */
                     char str[CHAR_CLASS_MAX_LENGTH + 1];
  
                     PATFETCH (c);
@@ -2056,9 +2262,10 @@ regex_compile (pattern, size, syntax, bufp)
                       }
                     str[c1] = '\0';
  
-                   /* If isn't a word bracketed by `[:' and:`]':
-                      undo the ending character, the letters, and leave
-                      the leading `:' and `[' (but set bits for them).  */
+                   /* If isn't a word bracketed by `[:' and `:]':
+                      undo the ending character, the letters, and
+                      leave the leading `:' and `[' (but set bits for
+                      them).  */
                     if (c == ':' && *p == ']')
                       {
                         int ch;
@@ -2105,7 +2312,9 @@ regex_compile (pattern, size, syntax, bufp)
                                 || (is_xdigit && ISXDIGIT (ch)))
                               SET_LIST_BIT (translated);
                           }
-                       had_char_class = true;
+
+                       /* Repeat the loop. */
+                       continue;
                       }
                     else
                       {
@@ -2113,15 +2322,71 @@ regex_compile (pattern, size, syntax, bufp)
                         while (c1--)
                           PATUNFETCH;
                         SET_LIST_BIT ('[');
-                       SET_LIST_BIT (':');
-                       had_char_class = false;
+
+                       /* Because the `:' may starts the range, we
+                          can't simply set bit and repeat the loop.
+                          Instead, just set it to C and handle below.  */
+                       c = ':';
+                     }
+                 }
+
+               if (p < pend && p[0] == '-' && p[1] != ']')
+                 {
+
+                   /* Discard the `-'. */
+                   PATFETCH (c1);
+
+                   /* Fetch the character which ends the range. */
+                   PATFETCH (c1);
+                   if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
+                     {
+                       PATUNFETCH;
+                       c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+                       p += len;
+                     }
+
+                   if (SINGLE_BYTE_CHAR_P (c)
+                       && ! SINGLE_BYTE_CHAR_P (c1))
+                     {
+                       /* Handle a range such as \177-\377 in multibyte mode.
+                          Split that into two ranges,,
+                          the low one ending at 0237, and the high one
+                          starting at ...040.  */
+                       int c1_base = (c1 & ~0177) | 040;
+                       SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
+                       c1 = 0237;
                       }
+                   else if (!SAME_CHARSET_P (c, c1))
+                     FREE_STACK_RETURN (REG_ERANGE);
                   }
                 else
+                 /* Range from C to C. */
+                 c1 = c;
+
+               /* Set the range ... */
+               if (SINGLE_BYTE_CHAR_P (c))
+                 /* ... into bitmap.  */
                   {
-                   had_char_class = false;
-                   SET_LIST_BIT (c);
+                   unsigned this_char;
+                   int range_start = c, range_end = c1;
+
+                   /* If the start is after the end, the range is empty.  */
+                   if (range_start > range_end)
+                     {
+                       if (syntax & RE_NO_EMPTY_RANGES)
+                         FREE_STACK_RETURN (REG_ERANGE);
+                       /* Else, repeat the loop.  */
+                     }
+                   else
+                     {
+                       for (this_char = range_start; this_char <= range_end;
+                            this_char++)
+                         SET_LIST_BIT (TRANSLATE (this_char));
+                     }
                   }
+               else
+                 /* ... into range table.  */
+                 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
               }
  
             /* Discard any (non)matching list bytes that are all 0 at the
@@ -2129,6 +2394,25 @@ regex_compile (pattern, size, syntax, bufp)
             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
               b[-1]--;
             b += b[-1];
+
+           /* Build real range table from work area. */
+           if (RANGE_TABLE_WORK_USED (range_table_work))
+             {
+               int i;
+               int used = RANGE_TABLE_WORK_USED (range_table_work);
+
+               /* Allocate space for COUNT + RANGE_TABLE.  Needs two
+                  bytes for COUNT and three bytes for each character.  */
+               GET_BUFFER_SPACE (2 + used * 3);
+
+               /* Indicate the existence of range table.  */
+               laststart[1] |= 0x80;
+
+               STORE_NUMBER_AND_INCR (b, used / 2);
+               for (i = 0; i < used; i++)
+                 STORE_CHARACTER_AND_INCR
+                   (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
+             }
           }
           break;
  
@@ -2525,6 +2809,18 @@ regex_compile (pattern, size, syntax, bufp)
               PATFETCH (c);
               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
               break;
+
+           case 'c':
+             laststart = b;
+             PATFETCH_RAW (c);
+             BUF_PUSH_2 (categoryspec, c);
+             break;
+
+           case 'C':
+             laststart = b;
+             PATFETCH_RAW (c);
+             BUF_PUSH_2 (notcategoryspec, c);
+             break;
  #endif /* emacs */
  
  
@@ -2604,6 +2900,12 @@ regex_compile (pattern, size, syntax, bufp)
         default:
         /* Expects the character in `c'.  */
         normal_char:
+         p1 = p - 1;           /* P1 points the head of C.  */
+#ifdef emacs
+         if (bufp->multibyte)
+           /* Set P to the next character boundary.  */
+           p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
+#endif
               /* If no exactn currently being built.  */
           if (!pending_exact
  
@@ -2611,17 +2913,17 @@ regex_compile (pattern, size, syntax, bufp)
               || pending_exact + *pending_exact + 1 != b
  
               /* We have only one byte following the exactn for the count.  */
-             || *pending_exact == (1 << BYTEWIDTH) - 1
+             || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
  
               /* If followed by a repetition operator.  */
-             || *p == '*' || *p == '^'
+             || (p != pend && (*p == '*' || *p == '^'))
               || ((syntax & RE_BK_PLUS_QM)
-                 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
-                 : (*p == '+' || *p == '?'))
+                 ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
+                 : p != pend && (*p == '+' || *p == '?'))
               || ((syntax & RE_INTERVALS)
                   && ((syntax & RE_NO_BK_BRACES)
-                     ? *p == '{'
-                     : (p[0] == '\\' && p[1] == '{'))))
+                     ? p != pend && *p == '{'
+                     : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
             {
               /* Start building a new exactn.  */
  
@@ -2631,8 +2933,17 @@ regex_compile (pattern, size, syntax, bufp)
               pending_exact = b - 1;
             }
  
-         BUF_PUSH (c);
-         (*pending_exact)++;
+         /* Here, C may translated, therefore C may not equal to *P1. */
+         while (1)
+           {
+             BUF_PUSH (c);
+             (*pending_exact)++;
+             if (++p1 == p)
+               break;
+
+             /* Rest of multibyte form should be copied literally. */
+             c = *(unsigned char *)p1;
+           }
           break;
         } /* switch (c) */
      } /* while p != pend */
@@ -2671,12 +2982,9 @@ regex_compile (pattern, size, syntax, bufp)
    {
      int num_regs = bufp->re_nsub + 1;
  
-    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
-       is strictly greater than re_max_failures, the largest possible stack
-       is 2 * re_max_failures failure points.  */
-    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+    if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
        {
-       fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+       fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
  
  #ifdef emacs
         if (! fail_stack.stack)
@@ -2836,64 +3144,6 @@ group_in_compile_stack (compile_stack, regnum)
  
    return false;
  }
-
-
-/* Read the ending character of a range (in a bracket expression) from the
-   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
-   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
-   Then we set the translation of all bits between the starting and
-   ending characters (inclusive) in the compiled pattern B.
-
-   Return an error code.
-
-   We use these short variable names so we can use the same macros as
-   `regex_compile' itself.  */
-
-static reg_errcode_t
-compile_range (p_ptr, pend, translate, syntax, b)
-    const char **p_ptr, *pend;
-    RE_TRANSLATE_TYPE translate;
-    reg_syntax_t syntax;
-    unsigned char *b;
-{
-  unsigned this_char;
-
-  const char *p = *p_ptr;
-  int range_start, range_end;
-
-  if (p == pend)
-    return REG_ERANGE;
-
-  /* Even though the pattern is a signed `char *', we need to fetch
-     with unsigned char *'s; if the high bit of the pattern character
-     is set, the range endpoints will be negative if we fetch using a
-     signed char *.
-
-     We also want to fetch the endpoints without translating them; the
-     appropriate translation is done in the bit-setting loop below.  */
-  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
-  range_start = ((const unsigned char *) p)[-2];
-  range_end   = ((const unsigned char *) p)[0];
-
-  /* Have to increment the pointer into the pattern string, so the
-     caller isn't still at the ending character.  */
-  (*p_ptr)++;
-
-  /* If the start is after the end, the range is empty.         */
-  if (range_start > range_end)
-    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
-
-  /* Here we see why `this_char' has to be larger than an `unsigned
-     char' -- the range is inclusive, so if `range_end' == 0xff
-     (assuming 8-bit characters), we would otherwise go into an infinite
-     loop, since all characters <= 0xff.  */
-  for (this_char = range_start; this_char <= range_end; this_char++)
-    {
-      SET_LIST_BIT (TRANSLATE (this_char));
-    }
-
-  return REG_NOERROR;
-}
  \f
  /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
     BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
@@ -2912,7 +3162,7 @@ int
  re_compile_fastmap (bufp)
       struct re_pattern_buffer *bufp;
  {
-  int j, k;
+  int i, j, k;
  #ifdef MATCH_MAY_ALLOCATE
    fail_stack_type fail_stack;
  #endif
@@ -2941,6 +3191,13 @@ re_compile_fastmap (bufp)
    /* We aren't doing a `succeed_n' to begin with.  */
    boolean succeed_n_p = false;
  
+  /* If all elements for base leading-codes in fastmap is set, this
+     flag is set true. */
+  boolean match_any_multibyte_characters = false;
+
+  /* Maximum code of simple (single byte) character. */
+  int simple_char_max;
+
    assert (fastmap != NULL && p != NULL);
  
    INIT_FAIL_STACK ();
@@ -2992,6 +3249,7 @@ re_compile_fastmap (bufp)
           break;
  
  
+#ifndef emacs
         case charset:
           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
             if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
@@ -3022,14 +3280,112 @@ re_compile_fastmap (bufp)
             if (SYNTAX (j) != Sword)
               fastmap[j] = 1;
           break;
+#else  /* emacs */
+       case charset:
+         for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
+              j >= 0; j--)
+           if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+             fastmap[j] = 1;
+
+         if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+             && match_any_multibyte_characters == false)
+           {
+             /* Set fastmap[I] 1 where I is a base leading code of each
+                multibyte character in the range table. */
+             int c, count;
+
+             /* Make P points the range table. */
+             p += CHARSET_BITMAP_SIZE (&p[-2]);
+
+             /* Extract the number of ranges in range table into
+                COUNT.  */
+             EXTRACT_NUMBER_AND_INCR (count, p);
+             for (; count > 0; count--, p += 2 * 3) /* XXX */
+               {
+                 /* Extract the start of each range.  */
+                 EXTRACT_CHARACTER (c, p);
+                 j = CHAR_CHARSET (c);
+                 fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
+               }
+           }
+         break;
+
+
+       case charset_not:
+         /* Chars beyond end of map must be allowed.  End of map is
+            `127' if bufp->multibyte is nonzero.  */
+         simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+         for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
+              j < simple_char_max; j++)
+           fastmap[j] = 1;
+
+         for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
+              j >= 0; j--)
+           if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+             fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              which doesn't match the specified set of characters.  */
+           {
+           set_fastmap_for_multibyte_characters:
+             if (match_any_multibyte_characters == false)
+               {
+                 for (j = 0x80; j < 0xA0; j++) /* XXX */
+                   if (BASE_LEADING_CODE_P (j))
+                     fastmap[j] = 1;
+                 match_any_multibyte_characters = true;
+               }
+           }
+         break;
+
+
+       case wordchar:
+         simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
+           if (SYNTAX (j) == Sword)
+             fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose syntax is `Sword'.  */
+           goto set_fastmap_for_multibyte_characters;
+         break;
  
  
+       case notwordchar:
+         simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
+           if (SYNTAX (j) != Sword)
+             fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose syntax is not `Sword'.  */
+           goto set_fastmap_for_multibyte_characters;
+         break;
+#endif
+
         case anychar:
           {
             int fastmap_newline = fastmap['\n'];
  
-           /* `.' matches anything ...  */
-           for (j = 0; j < (1 << BYTEWIDTH); j++)
+           /* `.' matches anything (but if bufp->multibyte is
+              nonzero, matches `\000' .. `\127' and possible multibyte
+              character) ...  */
+           if (bufp->multibyte)
+             {
+               simple_char_max = 0x80;
+
+               for (j = 0x80; j < 0xA0; j++)
+                 if (BASE_LEADING_CODE_P (j))
+                   fastmap[j] = 1;
+               match_any_multibyte_characters = true;
+             }
+           else
+             simple_char_max = (1 << BYTEWIDTH);
+
+           for (j = 0; j < simple_char_max; j++)
               fastmap[j] = 1;
  
             /* ... except perhaps newline.  */
@@ -3046,22 +3402,71 @@ re_compile_fastmap (bufp)
           }
  
  #ifdef emacs
+       case wordbound:
+       case notwordbound:
+       case wordbeg:
+       case wordend:
+       case notsyntaxspec:
         case syntaxspec:
+         /* This match depends on text properties.  These end with
+            aborting optimizations.  */
+         bufp->can_be_null = 1;
+         goto done;
+#if 0
           k = *p++;
-         for (j = 0; j < (1 << BYTEWIDTH); j++)
+         simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
             if (SYNTAX (j) == (enum syntaxcode) k)
               fastmap[j] = 1;
-         break;
  
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose syntax is K.  */
+           goto set_fastmap_for_multibyte_characters;
+         break;
  
         case notsyntaxspec:
           k = *p++;
-         for (j = 0; j < (1 << BYTEWIDTH); j++)
+         simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
             if (SYNTAX (j) != (enum syntaxcode) k)
               fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose syntax is not K.  */
+           goto set_fastmap_for_multibyte_characters;
+         break;
+#endif
+
+
+       case categoryspec:
+         k = *p++;
+         simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
+           if (CHAR_HAS_CATEGORY (j, k))
+             fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose category is K.  */
+           goto set_fastmap_for_multibyte_characters;
           break;
  
  
+       case notcategoryspec:
+         k = *p++;
+         simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+         for (j = 0; j < simple_char_max; j++)
+           if (!CHAR_HAS_CATEGORY (j, k))
+             fastmap[j] = 1;
+
+         if (bufp->multibyte)
+           /* Any character set can possibly contain a character
+              whose category is not K.  */
+           goto set_fastmap_for_multibyte_characters;
+         break;
+
        /* All cases after this match the empty string.  These end with
          `continue'.  */
  
@@ -3078,10 +3483,12 @@ re_compile_fastmap (bufp)
         case endline:
         case begbuf:
         case endbuf:
+#ifndef emacs
         case wordbound:
         case notwordbound:
         case wordbeg:
         case wordend:
+#endif
         case push_dummy_failure:
           continue;
  
@@ -3250,6 +3657,13 @@ re_search (bufp, string, size, startpos, range, regs)
                       regs, size);
  }
  
+/* End address of virtual concatenation of string.  */
+#define STOP_ADDR_VSTRING(P)                           \
+  (((P) >= size1 ? string2 + size2 : string1 + size1))
+
+/* Address of POS in the concatenation of virtual string. */
+#define POS_ADDR_VSTRING(POS)                                  \
+  (((POS) >= size1 ? string2 - size1 : string1) + (POS))
  
  /* Using the compiled pattern in BUFP->buffer, first tries to match the
     virtual concatenation of STRING1 and STRING2, starting first at index
@@ -3289,6 +3703,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
    int endpos = startpos + range;
    int anchored_start = 0;
  
+  /* Nonzero if we have to concern multibyte character.         */
+  int multibyte = bufp->multibyte;
+
    /* Check for out-of-range STARTPOS.  */
    if (startpos < 0 || startpos > total_size)
      return -1;
@@ -3302,13 +3719,13 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
      range = total_size - startpos;
  
    /* If the search isn't to be a backwards one, don't waste time in a
-     search for a pattern that must be anchored.  */
+     search for a pattern anchored at beginning of buffer.  */
    if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
      {
        if (startpos > 0)
         return -1;
        else
-       range = 1;
+       range = 0;
      }
  
  #ifdef emacs
@@ -3316,8 +3733,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
       don't keep searching past point.  */
    if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
      {
-      range = PT - startpos;
-      if (range <= 0)
+      range = PT_BYTE - BEGV_BYTE - startpos;
+      if (range < 0)
         return -1;
      }
  #endif /* emacs */
@@ -3331,6 +3748,16 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
    if (bufp->buffer[0] == begline)
      anchored_start = 1;
  
+#ifdef emacs
+  gl_state.object = re_match_object;
+  {
+    int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+    int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos + adjpos);
+
+    SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
+  }
+#endif
+
    /* Loop through the string, looking for a place to start matching.  */
    for (;;)
      {
@@ -3353,37 +3780,69 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
          the first null string.  */
        if (fastmap && startpos < total_size && !bufp->can_be_null)
         {
+         register const char *d;
+         register unsigned int buf_ch;
+
+         d = POS_ADDR_VSTRING (startpos);
+
           if (range > 0)        /* Searching forwards.  */
             {
-             register const char *d;
               register int lim = 0;
               int irange = range;
  
               if (startpos < size1 && startpos + range >= size1)
                 lim = range - (size1 - startpos);
  
-             d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
-
               /* Written out as an if-else to avoid testing `translate'
                  inside the loop.  */
-             if (translate)
-               while (range > lim
-                      && !fastmap[(unsigned char)
-                                  translate[(unsigned char) *d++]])
-                 range--;
+             if (RE_TRANSLATE_P (translate))
+               {
+                 if (multibyte)
+                   while (range > lim)
+                     {
+                       int buf_charlen;
+
+                       buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
+                                                        buf_charlen);
+
+                       buf_ch = RE_TRANSLATE (translate, buf_ch);
+                       if (buf_ch >= 0400
+                           || fastmap[buf_ch])
+                         break;
+
+                       range -= buf_charlen;
+                       d += buf_charlen;
+                     }
+                 else
+                   while (range > lim
+                          && !fastmap[(unsigned char)
+                                      RE_TRANSLATE (translate, (unsigned char) *d)])
+                     {
+                       d++;
+                       range--;
+                     }
+               }
               else
-               while (range > lim && !fastmap[(unsigned char) *d++])
-                 range--;
+               while (range > lim && !fastmap[(unsigned char) *d])
+                 {
+                   d++;
+                   range--;
+                 }
  
               startpos += irange - range;
             }
           else                          /* Searching backwards.  */
             {
-             register char c = (size1 == 0 || startpos >= size1
-                                ? string2[startpos - size1]
-                                : string1[startpos]);
+             int room = (size1 == 0 || startpos >= size1
+                         ? size2 + size1 - startpos
+                         : size1 - startpos);
+
+             buf_ch = STRING_CHAR (d, room);
+             if (RE_TRANSLATE_P (translate))
+               buf_ch = RE_TRANSLATE (translate, buf_ch);
  
-             if (!fastmap[(unsigned char) TRANSLATE (c)])
+             if (! (buf_ch >= 0400
+                    || fastmap[buf_ch]))
                 goto advance;
             }
         }
@@ -3412,13 +3871,56 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
         break;
        else if (range > 0)
         {
-         range--;
-         startpos++;
+         /* Update STARTPOS to the next character boundary.  */
+         if (multibyte)
+           {
+             const unsigned char *p
+               = (const unsigned char *) POS_ADDR_VSTRING (startpos);
+             const unsigned char *pend
+               = (const unsigned char *) STOP_ADDR_VSTRING (startpos);
+             int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
+
+             range -= len;
+             if (range < 0)
+               break;
+             startpos += len;
+           }
+         else
+           {
+             range--;
+             startpos++;
+           }
         }
        else
         {
           range++;
           startpos--;
+
+         /* Update STARTPOS to the previous character boundary.  */
+         if (multibyte)
+           {
+             const unsigned char *p
+               = (const unsigned char *) POS_ADDR_VSTRING (startpos);
+             int len = 0;
+
+             /* Find the head of multibyte form.  */
+             while (!CHAR_HEAD_P (*p))
+               p--, len++;
+
+             /* Adjust it. */
+#if 0                          /* XXX */
+             if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
+               ;
+             else
+#endif
+               {
+                 range += len;
+                 if (range > 0)
+                   break;
+
+                 startpos -= len;
+               }
+           }
         }
      }
    return -1;
@@ -3472,6 +3974,15 @@ static boolean alt_match_null_string_p (),
     == Sword)
  
  /* Disabled due to a compiler bug -- see comment at case wordbound */
+
+/* The comment at case wordbound is following one, but we don't use
+   AT_WORD_BOUNDARY anymore to support multibyte form.
+
+   The DEC Alpha C compiler 3.x generates incorrect code for the
+   test         WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
+   AT_WORD_BOUNDARY, so this code is disabled. Expanding the
+   macro and introducing temporary variables works around the bug.  */
+
  #if 0
  /* Test if the character before D and the one at D differ with respect
     to being word-constituent.  */
@@ -3529,6 +4040,11 @@ re_match (bufp, string, size, pos, regs)
  }
  #endif /* not emacs */
  
+#ifdef emacs
+/* In Emacs, this is the string or buffer in which we
+   are matching.  It is used for looking up syntax properties. */
+Lisp_Object re_match_object;
+#endif
  
  /* re_match_2 matches the compiled pattern in BUFP against the
     the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
@@ -3552,8 +4068,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
       struct re_registers *regs;
       int stop;
  {
-  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
-                                   pos, regs, stop);
+  int result;
+
+#ifdef emacs
+  int charpos;
+  int adjpos = NILP (re_match_object) || BUFFERP (re_match_object);
+  gl_state.object = re_match_object;
+  charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos + adjpos);
+  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
+#endif
+
+  result = re_match_2_internal (bufp, string1, size1, string2, size2,
+                               pos, regs, stop);
    alloca (0);
    return result;
  }
@@ -3594,6 +4120,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
    /* We use this to map every character in the string. */
    RE_TRANSLATE_TYPE translate = bufp->translate;
  
+  /* Nonzero if we have to concern multibyte character.         */
+  int multibyte = bufp->multibyte;
+
    /* Failure point stack.  Each place that can handle a failure further
       down the line pushes a failure point on this stack.  It consists of
       restart, regend, and reg_info for all registers corresponding to
@@ -3986,16 +4515,39 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  
           /* This is written out as an if-else so we don't waste time
              testing `translate' inside the loop.  */
-         if (translate)
+         if (RE_TRANSLATE_P (translate))
             {
-             do
-               {
-                 PREFETCH ();
-                 if ((unsigned char) translate[(unsigned char) *d++]
-                     != (unsigned char) *p++)
-                   goto fail;
-               }
-             while (--mcnt);
+#ifdef emacs
+             if (multibyte)
+               do
+                 {
+                   int pat_charlen, buf_charlen;
+                   unsigned int pat_ch, buf_ch;
+
+                   PREFETCH ();
+                   pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+                   buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+
+                   if (RE_TRANSLATE (translate, buf_ch)
+                       != pat_ch)
+                     goto fail;
+
+                   p += pat_charlen;
+                   d += buf_charlen;
+                   mcnt -= pat_charlen;
+                 }
+               while (mcnt > 0);
+             else
+#endif /* not emacs */
+               do
+                 {
+                   PREFETCH ();
+                   if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d)
+                       != (unsigned char) *p++)
+                     goto fail;
+                   d++;
+                 }
+               while (--mcnt);
             }
           else
             {
@@ -4012,43 +4564,91 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  
         /* Match any character except possibly a newline or a null.  */
         case anychar:
-         DEBUG_PRINT1 ("EXECUTING anychar.\n");
+         {
+           int buf_charlen;
+           unsigned int buf_ch;
  
-         PREFETCH ();
+           DEBUG_PRINT1 ("EXECUTING anychar.\n");
  
-         if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
-             || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
-           goto fail;
+           PREFETCH ();
  
-         SET_REGS_MATCHED ();
-         DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
-         d++;
+#ifdef emacs
+           if (multibyte)
+             buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+           else
+#endif /* not emacs */
+             {
+               buf_ch = (unsigned char) *d;
+               buf_charlen = 1;
+             }
+
+           buf_ch = TRANSLATE (buf_ch);
+
+           if ((!(bufp->syntax & RE_DOT_NEWLINE)
+                && buf_ch == '\n')
+               || ((bufp->syntax & RE_DOT_NOT_NULL)
+                   && buf_ch == '\000'))
+             goto fail;
+
+           SET_REGS_MATCHED ();
+           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
+           d += buf_charlen;
+         }
           break;
  
  
         case charset:
         case charset_not:
           {
-           register unsigned char c;
+           register unsigned int c;
             boolean not = (re_opcode_t) *(p - 1) == charset_not;
+           int len;
+
+           /* Start of actual range_table, or end of bitmap if there is no
+              range table.  */
+           unsigned char *range_table;
+
+           /* Nonzero if there is range table.  */
+           int range_table_exists;
+
+           /* Number of ranges of range table.  Not in bytes.  */
+           int count;
  
             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
  
             PREFETCH ();
-           c = TRANSLATE (*d); /* The character to match.  */
+           c = (unsigned char) *d;
+
+           range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
+           range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
+           if (range_table_exists)
+             EXTRACT_NUMBER_AND_INCR (count, range_table);
+           else
+             count = 0;
+
+           if (multibyte && BASE_LEADING_CODE_P (c))
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
  
-           /* Cast to `unsigned' instead of `unsigned char' in case the
-              bit list is a full 32 bytes long.  */
-           if (c < (unsigned) (*p * BYTEWIDTH)
+           if (SINGLE_BYTE_CHAR_P (c))
+             {                 /* Lookup bitmap.  */
+               c = TRANSLATE (c); /* The character to match.  */
+               len = 1;
+
+               /* Cast to `unsigned' instead of `unsigned char' in
+                  case the bit list is a full 32 bytes long.  */
+               if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
                 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
               not = !not;
+             }
+           else if (range_table_exists)
+             CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
  
-           p += 1 + *p;
+           p = CHARSET_RANGE_TABLE_END (range_table, count);
  
             if (!not) goto fail;
  
             SET_REGS_MATCHED ();
-           d++;
+           d += len;
             break;
           }
  
@@ -4291,7 +4891,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  
                 /* Compare that many; failure if mismatch, else move
                    past them.  */
-               if (translate
+               if (RE_TRANSLATE_P (translate)
                     ? bcmp_translate (d, d2, mcnt, translate)
                     : bcmp (d, d2, mcnt))
                   goto fail;
@@ -4492,24 +5092,42 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             else if ((re_opcode_t) *p2 == exactn
                      || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
               {
-               register unsigned char c
+               register unsigned int c
                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
  
-               if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+               if ((re_opcode_t) p1[3] == exactn)
+                 {
+                   if (!(multibyte /* && (c != '\n') */
+                         && BASE_LEADING_CODE_P (c))
+                       ? c != p1[5]
+                       : (STRING_CHAR (&p2[2], pend - &p2[2])
+                          != STRING_CHAR (&p1[5], pend - &p1[5])))
                   {
                     p[-3] = (unsigned char) pop_failure_jump;
                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
                                   c, p1[5]);
                   }
+                 }
  
                 else if ((re_opcode_t) p1[3] == charset
                          || (re_opcode_t) p1[3] == charset_not)
                   {
                     int not = (re_opcode_t) p1[3] == charset_not;
  
-                   if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+                   if (multibyte /* && (c != '\n') */
+                       && BASE_LEADING_CODE_P (c))
+                     c = STRING_CHAR (&p2[2], pend - &p2[2]);
+
+                   /* Test if C is listed in charset (or charset_not)
+                      at `&p1[3]'.  */
+                   if (SINGLE_BYTE_CHAR_P (c))
+                     {
+                       if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH
                         && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
                       not = !not;
+                     }
+                   else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
+                     CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
  
                     /* `not' is equal to 1 if c would match, which means
                         that we can't change to pop_failure_jump.  */
@@ -4522,29 +5140,55 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
               }
             else if ((re_opcode_t) *p2 == charset)
               {
-#ifdef DEBUG
-               register unsigned char c
-                 = *p2 == (unsigned char) endline ? '\n' : p2[2];
-#endif
+               if ((re_opcode_t) p1[3] == exactn)
+                 {
+                   register unsigned int c = p1[5];
+                   int not = 0;
+
+                   if (multibyte && BASE_LEADING_CODE_P (c))
+                     c = STRING_CHAR (&p1[5], pend - &p1[5]);
  
-               if ((re_opcode_t) p1[3] == exactn
-                   && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
-                         && (p2[2 + p1[5] / BYTEWIDTH]
-                             & (1 << (p1[5] % BYTEWIDTH)))))
+                   /* Test if C is listed in charset at `p2'.  */
+                   if (SINGLE_BYTE_CHAR_P (c))
+                     {
+                       if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
+                           && (p2[2 + c / BYTEWIDTH]
+                               & (1 << (c % BYTEWIDTH))))
+                         not = !not;
+                     }
+                   else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
+                     CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
+
+                   if (!not)
                   {
                     p[-3] = (unsigned char) pop_failure_jump;
-                   DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
-                                 c, p1[5]);
+                       DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
+                     }
                   }
  
-               else if ((re_opcode_t) p1[3] == charset_not)
+               /* It is hard to list up all the character in charset
+                  P2 if it includes multibyte character.  Give up in
+                  such case.  */
+               else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
+                 {
+                   /* Now, we are sure that P2 has no range table.
+                      So, for the size of bitmap in P2, `p2[1]' is
+                      enough.  But P1 may have range table, so the
+                      size of bitmap table of P1 is extracted by
+                      using macro `CHARSET_BITMAP_SIZE'.
+
+                      Since we know that all the character listed in
+                      P2 is ASCII, it is enough to test only bitmap
+                      table of P1.  */
+
+                   if ((re_opcode_t) p1[3] == charset_not)
                   {
                     int idx;
-                   /* We win if the charset_not inside the loop
-                      lists every character listed in the charset after.  */
+                       /* We win if the charset_not inside the loop lists
+                          every character listed in the charset after.  */
                     for (idx = 0; idx < (int) p2[1]; idx++)
                       if (! (p2[2 + idx] == 0
-                            || (idx < (int) p1[4]
+                                || (idx < CHARSET_BITMAP_SIZE (&p1[3])
                                  && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
                         break;
  
@@ -4560,12 +5204,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                     /* We win if the charset inside the loop
                        has no overlap with the one after the loop.  */
                     for (idx = 0;
-                        idx < (int) p2[1] && idx < (int) p1[4];
+                            (idx < (int) p2[1]
+                             && idx < CHARSET_BITMAP_SIZE (&p1[3]));
                          idx++)
                       if ((p2[2 + idx] & p1[5 + idx]) != 0)
                         break;
  
-                   if (idx == p2[1] || idx == p1[4])
+                       if (idx == p2[1]
+                           || idx == CHARSET_BITMAP_SIZE (&p1[3]))
                       {
                         p[-3] = (unsigned char) pop_failure_jump;
                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
@@ -4573,6 +5219,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                   }
               }
           }
+         }
           p -= 2;               /* Point at relative address again.  */
           if ((re_opcode_t) p[-1] != pop_failure_jump)
             {
@@ -4702,84 +5349,184 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             break;
           }
  
-#if 0
-       /* The DEC Alpha C compiler 3.x generates incorrect code for the
-          test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
-          AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
-          macro and introducing temporary variables works around the bug.  */
-
         case wordbound:
           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
-         if (AT_WORD_BOUNDARY (d))
-           break;
-         goto fail;
  
-       case notwordbound:
-         DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
-         if (AT_WORD_BOUNDARY (d))
-           goto fail;
-         break;
-#else
-       case wordbound:
-       {
-         boolean prevchar, thischar;
+         /* We SUCCEED in one of the following cases: */
  
-         DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+         /* Case 1: D is at the beginning or the end of string.  */
           if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
             break;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             int c1, c2, s1, s2;
+             int pos1 = PTR_TO_OFFSET (d - 1);
+             int charpos;
+
+             GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+             GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+             charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             s1 = SYNTAX (c1);
+#ifdef emacs
+             UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+#endif
+             s2 = SYNTAX (c2);
  
-         prevchar = WORDCHAR_P (d - 1);
-         thischar = WORDCHAR_P (d);
-         if (prevchar != thischar)
+             if (/* Case 2: Only one of S1 and S2 is Sword.  */
+                 ((s1 == Sword) != (s2 == Sword))
+                 /* Case 3: Both of S1 and S2 are Sword, and macro
+                    WORD_BOUNDARY_P (C1, C2) returns nonzero.  */
+                 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
             break;
-         goto fail;
         }
+         goto fail;
  
        case notwordbound:
-       {
-         boolean prevchar, thischar;
-
           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+
+         /* We FAIL in one of the following cases: */
+
+         /* Case 1: D is at the beginning or the end of string.  */
           if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
             goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             int c1, c2, s1, s2;
+             int pos1 = PTR_TO_OFFSET (d - 1);
+             int charpos;
+
+             GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+             GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+             charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             s1 = SYNTAX (c1);
+#ifdef emacs
+             UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+#endif
+             s2 = SYNTAX (c2);
  
-         prevchar = WORDCHAR_P (d - 1);
-         thischar = WORDCHAR_P (d);
-         if (prevchar != thischar)
+             if (/* Case 2: Only one of S1 and S2 is Sword.  */
+                 ((s1 == Sword) != (s2 == Sword))
+                 /* Case 3: Both of S1 and S2 are Sword, and macro
+                    WORD_BOUNDARY_P (C1, C2) returns nonzero.  */
+                 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
             goto fail;
-         break;
         }
-#endif
+         break;
  
         case wordbeg:
           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
-         if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
-           break;
+
+         /* We FAIL in one of the following cases: */
+
+         /* Case 1: D is at the end of string.  */
+         if (AT_STRINGS_END (d))
           goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             int c1, c2, s1, s2;
+             int pos1 = PTR_TO_OFFSET (d);
+             int charpos;
+
+             GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+             charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             s2 = SYNTAX (c2);
+       
+             /* Case 2: S2 is not Sword. */
+             if (s2 != Sword)
+               goto fail;
+
+             /* Case 3: D is not at the beginning of string ... */
+             if (!AT_STRINGS_BEG (d))
+               {
+                 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+                 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
+#endif
+                 s1 = SYNTAX (c1);
+
+                 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
+                    returns 0.  */
+                 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
+                   goto fail;
+               }
+           }
+         break;
  
         case wordend:
           DEBUG_PRINT1 ("EXECUTING wordend.\n");
-         if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
-             && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
-           break;
+
+         /* We FAIL in one of the following cases: */
+
+         /* Case 1: D is at the beginning of string.  */
+         if (AT_STRINGS_BEG (d))
+           goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             int c1, c2, s1, s2;
+             int pos1 = PTR_TO_OFFSET (d);
+             int charpos;
+
+             GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+             charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             s1 = SYNTAX (c1);
+
+             /* Case 2: S1 is not Sword.  */
+             if (s1 != Sword)
+               goto fail;
+
+             /* Case 3: D is not at the end of string ... */
+             if (!AT_STRINGS_END (d))
+               {
+                 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+                 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+#endif
+                 s2 = SYNTAX (c2);
+
+                 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
+                    returns 0.  */
+                 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
           goto fail;
+               }
+           }
+         break;
  
  #ifdef emacs
         case before_dot:
           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
-         if (PTR_CHAR_POS ((unsigned char *) d) >= PT)
+         if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
             goto fail;
           break;
  
         case at_dot:
           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
-         if (PTR_CHAR_POS ((unsigned char *) d) != PT)
+         if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
             goto fail;
           break;
  
         case after_dot:
           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
-         if (PTR_CHAR_POS ((unsigned char *) d) <= PT)
+         if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
             goto fail;
           break;
  
@@ -4793,10 +5540,27 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           mcnt = (int) Sword;
         matchsyntax:
           PREFETCH ();
-         /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
-         d++;
-         if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
+#ifdef emacs
+         {
+           int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+           UPDATE_SYNTAX_TABLE (pos1);
+         }
+#endif
+         {
+           int c, len;
+
+           if (multibyte)
+             /* we must concern about multibyte form, ... */
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+           else
+             /* everything should be handled as ASCII, even though it
+                looks like multibyte form.  */
+             c = *d, len = 1;
+
+           if (SYNTAX (c) != (enum syntaxcode) mcnt)
             goto fail;
+           d += len;
+         }
           SET_REGS_MATCHED ();
           break;
  
@@ -4810,86 +5574,138 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           mcnt = (int) Sword;
         matchnotsyntax:
           PREFETCH ();
-         /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
-         d++;
-         if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
+#ifdef emacs
+         {
+           int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+           UPDATE_SYNTAX_TABLE (pos1);
+         }
+#endif
+         {
+           int c, len;
+
+           if (multibyte)
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+           else
+             c = *d, len = 1;
+
+           if (SYNTAX (c) == (enum syntaxcode) mcnt)
             goto fail;
+           d += len;
+         }
           SET_REGS_MATCHED ();
           break;
  
+       case categoryspec:
+         DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p);
+         mcnt = *p++;
+         PREFETCH ();
+         {
+           int c, len;
+
+           if (multibyte)
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+           else
+             c = *d, len = 1;
+
+           if (!CHAR_HAS_CATEGORY (c, mcnt))
+             goto fail;
+           d += len;
+         }
+         SET_REGS_MATCHED ();
+         break;
+
+       case notcategoryspec:
+         DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p);
+         mcnt = *p++;
+         PREFETCH ();
+         {
+           int c, len;
+
+           if (multibyte)
+             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+           else
+             c = *d, len = 1;
+
+           if (CHAR_HAS_CATEGORY (c, mcnt))
+             goto fail;
+           d += len;
+         }
+         SET_REGS_MATCHED ();
+          break;
+
  #else /* not emacs */
         case wordchar:
-         DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
           PREFETCH ();
-         if (!WORDCHAR_P (d))
-           goto fail;
+          if (!WORDCHAR_P (d))
+            goto fail;
           SET_REGS_MATCHED ();
-         d++;
+          d++;
           break;
  
         case notwordchar:
-         DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
           PREFETCH ();
           if (WORDCHAR_P (d))
-           goto fail;
-         SET_REGS_MATCHED ();
-         d++;
+            goto fail;
+          SET_REGS_MATCHED ();
+          d++;
           break;
  #endif /* not emacs */
  
-       default:
-         abort ();
+        default:
+          abort ();
         }
-      continue;         /* Successfully executed one pattern command; keep going.  */
+      continue;  /* Successfully executed one pattern command; keep going.  */
  
  
      /* We goto here if a matching operation fails. */
      fail:
        if (!FAIL_STACK_EMPTY ())
-       { /* A restart point is known.  Restore to that state.  */
-         DEBUG_PRINT1 ("\nFAIL:\n");
-         POP_FAILURE_POINT (d, p,
-                            lowest_active_reg, highest_active_reg,
-                            regstart, regend, reg_info);
-
-         /* If this failure point is a dummy, try the next one.  */
-         if (!p)
+       { /* A restart point is known.  Restore to that state.  */
+          DEBUG_PRINT1 ("\nFAIL:\n");
+          POP_FAILURE_POINT (d, p,
+                             lowest_active_reg, highest_active_reg,
+                             regstart, regend, reg_info);
+
+          /* If this failure point is a dummy, try the next one.  */
+          if (!p)
             goto fail;
  
-         /* If we failed to the end of the pattern, don't examine *p.  */
+          /* If we failed to the end of the pattern, don't examine *p.  */
           assert (p <= pend);
-         if (p < pend)
-           {
-             boolean is_a_jump_n = false;
-
-             /* If failed to a backwards jump that's part of a repetition
-                loop, need to pop this failure point and use the next one.  */
-             switch ((re_opcode_t) *p)
-               {
-               case jump_n:
-                 is_a_jump_n = true;
-               case maybe_pop_jump:
-               case pop_failure_jump:
-               case jump:
-                 p1 = p + 1;
-                 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-                 p1 += mcnt;
-
-                 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
-                     || (!is_a_jump_n
-                         && (re_opcode_t) *p1 == on_failure_jump))
-                   goto fail;
-                 break;
-               default:
-                 /* do nothing */ ;
-               }
-           }
-
-         if (d >= string1 && d <= end1)
+          if (p < pend)
+            {
+              boolean is_a_jump_n = false;
+
+              /* If failed to a backwards jump that's part of a repetition
+                 loop, need to pop this failure point and use the next one.  */
+              switch ((re_opcode_t) *p)
+                {
+                case jump_n:
+                  is_a_jump_n = true;
+                case maybe_pop_jump:
+                case pop_failure_jump:
+                case jump:
+                  p1 = p + 1;
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  p1 += mcnt;
+
+                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+                      || (!is_a_jump_n
+                          && (re_opcode_t) *p1 == on_failure_jump))
+                    goto fail;
+                  break;
+                default:
+                  /* do nothing */ ;
+                }
+            }
+
+          if (d >= string1 && d <= end1)
             dend = end_match_1;
-       }
+        }
        else
-       break;   /* Matching at this starting point really fails.  */
+        break;   /* Matching at this starting point really fails.  */
      } /* for (;;) */
  
    if (best_regs_set)
@@ -4897,7 +5713,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  
    FREE_VARIABLES ();
  
-  return -1;                           /* Failure to match.  */
+  return -1;                           /* Failure to match.  */
  } /* re_match_2 */
  \f
  /* Subroutine definitions for re_match_2.  */
@@ -4926,92 +5742,92 @@ group_match_null_string_p (p, end, reg_info)
      {
        /* Skip over opcodes that can match nothing, and return true or
          false, as appropriate, when we get to one that can't, or to the
-        matching stop_memory.  */
+         matching stop_memory.  */
  
        switch ((re_opcode_t) *p1)
-       {
-       /* Could be either a loop or a series of alternatives.  */
-       case on_failure_jump:
-         p1++;
-         EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+        {
+        /* Could be either a loop or a series of alternatives.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
  
-         /* If the next operation is not a jump backwards in the
+          /* If the next operation is not a jump backwards in the
              pattern.  */
  
           if (mcnt >= 0)
             {
-             /* Go through the on_failure_jumps of the alternatives,
-                seeing if any of the alternatives cannot match nothing.
-                The last alternative starts with only a jump,
-                whereas the rest start with on_failure_jump and end
-                with a jump, e.g., here is the pattern for `a|b|c':
+              /* Go through the on_failure_jumps of the alternatives,
+                 seeing if any of the alternatives cannot match nothing.
+                 The last alternative starts with only a jump,
+                 whereas the rest start with on_failure_jump and end
+                 with a jump, e.g., here is the pattern for `a|b|c':
  
-                /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
-                /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
-                /exactn/1/c
+                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+                 /exactn/1/c
  
-                So, we have to first go through the first (n-1)
-                alternatives and then deal with the last one separately.  */
+                 So, we have to first go through the first (n-1)
+                 alternatives and then deal with the last one separately.  */
  
  
-             /* Deal with the first (n-1) alternatives, which start
-                with an on_failure_jump (see above) that jumps to right
-                past a jump_past_alt.  */
+              /* Deal with the first (n-1) alternatives, which start
+                 with an on_failure_jump (see above) that jumps to right
+                 past a jump_past_alt.  */
  
-             while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
-               {
-                 /* `mcnt' holds how many bytes long the alternative
-                    is, including the ending `jump_past_alt' and
-                    its number.  */
+              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+                {
+                  /* `mcnt' holds how many bytes long the alternative
+                     is, including the ending `jump_past_alt' and
+                     its number.  */
  
-                 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
-                                                     reg_info))
-                   return false;
+                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+                                                     reg_info))
+                    return false;
  
-                 /* Move to right after this alternative, including the
+                  /* Move to right after this alternative, including the
                      jump_past_alt.  */
-                 p1 += mcnt;
+                  p1 += mcnt;
  
-                 /* Break if it's the beginning of an n-th alternative
-                    that doesn't begin with an on_failure_jump.  */
-                 if ((re_opcode_t) *p1 != on_failure_jump)
-                   break;
+                  /* Break if it's the beginning of an n-th alternative
+                     that doesn't begin with an on_failure_jump.  */
+                  if ((re_opcode_t) *p1 != on_failure_jump)
+                    break;
  
                   /* Still have to check that it's not an n-th
                      alternative that starts with an on_failure_jump.  */
                   p1++;
-                 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-                 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
-                   {
-                     /* Get to the beginning of the n-th alternative.  */
-                     p1 -= 3;
-                     break;
-                   }
-               }
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+                    {
+                     /* Get to the beginning of the n-th alternative.  */
+                      p1 -= 3;
+                      break;
+                    }
+                }
  
-             /* Deal with the last alternative: go back and get number
-                of the `jump_past_alt' just before it.  `mcnt' contains
-                the length of the alternative.  */
-             EXTRACT_NUMBER (mcnt, p1 - 2);
+              /* Deal with the last alternative: go back and get number
+                 of the `jump_past_alt' just before it.  `mcnt' contains
+                 the length of the alternative.  */
+              EXTRACT_NUMBER (mcnt, p1 - 2);
  
-             if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
-               return false;
+              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+                return false;
  
-             p1 += mcnt;       /* Get past the n-th alternative.  */
-           } /* if mcnt > 0 */
-         break;
+              p1 += mcnt;      /* Get past the n-th alternative.  */
+            } /* if mcnt > 0 */
+          break;
  
  
-       case stop_memory:
+        case stop_memory:
           assert (p1[1] == **p);
-         *p = p1 + 2;
-         return true;
+          *p = p1 + 2;
+          return true;
  
  
-       default:
-         if (!common_op_match_null_string_p (&p1, end, reg_info))
-           return false;
-       }
+        default:
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
      } /* while p1 < end */
  
    return false;
@@ -5033,21 +5849,21 @@ alt_match_null_string_p (p, end, reg_info)
    while (p1 < end)
      {
        /* Skip over opcodes that can match nothing, and break when we get
-        to one that can't.  */
+         to one that can't.  */
  
        switch ((re_opcode_t) *p1)
-       {
-       /* It's a loop.  */
-       case on_failure_jump:
-         p1++;
-         EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-         p1 += mcnt;
-         break;
+        {
+       /* It's a loop.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+          break;
  
         default:
-         if (!common_op_match_null_string_p (&p1, end, reg_info))
-           return false;
-       }
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
      }  /* while p1 < end */
  
    return true;
@@ -5093,42 +5909,42 @@ common_op_match_null_string_p (p, end, reg_info)
        ret = group_match_null_string_p (&p1, end, reg_info);
  
        /* Have to set this here in case we're checking a group which
-        contains a group and a back reference to it.  */
+         contains a group and a back reference to it.  */
  
        if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
-       REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
  
        if (!ret)
-       return false;
+        return false;
        break;
  
-    /* If this is an optimized succeed_n for zero times, make the jump.         */
+    /* If this is an optimized succeed_n for zero times, make the jump.  */
      case jump:
        EXTRACT_NUMBER_AND_INCR (mcnt, p1);
        if (mcnt >= 0)
-       p1 += mcnt;
+        p1 += mcnt;
        else
-       return false;
+        return false;
        break;
  
      case succeed_n:
-      /* Get to the number of times to succeed.         */
+      /* Get to the number of times to succeed.  */
        p1 += 2;
        EXTRACT_NUMBER_AND_INCR (mcnt, p1);
  
        if (mcnt == 0)
-       {
-         p1 -= 4;
-         EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-         p1 += mcnt;
-       }
+        {
+          p1 -= 4;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+        }
        else
-       return false;
+        return false;
        break;
  
      case duplicate:
        if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
-       return false;
+        return false;
        break;
  
      case set_number_at:
@@ -5154,11 +5970,27 @@ bcmp_translate (s1, s2, len, translate)
       RE_TRANSLATE_TYPE translate;
  {
    register unsigned char *p1 = s1, *p2 = s2;
-  while (len)
+  unsigned char *p1_end = s1 + len;
+  unsigned char *p2_end = s2 + len;
+
+  while (p1 != p1_end && p2 != p2_end)
      {
-      if (translate[*p1++] != translate[*p2++]) return 1;
-      len--;
+      int p1_charlen, p2_charlen;
+      int p1_ch, p2_ch;
+
+      p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
+      p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
+
+      if (RE_TRANSLATE (translate, p1_ch)
+         != RE_TRANSLATE (translate, p2_ch))
+       return 1;
+
+      p1 += p1_charlen, p2 += p2_charlen;
      }
+
+  if (p1 != p1_end || p2 != p2_end)
+    return 1;
+
    return 0;
  }
  \f
@@ -5171,7 +6003,7 @@ bcmp_translate (s1, s2, len, translate)
     Assumes the `allocated' (and perhaps `buffer') and `translate' fields
     are set in BUFP on entry.
  
-   We call regex_compile to do the actual compilation. */
+   We call regex_compile to do the actual compilation.  */
  
  const char *
  re_compile_pattern (pattern, length, bufp)
@@ -5190,7 +6022,7 @@ re_compile_pattern (pattern, length, bufp)
       setting no_sub.  */
    bufp->no_sub = 0;
  
-  /* Match anchors at newline. */
+  /* Match anchors at newline.  */
    bufp->newline_anchor = 1;
  
    ret = regex_compile (pattern, length, re_syntax_options, bufp);
@@ -5200,8 +6032,8 @@ re_compile_pattern (pattern, length, bufp)
    return gettext (re_error_msgid[(int) ret]);
  }
  \f
-/* Entry points compatible with 4.2 BSD regex library. We don't define
-   them unless specifically requested. */
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
  
  #if defined (_REGEX_RE_COMP) || defined (_LIBC)
  
@@ -5231,7 +6063,7 @@ re_comp (s)
      {
        re_comp_buf.buffer = (unsigned char *) malloc (200);
        if (re_comp_buf.buffer == NULL)
-       return gettext (re_error_msgid[(int) REG_ESPACE]);
+        return gettext (re_error_msgid[(int) REG_ESPACE]);
        re_comp_buf.allocated = 200;
  
        re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
@@ -5242,7 +6074,7 @@ re_comp (s)
    /* Since `re_exec' always passes NULL for the `regs' argument, we
       don't need to initialize the pattern buffer fields which affect it.  */
  
-  /* Match anchors at newlines.         */
+  /* Match anchors at newlines.  */
    re_comp_buf.newline_anchor = 1;
  
    ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
@@ -5274,7 +6106,7 @@ re_exec (s)
  
  /* regcomp takes a regular expression as a string and compiles it.
  
-   PREG is a regex_t *.         We do not expect any fields to be initialized,
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
     since POSIX says we shouldn't.  Thus, we set
  
       `buffer' to the compiled pattern;
@@ -5303,7 +6135,7 @@ re_exec (s)
       routine will report only success or failure, and nothing about the
       registers.
  
-   It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
     the return codes and their meanings.)  */
  
  int
@@ -5336,11 +6168,11 @@ regcomp (preg, pattern, cflags)
         = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
                                       * sizeof (*(RE_TRANSLATE_TYPE)0));
        if (preg->translate == NULL)
-       return (int) REG_ESPACE;
+        return (int) REG_ESPACE;
  
        /* Map uppercase characters to corresponding lowercase ones.  */
        for (i = 0; i < CHAR_SET_SIZE; i++)
-       preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
      }
    else
      preg->translate = NULL;
@@ -5350,7 +6182,7 @@ regcomp (preg, pattern, cflags)
      { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
        syntax &= ~RE_DOT_NEWLINE;
        syntax |= RE_HAT_LISTS_NOT_NEWLINE;
-      /* It also changes the matching behavior.         */
+      /* It also changes the matching behavior.  */
        preg->newline_anchor = 1;
      }
    else
@@ -5374,7 +6206,7 @@ regcomp (preg, pattern, cflags)
     string STRING.
  
     If NMATCH is zero or REG_NOSUB was set in the cflags argument to
-   `regcomp', we ignore PMATCH.         Otherwise, we assume PMATCH has at
+   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
     least NMATCH elements, and we set them to the offsets of the
     corresponding matched substrings.
  
@@ -5405,7 +6237,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
  
    /* The user has told us exactly how many registers to return
       information about, via `nmatch'.  We have to pass that on to the
-     matching routines.         */
+     matching routines.  */
    private_preg.regs_allocated = REGS_FIXED;
  
    if (want_reg_info)
@@ -5414,29 +6246,29 @@ regexec (preg, string, nmatch, pmatch, eflags)
        regs.start = TALLOC (nmatch, regoff_t);
        regs.end = TALLOC (nmatch, regoff_t);
        if (regs.start == NULL || regs.end == NULL)
-       return (int) REG_NOMATCH;
+        return (int) REG_NOMATCH;
      }
  
    /* Perform the searching operation.  */
    ret = re_search (&private_preg, string, len,
-                  /* start: */ 0, /* range: */ len,
-                  want_reg_info ? &regs : (struct re_registers *) 0);
+                   /* start: */ 0, /* range: */ len,
+                   want_reg_info ? &regs : (struct re_registers *) 0);
  
    /* Copy the register information to the POSIX structure.  */
    if (want_reg_info)
      {
        if (ret >= 0)
-       {
-         unsigned r;
+        {
+          unsigned r;
  
-         for (r = 0; r < nmatch; r++)
-           {
-             pmatch[r].rm_so = regs.start[r];
-             pmatch[r].rm_eo = regs.end[r];
-           }
-       }
+          for (r = 0; r < nmatch; r++)
+            {
+              pmatch[r].rm_so = regs.start[r];
+              pmatch[r].rm_eo = regs.end[r];
+            }
+        }
  
-      /* If we needed the temporary register info, free the space now. */
+      /* If we needed the temporary register info, free the space now.  */
        free (regs.start);
        free (regs.end);
      }
@@ -5462,7 +6294,7 @@ regerror (errcode, preg, errbuf, errbuf_size)
    if (errcode < 0
        || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
      /* Only error codes returned by the rest of the code should be passed
-       to this routine.         If we are given anything else, or if other regex
+       to this routine.  If we are given anything else, or if other regex
         code generates an invalid error code, then the program has a bug.
         Dump core so we can fix it.  */
      abort ();
@@ -5474,12 +6306,12 @@ regerror (errcode, preg, errbuf, errbuf_size)
    if (errbuf_size != 0)
      {
        if (msg_size > errbuf_size)
-       {
-         strncpy (errbuf, msg, errbuf_size - 1);
-         errbuf[errbuf_size - 1] = 0;
-       }
+        {
+          strncpy (errbuf, msg, errbuf_size - 1);
+          errbuf[errbuf_size - 1] = 0;
+        }
        else
-       strcpy (errbuf, msg);
+        strcpy (errbuf, msg);
      }
  
    return msg_size;