X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=d30a922abdf2be4c49ea39e9e35d34b162be9107;hb=58dc681a9b810db4f7fc8b47d6c216bb169b7da4;hp=e26641bfcd9cb05ff7223fd020353f40ac14626d;hpb=c90c4710cee3f4882a4fa193c112f5efb6c93de7;p=gnulib.git

diff --git a/regex.c b/regex.c
index e26641bfc..d30a922ab 100644
--- a/regex.c
+++ b/regex.c
@@ -2,7 +2,7 @@
    0.12.  (Implements POSIX draft P10003.2/D11.2, except for
    internationalization features.)
 
-   Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+   Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -27,6 +27,12 @@
 #undef	_GNU_SOURCE
 #define _GNU_SOURCE
 
+/* Converts the pointer to the char to BEG-based offset from the start.	 */
+#define PTR_TO_OFFSET(d)						\
+	POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING			\
+			  ? (d) - string1 : (d) - (string2 - size1))
+#define POS_AS_IN_BUFFER(p) ((p) + 1)
+
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -53,7 +59,13 @@
 
 #include "lisp.h"
 #include "buffer.h"
+
+/* Make syntax table lookup grant data in gl_state.  */
+#define SYNTAX_ENTRY_VIA_PROPERTY
+
 #include "syntax.h"
+#include "charset.h"
+#include "category.h"
 
 #define malloc xmalloc
 #define free xfree
@@ -156,6 +168,19 @@ init_syntax_once ()
 
 #define SYNTAX(c) re_syntax_table[c]
 
+/* Dummy macro for non emacs environments.  */
+#define BASE_LEADING_CODE_P(c) (0)
+#define WORD_BOUNDARY_P(c1, c2) (0)
+#define CHAR_HEAD_P(p) (1)
+#define SINGLE_BYTE_CHAR_P(c) (1)
+#define SAME_CHARSET_P(c1, c2) (1)
+#define MULTIBYTE_FORM_LENGTH(p, s) (1)
+#define STRING_CHAR(p, s) (*(p))
+#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
+#define GET_CHAR_AFTER_2(c, p, str1, end1, str2, end2) \
+  (c = ((p) == (end1) ? *(str2) : *(p)))
+#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
+  (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
 #endif /* not emacs */
 
 /* Get the interface, including the syntax bits.  */
@@ -465,7 +490,17 @@ typedef enum
   syntaxspec,
 
 	/* Matches any character whose syntax is not that specified.  */
-  notsyntaxspec
+  notsyntaxspec,
+
+  /* Matches any character whose category-set contains the specified
+     category.	The operator is followed by a byte which contains a
+     category code (mnemonic ASCII character).	*/
+  categoryspec,
+
+  /* Matches any character whose category-set does not contain the
+     specified category.  The operator is followed by a byte which
+     contains the category code (mnemonic ASCII character).  */
+  notcategoryspec
 #endif /* emacs */
 } re_opcode_t;
 
@@ -543,6 +578,93 @@ extract_number_and_incr (destination, source)
 
 #endif /* DEBUG */
 
+/* Store a multibyte character in three contiguous bytes starting
+   DESTINATION, and increment DESTINATION to the byte after where the
+   character is stored.	 Therefore, DESTINATION must be an lvalue.  */
+
+#define STORE_CHARACTER_AND_INCR(destination, character)	\
+  do {								\
+    (destination)[0] = (character) & 0377;			\
+    (destination)[1] = ((character) >> 8) & 0377;		\
+    (destination)[2] = (character) >> 16;			\
+    (destination) += 3;						\
+  } while (0)
+
+/* Put into DESTINATION a character stored in three contiguous bytes
+   starting at SOURCE.	*/
+
+#define EXTRACT_CHARACTER(destination, source)	\
+  do {						\
+    (destination) = ((source)[0]		\
+		     | ((source)[1] << 8)	\
+		     | ((source)[2] << 16));	\
+  } while (0)
+
+
+/* Macros for charset. */
+
+/* Size of bitmap of charset P in bytes.  P is a start of charset,
+   i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
+#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
+
+/* Nonzero if charset P has range table.  */
+#define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
+
+/* Return the address of range table of charset P.  But not the start
+   of table itself, but the before where the number of ranges is
+   stored.  `2 +' means to skip re_opcode_t and size of bitmap.	 */
+#define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
+
+/* Test if C is listed in the bitmap of charset P.  */
+#define CHARSET_LOOKUP_BITMAP(p, c)				\
+  ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH			\
+   && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
+
+/* Return the address of end of RANGE_TABLE.  COUNT is number of
+   ranges (which is a pair of (start, end)) in the RANGE_TABLE.	 `* 2'
+   is start of range and end of range.	`* 3' is size of each start
+   and end.  */
+#define CHARSET_RANGE_TABLE_END(range_table, count)	\
+  ((range_table) + (count) * 2 * 3)
+
+/* Test if C is in RANGE_TABLE.	 A flag NOT is negated if C is in.
+   COUNT is number of ranges in RANGE_TABLE.  */
+#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
+  do									\
+    {									\
+      int range_start, range_end;					\
+      unsigned char *p;							\
+      unsigned char *range_table_end					\
+	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
+									\
+      for (p = (range_table); p < range_table_end; p += 2 * 3)		\
+	{								\
+	  EXTRACT_CHARACTER (range_start, p);				\
+	  EXTRACT_CHARACTER (range_end, p + 3);				\
+									\
+	  if (range_start <= (c) && (c) <= range_end)			\
+	    {								\
+	      (not) = !(not);						\
+	      break;							\
+	    }								\
+	}								\
+    }									\
+  while (0)
+
+/* Test if C is in range table of CHARSET.  The flag NOT is negated if
+   C is listed in it.  */
+#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)			\
+  do									\
+    {									\
+      /* Number of ranges in range table. */				\
+      int count;							\
+      unsigned char *range_table = CHARSET_RANGE_TABLE (charset);	\
+									\
+      EXTRACT_NUMBER_AND_INCR (count, range_table);			\
+      CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
+    }									\
+  while (0)
+
 /* If DEBUG is defined, Regex prints many voluminous messages about what
    it is doing (if the variable `debug' is nonzero).  If linked with the
    main program in `iregex.c', you can enter patterns and strings
@@ -1418,7 +1540,7 @@ static reg_errcode_t compile_range ();
    when we use a character as a subscript we must make it unsigned.  */
 #ifndef TRANSLATE
 #define TRANSLATE(d) \
-  (translate ? (char) translate[(unsigned char) (d)] : (d))
+  (translate ? (unsigned char) translate[(unsigned char) (d)] : (d))
 #endif
 
 
@@ -1556,6 +1678,51 @@ typedef struct
 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
 
 
+/* Structure to manage work area for range table.  */
+struct range_table_work_area
+{
+  int *table;			/* actual work area.  */
+  int allocated;		/* allocated size for work area in bytes.  */
+  int used;			/* actually used size in words.	 */
+};
+
+/* Make sure that WORK_AREA can hold more N multibyte characters.  */
+#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n)			  \
+  do {									  \
+    if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated)  \
+      {									  \
+	(work_area).allocated += 16 * sizeof (int);			  \
+	if ((work_area).table)						  \
+	  (work_area).table						  \
+	    = (int *) realloc ((work_area).table, (work_area).allocated); \
+	else								  \
+	  (work_area).table						  \
+	    = (int *) malloc ((work_area).allocated);			  \
+	if ((work_area).table == 0)					  \
+	  FREE_STACK_RETURN (REG_ESPACE);				  \
+      }									  \
+  } while (0)
+
+/* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
+#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)	\
+  do {									\
+    EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);			\
+    (work_area).table[(work_area).used++] = (range_start);		\
+    (work_area).table[(work_area).used++] = (range_end);		\
+  } while (0)
+
+/* Free allocated memory for WORK_AREA.	 */
+#define FREE_RANGE_TABLE_WORK_AREA(work_area)	\
+  do {						\
+    if ((work_area).table)			\
+      free ((work_area).table);			\
+  } while (0)
+
+#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0)
+#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
+#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
+
+
 /* Set the bit for character C in a list.  */
 #define SET_LIST_BIT(c)				      \
   (b[((unsigned char) (c)) / BYTEWIDTH]		      \
@@ -1657,7 +1824,11 @@ regex_grow_registers (num_regs)
 
 /* Return, freeing storage we allocated.  */
 #define FREE_STACK_RETURN(value)		\
-  return (free (compile_stack.stack), value)
+  do {							\
+    FREE_RANGE_TABLE_WORK_AREA (range_table_work);	\
+    free (compile_stack.stack);				\
+    return value;					\
+  } while (0)
 
 static reg_errcode_t
 regex_compile (pattern, size, syntax, bufp)
@@ -1669,7 +1840,7 @@ regex_compile (pattern, size, syntax, bufp)
   /* We fetch characters from PATTERN here.  Even though PATTERN is
      `char *' (i.e., signed), we declare these variables as unsigned, so
      they can be reliably used as array indices.  */
-  register unsigned char c, c1;
+  register unsigned int c, c1;
 
   /* A random temporary spot in PATTERN.  */
   const char *p1;
@@ -1715,6 +1886,9 @@ regex_compile (pattern, size, syntax, bufp)
      number is put in the stop_memory as the start_memory.  */
   regnum_t regnum = 0;
 
+  /* Work area for range table of charset.  */
+  struct range_table_work_area range_table_work;
+
 #ifdef DEBUG
   DEBUG_PRINT1 ("\nCompiling pattern: ");
   if (debug)
@@ -1735,6 +1909,9 @@ regex_compile (pattern, size, syntax, bufp)
   compile_stack.size = INIT_COMPILE_STACK_SIZE;
   compile_stack.avail = 0;
 
+  range_table_work.table = 0;
+  range_table_work.allocated = 0;
+
   /* Initialize the pattern buffer.  */
   bufp->syntax = syntax;
   bufp->fastmap_accurate = 0;
@@ -1748,6 +1925,14 @@ regex_compile (pattern, size, syntax, bufp)
   /* Always count groups, whether or not bufp->no_sub is set.  */
   bufp->re_nsub = 0;
 
+#ifdef emacs
+  /* bufp->multibyte is set before regex_compile is called, so don't alter
+     it. */
+#else  /* not emacs */
+  /* Nothing is recognized as a multibyte character.  */
+  bufp->multibyte = 0;
+#endif
+
 #if !defined (emacs) && !defined (SYNTAX_TABLE)
   /* Initialize the syntax table.  */
    init_syntax_once ();
@@ -1950,7 +2135,7 @@ regex_compile (pattern, size, syntax, bufp)
 
 	case '[':
 	  {
-	    boolean had_char_class = false;
+	    CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
 
 	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
@@ -1983,6 +2168,9 @@ regex_compile (pattern, size, syntax, bufp)
 	    /* Read in characters and ranges, setting map bits.	 */
 	    for (;;)
 	      {
+		int len;
+		boolean escaped_char = false;
+
 		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
 		PATFETCH (c);
@@ -1992,51 +2180,36 @@ regex_compile (pattern, size, syntax, bufp)
 		  {
 		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
 
-		    PATFETCH (c1);
-		    SET_LIST_BIT (c1);
-		    continue;
+		    PATFETCH (c);
+		    escaped_char = true;
 		  }
-
+		else
+		  {
 		/* Could be the end of the bracket expression.	If it's
 		   not (i.e., when the bracket expression is `[]' so
 		   far), the ']' character bit gets set way below.  */
 		if (c == ']' && p != p1 + 1)
 		  break;
-
-		/* Look ahead to see if it's a range when the last thing
-		   was a character class.  */
-		if (had_char_class && c == '-' && *p != ']')
-		  FREE_STACK_RETURN (REG_ERANGE);
-
-		/* Look ahead to see if it's a range when the last thing
-		   was a character: if this is a hyphen not at the
-		   beginning or the end of a list, then it's the range
-		   operator.  */
-		if (c == '-'
-		    && !(p - 2 >= pattern && p[-2] == '[')
-		    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
-		    && *p != ']')
-		  {
-		    reg_errcode_t ret
-		      = compile_range (&p, pend, translate, syntax, b);
-		    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
 		  }
 
-		else if (p[0] == '-' && p[1] != ']')
-		  { /* This handles ranges made up of characters only.	*/
-		    reg_errcode_t ret;
-
-		    /* Move past the `-'.  */
-		    PATFETCH (c1);
-
-		    ret = compile_range (&p, pend, translate, syntax, b);
-		    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
+		/* If C indicates start of multibyte char, get the
+		   actual character code in C, and set the pattern
+		   pointer P to the next character boundary.  */
+		if (bufp->multibyte && BASE_LEADING_CODE_P (c))
+		  {
+		    PATUNFETCH;
+		    c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+		    p += len;
 		  }
+		/* What should we do for the character which is
+		   greater than 0x7F, but not BASE_LEADING_CODE_P?
+		   XXX */
 
 		/* See if we're at the beginning of a possible character
 		   class.  */
 
-		else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+		else if (!escaped_char &&
+			 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
 		  { /* Leave room for the null.	 */
 		    char str[CHAR_CLASS_MAX_LENGTH + 1];
 
@@ -2056,9 +2229,10 @@ regex_compile (pattern, size, syntax, bufp)
 		      }
 		    str[c1] = '\0';
 
-		    /* If isn't a word bracketed by `[:' and:`]':
-		       undo the ending character, the letters, and leave
-		       the leading `:' and `[' (but set bits for them).	 */
+		    /* If isn't a word bracketed by `[:' and `:]':
+		       undo the ending character, the letters, and
+		       leave the leading `:' and `[' (but set bits for
+		       them).  */
 		    if (c == ':' && *p == ']')
 		      {
 			int ch;
@@ -2105,7 +2279,9 @@ regex_compile (pattern, size, syntax, bufp)
 				|| (is_xdigit && ISXDIGIT (ch)))
 			      SET_LIST_BIT (translated);
 			  }
-			had_char_class = true;
+
+			/* Repeat the loop. */
+			continue;
 		      }
 		    else
 		      {
@@ -2113,22 +2289,86 @@ regex_compile (pattern, size, syntax, bufp)
 			while (c1--)
 			  PATUNFETCH;
 			SET_LIST_BIT ('[');
-			SET_LIST_BIT (':');
-			had_char_class = false;
+
+			/* Because the `:' may starts the range, we
+			   can't simply set bit and repeat the loop.
+			   Instead, just set it to C and handle below.	*/
+			c = ':';
+		      }
+		  }
+
+		if (p < pend && p[0] == '-' && p[1] != ']')
+		  {
+
+		    /* Discard the `-'. */
+		    PATFETCH (c1);
+
+		    /* Fetch the character which ends the range. */
+		    PATFETCH (c1);
+		    if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
+		      {
+			PATUNFETCH;
+			c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+			p += len;
 		      }
+
+		    if (!SAME_CHARSET_P (c, c1))
+		      FREE_STACK_RETURN (REG_ERANGE);
 		  }
 		else
+		  /* Range from C to C. */
+		  c1 = c;
+
+		/* Set the range ... */
+		if (SINGLE_BYTE_CHAR_P (c))
+		  /* ... into bitmap.  */
 		  {
-		    had_char_class = false;
-		    SET_LIST_BIT (c);
+		    unsigned this_char;
+		    int range_start = c, range_end = c1;
+
+		    /* If the start is after the end, the range is empty.  */
+		    if (range_start > range_end)
+		      {
+			if (syntax & RE_NO_EMPTY_RANGES)
+			  FREE_STACK_RETURN (REG_ERANGE);
+			/* Else, repeat the loop.  */
+		      }
+		    else
+		      {
+			for (this_char = range_start; this_char <= range_end;
+			     this_char++)
+			  SET_LIST_BIT (TRANSLATE (this_char));
 		  }
 	      }
+		else
+		  /* ... into range table.  */
+		  SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
+	      }
 
 	    /* Discard any (non)matching list bytes that are all 0 at the
 	       end of the map.	Decrease the map-length byte too.  */
 	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
 	      b[-1]--;
 	    b += b[-1];
+
+	    /* Build real range table from work area. */
+	    if (RANGE_TABLE_WORK_USED (range_table_work))
+	      {
+		int i;
+		int used = RANGE_TABLE_WORK_USED (range_table_work);
+
+		/* Allocate space for COUNT + RANGE_TABLE.  Needs two
+		   bytes for COUNT and three bytes for each character.	*/
+		GET_BUFFER_SPACE (2 + used * 3);
+
+		/* Indicate the existence of range table.  */
+		laststart[1] |= 0x80;
+
+		STORE_NUMBER_AND_INCR (b, used / 2);
+		for (i = 0; i < used; i++)
+		  STORE_CHARACTER_AND_INCR
+		    (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
+	      }
 	  }
 	  break;
 
@@ -2525,6 +2765,18 @@ regex_compile (pattern, size, syntax, bufp)
 	      PATFETCH (c);
 	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
 	      break;
+
+	    case 'c':
+	      laststart = b;
+	      PATFETCH_RAW (c);
+	      BUF_PUSH_2 (categoryspec, c);
+	      break;
+
+	    case 'C':
+	      laststart = b;
+	      PATFETCH_RAW (c);
+	      BUF_PUSH_2 (notcategoryspec, c);
+	      break;
 #endif /* emacs */
 
 
@@ -2604,6 +2856,12 @@ regex_compile (pattern, size, syntax, bufp)
 	default:
 	/* Expects the character in `c'.  */
 	normal_char:
+	  p1 = p - 1;		/* P1 points the head of C.  */
+#ifdef emacs
+	  if (bufp->multibyte)
+	    /* Set P to the next character boundary.  */
+	    p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
+#endif
 	      /* If no exactn currently being built.  */
 	  if (!pending_exact
 
@@ -2611,7 +2869,7 @@ regex_compile (pattern, size, syntax, bufp)
 	      || pending_exact + *pending_exact + 1 != b
 
 	      /* We have only one byte following the exactn for the count.  */
-	      || *pending_exact == (1 << BYTEWIDTH) - 1
+	      || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
 
 	      /* If followed by a repetition operator.	*/
 	      || *p == '*' || *p == '^'
@@ -2631,8 +2889,17 @@ regex_compile (pattern, size, syntax, bufp)
 	      pending_exact = b - 1;
 	    }
 
+	  /* Here, C may translated, therefore C may not equal to *P1. */
+	  while (1)
+	    {
 	  BUF_PUSH (c);
 	  (*pending_exact)++;
+	      if (++p1 == p)
+		break;
+
+	      /* Rest of multibyte form should be copied literally. */
+	      c = *(unsigned char *)p1;
+	    }
 	  break;
 	} /* switch (c) */
     } /* while p != pend */
@@ -2912,7 +3179,7 @@ int
 re_compile_fastmap (bufp)
      struct re_pattern_buffer *bufp;
 {
-  int j, k;
+  int i, j, k;
 #ifdef MATCH_MAY_ALLOCATE
   fail_stack_type fail_stack;
 #endif
@@ -2941,6 +3208,13 @@ re_compile_fastmap (bufp)
   /* We aren't doing a `succeed_n' to begin with.  */
   boolean succeed_n_p = false;
 
+  /* If all elements for base leading-codes in fastmap is set, this
+     flag is set true.	*/
+  boolean match_any_multibyte_characters = false;
+
+  /* Maximum code of simple (single byte) character. */
+  int simple_char_max;
+
   assert (fastmap != NULL && p != NULL);
 
   INIT_FAIL_STACK ();
@@ -2992,6 +3266,7 @@ re_compile_fastmap (bufp)
 	  break;
 
 
+#ifndef emacs
 	case charset:
 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
@@ -3022,14 +3297,112 @@ re_compile_fastmap (bufp)
 	    if (SYNTAX (j) != Sword)
 	      fastmap[j] = 1;
 	  break;
+#else  /* emacs */
+	case charset:
+	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
+	       j >= 0; j--)
+	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+	      fastmap[j] = 1;
+
+	  if (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+	      && match_any_multibyte_characters == false)
+	    {
+	      /* Set fastmap[I] 1 where I is a base leading code of each
+		 multibyte character in the range table. */
+	      int c, count;
+
+	      /* Make P points the range table. */
+	      p += CHARSET_BITMAP_SIZE (&p[-2]);
+
+	      /* Extract the number of ranges in range table into
+		 COUNT.	 */
+	      EXTRACT_NUMBER_AND_INCR (count, p);
+	      for (; count > 0; count--, p += 2 * 3) /* XXX */
+		{
+		  /* Extract the start of each range.  */
+		  EXTRACT_CHARACTER (c, p);
+		  j = CHAR_CHARSET (c);
+		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
+		}
+	    }
+	  break;
 
 
+	case charset_not:
+	  /* Chars beyond end of map must be allowed.  End of map is
+	     `127' if bufp->multibyte is nonzero.  */
+	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
+	       j < simple_char_max; j++)
+	    fastmap[j] = 1;
+
+	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
+	       j >= 0; j--)
+	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       which doesn't match the specified set of characters.  */
+	    {
+	    set_fastmap_for_multibyte_characters:
+	      if (match_any_multibyte_characters == false)
+		{
+		  for (j = 0x80; j < 0xA0; j++)	/* XXX */
+		    if (BASE_LEADING_CODE_P (j))
+		      fastmap[j] = 1;
+		  match_any_multibyte_characters = true;
+		}
+	    }
+	  break;
+
+
+	case wordchar:
+	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (SYNTAX (j) == Sword)
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose syntax is `Sword'.	 */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+
+
+	case notwordchar:
+	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (SYNTAX (j) != Sword)
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose syntax is not `Sword'.  */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+#endif
+
 	case anychar:
 	  {
 	    int fastmap_newline = fastmap['\n'];
 
-	    /* `.' matches anything ...	 */
-	    for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    /* `.' matches anything (but if bufp->multibyte is
+	       nonzero, matches `\000' .. `\127' and possible multibyte
+	       character) ...  */
+	    if (bufp->multibyte)
+	      {
+		simple_char_max = 0x80;
+
+		for (j = 0x80; j < 0xA0; j++)
+		  if (BASE_LEADING_CODE_P (j))
+		    fastmap[j] = 1;
+		match_any_multibyte_characters = true;
+	      }
+	    else
+	      simple_char_max = (1 << BYTEWIDTH);
+
+	    for (j = 0; j < simple_char_max; j++)
 	      fastmap[j] = 1;
 
 	    /* ... except perhaps newline.  */
@@ -3046,22 +3419,71 @@ re_compile_fastmap (bufp)
 	  }
 
 #ifdef emacs
+	case wordbound:
+	case notwordbound:
+	case wordbeg:
+	case wordend:
+	case notsyntaxspec:
 	case syntaxspec:
+	  /* This match depends on text properties.  These end with
+	     aborting optimizations.  */
+	  bufp->can_be_null = 1;
+	  goto done;
+#if 0
 	  k = *p++;
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
 	    if (SYNTAX (j) == (enum syntaxcode) k)
 	      fastmap[j] = 1;
-	  break;
 
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose syntax is K.  */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
 
 	case notsyntaxspec:
 	  k = *p++;
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
 	    if (SYNTAX (j) != (enum syntaxcode) k)
 	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose syntax is not K.  */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+#endif
+
+
+	case categoryspec:
+	  k = *p++;
+	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (CHAR_HAS_CATEGORY (j, k))
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose category is K.  */
+	    goto set_fastmap_for_multibyte_characters;
 	  break;
 
 
+	case notcategoryspec:
+	  k = *p++;
+	  simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
+	  for (j = 0; j < simple_char_max; j++)
+	    if (!CHAR_HAS_CATEGORY (j, k))
+	      fastmap[j] = 1;
+
+	  if (bufp->multibyte)
+	    /* Any character set can possibly contain a character
+	       whose category is not K.	 */
+	    goto set_fastmap_for_multibyte_characters;
+	  break;
+
       /* All cases after this match the empty string.  These end with
 	 `continue'.  */
 
@@ -3078,10 +3500,12 @@ re_compile_fastmap (bufp)
 	case endline:
 	case begbuf:
 	case endbuf:
+#ifndef emacs
 	case wordbound:
 	case notwordbound:
 	case wordbeg:
 	case wordend:
+#endif
 	case push_dummy_failure:
 	  continue;
 
@@ -3250,6 +3674,13 @@ re_search (bufp, string, size, startpos, range, regs)
 		      regs, size);
 }
 
+/* End address of virtual concatenation of string.  */
+#define STOP_ADDR_VSTRING(P)				\
+  (((P) >= size1 ? string2 + size2 : string1 + size1))
+
+/* Address of POS in the concatenation of virtual string. */
+#define POS_ADDR_VSTRING(POS)					\
+  (((POS) >= size1 ? string2 - size1 : string1) + (POS))
 
 /* Using the compiled pattern in BUFP->buffer, first tries to match the
    virtual concatenation of STRING1 and STRING2, starting first at index
@@ -3289,6 +3720,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
   int endpos = startpos + range;
   int anchored_start = 0;
 
+  /* Nonzero if we have to concern multibyte character.	 */
+  int multibyte = bufp->multibyte;
+
   /* Check for out-of-range STARTPOS.  */
   if (startpos < 0 || startpos > total_size)
     return -1;
@@ -3331,6 +3765,13 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
   if (bufp->buffer[0] == begline)
     anchored_start = 1;
 
+#ifdef emacs
+  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object,
+				 POS_AS_IN_BUFFER (startpos > 0
+						   ? startpos - 1 : startpos),
+				 1);
+#endif
+
   /* Loop through the string, looking for a place to start matching.  */
   for (;;)
     {
@@ -3362,7 +3803,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
 	      if (startpos < size1 && startpos + range >= size1)
 		lim = range - (size1 - startpos);
 
-	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+	      d = POS_ADDR_VSTRING (startpos);
 
 	      /* Written out as an if-else to avoid testing `translate'
 		 inside the loop.  */
@@ -3412,13 +3853,53 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
 	break;
       else if (range > 0)
 	{
+	  /* Update STARTPOS to the next character boundary.  */
+	  if (multibyte)
+	    {
+	      const unsigned char *p = POS_ADDR_VSTRING (startpos);
+	      const unsigned char *pend = STOP_ADDR_VSTRING (startpos);
+	      int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
+
+	      range -= len;
+	      if (range < 0)
+		break;
+	      startpos += len;
+	    }
+	  else
+	    {
 	  range--;
 	  startpos++;
 	}
+	}
       else
 	{
 	  range++;
 	  startpos--;
+
+	  /* Update STARTPOS to the previous character boundary.  */
+	  if (multibyte)
+	    {
+	      const unsigned char *p = POS_ADDR_VSTRING (startpos);
+	      int len = 0;
+
+	      /* Find the head of multibyte form.  */
+	      while (!CHAR_HEAD_P (p))
+		p--, len++;
+
+	      /* Adjust it. */
+#if 0				/* XXX */
+	      if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
+		;
+	      else
+#endif
+		{
+		  range += len;
+		  if (range > 0)
+		    break;
+
+		  startpos -= len;
+		}
+	    }
 	}
     }
   return -1;
@@ -3472,6 +3953,15 @@ static boolean alt_match_null_string_p (),
    == Sword)
 
 /* Disabled due to a compiler bug -- see comment at case wordbound */
+
+/* The comment at case wordbound is following one, but we don't use
+   AT_WORD_BOUNDARY anymore to support multibyte form.
+
+   The DEC Alpha C compiler 3.x generates incorrect code for the
+   test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
+   AT_WORD_BOUNDARY, so this code is disabled.	Expanding the
+   macro and introducing temporary variables works around the bug.  */
+
 #if 0
 /* Test if the character before D and the one at D differ with respect
    to being word-constituent.  */
@@ -3529,6 +4019,11 @@ re_match (bufp, string, size, pos, regs)
 }
 #endif /* not emacs */
 
+#ifdef emacs
+/* In Emacs, this is the string or buffer in which we
+   are matching.  It is used for looking up syntax properties.	*/
+Lisp_Object re_match_object;
+#endif
 
 /* re_match_2 matches the compiled pattern in BUFP against the
    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
@@ -3552,7 +4047,15 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
      struct re_registers *regs;
      int stop;
 {
-  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
+  int result;
+
+#ifdef emacs
+  SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object,
+				 POS_AS_IN_BUFFER (pos > 0 ? pos - 1 : pos),
+				 1);
+#endif
+
+  result = re_match_2_internal (bufp, string1, size1, string2, size2,
 				    pos, regs, stop);
   alloca (0);
   return result;
@@ -3594,6 +4097,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   /* We use this to map every character in the string.	*/
   RE_TRANSLATE_TYPE translate = bufp->translate;
 
+  /* Nonzero if we have to concern multibyte character.	 */
+  int multibyte = bufp->multibyte;
+
   /* Failure point stack.  Each place that can handle a failure further
      down the line pushes a failure point on this stack.  It consists of
      restart, regend, and reg_info for all registers corresponding to
@@ -4022,33 +4528,62 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
 	  SET_REGS_MATCHED ();
 	  DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
-	  d++;
+	  d += multibyte ? MULTIBYTE_FORM_LENGTH (d, dend - d) : 1;
 	  break;
 
 
 	case charset:
 	case charset_not:
 	  {
-	    register unsigned char c;
+	    register unsigned int c;
 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
+	    int len;
+
+	    /* Start of actual range_table, or end of bitmap if there is no
+	       range table.  */
+	    unsigned char *range_table;
+
+	    /* Nonzero if there is range table.	 */
+	    int range_table_exists;
+
+	    /* Number of ranges of range table.	 Not in bytes.	*/
+	    int count;
 
 	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
 
 	    PREFETCH ();
-	    c = TRANSLATE (*d); /* The character to match.  */
+	    c = (unsigned char) *d;
+
+	    range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
+	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
+	    if (range_table_exists)
+	      EXTRACT_NUMBER_AND_INCR (count, range_table);
+	    else
+	      count = 0;
+
+	    if (multibyte && BASE_LEADING_CODE_P (c))
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+
+	    if (SINGLE_BYTE_CHAR_P (c))
+	      {			/* Lookup bitmap.  */
+		c = TRANSLATE (c); /* The character to match.  */
+		len = 1;
 
-	    /* Cast to `unsigned' instead of `unsigned char' in case the
-	       bit list is a full 32 bytes long.  */
-	    if (c < (unsigned) (*p * BYTEWIDTH)
+		/* Cast to `unsigned' instead of `unsigned char' in
+		   case the bit list is a full 32 bytes long.  */
+		if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
 	      not = !not;
+	      }
+	    else if (range_table_exists)
+	      CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
 
-	    p += 1 + *p;
+	    p = CHARSET_RANGE_TABLE_END (range_table, count);
 
 	    if (!not) goto fail;
 
 	    SET_REGS_MATCHED ();
-	    d++;
+	    d += len;
 	    break;
 	  }
 
@@ -4492,24 +5027,42 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    else if ((re_opcode_t) *p2 == exactn
 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
 	      {
-		register unsigned char c
+		register unsigned int c
 		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
 
-		if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+		if ((re_opcode_t) p1[3] == exactn)
+		  {
+		    if (!(multibyte /* && (c != '\n') */
+			  && BASE_LEADING_CODE_P (c))
+			? c != p1[5]
+			: (STRING_CHAR (&p2[2], pend - &p2[2])
+			   != STRING_CHAR (&p1[5], pend - &p1[5])))
 		  {
 		    p[-3] = (unsigned char) pop_failure_jump;
 		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
 				  c, p1[5]);
 		  }
+		  }
 
 		else if ((re_opcode_t) p1[3] == charset
 			 || (re_opcode_t) p1[3] == charset_not)
 		  {
 		    int not = (re_opcode_t) p1[3] == charset_not;
 
-		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+		    if (multibyte /* && (c != '\n') */
+			&& BASE_LEADING_CODE_P (c))
+		      c = STRING_CHAR (&p2[2], pend - &p2[2]);
+
+		    /* Test if C is listed in charset (or charset_not)
+		       at `&p1[3]'.  */
+		    if (SINGLE_BYTE_CHAR_P (c))
+		      {
+			if (c < CHARSET_BITMAP_SIZE (&p1[3]) * BYTEWIDTH
 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
 		      not = !not;
+		      }
+		    else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
+		      CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
 
 		    /* `not' is equal to 1 if c would match, which means
 			that we can't change to pop_failure_jump.  */
@@ -4522,29 +5075,55 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      }
 	    else if ((re_opcode_t) *p2 == charset)
 	      {
-#ifdef DEBUG
-		register unsigned char c
-		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
-#endif
+		if ((re_opcode_t) p1[3] == exactn)
+		  {
+		    register unsigned int c = p1[5];
+		    int not = 0;
+
+		    if (multibyte && BASE_LEADING_CODE_P (c))
+		      c = STRING_CHAR (&p1[5], pend - &p1[5]);
+
+		    /* Test if C is listed in charset at `p2'.	*/
+		    if (SINGLE_BYTE_CHAR_P (c))
+		      {
+			if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
+			    && (p2[2 + c / BYTEWIDTH]
+				& (1 << (c % BYTEWIDTH))))
+			  not = !not;
+		      }
+		    else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
+		      CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
 
-		if ((re_opcode_t) p1[3] == exactn
-		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
-			  && (p2[2 + p1[5] / BYTEWIDTH]
-			      & (1 << (p1[5] % BYTEWIDTH)))))
+		    if (!not)
 		  {
 		    p[-3] = (unsigned char) pop_failure_jump;
-		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
-				  c, p1[5]);
+			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
+		      }
 		  }
 
-		else if ((re_opcode_t) p1[3] == charset_not)
+		/* It is hard to list up all the character in charset
+		   P2 if it includes multibyte character.  Give up in
+		   such case.  */
+		else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
+		  {
+		    /* Now, we are sure that P2 has no range table.
+		       So, for the size of bitmap in P2, `p2[1]' is
+		       enough.	But P1 may have range table, so the
+		       size of bitmap table of P1 is extracted by
+		       using macro `CHARSET_BITMAP_SIZE'.
+
+		       Since we know that all the character listed in
+		       P2 is ASCII, it is enough to test only bitmap
+		       table of P1.  */
+
+		    if ((re_opcode_t) p1[3] == charset_not)
 		  {
 		    int idx;
-		    /* We win if the charset_not inside the loop
-		       lists every character listed in the charset after.  */
+			/* We win if the charset_not inside the loop lists
+			   every character listed in the charset after.	 */
 		    for (idx = 0; idx < (int) p2[1]; idx++)
 		      if (! (p2[2 + idx] == 0
-			     || (idx < (int) p1[4]
+				 || (idx < CHARSET_BITMAP_SIZE (&p1[3])
 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
 			break;
 
@@ -4560,12 +5139,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		    /* We win if the charset inside the loop
 		       has no overlap with the one after the loop.  */
 		    for (idx = 0;
-			 idx < (int) p2[1] && idx < (int) p1[4];
+			     (idx < (int) p2[1]
+			      && idx < CHARSET_BITMAP_SIZE (&p1[3]));
 			 idx++)
 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
 			break;
 
-		    if (idx == p2[1] || idx == p1[4])
+			if (idx == p2[1]
+			    || idx == CHARSET_BITMAP_SIZE (&p1[3]))
 		      {
 			p[-3] = (unsigned char) pop_failure_jump;
 			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
@@ -4573,6 +5154,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		  }
 	      }
 	  }
+	  }
 	  p -= 2;		/* Point at relative address again.  */
 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
 	    {
@@ -4702,67 +5284,152 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    break;
 	  }
 
-#if 0
-	/* The DEC Alpha C compiler 3.x generates incorrect code for the
-	   test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
-	   AT_WORD_BOUNDARY, so this code is disabled.	Expanding the
-	   macro and introducing temporary variables works around the bug.  */
-
 	case wordbound:
 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
-	  if (AT_WORD_BOUNDARY (d))
-	    break;
-	  goto fail;
 
-	case notwordbound:
-	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
-	  if (AT_WORD_BOUNDARY (d))
-	    goto fail;
-	  break;
-#else
-	case wordbound:
-	{
-	  boolean prevchar, thischar;
+	  /* We SUCCEED in one of the following cases: */
 
-	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+	  /* Case 1: D is at the beginning or the end of string.  */
 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
 	    break;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      int c1, c2, s1, s2;
+	      int pos1 = PTR_TO_OFFSET (d - 1);
+
+	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+	      UPDATE_SYNTAX_TABLE (pos1 ? pos1 : 1);
+#endif
+	      s1 = SYNTAX (c1);
+#ifdef emacs
+	      UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
+#endif
+	      s2 = SYNTAX (c2);
 
-	  prevchar = WORDCHAR_P (d - 1);
-	  thischar = WORDCHAR_P (d);
-	  if (prevchar != thischar)
+	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
+		  ((s1 == Sword) != (s2 == Sword))
+		  /* Case 3: Both of S1 and S2 are Sword, and macro
+		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
+		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
 	    break;
-	  goto fail;
 	}
+	  goto fail;
 
       case notwordbound:
-	{
-	  boolean prevchar, thischar;
-
 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+
+	  /* We FAIL in one of the following cases: */
+
+	  /* Case 1: D is at the beginning or the end of string.  */
 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
 	    goto fail;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      int c1, c2, s1, s2;
+	      int pos1 = PTR_TO_OFFSET (d - 1);
 
-	  prevchar = WORDCHAR_P (d - 1);
-	  thischar = WORDCHAR_P (d);
-	  if (prevchar != thischar)
+	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+	      UPDATE_SYNTAX_TABLE (pos1);
+#endif
+	      s1 = SYNTAX (c1);
+#ifdef emacs
+	      UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
+#endif
+	      s2 = SYNTAX (c2);
+
+	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
+		  ((s1 == Sword) != (s2 == Sword))
+		  /* Case 3: Both of S1 and S2 are Sword, and macro
+		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
+		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
 	    goto fail;
-	  break;
 	}
-#endif
+	  break;
 
 	case wordbeg:
 	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
-	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
-	    break;
+
+	  /* We FAIL in one of the following cases: */
+
+	  /* Case 1: D is at the end of string.	 */
+	  if (AT_STRINGS_END (d))
 	  goto fail;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      int c1, c2, s1, s2;
+	      int pos1 = PTR_TO_OFFSET (d);
+
+	      GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+	      UPDATE_SYNTAX_TABLE (pos1);
+#endif
+	      s2 = SYNTAX (c2);
+	
+	      /* Case 2: S2 is not Sword. */
+	      if (s2 != Sword)
+		goto fail;
+
+	      /* Case 3: D is not at the beginning of string ... */
+	      if (!AT_STRINGS_BEG (d))
+		{
+		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+		  UPDATE_SYNTAX_TABLE_BACKWARD (pos1 - 1);
+#endif
+		  s1 = SYNTAX (c1);
+
+		  /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
+		     returns 0.	 */
+		  if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
+		    goto fail;
+		}
+	    }
+	  break;
 
 	case wordend:
 	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
-	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
-	      && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
-	    break;
+
+	  /* We FAIL in one of the following cases: */
+
+	  /* Case 1: D is at the beginning of string.  */
+	  if (AT_STRINGS_BEG (d))
+	    goto fail;
+	  else
+	    {
+	      /* C1 is the character before D, S1 is the syntax of C1, C2
+		 is the character at D, and S2 is the syntax of C2.  */
+	      int c1, c2, s1, s2;
+
+	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+	      s1 = SYNTAX (c1);
+
+	      /* Case 2: S1 is not Sword.  */
+	      if (s1 != Sword)
+		goto fail;
+
+	      /* Case 3: D is not at the end of string ... */
+	      if (!AT_STRINGS_END (d))
+		{
+		  GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+		  s2 = SYNTAX (c2);
+
+		  /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
+		     returns 0.	 */
+		  if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
 	  goto fail;
+		}
+	    }
+	  break;
 
 #ifdef emacs
 	case before_dot:
@@ -4793,10 +5460,27 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  mcnt = (int) Sword;
 	matchsyntax:
 	  PREFETCH ();
-	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
-	  d++;
-	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
+#ifdef emacs
+	  {
+	    int pos1 = PTR_TO_OFFSET (d);
+	    UPDATE_SYNTAX_TABLE (pos1);
+	  }
+#endif
+	  {
+	    int c, len;
+
+	    if (multibyte)
+	      /* we must concern about multibyte form, ... */
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+	    else
+	      /* everything should be handled as ASCII, even though it
+		 looks like multibyte form.  */
+	      c = *d, len = 1;
+
+	    if (SYNTAX (c) != (enum syntaxcode) mcnt)
 	    goto fail;
+	    d += len;
+	  }
 	  SET_REGS_MATCHED ();
 	  break;
 
@@ -4810,86 +5494,138 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  mcnt = (int) Sword;
 	matchnotsyntax:
 	  PREFETCH ();
-	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
-	  d++;
-	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
+#ifdef emacs
+	  {
+	    int pos1 = PTR_TO_OFFSET (d);
+	    UPDATE_SYNTAX_TABLE (pos1);
+	  }
+#endif
+	  {
+	    int c, len;
+
+	    if (multibyte)
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+	    else
+	      c = *d, len = 1;
+
+	    if (SYNTAX (c) == (enum syntaxcode) mcnt)
 	    goto fail;
+	    d += len;
+	  }
+	  SET_REGS_MATCHED ();
+	  break;
+
+	case categoryspec:
+	  DEBUG_PRINT2 ("EXECUTING categoryspec %d.\n", *p);
+	  mcnt = *p++;
+	  PREFETCH ();
+	  {
+	    int c, len;
+
+	    if (multibyte)
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+	    else
+	      c = *d, len = 1;
+
+	    if (!CHAR_HAS_CATEGORY (c, mcnt))
+	      goto fail;
+	    d += len;
+	  }
 	  SET_REGS_MATCHED ();
 	  break;
 
+	case notcategoryspec:
+	  DEBUG_PRINT2 ("EXECUTING notcategoryspec %d.\n", *p);
+	  mcnt = *p++;
+	  PREFETCH ();
+	  {
+	    int c, len;
+
+	    if (multibyte)
+	      c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
+	    else
+	      c = *d, len = 1;
+
+	    if (CHAR_HAS_CATEGORY (c, mcnt))
+	      goto fail;
+	    d += len;
+	  }
+	  SET_REGS_MATCHED ();
+          break;
+
 #else /* not emacs */
 	case wordchar:
-	  DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
 	  PREFETCH ();
-	  if (!WORDCHAR_P (d))
-	    goto fail;
+          if (!WORDCHAR_P (d))
+            goto fail;
 	  SET_REGS_MATCHED ();
-	  d++;
+          d++;
 	  break;
 
 	case notwordchar:
-	  DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
 	  PREFETCH ();
 	  if (WORDCHAR_P (d))
-	    goto fail;
-	  SET_REGS_MATCHED ();
-	  d++;
+            goto fail;
+          SET_REGS_MATCHED ();
+          d++;
 	  break;
 #endif /* not emacs */
 
-	default:
-	  abort ();
+        default:
+          abort ();
 	}
-      continue;	 /* Successfully executed one pattern command; keep going.  */
+      continue;  /* Successfully executed one pattern command; keep going.  */
 
 
     /* We goto here if a matching operation fails. */
     fail:
       if (!FAIL_STACK_EMPTY ())
-	{ /* A restart point is known.	Restore to that state.	*/
-	  DEBUG_PRINT1 ("\nFAIL:\n");
-	  POP_FAILURE_POINT (d, p,
-			     lowest_active_reg, highest_active_reg,
-			     regstart, regend, reg_info);
-
-	  /* If this failure point is a dummy, try the next one.  */
-	  if (!p)
+	{ /* A restart point is known.  Restore to that state.  */
+          DEBUG_PRINT1 ("\nFAIL:\n");
+          POP_FAILURE_POINT (d, p,
+                             lowest_active_reg, highest_active_reg,
+                             regstart, regend, reg_info);
+
+          /* If this failure point is a dummy, try the next one.  */
+          if (!p)
 	    goto fail;
 
-	  /* If we failed to the end of the pattern, don't examine *p.	*/
+          /* If we failed to the end of the pattern, don't examine *p.  */
 	  assert (p <= pend);
-	  if (p < pend)
-	    {
-	      boolean is_a_jump_n = false;
-
-	      /* If failed to a backwards jump that's part of a repetition
-		 loop, need to pop this failure point and use the next one.  */
-	      switch ((re_opcode_t) *p)
-		{
-		case jump_n:
-		  is_a_jump_n = true;
-		case maybe_pop_jump:
-		case pop_failure_jump:
-		case jump:
-		  p1 = p + 1;
-		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-		  p1 += mcnt;
-
-		  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
-		      || (!is_a_jump_n
-			  && (re_opcode_t) *p1 == on_failure_jump))
-		    goto fail;
-		  break;
-		default:
-		  /* do nothing */ ;
-		}
-	    }
-
-	  if (d >= string1 && d <= end1)
+          if (p < pend)
+            {
+              boolean is_a_jump_n = false;
+
+              /* If failed to a backwards jump that's part of a repetition
+                 loop, need to pop this failure point and use the next one.  */
+              switch ((re_opcode_t) *p)
+                {
+                case jump_n:
+                  is_a_jump_n = true;
+                case maybe_pop_jump:
+                case pop_failure_jump:
+                case jump:
+                  p1 = p + 1;
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  p1 += mcnt;
+
+                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+                      || (!is_a_jump_n
+                          && (re_opcode_t) *p1 == on_failure_jump))
+                    goto fail;
+                  break;
+                default:
+                  /* do nothing */ ;
+                }
+            }
+
+          if (d >= string1 && d <= end1)
 	    dend = end_match_1;
-	}
+        }
       else
-	break;	 /* Matching at this starting point really fails.  */
+        break;   /* Matching at this starting point really fails.  */
     } /* for (;;) */
 
   if (best_regs_set)
@@ -4897,7 +5633,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 
   FREE_VARIABLES ();
 
-  return -1;				/* Failure to match.  */
+  return -1;         			/* Failure to match.  */
 } /* re_match_2 */
 
 /* Subroutine definitions for re_match_2.  */
@@ -4926,92 +5662,92 @@ group_match_null_string_p (p, end, reg_info)
     {
       /* Skip over opcodes that can match nothing, and return true or
 	 false, as appropriate, when we get to one that can't, or to the
-	 matching stop_memory.	*/
+         matching stop_memory.  */
 
       switch ((re_opcode_t) *p1)
-	{
-	/* Could be either a loop or a series of alternatives.	*/
-	case on_failure_jump:
-	  p1++;
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+        {
+        /* Could be either a loop or a series of alternatives.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 
-	  /* If the next operation is not a jump backwards in the
+          /* If the next operation is not a jump backwards in the
 	     pattern.  */
 
 	  if (mcnt >= 0)
 	    {
-	      /* Go through the on_failure_jumps of the alternatives,
-		 seeing if any of the alternatives cannot match nothing.
-		 The last alternative starts with only a jump,
-		 whereas the rest start with on_failure_jump and end
-		 with a jump, e.g., here is the pattern for `a|b|c':
+              /* Go through the on_failure_jumps of the alternatives,
+                 seeing if any of the alternatives cannot match nothing.
+                 The last alternative starts with only a jump,
+                 whereas the rest start with on_failure_jump and end
+                 with a jump, e.g., here is the pattern for `a|b|c':
 
-		 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
-		 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
-		 /exactn/1/c
+                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+                 /exactn/1/c
 
-		 So, we have to first go through the first (n-1)
-		 alternatives and then deal with the last one separately.  */
+                 So, we have to first go through the first (n-1)
+                 alternatives and then deal with the last one separately.  */
 
 
-	      /* Deal with the first (n-1) alternatives, which start
-		 with an on_failure_jump (see above) that jumps to right
-		 past a jump_past_alt.	*/
+              /* Deal with the first (n-1) alternatives, which start
+                 with an on_failure_jump (see above) that jumps to right
+                 past a jump_past_alt.  */
 
-	      while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
-		{
-		  /* `mcnt' holds how many bytes long the alternative
-		     is, including the ending `jump_past_alt' and
-		     its number.  */
+              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+                {
+                  /* `mcnt' holds how many bytes long the alternative
+                     is, including the ending `jump_past_alt' and
+                     its number.  */
 
-		  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
-						      reg_info))
-		    return false;
+                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+				                      reg_info))
+                    return false;
 
-		  /* Move to right after this alternative, including the
+                  /* Move to right after this alternative, including the
 		     jump_past_alt.  */
-		  p1 += mcnt;
+                  p1 += mcnt;
 
-		  /* Break if it's the beginning of an n-th alternative
-		     that doesn't begin with an on_failure_jump.  */
-		  if ((re_opcode_t) *p1 != on_failure_jump)
-		    break;
+                  /* Break if it's the beginning of an n-th alternative
+                     that doesn't begin with an on_failure_jump.  */
+                  if ((re_opcode_t) *p1 != on_failure_jump)
+                    break;
 
 		  /* Still have to check that it's not an n-th
 		     alternative that starts with an on_failure_jump.  */
 		  p1++;
-		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-		  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
-		    {
-		      /* Get to the beginning of the n-th alternative.	*/
-		      p1 -= 3;
-		      break;
-		    }
-		}
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+                    {
+		      /* Get to the beginning of the n-th alternative.  */
+                      p1 -= 3;
+                      break;
+                    }
+                }
 
-	      /* Deal with the last alternative: go back and get number
-		 of the `jump_past_alt' just before it.	 `mcnt' contains
-		 the length of the alternative.	 */
-	      EXTRACT_NUMBER (mcnt, p1 - 2);
+              /* Deal with the last alternative: go back and get number
+                 of the `jump_past_alt' just before it.  `mcnt' contains
+                 the length of the alternative.  */
+              EXTRACT_NUMBER (mcnt, p1 - 2);
 
-	      if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
-		return false;
+              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+                return false;
 
-	      p1 += mcnt;	/* Get past the n-th alternative.  */
-	    } /* if mcnt > 0 */
-	  break;
+              p1 += mcnt;	/* Get past the n-th alternative.  */
+            } /* if mcnt > 0 */
+          break;
 
 
-	case stop_memory:
+        case stop_memory:
 	  assert (p1[1] == **p);
-	  *p = p1 + 2;
-	  return true;
+          *p = p1 + 2;
+          return true;
 
 
-	default:
-	  if (!common_op_match_null_string_p (&p1, end, reg_info))
-	    return false;
-	}
+        default:
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
     } /* while p1 < end */
 
   return false;
@@ -5033,21 +5769,21 @@ alt_match_null_string_p (p, end, reg_info)
   while (p1 < end)
     {
       /* Skip over opcodes that can match nothing, and break when we get
-	 to one that can't.  */
+         to one that can't.  */
 
       switch ((re_opcode_t) *p1)
-	{
-	/* It's a loop.	 */
-	case on_failure_jump:
-	  p1++;
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-	  p1 += mcnt;
-	  break;
+        {
+	/* It's a loop.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+          break;
 
 	default:
-	  if (!common_op_match_null_string_p (&p1, end, reg_info))
-	    return false;
-	}
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
     }  /* while p1 < end */
 
   return true;
@@ -5093,42 +5829,42 @@ common_op_match_null_string_p (p, end, reg_info)
       ret = group_match_null_string_p (&p1, end, reg_info);
 
       /* Have to set this here in case we're checking a group which
-	 contains a group and a back reference to it.  */
+         contains a group and a back reference to it.  */
 
       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
-	REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
 
       if (!ret)
-	return false;
+        return false;
       break;
 
-    /* If this is an optimized succeed_n for zero times, make the jump.	 */
+    /* If this is an optimized succeed_n for zero times, make the jump.  */
     case jump:
       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
       if (mcnt >= 0)
-	p1 += mcnt;
+        p1 += mcnt;
       else
-	return false;
+        return false;
       break;
 
     case succeed_n:
-      /* Get to the number of times to succeed.	 */
+      /* Get to the number of times to succeed.  */
       p1 += 2;
       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
 
       if (mcnt == 0)
-	{
-	  p1 -= 4;
-	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-	  p1 += mcnt;
-	}
+        {
+          p1 -= 4;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+        }
       else
-	return false;
+        return false;
       break;
 
     case duplicate:
       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
-	return false;
+        return false;
       break;
 
     case set_number_at:
@@ -5171,7 +5907,7 @@ bcmp_translate (s1, s2, len, translate)
    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
    are set in BUFP on entry.
 
-   We call regex_compile to do the actual compilation.	*/
+   We call regex_compile to do the actual compilation.  */
 
 const char *
 re_compile_pattern (pattern, length, bufp)
@@ -5190,7 +5926,7 @@ re_compile_pattern (pattern, length, bufp)
      setting no_sub.  */
   bufp->no_sub = 0;
 
-  /* Match anchors at newline.	*/
+  /* Match anchors at newline.  */
   bufp->newline_anchor = 1;
 
   ret = regex_compile (pattern, length, re_syntax_options, bufp);
@@ -5200,8 +5936,8 @@ re_compile_pattern (pattern, length, bufp)
   return gettext (re_error_msgid[(int) ret]);
 }
 
-/* Entry points compatible with 4.2 BSD regex library.	We don't define
-   them unless specifically requested.	*/
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them unless specifically requested.  */
 
 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
 
@@ -5231,7 +5967,7 @@ re_comp (s)
     {
       re_comp_buf.buffer = (unsigned char *) malloc (200);
       if (re_comp_buf.buffer == NULL)
-	return gettext (re_error_msgid[(int) REG_ESPACE]);
+        return gettext (re_error_msgid[(int) REG_ESPACE]);
       re_comp_buf.allocated = 200;
 
       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
@@ -5242,7 +5978,7 @@ re_comp (s)
   /* Since `re_exec' always passes NULL for the `regs' argument, we
      don't need to initialize the pattern buffer fields which affect it.  */
 
-  /* Match anchors at newlines.	 */
+  /* Match anchors at newlines.  */
   re_comp_buf.newline_anchor = 1;
 
   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
@@ -5274,7 +6010,7 @@ re_exec (s)
 
 /* regcomp takes a regular expression as a string and compiles it.
 
-   PREG is a regex_t *.	 We do not expect any fields to be initialized,
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
    since POSIX says we shouldn't.  Thus, we set
 
      `buffer' to the compiled pattern;
@@ -5303,7 +6039,7 @@ re_exec (s)
      routine will report only success or failure, and nothing about the
      registers.
 
-   It returns 0 if it succeeds, nonzero if it doesn't.	(See regex.h for
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
    the return codes and their meanings.)  */
 
 int
@@ -5336,11 +6072,11 @@ regcomp (preg, pattern, cflags)
 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
       if (preg->translate == NULL)
-	return (int) REG_ESPACE;
+        return (int) REG_ESPACE;
 
       /* Map uppercase characters to corresponding lowercase ones.  */
       for (i = 0; i < CHAR_SET_SIZE; i++)
-	preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
     }
   else
     preg->translate = NULL;
@@ -5350,7 +6086,7 @@ regcomp (preg, pattern, cflags)
     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
       syntax &= ~RE_DOT_NEWLINE;
       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
-      /* It also changes the matching behavior.	 */
+      /* It also changes the matching behavior.  */
       preg->newline_anchor = 1;
     }
   else
@@ -5374,7 +6110,7 @@ regcomp (preg, pattern, cflags)
    string STRING.
 
    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
-   `regcomp', we ignore PMATCH.	 Otherwise, we assume PMATCH has at
+   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
    least NMATCH elements, and we set them to the offsets of the
    corresponding matched substrings.
 
@@ -5405,7 +6141,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
 
   /* The user has told us exactly how many registers to return
      information about, via `nmatch'.  We have to pass that on to the
-     matching routines.	 */
+     matching routines.  */
   private_preg.regs_allocated = REGS_FIXED;
 
   if (want_reg_info)
@@ -5414,29 +6150,29 @@ regexec (preg, string, nmatch, pmatch, eflags)
       regs.start = TALLOC (nmatch, regoff_t);
       regs.end = TALLOC (nmatch, regoff_t);
       if (regs.start == NULL || regs.end == NULL)
-	return (int) REG_NOMATCH;
+        return (int) REG_NOMATCH;
     }
 
   /* Perform the searching operation.  */
   ret = re_search (&private_preg, string, len,
-		   /* start: */ 0, /* range: */ len,
-		   want_reg_info ? &regs : (struct re_registers *) 0);
+                   /* start: */ 0, /* range: */ len,
+                   want_reg_info ? &regs : (struct re_registers *) 0);
 
   /* Copy the register information to the POSIX structure.  */
   if (want_reg_info)
     {
       if (ret >= 0)
-	{
-	  unsigned r;
+        {
+          unsigned r;
 
-	  for (r = 0; r < nmatch; r++)
-	    {
-	      pmatch[r].rm_so = regs.start[r];
-	      pmatch[r].rm_eo = regs.end[r];
-	    }
-	}
+          for (r = 0; r < nmatch; r++)
+            {
+              pmatch[r].rm_so = regs.start[r];
+              pmatch[r].rm_eo = regs.end[r];
+            }
+        }
 
-      /* If we needed the temporary register info, free the space now.	*/
+      /* If we needed the temporary register info, free the space now.  */
       free (regs.start);
       free (regs.end);
     }
@@ -5462,7 +6198,7 @@ regerror (errcode, preg, errbuf, errbuf_size)
   if (errcode < 0
       || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
     /* Only error codes returned by the rest of the code should be passed
-       to this routine.	 If we are given anything else, or if other regex
+       to this routine.  If we are given anything else, or if other regex
        code generates an invalid error code, then the program has a bug.
        Dump core so we can fix it.  */
     abort ();
@@ -5474,12 +6210,12 @@ regerror (errcode, preg, errbuf, errbuf_size)
   if (errbuf_size != 0)
     {
       if (msg_size > errbuf_size)
-	{
-	  strncpy (errbuf, msg, errbuf_size - 1);
-	  errbuf[errbuf_size - 1] = 0;
-	}
+        {
+          strncpy (errbuf, msg, errbuf_size - 1);
+          errbuf[errbuf_size - 1] = 0;
+        }
       else
-	strcpy (errbuf, msg);
+        strcpy (errbuf, msg);
     }
 
   return msg_size;