(re_match, re_match_2): Protect calls to alloca (0).

[gnulib.git] / regex.c
diff --git a/regex.c b/regex.c

index 911daed..1b796c0 100644 (file)
--- a/regex.c
+++ b/regex.c
@@ -20,7 +20,6 @@
     USA.         */
  
  /* TODO:
-   - clean up multibyte issues
     - structure the opcode space into opcode+flag.
     - merge with glibc's regex.[ch]
   */
@@ -37,8 +36,6 @@
  /* Converts the pointer to the char to BEG-based offset from the start.         */
  #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
  #define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
-#else
-#define PTR_TO_OFFSET(d) 0
  #endif
  
  #ifdef HAVE_CONFIG_H
@@ -79,8 +76,28 @@
  #define realloc xrealloc
  #define free xfree
  
+#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
  #define RE_STRING_CHAR(p, s) \
    (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
+#define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
+  (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
+
+/* Set C a (possibly multibyte) character before P.  P points into a
+   string which is the virtual concatenation of STR1 (which ends at
+   END1) or STR2 (which ends at END2).  */
+#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)                        \
+  do {                                                                 \
+    if (multibyte)                                                     \
+       {                                                               \
+         re_char *dtemp = (p) == (str2) ? (end1) : (p);                        \
+         re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
+         while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp));            \
+         c = STRING_CHAR (dtemp, (p) - dtemp);                         \
+       }                                                               \
+     else                                                              \
+       (c = ((p) == (str2) ? (end1) : (p))[-1]);                       \
+  } while (0)
+
  
  #else  /* not emacs */
  
@@ -181,6 +198,8 @@ init_syntax_once ()
  #define BASE_LEADING_CODE_P(c) (0)
  #define CHAR_CHARSET(c) 0
  #define CHARSET_LEADING_CODE_BASE(c) 0
+#define MAX_MULTIBYTE_LENGTH 1
+#define RE_MULTIBYTE_P(x) 0
  #define WORD_BOUNDARY_P(c1, c2) (0)
  #define CHAR_HEAD_P(p) (1)
  #define SINGLE_BYTE_CHAR_P(c) (1)
@@ -188,9 +207,12 @@ init_syntax_once ()
  #define MULTIBYTE_FORM_LENGTH(p, s) (1)
  #define STRING_CHAR(p, s) (*(p))
  #define RE_STRING_CHAR STRING_CHAR
+#define CHAR_STRING(c, s) (*(s) = (c), 1)
  #define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
+#define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
  #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
    (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
+#define MAKE_CHAR(charset, c1, c2) (c1)
  #endif /* not emacs */
  
  #ifndef RE_TRANSLATE
@@ -1550,19 +1572,19 @@ static int analyse_first _RE_ARGS((unsigned char *p, unsigned char *pend,
  #define PATFETCH(c)                                                    \
    do {                                                                 \
      PATFETCH_RAW (c);                                                  \
-    if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c);   \
+    c = TRANSLATE (c);                                                 \
    } while (0)
  
  /* Fetch the next character in the uncompiled pattern, with no
     translation.         */
  #define PATFETCH_RAW(c)                                                        \
-  do {if (p == pend) return REG_EEND;                                  \
-    c = *p++;                                                          \
+  do {                                                                 \
+    int len;                                                           \
+    if (p == pend) return REG_EEND;                                    \
+    c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len);                  \
+    p += len;                                                          \
    } while (0)
  
-/* Go backwards one character in the pattern.  */
-#define PATUNFETCH p--
-
  
  /* If `translate' is non-null, return translate[D], else just D.  We
     cast the subscript to translate because some data is declared as
@@ -1957,6 +1979,9 @@ regex_compile (pattern, size, syntax, bufp)
    /* Work area for range table of charset.  */
    struct range_table_work_area range_table_work;
  
+  /* If the object matched can contain multibyte characters.  */
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
+
  #ifdef DEBUG
    debug++;
    DEBUG_PRINT1 ("\nCompiling pattern: ");
@@ -1994,14 +2019,6 @@ regex_compile (pattern, size, syntax, bufp)
    /* Always count groups, whether or not bufp->no_sub is set.  */
    bufp->re_nsub = 0;
  
-#ifdef emacs
-  /* bufp->multibyte is set before regex_compile is called, so don't alter
-     it. */
-#else  /* not emacs */
-  /* Nothing is recognized as a multibyte character.  */
-  bufp->multibyte = 0;
-#endif
-
  #if !defined (emacs) && !defined (SYNTAX_TABLE)
    /* Initialize the syntax table.  */
     init_syntax_once ();
@@ -2254,8 +2271,8 @@ regex_compile (pattern, size, syntax, bufp)
             /* Read in characters and ranges, setting map bits.  */
             for (;;)
               {
-               int len;
                 boolean escaped_char = false;
+               const unsigned char *p2 = p;
  
                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  
@@ -2274,19 +2291,10 @@ regex_compile (pattern, size, syntax, bufp)
                     /* Could be the end of the bracket expression.      If it's
                        not (i.e., when the bracket expression is `[]' so
                        far), the ']' character bit gets set way below.  */
-                   if (c == ']' && p != p1 + 1)
+                   if (c == ']' && p2 != p1)
                       break;
                   }
  
-               /* If C indicates start of multibyte char, get the
-                  actual character code in C, and set the pattern
-                  pointer P to the next character boundary.  */
-               if (bufp->multibyte && BASE_LEADING_CODE_P (c))
-                 {
-                   PATUNFETCH;
-                   c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
-                   p += len;
-                 }
                 /* What should we do for the character which is
                    greater than 0x7F, but not BASE_LEADING_CODE_P?
                    XXX */
@@ -2294,8 +2302,8 @@ regex_compile (pattern, size, syntax, bufp)
                 /* See if we're at the beginning of a possible character
                    class.  */
  
-               else if (!escaped_char &&
-                        syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+               if (!escaped_char &&
+                   syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
                   {
                     /* Leave room for the null.  */
                     char str[CHAR_CLASS_MAX_LENGTH + 1];
@@ -2358,7 +2366,7 @@ regex_compile (pattern, size, syntax, bufp)
                            they can only match ASCII characters.  We
                            don't need to handle them for multibyte.  */
  
-                       if (bufp->multibyte)
+                       if (multibyte)
                           {
                             int bit = 0;
  
@@ -2435,25 +2443,24 @@ regex_compile (pattern, size, syntax, bufp)
  
                     /* Fetch the character which ends the range. */
                     PATFETCH (c1);
-                   if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
-                     {
-                       PATUNFETCH;
-                       c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
-                       p += len;
-                     }
  
-                   if (SINGLE_BYTE_CHAR_P (c)
-                       && ! SINGLE_BYTE_CHAR_P (c1))
+                   if (SINGLE_BYTE_CHAR_P (c))
                       {
-                       /* Handle a range such as \177-\377 in multibyte mode.
-                          Split that into two ranges,,
-                          the low one ending at 0237, and the high one
-                          starting at ...040.  */
-                       /*   Unless I'm missing something,
-                            this line is useless.  -sm
-                          int c1_base = (c1 & ~0177) | 040; */
-                       SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
-                       c1 = 0237;
+                       if (! SINGLE_BYTE_CHAR_P (c1))
+                         {
+                           /* Handle a range such as \177-\377 in
+                              multibyte mode.  Split that into two
+                              ranges, the low one ending at 0237, and
+                              the high one starting at the smallest
+                              character in the charset of C1 and
+                              ending at C1.  */
+                           int charset = CHAR_CHARSET (c1);
+                           int c2 = MAKE_CHAR (charset, 0, 0);
+                           
+                           SET_RANGE_TABLE_WORK_AREA (range_table_work,
+                                                      c2, c1);
+                           c1 = 0237;
+                         }
                       }
                     else if (!SAME_CHARSET_P (c, c1))
                       FREE_STACK_RETURN (REG_ERANGE);
@@ -3028,16 +3035,6 @@ regex_compile (pattern, size, syntax, bufp)
         default:
         /* Expects the character in `c'.  */
         normal_char:
-         p1 = p - 1;           /* P1 points the head of C.  */
-#ifdef emacs
-         if (bufp->multibyte)
-           {
-             c = STRING_CHAR (p1, pend - p1);
-             c = TRANSLATE (c);
-             /* Set P to the next character boundary.  */
-             p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
-           }
-#endif
               /* If no exactn currently being built.  */
           if (!pending_exact
  
@@ -3045,7 +3042,7 @@ regex_compile (pattern, size, syntax, bufp)
               || pending_exact + *pending_exact + 1 != b
  
               /* We have only one byte following the exactn for the count.  */
-             || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
+             || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
  
               /* If followed by a repetition operator.  */
               || (p != pend && (*p == '*' || *p == '^'))
@@ -3065,24 +3062,13 @@ regex_compile (pattern, size, syntax, bufp)
               pending_exact = b - 1;
             }
  
-#ifdef emacs
-         if (! SINGLE_BYTE_CHAR_P (c))
-           {
-             unsigned char str[MAX_MULTIBYTE_LENGTH];
-             int i = CHAR_STRING (c, str);
-             int j;
-             for (j = 0; j < i; j++)
-               {
-                 BUF_PUSH (str[j]);
-                 (*pending_exact)++;
-               }
-           }
-         else
-#endif
-           {
-             BUF_PUSH (c);
-             (*pending_exact)++;
-           }
+         GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
+         {
+           int len = CHAR_STRING (c, b);
+           b += len;
+           (*pending_exact) += len;
+         }
+
           break;
         } /* switch (c) */
      } /* while p != pend */
@@ -3228,7 +3214,12 @@ at_begline_loc_p (pattern, p, syntax)
         /* After a subexpression?  */
         (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
         /* After an alternative?         */
-    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
+       /* After a shy subexpression?  */
+    || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
+       && prev[-1] == '?' && prev[-2] == '('
+       && (syntax & RE_NO_BK_PARENS
+           || (prev - 3 >= pattern && prev[-3] == '\\')));
  }
  
  
@@ -3616,7 +3607,7 @@ re_compile_fastmap (bufp)
    bufp->fastmap_accurate = 1;      /* It will be when we're done.  */
  
    analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
-                           fastmap, bufp->multibyte);
+                           fastmap, RE_MULTIBYTE_P (bufp));
    if (analysis < -1)
      return analysis;
    bufp->can_be_null = (analysis != 0);
@@ -3723,7 +3714,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
    int anchored_start = 0;
  
    /* Nonzero if we have to concern multibyte character.         */
-  const boolean multibyte = bufp->multibyte;
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
  
    /* Check for out-of-range STARTPOS.  */
    if (startpos < 0 || startpos > total_size)
@@ -3850,11 +3841,11 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
             }
           else                          /* Searching backwards.  */
             {
-             buf_ch = STRING_CHAR (d, (startpos >= size1
-                                       ? size2 + size1 - startpos
-                                       : size1 - startpos));
-             if (RE_TRANSLATE_P (translate))
-               buf_ch = RE_TRANSLATE (translate, buf_ch);
+             int room = (startpos >= size1
+                         ? size2 + size1 - startpos
+                         : size1 - startpos);
+             buf_ch = RE_STRING_CHAR (d, room);
+             buf_ch = TRANSLATE (buf_ch);
  
               if (! (buf_ch >= 0400
                      || fastmap[buf_ch]))
@@ -3940,7 +3931,10 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
  \f
  /* Declarations and macros for re_match_2.  */
  
-static int bcmp_translate ();
+static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
+                                   register int len,
+                                   RE_TRANSLATE_TYPE translate,
+                                   const int multibyte));
  
  /* This converts PTR, a pointer into one of the search strings `string1'
     and `string2' into an offset from the beginning of that string.  */
@@ -3950,7 +3944,9 @@ static int bcmp_translate ();
     : ((regoff_t) ((ptr) - string2 + size1)))
  
  /* Call before fetching a character with *d.  This switches over to
-   string2 if necessary.  */
+   string2 if necessary.
+   Check re_match_2_internal for a discussion of why end_match_2 might
+   not be within string2 (but be equal to end_match_1 instead).  */
  #define PREFETCH()                                                     \
    while (d == dend)                                                    \
      {                                                                  \
@@ -3962,6 +3958,16 @@ static int bcmp_translate ();
        dend = end_match_2;                                              \
      }
  
+/* Call before fetching a char with *d if you already checked other limits.
+   This is meant for use in lookahead operations like wordend, etc..
+   where we might need to look at parts of the string that might be
+   outside of the LIMITs (i.e past `stop').  */
+#define PREFETCH_NOLIMIT()                                             \
+  if (d == end1)                                                       \
+     {                                                                 \
+       d = string2;                                                    \
+       dend = end_match_2;                                             \
+     }                                                                 \
  
  /* Test if at very beginning or at very end of the virtual concatenation
     of `string1' and `string2'. If only one string, it's `string2'.  */
@@ -4093,7 +4099,7 @@ mutually_exclusive_p (bufp, p1, p2)
       unsigned char *p1, *p2;
  {
    re_opcode_t op2;
-  const boolean multibyte = bufp->multibyte;
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
    unsigned char *pend = bufp->buffer + bufp->used;
  
    assert (p1 >= bufp->buffer && p1 < pend
@@ -4285,7 +4291,9 @@ re_match (bufp, string, size, pos, regs)
  {
    int result = re_match_2_internal (bufp, NULL, 0, string, size,
                                     pos, regs, size);
+#if defined (C_ALLOCA) && !defined (REGEX_MALLOC)
    alloca (0);
+#endif
    return result;
  }
  #endif /* not emacs */
@@ -4329,7 +4337,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
  
    result = re_match_2_internal (bufp, string1, size1, string2, size2,
                                 pos, regs, stop);
+#if defined (C_ALLOCA) && !defined (REGEX_MALLOC)
    alloca (0);
+#endif
    return result;
  }
  
@@ -4373,7 +4383,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
    RE_TRANSLATE_TYPE translate = bufp->translate;
  
    /* Nonzero if we have to concern multibyte character.         */
-  const boolean multibyte = bufp->multibyte;
+  const boolean multibyte = RE_MULTIBYTE_P (bufp);
  
    /* Failure point stack.  Each place that can handle a failure further
       down the line pushes a failure point on this stack.  It consists of
@@ -4480,15 +4490,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
    for (mcnt = 1; mcnt < num_regs; mcnt++)
      regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE;
  
-  /* Shorten strings to `stop'.  */
-  if (stop <= size1)
-    {
-      size1 = stop;
-      size2 = 0;
-    }
-  else if (stop <= size1 + size2)
-    size2 = stop - size1;
-
    /* We move `string1' into `string2' if the latter's empty -- but not if
       `string1' is null.         */
    if (size2 == 0 && string1 != NULL)
@@ -4501,25 +4502,44 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
    end1 = string1 + size1;
    end2 = string2 + size2;
  
-  /* Compute where to stop matching, within the two strings.  */
-  end_match_1 = end1;
-  end_match_2 = end2;
-
    /* `p' scans through the pattern as `d' scans through the data.
       `dend' is the end of the input string that `d' points within.  `d'
       is advanced into the following input string whenever necessary, but
       this happens before fetching; therefore, at the beginning of the
       loop, `d' can be pointing at the end of a string, but it cannot
       equal `string2'.  */
-  if (size1 > 0 && pos <= size1)
+  if (pos >= size1)
      {
-      d = string1 + pos;
-      dend = end_match_1;
+      /* Only match within string2.  */
+      d = string2 + pos - size1;
+      dend = end_match_2 = string2 + stop - size1;
+      end_match_1 = end1;      /* Just to give it a value.  */
      }
    else
      {
-      d = string2 + pos - size1;
-      dend = end_match_2;
+      if (stop < size1)
+       {
+         /* Only match within string1.  */
+         end_match_1 = string1 + stop;
+         /* BEWARE!
+            When we reach end_match_1, PREFETCH normally switches to string2.
+            But in the present case, this means that just doing a PREFETCH
+            makes us jump from `stop' to `gap' within the string.
+            What we really want here is for the search to stop as
+            soon as we hit end_match_1.  That's why we set end_match_2
+            to end_match_1 (since PREFETCH fails as soon as we hit
+            end_match_2).  */
+         end_match_2 = end_match_1;
+       }
+      else
+       { /* It's important to use this code when stop == size so that
+            moving `d' from end1 to string2 will not prevent the d == dend
+            check from catching the end of string.  */
+         end_match_1 = end1;
+         end_match_2 = string2 + stop - size1;
+       }
+      d = string1 + pos;
+      dend = end_match_1;
      }
  
    DEBUG_PRINT1 ("The compiled pattern is: ");
@@ -4721,7 +4741,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
              testing `translate' inside the loop.  */
           if (RE_TRANSLATE_P (translate))
             {
-#ifdef emacs
               if (multibyte)
                 do
                   {
@@ -4745,7 +4764,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                   }
                 while (mcnt > 0);
               else
-#endif /* not emacs */
                 do
                   {
                     PREFETCH ();
@@ -4783,17 +4801,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             DEBUG_PRINT1 ("EXECUTING anychar.\n");
  
             PREFETCH ();
-
-#ifdef emacs
-           if (multibyte)
-             buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
-           else
-#endif /* not emacs */
-             {
-               buf_ch = *d;
-               buf_charlen = 1;
-             }
-
+           buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
             buf_ch = TRANSLATE (buf_ch);
  
             if ((!(bufp->syntax & RE_DOT_NEWLINE)
@@ -4828,27 +4836,20 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
  
             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
  
-           PREFETCH ();
-           c = *d;
-
             range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
  
-#ifdef emacs
             if (range_table_exists)
               {
                 range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
                 EXTRACT_NUMBER_AND_INCR (count, range_table);
               }
  
-           if (multibyte && BASE_LEADING_CODE_P (c))
-             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-#endif /* emacs */
+           PREFETCH ();
+           c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
+           c = TRANSLATE (c); /* The character to match.  */
  
             if (SINGLE_BYTE_CHAR_P (c))
               {                 /* Lookup bitmap.  */
-               c = TRANSLATE (c); /* The character to match.  */
-               len = 1;
-
                 /* Cast to `unsigned' instead of `unsigned char' in
                    case the bit list is a full 32 bytes long.  */
                 if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
@@ -4994,7 +4995,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                 /* Compare that many; failure if mismatch, else move
                    past them.  */
                 if (RE_TRANSLATE_P (translate)
-                   ? bcmp_translate (d, d2, mcnt, translate)
+                   ? bcmp_translate (d, d2, mcnt, translate, multibyte)
                     : bcmp (d, d2, mcnt))
                   {
                     d = dfail;
@@ -5016,9 +5017,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             {
               if (!bufp->not_bol) break;
             }
-         else if (d[-1] == '\n' && bufp->newline_anchor)
+         else
             {
-             break;
+             unsigned char c;
+             GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
+             if (c == '\n' && bufp->newline_anchor)
+               break;
             }
           /* In all other cases, we fail.  */
           goto fail;
@@ -5032,12 +5036,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             {
               if (!bufp->not_eol) break;
             }
-
-         /* We have to ``prefetch'' the next character.  */
-         else if ((d == end1 ? *string2 : *d) == '\n'
-                  && bufp->newline_anchor)
+         else
             {
-             break;
+             PREFETCH_NOLIMIT ();
+             if (*d == '\n' && bufp->newline_anchor)
+               break;
             }
           goto fail;
  
@@ -5263,18 +5266,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                  is the character at D, and S2 is the syntax of C2.  */
               int c1, c2, s1, s2;
  #ifdef emacs
-             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d - 1));
+             int offset = PTR_TO_OFFSET (d - 1);
+             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
               UPDATE_SYNTAX_TABLE (charpos);
  #endif
-             /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
               s1 = SYNTAX (c1);
  #ifdef emacs
               UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  #endif
-             PREFETCH ();
-             /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
-             c2 = STRING_CHAR (d, dend - d);
+             PREFETCH_NOLIMIT ();
+             c2 = RE_STRING_CHAR (d, dend - d);
               s2 = SYNTAX (c2);
  
               if (/* Case 2: Only one of S1 and S2 is Sword.  */
@@ -5303,12 +5305,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                  is the character at D, and S2 is the syntax of C2.  */
               int c1, c2, s1, s2;
  #ifdef emacs
-             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+             int offset = PTR_TO_OFFSET (d);
+             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
               UPDATE_SYNTAX_TABLE (charpos);
  #endif
               PREFETCH ();
-             /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
-             c2 = STRING_CHAR (d, dend - d);
+             c2 = RE_STRING_CHAR (d, dend - d);
               s2 = SYNTAX (c2);
         
               /* Case 2: S2 is not Sword. */
@@ -5346,7 +5348,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                  is the character at D, and S2 is the syntax of C2.  */
               int c1, c2, s1, s2;
  #ifdef emacs
-             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d) - 1);
+             int offset = PTR_TO_OFFSET (d) - 1;
+             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
               UPDATE_SYNTAX_TABLE (charpos);
  #endif
               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
@@ -5359,9 +5362,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
               /* Case 3: D is not at the end of string ... */
               if (!AT_STRINGS_END (d))
                 {
-                 PREFETCH ();
-                 /* FIXME: This does a STRING_CHAR even for unibyte buffers.  */
-                 c2 = STRING_CHAR (d, dend - d);
+                 PREFETCH_NOLIMIT ();
+                 c2 = RE_STRING_CHAR (d, dend - d);
  #ifdef emacs
                   UPDATE_SYNTAX_TABLE_FORWARD (charpos);
  #endif
@@ -5383,20 +5385,15 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           PREFETCH ();
  #ifdef emacs
           {
-           int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
+           int offset = PTR_TO_OFFSET (d);
+           int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
             UPDATE_SYNTAX_TABLE (pos1);
           }
  #endif
           {
             int c, len;
  
-           if (multibyte)
-             /* we must concern about multibyte form, ... */
-             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-           else
-             /* everything should be handled as ASCII, even though it
-                looks like multibyte form.  */
-             c = *d, len = 1;
+           c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
  
             if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
               goto fail;
@@ -5431,11 +5428,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           PREFETCH ();
           {
             int c, len;
-
-           if (multibyte)
-             c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
-           else
-             c = *d, len = 1;
+           c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
  
             if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
               goto fail;
@@ -5512,23 +5505,23 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
     bytes; nonzero otherwise.  */
  
  static int
-bcmp_translate (s1, s2, len, translate)
-     unsigned char *s1, *s2;
+bcmp_translate (s1, s2, len, translate, multibyte)
+     re_char *s1, *s2;
       register int len;
       RE_TRANSLATE_TYPE translate;
+     const int multibyte;
  {
-  register unsigned char *p1 = s1, *p2 = s2;
-  unsigned char *p1_end = s1 + len;
-  unsigned char *p2_end = s2 + len;
+  register re_char *p1 = s1, *p2 = s2;
+  re_char *p1_end = s1 + len;
+  re_char *p2_end = s2 + len;
  
    while (p1 != p1_end && p2 != p2_end)
      {
        int p1_charlen, p2_charlen;
        int p1_ch, p2_ch;
  
-      /* FIXME: This assumes `multibyte = true'.  */
-      p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
-      p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
+      p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
+      p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
  
        if (RE_TRANSLATE (translate, p1_ch)
           != RE_TRANSLATE (translate, p2_ch))
@@ -5604,7 +5597,8 @@ re_comp (s)
    if (!s)
      {
        if (!re_comp_buf.buffer)
-       return gettext ("No previous regular expression");
+        /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+       return (char *) gettext ("No previous regular expression");
        return 0;
      }
  
@@ -5612,12 +5606,14 @@ re_comp (s)
      {
        re_comp_buf.buffer = (unsigned char *) malloc (200);
        if (re_comp_buf.buffer == NULL)
-        return gettext (re_error_msgid[(int) REG_ESPACE]);
+        /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
        re_comp_buf.allocated = 200;
  
        re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
        if (re_comp_buf.fastmap == NULL)
-       return gettext (re_error_msgid[(int) REG_ESPACE]);
+       /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
+       return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
      }
  
    /* Since `re_exec' always passes NULL for the `regs' argument, we