*** empty log message ***

[gnulib.git] / regex.c
diff --git a/regex.c b/regex.c

index f3e3829..37436ff 100644 (file)
--- a/regex.c
+++ b/regex.c
@@ -49,8 +49,12 @@
  
  /* Whether to use ISO C Amendment 1 wide char functions.
     Those should not be used for Emacs since it uses its own.  */
+#if defined _LIBC
+#define WIDE_CHAR_SUPPORT 1
+#else
  #define WIDE_CHAR_SUPPORT \
-  (defined _LIBC || HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
+       (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
+#endif
  
  /* For platform which support the ISO C amendement 1 functionality we
     support user defined character classes.  */
@@ -124,8 +128,17 @@
  # include "charset.h"
  # include "category.h"
  
+# ifdef malloc
+#  undef malloc
+# endif
  # define malloc xmalloc
+# ifdef realloc
+#  undef realloc
+# endif
  # define realloc xrealloc
+# ifdef free
+#  undef free
+# endif
  # define free xfree
  
  /* Converts the pointer to the char to BEG-based offset from the start.         */
@@ -1314,7 +1327,8 @@ static const char *re_error_msgid[] =
  /* Roughly the maximum number of failure points on the stack.  Would be
     exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
     This is a variable only so users of regex can assign to it; we never
-   change it ourselves.  */
+   change it ourselves.  We always multiply it by TYPICAL_FAILURE_SIZE
+   before using it, so it should probably be a byte-count instead.  */
  # if defined MATCH_MAY_ALLOCATE
  /* Note that 4400 was enough to cause a crash on Alpha OSF/1,
     whose default stack limit is 2mb.  In order for a larger
@@ -1566,7 +1580,7 @@ do {                                                                      \
  /* Estimate the size of data pushed by a typical failure stack entry.
     An estimate is all we need, because all we use this for
     is to choose a limit for how big to make the failure stack.  */
-
+/* BEWARE, the value `20' is hard-coded in emacs.c:main().  */
  #define TYPICAL_FAILURE_SIZE 20
  
  /* How many items can still be added to the stack without overflowing it.  */
@@ -1953,9 +1967,10 @@ typedef int re_wchar_t;
  
  /* Map a string to the char class it names (if any).  */
  static re_wctype_t
-re_wctype (string)
-     re_char *string;
+re_wctype (str)
+     re_char *str;
  {
+  const char *string = str;
    if      (STREQ (string, "alnum"))    return RECC_ALNUM;
    else if (STREQ (string, "alpha"))    return RECC_ALPHA;
    else if (STREQ (string, "word"))     return RECC_WORD;
@@ -1982,30 +1997,29 @@ re_iswctype (ch, cc)
       int ch;
       re_wctype_t cc;
  {
-  boolean ret = false;
-
    switch (cc)
      {
-    case RECC_ALNUM: ret = ISALNUM (ch);
-    case RECC_ALPHA: ret = ISALPHA (ch);
-    case RECC_BLANK: ret = ISBLANK (ch);
-    case RECC_CNTRL: ret = ISCNTRL (ch);
-    case RECC_DIGIT: ret = ISDIGIT (ch);
-    case RECC_GRAPH: ret = ISGRAPH (ch);
-    case RECC_LOWER: ret = ISLOWER (ch);
-    case RECC_PRINT: ret = ISPRINT (ch);
-    case RECC_PUNCT: ret = ISPUNCT (ch);
-    case RECC_SPACE: ret = ISSPACE (ch);
-    case RECC_UPPER: ret = ISUPPER (ch);
-    case RECC_XDIGIT: ret = ISXDIGIT (ch);
-    case RECC_ASCII: ret = IS_REAL_ASCII (ch);
-    case RECC_NONASCII: ret = !IS_REAL_ASCII (ch);
-    case RECC_UNIBYTE: ret = ISUNIBYTE (ch);
-    case RECC_MULTIBYTE: ret = !ISUNIBYTE (ch);
-    case RECC_WORD: ret = ISWORD (ch);
-    case RECC_ERROR: ret = false;
+    case RECC_ALNUM: return ISALNUM (ch);
+    case RECC_ALPHA: return ISALPHA (ch);
+    case RECC_BLANK: return ISBLANK (ch);
+    case RECC_CNTRL: return ISCNTRL (ch);
+    case RECC_DIGIT: return ISDIGIT (ch);
+    case RECC_GRAPH: return ISGRAPH (ch);
+    case RECC_LOWER: return ISLOWER (ch);
+    case RECC_PRINT: return ISPRINT (ch);
+    case RECC_PUNCT: return ISPUNCT (ch);
+    case RECC_SPACE: return ISSPACE (ch);
+    case RECC_UPPER: return ISUPPER (ch);
+    case RECC_XDIGIT: return ISXDIGIT (ch);
+    case RECC_ASCII: return IS_REAL_ASCII (ch);
+    case RECC_NONASCII: return !IS_REAL_ASCII (ch);
+    case RECC_UNIBYTE: return ISUNIBYTE (ch);
+    case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
+    case RECC_WORD: return ISWORD (ch);
+    case RECC_ERROR: return false;
+    default:
+      abort();
      }
-  return ret;
  }
  
  /* Return a bit-pattern to use in the range-table bits to match multibyte
@@ -2014,21 +2028,20 @@ static int
  re_wctype_to_bit (cc)
       re_wctype_t cc;
  {
-  int ret = 0;
-
    switch (cc)
      {
      case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
-    case RECC_MULTIBYTE: ret = BIT_MULTIBYTE;
-    case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: ret = BIT_WORD;
-    case RECC_LOWER: ret = BIT_LOWER;
-    case RECC_UPPER: ret = BIT_UPPER;
-    case RECC_PUNCT: ret = BIT_PUNCT;
-    case RECC_SPACE: ret = BIT_SPACE;
+    case RECC_MULTIBYTE: return BIT_MULTIBYTE;
+    case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
+    case RECC_LOWER: return BIT_LOWER;
+    case RECC_UPPER: return BIT_UPPER;
+    case RECC_PUNCT: return BIT_PUNCT;
+    case RECC_SPACE: return BIT_SPACE;
      case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
-    case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: ret = 0;
+    case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
+    default:
+      abort();
      }
-  return ret;
  }
  #endif
  
@@ -3150,20 +3163,21 @@ regex_compile (pattern, size, syntax, bufp)
  
             case '1': case '2': case '3': case '4': case '5':
             case '6': case '7': case '8': case '9':
-             if (syntax & RE_NO_BK_REFS)
-               goto normal_char;
+             {
+               regnum_t reg;
  
-             c1 = c - '0';
+               if (syntax & RE_NO_BK_REFS)
+                 goto normal_backslash;
  
-             if (c1 > regnum)
-               FREE_STACK_RETURN (REG_ESUBREG);
+               reg = c - '0';
  
-             /* Can't back reference to a subexpression if inside of it.  */
-             if (group_in_compile_stack (compile_stack, (regnum_t) c1))
-               goto normal_char;
+               /* Can't back reference to a subexpression before its end.  */
+               if (reg > regnum || group_in_compile_stack (compile_stack, reg))
+                 FREE_STACK_RETURN (REG_ESUBREG);
  
-             laststart = b;
-             BUF_PUSH_2 (duplicate, c1);
+               laststart = b;
+               BUF_PUSH_2 (duplicate, reg);
+             }
               break;
  
  
@@ -4264,7 +4278,7 @@ mutually_exclusive_p (bufp, p1, p2)
        {
         register re_wchar_t c
           = (re_opcode_t) *p2 == endline ? '\n'
-         : RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
+         : RE_STRING_CHAR (p2 + 2, pend - p2 - 2);
  
         if ((re_opcode_t) *p1 == exactn)
           {
@@ -4309,13 +4323,11 @@ mutually_exclusive_p (bufp, p1, p2)
        break;
  
      case charset:
-    case charset_not:
        {
         if ((re_opcode_t) *p1 == exactn)
           /* Reuse the code above.  */
           return mutually_exclusive_p (bufp, p2, p1);
  
-
        /* It is hard to list up all the character in charset
          P2 if it includes multibyte character.  Give up in
          such case.  */
@@ -4331,7 +4343,7 @@ mutually_exclusive_p (bufp, p1, p2)
              P2 is ASCII, it is enough to test only bitmap
              table of P1.  */
  
-         if (*p1 == *p2)
+         if ((re_opcode_t) *p1 == charset)
             {
               int idx;
               /* We win if the charset inside the loop
@@ -4350,8 +4362,7 @@ mutually_exclusive_p (bufp, p1, p2)
                   return 1;
                 }
             }
-         else if ((re_opcode_t) *p1 == charset
-                  || (re_opcode_t) *p1 == charset_not)
+         else if ((re_opcode_t) *p1 == charset_not)
             {
               int idx;
               /* We win if the charset_not inside the loop lists
@@ -4370,7 +4381,24 @@ mutually_exclusive_p (bufp, p1, p2)
               }
           }
        }
+      break;
        
+    case charset_not:
+      switch (SWITCH_ENUM_CAST (*p1))
+       {
+       case exactn:
+       case charset:
+         /* Reuse the code above.  */
+         return mutually_exclusive_p (bufp, p2, p1);
+       case charset_not:
+         /* When we have two charset_not, it's very unlikely that
+            they don't overlap.  The union of the two sets of excluded
+            chars should cover all possible chars, which, as a matter of
+            fact, is virtually impossible in multibyte buffers.  */
+         ;
+       }
+      break;
+
      case wordend:
      case notsyntaxspec:
        return ((re_opcode_t) *p1 == syntaxspec
@@ -5811,8 +5839,8 @@ re_exec (s)
  
  int
  regcomp (preg, pattern, cflags)
-    regex_t *preg;
-    const char *pattern;
+    regex_t *__restrict preg;
+    const char *__restrict pattern;
      int cflags;
  {
    reg_errcode_t ret;
@@ -5896,8 +5924,8 @@ WEAK_ALIAS (__regcomp, regcomp)
  
  int
  regexec (preg, string, nmatch, pmatch, eflags)
-    const regex_t *preg;
-    const char *string;
+    const regex_t *__restrict preg;
+    const char *__restrict string;
      size_t nmatch;
      regmatch_t pmatch[];
      int eflags;