X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregex.c;h=74fcf9c45ebb593a9c53c923d18fe00e4cc1301f;hb=ade79def12b42c40afa2d4bd47989ee40df7fe98;hp=d16bd60381bdf82be7c2203b1bde3916c9de014d;hpb=f711379c33b61c8258767154bebed5be41355a8e;p=gnulib.git

diff --git a/lib/regex.c b/lib/regex.c
index d16bd6038..74fcf9c45 100644
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -2,25 +2,22 @@
    version 0.12.
    (Implements POSIX draft P1003.2/D11.2, except for some of the
    internationalization features.)
-   Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
+   Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
 
-   NOTE: The canonical source of this file is maintained with the GNU C Library.
-   Bugs can be reported to bug-glibc@prep.ai.mit.edu.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
 
-   This program is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by the
-   Free Software Foundation; either version 2, or (at your option) any
-   later version.
-
-   This program is distributed in the hope that it will be useful,
+   The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-   USA.  */
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
 
 /* AIX requires this to be the first thing in the file. */
 #if defined _AIX && !defined REGEX_MALLOC
@@ -49,16 +46,41 @@
 # include <sys/types.h>
 #endif
 
-#define WIDE_CHAR_SUPPORT \
-  defined _LIBC || (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
 
 /* For platform which support the ISO C amendement 1 functionality we
    support user defined character classes.  */
-#if WIDE_CHAR_SUPPORT
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
 # include <wchar.h>
 # include <wctype.h>
 #endif
 
+#ifdef _LIBC
+/* We have to keep the namespace clean.  */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+	__regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+	__re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+	__re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+	__re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+	__re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+#define btowc __btowc
+#endif
+
 /* This is for other GNU distributions with internationalized messages.  */
 #if HAVE_LIBINTL_H || defined _LIBC
 # include <libintl.h>
@@ -110,8 +132,12 @@ char *realloc ();
 # ifndef INHIBIT_STRING_HEADER
 #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
 #   include <string.h>
-#   if !defined bzero && !defined _LIBC
-#    define bzero(s, n)		(memset (s, '\0', n), (s))
+#   ifndef bzero
+#    ifndef _LIBC
+#     define bzero(s, n)	(memset (s, '\0', n), (s))
+#    else
+#     define bzero(s, n)	__bzero (s, n)
+#    endif
 #   endif
 #  else
 #   include <strings.h>
@@ -181,7 +207,7 @@ init_syntax_once ()
 #endif /* not emacs */
 
 /* Get the interface, including the syntax bits.  */
-#include "regex.h"
+#include <regex.h>
 
 /* isalpha etc. are used for the character classes.  */
 #include <ctype.h>
@@ -195,7 +221,8 @@ init_syntax_once ()
    STDC_HEADERS is defined, then autoconf has verified that the ctype
    macros don't need to be guarded with references to isascii. ...
    Defining isascii to 1 should let any compiler worth its salt
-   eliminate the && through constant folding."  */
+   eliminate the && through constant folding."
+   Solaris defines some of these symbols so we must undefine them first.  */
 
 #undef ISASCII
 #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
@@ -227,6 +254,12 @@ init_syntax_once ()
 #define ISUPPER(c) (ISASCII (c) && isupper (c))
 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
 
+#ifdef _tolower
+# define TOLOWER(c) _tolower(c)
+#else
+# define TOLOWER(c) tolower(c)
+#endif
+
 #ifndef NULL
 # define NULL (void *)0
 #endif
@@ -971,6 +1004,9 @@ re_set_syntax (syntax)
 #endif /* DEBUG */
   return ret;
 }
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
 
 /* This table gives an error message for each of the error codes listed
    in regex.h.  Obviously the order here has to be same as there.
@@ -1688,7 +1724,7 @@ typedef struct
        } 								\
     }
 
-#if WIDE_CHAR_SUPPORT
+#if defined _LIBC || WIDE_CHAR_SUPPORT
 /* The GNU C library provides support for user-defined character classes
    and the functions from ISO C amendement 1.  */
 # ifdef CHARCLASS_NAME_MAX
@@ -1699,7 +1735,11 @@ typedef struct
 #  define CHAR_CLASS_MAX_LENGTH 256
 # endif
 
-# define IS_CHAR_CLASS(string) wctype (string)
+# ifdef _LIBC
+#  define IS_CHAR_CLASS(string) __wctype (string)
+# else
+#  define IS_CHAR_CLASS(string) wctype (string)
+# endif
 #else
 # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
 
@@ -2176,25 +2216,28 @@ regex_compile (pattern, size, syntax, bufp)
                     for (;;)
                       {
                         PATFETCH (c);
-                        if (c == ':' || c == ']' || p == pend
-                            || c1 == CHAR_CLASS_MAX_LENGTH)
+                        if ((c == ':' && *p == ']') || p == pend)
                           break;
-                        str[c1++] = c;
+			if (c1 < CHAR_CLASS_MAX_LENGTH)
+			  str[c1++] = c;
+			else
+			  /* This is in any case an invalid class name.  */
+			  str[0] = '\0';
                       }
                     str[c1] = '\0';
 
-                    /* If isn't a word bracketed by `[:' and:`]':
+                    /* If isn't a word bracketed by `[:' and `:]':
                        undo the ending character, the letters, and leave
                        the leading `:' and `[' (but set bits for them).  */
                     if (c == ':' && *p == ']')
                       {
-#if WIDE_CHAR_SUPPORT
+#if defined _LIBC || WIDE_CHAR_SUPPORT
                         boolean is_lower = STREQ (str, "lower");
                         boolean is_upper = STREQ (str, "upper");
 			wctype_t wt;
                         int ch;
 
-			wt = wctype (str);
+			wt = IS_CHAR_CLASS (str);
 			if (wt == 0)
 			  FREE_STACK_RETURN (REG_ECTYPE);
 
@@ -2206,8 +2249,13 @@ regex_compile (pattern, size, syntax, bufp)
 
                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
 			  {
+# ifdef _LIBC
+			    if (__iswctype (__btowc (ch), wt))
+			      SET_LIST_BIT (ch);
+# else
 			    if (iswctype (btowc (ch), wt))
 			      SET_LIST_BIT (ch);
+# endif
 
 			    if (translate && (is_upper || is_lower)
 				&& (ISUPPER (ch) || ISLOWER (ch)))
@@ -2691,7 +2739,7 @@ regex_compile (pattern, size, syntax, bufp)
 
 
             case 'w':
-	      if (re_syntax_options & RE_NO_GNU_OPS)
+	      if (syntax & RE_NO_GNU_OPS)
 		goto normal_char;
               laststart = b;
               BUF_PUSH (wordchar);
@@ -2699,7 +2747,7 @@ regex_compile (pattern, size, syntax, bufp)
 
 
             case 'W':
-	      if (re_syntax_options & RE_NO_GNU_OPS)
+	      if (syntax & RE_NO_GNU_OPS)
 		goto normal_char;
               laststart = b;
               BUF_PUSH (notwordchar);
@@ -2707,37 +2755,37 @@ regex_compile (pattern, size, syntax, bufp)
 
 
             case '<':
-	      if (re_syntax_options & RE_NO_GNU_OPS)
+	      if (syntax & RE_NO_GNU_OPS)
 		goto normal_char;
               BUF_PUSH (wordbeg);
               break;
 
             case '>':
-	      if (re_syntax_options & RE_NO_GNU_OPS)
+	      if (syntax & RE_NO_GNU_OPS)
 		goto normal_char;
               BUF_PUSH (wordend);
               break;
 
             case 'b':
-	      if (re_syntax_options & RE_NO_GNU_OPS)
+	      if (syntax & RE_NO_GNU_OPS)
 		goto normal_char;
               BUF_PUSH (wordbound);
               break;
 
             case 'B':
-	      if (re_syntax_options & RE_NO_GNU_OPS)
+	      if (syntax & RE_NO_GNU_OPS)
 		goto normal_char;
               BUF_PUSH (notwordbound);
               break;
 
             case '`':
-	      if (re_syntax_options & RE_NO_GNU_OPS)
+	      if (syntax & RE_NO_GNU_OPS)
 		goto normal_char;
               BUF_PUSH (begbuf);
               break;
 
             case '\'':
-	      if (re_syntax_options & RE_NO_GNU_OPS)
+	      if (syntax & RE_NO_GNU_OPS)
 		goto normal_char;
               BUF_PUSH (endbuf);
               break;
@@ -3375,6 +3423,9 @@ re_compile_fastmap (bufp)
   RESET_FAIL_STACK ();
   return 0;
 } /* re_compile_fastmap */
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
 
 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
@@ -3410,6 +3461,9 @@ re_set_registers (bufp, regs, num_regs, starts, ends)
       regs->start = regs->end = (regoff_t *) 0;
     }
 }
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
 
 /* Searching routines.  */
 
@@ -3426,6 +3480,9 @@ re_search (bufp, string, size, startpos, range, regs)
   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
 		      regs, size);
 }
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
 
 
 /* Using the compiled pattern in BUFP->buffer, first tries to match the
@@ -3479,7 +3536,11 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
 
   /* If the search isn't to be a backwards one, don't waste time in a
      search for a pattern that must be anchored.  */
-  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+  if (bufp->used > 0 && range > 0
+      && ((re_opcode_t) bufp->buffer[0] == begbuf
+	  /* `begline' is like `begbuf' if it cannot match at newlines.  */
+	  || ((re_opcode_t) bufp->buffer[0] == begline
+	      && !bufp->newline_anchor)))
     {
       if (startpos > 0)
 	return -1;
@@ -3582,6 +3643,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
     }
   return -1;
 } /* re_search_2 */
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
 
 /* This converts PTR, a pointer into one of the search strings `string1'
    and `string2' into an offset from the beginning of that string.  */
@@ -3683,6 +3747,9 @@ re_match (bufp, string, size, pos, regs)
 # endif
   return result;
 }
+# ifdef _LIBC
+weak_alias (__re_match, re_match)
+# endif
 #endif /* not emacs */
 
 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
@@ -3728,6 +3795,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
 #endif
   return result;
 }
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
 
 /* This is a separate function so that we can force an alloca cleanup
    afterwards.  */
@@ -5421,6 +5491,9 @@ re_compile_pattern (pattern, length, bufp)
     return NULL;
   return gettext (re_error_msgid[(int) ret]);
 }
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
 
 /* Entry points compatible with 4.2 BSD regex library.  We don't define
    them unless specifically requested.  */
@@ -5453,12 +5526,12 @@ re_comp (s)
     {
       re_comp_buf.buffer = (unsigned char *) malloc (200);
       if (re_comp_buf.buffer == NULL)
-        return gettext (re_error_msgid[(int) REG_ESPACE]);
+        return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
       re_comp_buf.allocated = 200;
 
       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
       if (re_comp_buf.fastmap == NULL)
-	return gettext (re_error_msgid[(int) REG_ESPACE]);
+	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
     }
 
   /* Since `re_exec' always passes NULL for the `regs' argument, we
@@ -5506,7 +5579,8 @@ re_exec (s)
        REG_EXTENDED bit in CFLAGS is set; otherwise, to
        RE_SYNTAX_POSIX_BASIC;
      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
-     `fastmap' and `fastmap_accurate' to zero;
+     `fastmap' to an allocated space for the fastmap;
+     `fastmap_accurate' to zero;
      `re_nsub' to the number of subexpressions in PATTERN.
 
    PATTERN is the address of the pattern string.
@@ -5545,11 +5619,8 @@ regcomp (preg, pattern, cflags)
   preg->allocated = 0;
   preg->used = 0;
 
-  /* Don't bother to use a fastmap when searching.  This simplifies the
-     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
-     characters after newlines into the fastmap.  This way, we just try
-     every character.  */
-  preg->fastmap = 0;
+  /* Try to allocate space for the fastmap.  */
+  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
 
   if (cflags & REG_ICASE)
     {
@@ -5563,7 +5634,7 @@ regcomp (preg, pattern, cflags)
 
       /* Map uppercase characters to corresponding lowercase ones.  */
       for (i = 0; i < CHAR_SET_SIZE; i++)
-        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+        preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
     }
   else
     preg->translate = NULL;
@@ -5589,8 +5660,24 @@ regcomp (preg, pattern, cflags)
      unmatched close-group: both are REG_EPAREN.  */
   if (ret == REG_ERPAREN) ret = REG_EPAREN;
 
+  if (ret == REG_NOERROR && preg->fastmap)
+    {
+      /* Compute the fastmap now, since regexec cannot modify the pattern
+	 buffer.  */
+      if (re_compile_fastmap (preg) == -2)
+	{
+	  /* Some error occured while computing the fastmap, just forget
+	     about it.  */
+	  free (preg->fastmap);
+	  preg->fastmap = NULL;
+	}
+    }
+
   return (int) ret;
 }
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
 
 
 /* regexec searches for a given pattern, specified by PREG, in the
@@ -5634,10 +5721,10 @@ regexec (preg, string, nmatch, pmatch, eflags)
   if (want_reg_info)
     {
       regs.num_regs = nmatch;
-      regs.start = TALLOC (nmatch, regoff_t);
-      regs.end = TALLOC (nmatch, regoff_t);
-      if (regs.start == NULL || regs.end == NULL)
+      regs.start = TALLOC (nmatch * 2, regoff_t);
+      if (regs.start == NULL)
         return (int) REG_NOMATCH;
+      regs.end = regs.start + nmatch;
     }
 
   /* Perform the searching operation.  */
@@ -5661,12 +5748,14 @@ regexec (preg, string, nmatch, pmatch, eflags)
 
       /* If we needed the temporary register info, free the space now.  */
       free (regs.start);
-      free (regs.end);
     }
 
   /* We want zero return to mean success, unlike `re_search'.  */
   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
 }
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
 
 
 /* Returns a message corresponding to an error code, ERRCODE, returned
@@ -5712,6 +5801,9 @@ regerror (errcode, preg, errbuf, errbuf_size)
 
   return msg_size;
 }
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
 
 
 /* Free dynamically allocated space used by PREG.  */
@@ -5736,5 +5828,8 @@ regfree (preg)
     free (preg->translate);
   preg->translate = NULL;
 }
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
 
 #endif /* not emacs  */