version 0.12.
(Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
+ Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
- NOTE: The canonical source of this file is maintained with the GNU C Library.
- Bugs can be reported to bug-glibc@prep.ai.mit.edu.
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 2, or (at your option) any
- later version.
-
- This program is distributed in the hope that it will be useful,
+ The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
/* AIX requires this to be the first thing in the file. */
#if defined _AIX && !defined REGEX_MALLOC
# include <sys/types.h>
#endif
-#define WIDE_CHAR_SUPPORT \
- defined _LIBC || (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
/* For platform which support the ISO C amendement 1 functionality we
support user defined character classes. */
-#if WIDE_CHAR_SUPPORT
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
# include <wchar.h>
# include <wctype.h>
#endif
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+#define btowc __btowc
+#endif
+
/* This is for other GNU distributions with internationalized messages. */
#if HAVE_LIBINTL_H || defined _LIBC
# include <libintl.h>
# ifndef INHIBIT_STRING_HEADER
# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
# include <string.h>
-# if !defined bzero && !defined _LIBC
-# define bzero(s, n) (memset (s, '\0', n), (s))
+# ifndef bzero
+# ifndef _LIBC
+# define bzero(s, n) (memset (s, '\0', n), (s))
+# else
+# define bzero(s, n) __bzero (s, n)
+# endif
# endif
# else
# include <strings.h>
#endif /* not emacs */
\f
/* Get the interface, including the syntax bits. */
-#include "regex.h"
+#include <regex.h>
/* isalpha etc. are used for the character classes. */
#include <ctype.h>
STDC_HEADERS is defined, then autoconf has verified that the ctype
macros don't need to be guarded with references to isascii. ...
Defining isascii to 1 should let any compiler worth its salt
- eliminate the && through constant folding." */
+ eliminate the && through constant folding."
+ Solaris defines some of these symbols so we must undefine them first. */
#undef ISASCII
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
#define ISUPPER(c) (ISASCII (c) && isupper (c))
#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+#ifdef _tolower
+# define TOLOWER(c) _tolower(c)
+#else
+# define TOLOWER(c) tolower(c)
+#endif
+
#ifndef NULL
# define NULL (void *)0
#endif
#endif /* DEBUG */
return ret;
}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
\f
/* This table gives an error message for each of the error codes listed
in regex.h. Obviously the order here has to be same as there.
} \
}
-#if WIDE_CHAR_SUPPORT
+#if defined _LIBC || WIDE_CHAR_SUPPORT
/* The GNU C library provides support for user-defined character classes
and the functions from ISO C amendement 1. */
# ifdef CHARCLASS_NAME_MAX
# define CHAR_CLASS_MAX_LENGTH 256
# endif
-# define IS_CHAR_CLASS(string) wctype (string)
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
#else
# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
for (;;)
{
PATFETCH (c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
+ if ((c == ':' && *p == ']') || p == pend)
break;
- str[c1++] = c;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
}
str[c1] = '\0';
- /* If isn't a word bracketed by `[:' and:`]':
+ /* If isn't a word bracketed by `[:' and `:]':
undo the ending character, the letters, and leave
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
{
-#if WIDE_CHAR_SUPPORT
+#if defined _LIBC || WIDE_CHAR_SUPPORT
boolean is_lower = STREQ (str, "lower");
boolean is_upper = STREQ (str, "upper");
wctype_t wt;
int ch;
- wt = wctype (str);
+ wt = IS_CHAR_CLASS (str);
if (wt == 0)
FREE_STACK_RETURN (REG_ECTYPE);
for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
{
+# ifdef _LIBC
+ if (__iswctype (__btowc (ch), wt))
+ SET_LIST_BIT (ch);
+# else
if (iswctype (btowc (ch), wt))
SET_LIST_BIT (ch);
+# endif
if (translate && (is_upper || is_lower)
&& (ISUPPER (ch) || ISLOWER (ch)))
case 'w':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
laststart = b;
BUF_PUSH (wordchar);
case 'W':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
laststart = b;
BUF_PUSH (notwordchar);
case '<':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordbeg);
break;
case '>':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordend);
break;
case 'b':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordbound);
break;
case 'B':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (notwordbound);
break;
case '`':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (begbuf);
break;
case '\'':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (endbuf);
break;
RESET_FAIL_STACK ();
return 0;
} /* re_compile_fastmap */
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
\f
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
regs->start = regs->end = (regoff_t *) 0;
}
}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
\f
/* Searching routines. */
return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
regs, size);
}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
/* Using the compiled pattern in BUFP->buffer, first tries to match the
/* If the search isn't to be a backwards one, don't waste time in a
search for a pattern that must be anchored. */
- if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+ if (bufp->used > 0 && range > 0
+ && ((re_opcode_t) bufp->buffer[0] == begbuf
+ /* `begline' is like `begbuf' if it cannot match at newlines. */
+ || ((re_opcode_t) bufp->buffer[0] == begline
+ && !bufp->newline_anchor)))
{
if (startpos > 0)
return -1;
}
return -1;
} /* re_search_2 */
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
\f
/* This converts PTR, a pointer into one of the search strings `string1'
and `string2' into an offset from the beginning of that string. */
# endif
return result;
}
+# ifdef _LIBC
+weak_alias (__re_match, re_match)
+# endif
#endif /* not emacs */
static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
#endif
return result;
}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
return NULL;
return gettext (re_error_msgid[(int) ret]);
}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
\f
/* Entry points compatible with 4.2 BSD regex library. We don't define
them unless specifically requested. */
{
re_comp_buf.buffer = (unsigned char *) malloc (200);
if (re_comp_buf.buffer == NULL)
- return gettext (re_error_msgid[(int) REG_ESPACE]);
+ return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
re_comp_buf.allocated = 200;
re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
if (re_comp_buf.fastmap == NULL)
- return gettext (re_error_msgid[(int) REG_ESPACE]);
+ return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
}
/* Since `re_exec' always passes NULL for the `regs' argument, we
REG_EXTENDED bit in CFLAGS is set; otherwise, to
RE_SYNTAX_POSIX_BASIC;
`newline_anchor' to REG_NEWLINE being set in CFLAGS;
- `fastmap' and `fastmap_accurate' to zero;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
`re_nsub' to the number of subexpressions in PATTERN.
PATTERN is the address of the pattern string.
preg->allocated = 0;
preg->used = 0;
- /* Don't bother to use a fastmap when searching. This simplifies the
- REG_NEWLINE case: if we used a fastmap, we'd have to put all the
- characters after newlines into the fastmap. This way, we just try
- every character. */
- preg->fastmap = 0;
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
if (cflags & REG_ICASE)
{
/* Map uppercase characters to corresponding lowercase ones. */
for (i = 0; i < CHAR_SET_SIZE; i++)
- preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+ preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
}
else
preg->translate = NULL;
unmatched close-group: both are REG_EPAREN. */
if (ret == REG_ERPAREN) ret = REG_EPAREN;
+ if (ret == REG_NOERROR && preg->fastmap)
+ {
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. */
+ if (re_compile_fastmap (preg) == -2)
+ {
+ /* Some error occured while computing the fastmap, just forget
+ about it. */
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+ }
+
return (int) ret;
}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
/* regexec searches for a given pattern, specified by PREG, in the
if (want_reg_info)
{
regs.num_regs = nmatch;
- regs.start = TALLOC (nmatch, regoff_t);
- regs.end = TALLOC (nmatch, regoff_t);
- if (regs.start == NULL || regs.end == NULL)
+ regs.start = TALLOC (nmatch * 2, regoff_t);
+ if (regs.start == NULL)
return (int) REG_NOMATCH;
+ regs.end = regs.start + nmatch;
}
/* Perform the searching operation. */
/* If we needed the temporary register info, free the space now. */
free (regs.start);
- free (regs.end);
}
/* We want zero return to mean success, unlike `re_search'. */
return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
}
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
/* Returns a message corresponding to an error code, ERRCODE, returned
return msg_size;
}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
/* Free dynamically allocated space used by PREG. */
free (preg->translate);
preg->translate = NULL;
}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
#endif /* not emacs */