version 0.12.
(Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
+ Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
- NOTE: The canonical source of this file is maintained with the GNU C Library.
- Bugs can be reported to bug-glibc@prep.ai.mit.edu.
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the
- Free Software Foundation; either version 2, or (at your option) any
- later version.
-
- This program is distributed in the hope that it will be useful,
+ The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
/* AIX requires this to be the first thing in the file. */
#if defined _AIX && !defined REGEX_MALLOC
# include <sys/types.h>
#endif
+#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+
/* For platform which support the ISO C amendement 1 functionality we
support user defined character classes. */
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
-# include <wctype.h>
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
# include <wchar.h>
+# include <wctype.h>
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+#define btowc __btowc
#endif
/* This is for other GNU distributions with internationalized messages. */
# ifndef INHIBIT_STRING_HEADER
# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
# include <string.h>
-# if !defined bzero && !defined _LIBC
-# define bzero(s, n) (memset (s, '\0', n), (s))
+# ifndef bzero
+# ifndef _LIBC
+# define bzero(s, n) (memset (s, '\0', n), (s))
+# else
+# define bzero(s, n) __bzero (s, n)
+# endif
# endif
# else
# include <strings.h>
#endif /* not emacs */
\f
/* Get the interface, including the syntax bits. */
-#include "regex.h"
+#include <regex.h>
/* isalpha etc. are used for the character classes. */
#include <ctype.h>
STDC_HEADERS is defined, then autoconf has verified that the ctype
macros don't need to be guarded with references to isascii. ...
Defining isascii to 1 should let any compiler worth its salt
- eliminate the && through constant folding." */
+ eliminate the && through constant folding."
+ Solaris defines some of these symbols so we must undefine them first. */
+#undef ISASCII
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
# define ISASCII(c) 1
#else
# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
#endif
+#undef ISPRINT
#define ISPRINT(c) (ISASCII (c) && isprint (c))
#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
#define ISALNUM(c) (ISASCII (c) && isalnum (c))
#define ISUPPER(c) (ISASCII (c) && isupper (c))
#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+#ifdef _tolower
+# define TOLOWER(c) _tolower(c)
+#else
+# define TOLOWER(c) tolower(c)
+#endif
+
#ifndef NULL
# define NULL (void *)0
#endif
#endif /* DEBUG */
return ret;
}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
\f
/* This table gives an error message for each of the error codes listed
in regex.h. Obviously the order here has to be same as there.
if we ever fail back to it.
Requires variables fail_stack, regstart, regend, reg_info, and
- num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
- declared.
+ num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
+ be declared.
Does `return FAILURE_CODE' if runs out of memory. */
} \
}
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || WIDE_CHAR_SUPPORT
/* The GNU C library provides support for user-defined character classes
and the functions from ISO C amendement 1. */
# ifdef CHARCLASS_NAME_MAX
# define CHAR_CLASS_MAX_LENGTH 256
# endif
-# define IS_CHAR_CLASS(string) wctype (string)
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
#else
# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
for (;;)
{
PATFETCH (c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
+ if ((c == ':' && *p == ']') || p == pend)
break;
- str[c1++] = c;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
}
str[c1] = '\0';
- /* If isn't a word bracketed by `[:' and:`]':
+ /* If isn't a word bracketed by `[:' and `:]':
undo the ending character, the letters, and leave
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
{
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || WIDE_CHAR_SUPPORT
boolean is_lower = STREQ (str, "lower");
boolean is_upper = STREQ (str, "upper");
wctype_t wt;
int ch;
- wt = wctype (str);
+ wt = IS_CHAR_CLASS (str);
if (wt == 0)
FREE_STACK_RETURN (REG_ECTYPE);
for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
{
+# ifdef _LIBC
+ if (__iswctype (__btowc (ch), wt))
+ SET_LIST_BIT (ch);
+# else
if (iswctype (btowc (ch), wt))
SET_LIST_BIT (ch);
+# endif
if (translate && (is_upper || is_lower)
&& (ISUPPER (ch) || ISLOWER (ch)))
case 'w':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
laststart = b;
BUF_PUSH (wordchar);
case 'W':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
laststart = b;
BUF_PUSH (notwordchar);
case '<':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordbeg);
break;
case '>':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordend);
break;
case 'b':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordbound);
break;
case 'B':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (notwordbound);
break;
case '`':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (begbuf);
break;
case '\'':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (endbuf);
break;
#ifndef REGEX_MALLOC
char *destination;
#endif
- /* We don't push any register information onto the failure stack. */
- unsigned num_regs = 0;
register char *fastmap = bufp->fastmap;
unsigned char *pattern = bufp->buffer;
RESET_FAIL_STACK ();
return 0;
} /* re_compile_fastmap */
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
\f
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
regs->start = regs->end = (regoff_t *) 0;
}
}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
\f
/* Searching routines. */
return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
regs, size);
}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
/* Using the compiled pattern in BUFP->buffer, first tries to match the
/* If the search isn't to be a backwards one, don't waste time in a
search for a pattern that must be anchored. */
- if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+ if (bufp->used > 0 && range > 0
+ && ((re_opcode_t) bufp->buffer[0] == begbuf
+ /* `begline' is like `begbuf' if it cannot match at newlines. */
+ || ((re_opcode_t) bufp->buffer[0] == begline
+ && !bufp->newline_anchor)))
{
if (startpos > 0)
return -1;
}
return -1;
} /* re_search_2 */
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
\f
/* This converts PTR, a pointer into one of the search strings `string1'
and `string2' into an offset from the beginning of that string. */
# endif
return result;
}
+# ifdef _LIBC
+weak_alias (__re_match, re_match)
+# endif
#endif /* not emacs */
static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
#endif
return result;
}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
return NULL;
return gettext (re_error_msgid[(int) ret]);
}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
\f
/* Entry points compatible with 4.2 BSD regex library. We don't define
them unless specifically requested. */
{
re_comp_buf.buffer = (unsigned char *) malloc (200);
if (re_comp_buf.buffer == NULL)
- return gettext (re_error_msgid[(int) REG_ESPACE]);
+ return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
re_comp_buf.allocated = 200;
re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
if (re_comp_buf.fastmap == NULL)
- return gettext (re_error_msgid[(int) REG_ESPACE]);
+ return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
}
/* Since `re_exec' always passes NULL for the `regs' argument, we
REG_EXTENDED bit in CFLAGS is set; otherwise, to
RE_SYNTAX_POSIX_BASIC;
`newline_anchor' to REG_NEWLINE being set in CFLAGS;
- `fastmap' and `fastmap_accurate' to zero;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
`re_nsub' to the number of subexpressions in PATTERN.
PATTERN is the address of the pattern string.
preg->allocated = 0;
preg->used = 0;
- /* Don't bother to use a fastmap when searching. This simplifies the
- REG_NEWLINE case: if we used a fastmap, we'd have to put all the
- characters after newlines into the fastmap. This way, we just try
- every character. */
- preg->fastmap = 0;
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
if (cflags & REG_ICASE)
{
/* Map uppercase characters to corresponding lowercase ones. */
for (i = 0; i < CHAR_SET_SIZE; i++)
- preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+ preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
}
else
preg->translate = NULL;
unmatched close-group: both are REG_EPAREN. */
if (ret == REG_ERPAREN) ret = REG_EPAREN;
+ if (ret == REG_NOERROR && preg->fastmap)
+ {
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. */
+ if (re_compile_fastmap (preg) == -2)
+ {
+ /* Some error occured while computing the fastmap, just forget
+ about it. */
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+ }
+
return (int) ret;
}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
/* regexec searches for a given pattern, specified by PREG, in the
if (want_reg_info)
{
regs.num_regs = nmatch;
- regs.start = TALLOC (nmatch, regoff_t);
- regs.end = TALLOC (nmatch, regoff_t);
- if (regs.start == NULL || regs.end == NULL)
+ regs.start = TALLOC (nmatch * 2, regoff_t);
+ if (regs.start == NULL)
return (int) REG_NOMATCH;
+ regs.end = regs.start + nmatch;
}
/* Perform the searching operation. */
/* If we needed the temporary register info, free the space now. */
free (regs.start);
- free (regs.end);
}
/* We want zero return to mean success, unlike `re_search'. */
return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
}
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
/* Returns a message corresponding to an error code, ERRCODE, returned
return msg_size;
}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
/* Free dynamically allocated space used by PREG. */
free (preg->translate);
preg->translate = NULL;
}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
#endif /* not emacs */