/* Definitions for data structures and routines for the regular
- expression library, version 0.12.
-
- Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc.
+ expression library.
+ Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation,
- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
-#ifndef __REGEXP_LIBRARY_H__
-#define __REGEXP_LIBRARY_H__
+#include <sys/types.h>
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
/* POSIX says that <sys/types.h> must be included (by the caller) before
<regex.h>. */
-#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS)
+#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
should be there. */
-#include <stddef.h>
+# include <stddef.h>
#endif
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
/* The following bits are used to determine the regexp syntax we
recognize. The set/not-set meanings are chosen so that Emacs syntax
remains the value 0. The bits are given in alphabetical order, and
the definitions shifted by one from the previous bit; thus, when we
add or remove a bit, only one other definition need change. */
-typedef unsigned reg_syntax_t;
+typedef unsigned long int reg_syntax_t;
/* If this bit is not set, then \ inside a bracket expression is literal.
If set, then such a \ quotes the following character. */
-#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
/* If this bit is not set, then + and ? are operators, and \+ and \? are
literals.
without further backtracking. */
#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+ a string of ordinary characters. For example, the ERE 'a{1' is
+ treated as 'a\{1'. */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+ for ^, because it is difficult to scan the regex backwards to find
+ whether ^ should be special. */
+#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* If this bit is set, then no_sub will be set to 1 during
+ re_compile_pattern. */
+#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
#define RE_SYNTAX_EMACS 0
#define RE_SYNTAX_AWK \
- (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
+ | RE_CONTEXT_INVALID_OPS ))
#define RE_SYNTAX_POSIX_AWK \
- (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
#define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
| RE_NO_BK_VBAR)
#define RE_SYNTAX_POSIX_EGREP \
- (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
+ | RE_INVALID_INTERVAL_ORD)
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
| RE_INTERVALS | RE_NO_EMPTY_RANGES)
#define RE_SYNTAX_POSIX_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
(_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
#define RE_SYNTAX_POSIX_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
-/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
- replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+ removed and RE_NO_BK_REFS is added. */
#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
(erroneously) define this in other header files, but we want our
value, so remove any previous define. */
#ifdef RE_DUP_MAX
-#undef RE_DUP_MAX
+# undef RE_DUP_MAX
#endif
-#define RE_DUP_MAX ((1 << 15) - 1)
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+#define RE_DUP_MAX (0x7fff)
/* POSIX `cflags' bits (i.e., information for `regcomp'). */
/* Like REG_NOTBOL, except for the end-of-line. */
#define REG_NOTEOL (1 << 1)
+/* Use PMATCH[0] to delimit the start and end of the search in the
+ buffer. */
+#define REG_STARTEND (1 << 2)
+
/* If any error codes are removed, changed, or added, update the
`re_error_msg' table in regex.c. */
typedef enum
{
+#ifdef _XOPEN_SOURCE
+ REG_ENOSYS = -1, /* This will never happen for this implementation. */
+#endif
+
REG_NOERROR = 0, /* Success. */
REG_NOMATCH, /* Didn't find a match (for regexec). */
/* POSIX regcomp return error codes. (In the order listed in the
standard.) */
REG_BADPAT, /* Invalid pattern. */
- REG_ECOLLATE, /* Not implemented. */
+ REG_ECOLLATE, /* Inalid collating element. */
REG_ECTYPE, /* Invalid character class name. */
REG_EESCAPE, /* Trailing backslash. */
REG_ESUBREG, /* Invalid back reference. */
private to the regex routines. */
#ifndef RE_TRANSLATE_TYPE
-#define RE_TRANSLATE_TYPE char *
+# define RE_TRANSLATE_TYPE char *
#endif
struct re_pattern_buffer
unsigned char *buffer;
/* Number of bytes to which `buffer' points. */
- unsigned long allocated;
+ unsigned long int allocated;
/* Number of bytes actually used in `buffer'. */
- unsigned long used;
+ unsigned long int used;
/* Syntax setting with which the pattern was compiled. */
reg_syntax_t syntax;
`re_match_2' returns information about at least this many registers
the first time a `regs' structure is passed. */
#ifndef RE_NREGS
-#define RE_NREGS 30
+# define RE_NREGS 30
#endif
#if __STDC__
-#define _RE_ARGS(args) args
+# define _RE_ARGS(args) args
#else /* not __STDC__ */
-#define _RE_ARGS(args) ()
+# define _RE_ARGS(args) ()
#endif /* not __STDC__ */
and syntax given by the global `re_syntax_options', into the buffer
BUFFER. Return NULL if successful, and an error string if not. */
extern const char *re_compile_pattern
- _RE_ARGS ((const char *pattern, int length,
+ _RE_ARGS ((const char *pattern, size_t length,
struct re_pattern_buffer *buffer));
_RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
unsigned num_regs, regoff_t *starts, regoff_t *ends));
-#ifdef _REGEX_RE_COMP
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
/* 4.2 bsd compatibility. */
extern char *re_comp _RE_ARGS ((const char *));
extern int re_exec _RE_ARGS ((const char *));
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+ "restrict", and "configure" may have defined "restrict". */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+# if defined restrict || 199901L <= __STDC_VERSION__
+# define __restrict restrict
+# else
+# define __restrict
+# endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax. */
+#ifndef __restrict_arr
+# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
+# define __restrict_arr __restrict
+# else
+# define __restrict_arr
+# endif
#endif
/* POSIX compatibility. */
-extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
-extern int regexec
- _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
- regmatch_t pmatch[], int eflags));
-extern size_t regerror
- _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
- size_t errbuf_size));
-extern void regfree _RE_ARGS ((regex_t *preg));
-
-#endif /* not __REGEXP_LIBRARY_H__ */
+extern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
+ const char *__restrict __pattern,
+ int __cflags));
+
+extern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
+ const char *__restrict __string, size_t __nmatch,
+ regmatch_t __pmatch[__restrict_arr],
+ int __eflags));
+
+extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
+ char *__errbuf, size_t __errbuf_size));
+
+extern void regfree _RE_ARGS ((regex_t *__preg));
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
\f
/*
Local variables: