X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregex.h;h=b4bbc01fd9ced99ba3d8d299889dac52b07de57a;hb=f792a03ccf327e0e80b60e6b7a013e7f89f9f5ca;hp=eb1b406ef2c3eaac461169c9945295c64d1d10ff;hpb=37f27c1b74b966c83e6fd2ffa74ed8817bf00e6e;p=gnulib.git diff --git a/lib/regex.h b/lib/regex.h index eb1b406ef..b4bbc01fd 100644 --- a/lib/regex.h +++ b/lib/regex.h @@ -1,7 +1,8 @@ /* Definitions for data structures and routines for the regular - expression library, version 0.12. - - Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc. + expression library. + Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003 + Free Software Foundation, Inc. + This file is part of the GNU C Library. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,33 +14,46 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#ifndef __REGEXP_LIBRARY_H__ -#define __REGEXP_LIBRARY_H__ +#ifndef _REGEX_H +#define _REGEX_H 1 + +#include + +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif /* POSIX says that must be included (by the caller) before . */ -#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS) +#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS /* VMS doesn't have `size_t' in , even though POSIX says it should be there. */ -#include +# include #endif +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; /* The following bits are used to determine the regexp syntax we recognize. The set/not-set meanings are chosen so that Emacs syntax remains the value 0. The bits are given in alphabetical order, and the definitions shifted by one from the previous bit; thus, when we add or remove a bit, only one other definition need change. */ -typedef unsigned reg_syntax_t; +typedef unsigned long int reg_syntax_t; /* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) +#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) /* If this bit is not set, then + and ? are operators, and \+ and \? are literals. @@ -134,6 +148,27 @@ typedef unsigned reg_syntax_t; without further backtracking. */ #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, turn on internal regex debugging. + If not set, and debugging was on, turn it off. + This only works if regex.c is compiled -DDEBUG. + We define this bit always, so that all that's needed to turn on + debugging is to recompile regex.c; the calling code can always have + this bit set, and it won't affect anything in the normal case. */ +#define RE_DEBUG (RE_NO_GNU_OPS << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is stored in the pattern buffer, so changing this does not affect @@ -147,13 +182,20 @@ extern reg_syntax_t re_syntax_options; #define RE_SYNTAX_EMACS 0 #define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) #define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS) #define RE_SYNTAX_GREP \ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ @@ -167,7 +209,8 @@ extern reg_syntax_t re_syntax_options; | RE_NO_BK_VBAR) #define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC @@ -189,13 +232,13 @@ extern reg_syntax_t re_syntax_options; (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) #define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is + removed and RE_NO_BK_REFS is added. */ #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ @@ -207,9 +250,10 @@ extern reg_syntax_t re_syntax_options; (erroneously) define this in other header files, but we want our value, so remove any previous define. */ #ifdef RE_DUP_MAX -#undef RE_DUP_MAX +# undef RE_DUP_MAX #endif -#define RE_DUP_MAX ((1 << 15) - 1) +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +#define RE_DUP_MAX (0x7fff) /* POSIX `cflags' bits (i.e., information for `regcomp'). */ @@ -249,6 +293,10 @@ extern reg_syntax_t re_syntax_options; `re_error_msg' table in regex.c. */ typedef enum { +#ifdef _XOPEN_SOURCE + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + REG_NOERROR = 0, /* Success. */ REG_NOMATCH, /* Didn't find a match (for regexec). */ @@ -280,7 +328,7 @@ typedef enum private to the regex routines. */ #ifndef RE_TRANSLATE_TYPE -#define RE_TRANSLATE_TYPE char * +# define RE_TRANSLATE_TYPE char * #endif struct re_pattern_buffer @@ -292,10 +340,10 @@ struct re_pattern_buffer unsigned char *buffer; /* Number of bytes to which `buffer' points. */ - unsigned long allocated; + unsigned long int allocated; /* Number of bytes actually used in `buffer'. */ - unsigned long used; + unsigned long int used; /* Syntax setting with which the pattern was compiled. */ reg_syntax_t syntax; @@ -371,7 +419,7 @@ struct re_registers `re_match_2' returns information about at least this many registers the first time a `regs' structure is passed. */ #ifndef RE_NREGS -#define RE_NREGS 30 +# define RE_NREGS 30 #endif @@ -386,38 +434,21 @@ typedef struct /* Declarations for routines. */ -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -#define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -#define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - /* Sets the current default syntax to SYNTAX, and return the old syntax. You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); +extern reg_syntax_t re_set_syntax (reg_syntax_t syntax); /* Compile the regular expression PATTERN, with length LENGTH and syntax given by the global `re_syntax_options', into the buffer BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, int length, - struct re_pattern_buffer *buffer)); +extern const char *re_compile_pattern (const char *pattern, size_t length, + struct re_pattern_buffer *buffer); /* Compile a fastmap for the compiled pattern in BUFFER; used to accelerate searches. Return 0 if successful and -2 if was an internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); +extern int re_compile_fastmap (struct re_pattern_buffer *buffer); /* Search in the string STRING (with length LENGTH) for the pattern @@ -425,31 +456,29 @@ extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); characters. Return the starting position of the match, -1 for no match, or -2 for an internal error. Also return register information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); +extern int re_search (struct re_pattern_buffer *buffer, const char *string, + int length, int start, int range, + struct re_registers *regs); /* Like `re_search', but search in the concatenation of STRING1 and STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); +extern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, int range, struct re_registers *regs, + int stop); /* Like `re_search', but return how many characters in STRING the regexp in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); +extern int re_match (struct re_pattern_buffer *buffer, const char *string, + int length, int start, struct re_registers *regs); /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); +extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, struct re_registers *regs, int stop); /* Set REGS to hold NUM_REGS registers, storing them in STARTS and @@ -464,27 +493,59 @@ extern int re_match_2 Unless this function is called, the first search or match using PATTERN_BUFFER will allocate its own register data, without freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); +extern void re_set_registers (struct re_pattern_buffer *buffer, + struct re_registers *regs, unsigned num_regs, + regoff_t *starts, regoff_t *ends); -#ifdef _REGEX_RE_COMP +#if defined _REGEX_RE_COMP || defined _LIBC +# ifndef _CRAY /* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". */ +#ifndef __restrict +# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) +# if defined restrict || 199901L <= __STDC_VERSION__ +# define __restrict restrict +# else +# define __restrict +# endif +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. */ +#ifndef __restrict_arr +# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) +# define __restrict_arr __restrict +# else +# define __restrict_arr +# endif #endif /* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); -extern int regexec - _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags)); -extern size_t regerror - _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, - size_t errbuf_size)); -extern void regfree _RE_ARGS ((regex_t *preg)); - -#endif /* not __REGEXP_LIBRARY_H__ */ +extern int regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); + +extern int regexec (const regex_t *__restrict __preg, + const char *__restrict __string, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); + +extern size_t regerror (int __errcode, const regex_t *__preg, + char *__errbuf, size_t __errbuf_size); + +extern void regfree (regex_t *__preg); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ /* Local variables: