X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregex.h;h=594d5e6aa48b6634e8f6e3b84b3b0c8e6fb9edc5;hb=217ec874c8e4610ce407f0204c4e3fe30b7c76ec;hp=5edeff3551fdec114aac10ed2afb87ef33a06eca;hpb=34d6df51137e66e6bdaf350c4041374d273f1e04;p=gnulib.git diff --git a/lib/regex.h b/lib/regex.h index 5edeff355..594d5e6aa 100644 --- a/lib/regex.h +++ b/lib/regex.h @@ -1,40 +1,83 @@ /* Definitions for data structures and routines for the regular - expression library, version 0.12. - Copyright (C) 1985,1989-1993,1995-1998, 2000 Free Software Foundation, Inc. - - This file is part of the GNU C Library. Its master source is NOT part of - the C library, however. The master source lives in /gd/gnu/lib. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, + expression library. + Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993, 1995, 1996, 1997, 1998, + 2000, 2001, 2002, 2003, 2005, 2006, 2009, 2010 Free Software Foundation, + Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _REGEX_H #define _REGEX_H 1 +#include + /* Allow the use in C++ code. */ #ifdef __cplusplus extern "C" { #endif -/* POSIX says that must be included (by the caller) before - . */ +/* Define __USE_GNU_REGEX to declare GNU extensions that violate the + POSIX name space rules. */ +#undef __USE_GNU_REGEX +#if (defined _GNU_SOURCE \ + || (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \ + && !defined _XOPEN_SOURCE)) +# define __USE_GNU_REGEX 1 +#endif + +#ifdef _REGEX_LARGE_OFFSETS + +/* Use types and values that are wide enough to represent signed and + unsigned byte offsets in memory. This currently works only when + the regex code is used outside of the GNU C library; it is not yet + supported within glibc itself, and glibc users should not define + _REGEX_LARGE_OFFSETS. */ + +/* The type of the offset of a byte within a string. + For historical reasons POSIX 1003.1-2004 requires that regoff_t be + at least as wide as off_t. However, many common POSIX platforms set + regoff_t to the more-sensible ssize_t and the Open Group has + signalled its intention to change the requirement to be that + regoff_t be at least as wide as ptrdiff_t and ssize_t; see XBD ERN + 60 (2005-08-25). We don't know of any hosts where ssize_t or + ptrdiff_t is wider than ssize_t, so ssize_t is safe. */ +typedef ssize_t regoff_t; + +/* The type of nonnegative object indexes. Traditionally, GNU regex + uses 'int' for these. Code that uses __re_idx_t should work + regardless of whether the type is signed. */ +typedef size_t __re_idx_t; + +/* The type of object sizes. */ +typedef size_t __re_size_t; + +/* The type of object sizes, in places where the traditional code + uses unsigned long int. */ +typedef size_t __re_long_size_t; + +#else + +/* Use types that are binary-compatible with the traditional GNU regex + implementation, which mishandles strings longer than INT_MAX. */ + +typedef int regoff_t; +typedef int __re_idx_t; +typedef unsigned int __re_size_t; +typedef unsigned long int __re_long_size_t; -#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS -/* VMS doesn't have `size_t' in , even though POSIX says it - should be there. */ -# include #endif /* The following two types have to be signed and unsigned integer type @@ -51,20 +94,22 @@ typedef unsigned long int active_reg_t; add or remove a bit, only one other definition need change. */ typedef unsigned long int reg_syntax_t; +#ifdef __USE_GNU_REGEX + /* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) +# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) /* If this bit is not set, then + and ? are operators, and \+ and \? are literals. If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) +# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) /* If this bit is set, then character classes are supported. They are: [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) +# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) /* If this bit is set, then ^ and $ are always anchors (outside bracket expressions, of course). @@ -78,7 +123,7 @@ typedef unsigned long int reg_syntax_t; POSIX draft 11.2 says that * etc. in leading positions is undefined. We already implemented a previous draft which made those constructs invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) +# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) /* If this bit is set, then special characters are always special regardless of where they are in the pattern. @@ -86,71 +131,71 @@ typedef unsigned long int reg_syntax_t; some contexts; otherwise they are ordinary. Specifically, * + ? and intervals are only special when not after the beginning, open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) +# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) /* If this bit is set, then *, +, ?, and { cannot be first in an re or immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) +# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) /* If this bit is set, then . matches newline. If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) +# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) /* If this bit is set, then . doesn't match NUL. If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) +# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) /* If this bit is set, nonmatching lists [^...] do not match newline. If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) +# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) /* If this bit is set, either \{...\} or {...} defines an interval, depending on RE_NO_BK_BRACES. If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) +# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) /* If this bit is set, +, ? and | aren't recognized as operators. If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) +# define RE_LIMITED_OPS (RE_INTERVALS << 1) /* If this bit is set, newline is an alternation operator. If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) +# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) /* If this bit is set, then `{...}' defines an interval, and \{ and \} are literals. If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) +# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) /* If this bit is set, (...) defines a group, and \( and \) are literals. If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) +# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) /* If this bit is set, then \ matches . If not set, then \ is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) +# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) /* If this bit is set, then | is an alternation operator, and \| is literal. If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) +# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) /* If this bit is set, then an ending range point collating higher than the starting range point, as in [z-a], is invalid. If not set, then when ending range point collates higher than the starting range point, the range is ignored. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) +# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) /* If this bit is set, then an unmatched ) is ordinary. If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) +# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) /* If this bit is set, succeed as soon as we match the whole pattern, without further backtracking. */ -#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) +# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) /* If this bit is set, do not process the GNU regex operators. If not set, then the GNU regex operators are recognized. */ -#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) +# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) /* If this bit is set, turn on internal regex debugging. If not set, and debugging was on, turn it off. @@ -158,7 +203,31 @@ typedef unsigned long int reg_syntax_t; We define this bit always, so that all that's needed to turn on debugging is to recompile regex.c; the calling code can always have this bit set, and it won't affect anything in the normal case. */ -#define RE_DEBUG (RE_NO_GNU_OPS << 1) +# define RE_DEBUG (RE_NO_GNU_OPS << 1) + +/* If this bit is set, a syntactically invalid interval is treated as + a string of ordinary characters. For example, the ERE 'a{1' is + treated as 'a\{1'. */ +# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) + +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + +/* If this bit is set, then \{ cannot be first in an bre or + immediately after an alternation or begin-group operator. */ +# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) + +/* If this bit is set, then no_sub will be set to 1 during + re_compile_pattern. */ +# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) + +#endif /* defined __USE_GNU_REGEX */ /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is @@ -166,61 +235,64 @@ typedef unsigned long int reg_syntax_t; already-compiled regexps. */ extern reg_syntax_t re_syntax_options; +#ifdef __USE_GNU_REGEX /* Define combinations of the above bits for the standard possibilities. (The [[[ comments delimit what gets put into the Texinfo file, so don't delete them!) */ /* [[[begin syntaxes]]] */ -#define RE_SYNTAX_EMACS 0 +# define RE_SYNTAX_EMACS 0 -#define RE_SYNTAX_AWK \ +# define RE_SYNTAX_AWK \ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) -#define RE_SYNTAX_GNU_AWK \ +# define RE_SYNTAX_GNU_AWK \ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ - & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) -#define RE_SYNTAX_POSIX_AWK \ +# define RE_SYNTAX_POSIX_AWK \ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ | RE_INTERVALS | RE_NO_GNU_OPS) -#define RE_SYNTAX_GREP \ +# define RE_SYNTAX_GREP \ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ | RE_NEWLINE_ALT) -#define RE_SYNTAX_EGREP \ +# define RE_SYNTAX_EGREP \ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ | RE_NO_BK_VBAR) -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) +# define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ + | RE_INVALID_INTERVAL_ORD) /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC +# define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC +# define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC /* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ +# define _RE_SYNTAX_POSIX_COMMON \ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ | RE_INTERVALS | RE_NO_EMPTY_RANGES) -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) +# define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ +# define RE_SYNTAX_POSIX_MINIMAL_BASIC \ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) -#define RE_SYNTAX_POSIX_EXTENDED \ +# define RE_SYNTAX_POSIX_EXTENDED \ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ @@ -228,21 +300,34 @@ extern reg_syntax_t re_syntax_options; /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is removed and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ +# define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) /* [[[end syntaxes]]] */ + +#endif /* defined __USE_GNU_REGEX */ -/* Maximum number of duplicates an interval can allow. Some systems - (erroneously) define this in other header files, but we want our +#ifdef __USE_GNU_REGEX + +/* Maximum number of duplicates an interval can allow. POSIX-conforming + systems might define this in , but we want our value, so remove any previous define. */ -#ifdef RE_DUP_MAX -# undef RE_DUP_MAX -#endif -/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ -#define RE_DUP_MAX (0x7fff) +# ifdef RE_DUP_MAX +# undef RE_DUP_MAX +# endif + +/* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored + the counter as a 2-byte signed integer. This is no longer true, so + RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to + ((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined. + However, there would be a huge performance problem if someone + actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains + its historical value. */ +# define RE_DUP_MAX (0x7fff) + +#endif /* defined __USE_GNU_REGEX */ /* POSIX `cflags' bits (i.e., information for `regcomp'). */ @@ -253,16 +338,16 @@ extern reg_syntax_t re_syntax_options; /* If this bit is set, then ignore case when matching. If not set, then case is significant. */ -#define REG_ICASE (REG_EXTENDED << 1) +#define REG_ICASE (1 << 1) /* If this bit is set, then anchors do not match at newline characters in the string. If not set, then anchors do match at newlines. */ -#define REG_NEWLINE (REG_ICASE << 1) +#define REG_NEWLINE (1 << 2) /* If this bit is set, then report only success or fail in regexec. If not set, then returns differ between not matching and errors. */ -#define REG_NOSUB (REG_NEWLINE << 1) +#define REG_NOSUB (1 << 3) /* POSIX `eflags' bits (i.e., information for regexec). */ @@ -277,137 +362,176 @@ extern reg_syntax_t re_syntax_options; /* Like REG_NOTBOL, except for the end-of-line. */ #define REG_NOTEOL (1 << 1) +/* Use PMATCH[0] to delimit the start and end of the search in the + buffer. */ +#define REG_STARTEND (1 << 2) + /* If any error codes are removed, changed, or added, update the - `re_error_msg' table in regex.c. */ + `__re_error_msgid' table in regcomp.c. */ + typedef enum { -#ifdef _XOPEN_SOURCE - REG_ENOSYS = -1, /* This will never happen for this implementation. */ -#endif - - REG_NOERROR = 0, /* Success. */ - REG_NOMATCH, /* Didn't find a match (for regexec). */ + _REG_ENOSYS = -1, /* This will never happen for this implementation. */ + _REG_NOERROR = 0, /* Success. */ + _REG_NOMATCH, /* Didn't find a match (for regexec). */ /* POSIX regcomp return error codes. (In the order listed in the standard.) */ - REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Not implemented. */ - REG_ECTYPE, /* Invalid character class name. */ - REG_EESCAPE, /* Trailing backslash. */ - REG_ESUBREG, /* Invalid back reference. */ - REG_EBRACK, /* Unmatched left bracket. */ - REG_EPAREN, /* Parenthesis imbalance. */ - REG_EBRACE, /* Unmatched \{. */ - REG_BADBR, /* Invalid contents of \{\}. */ - REG_ERANGE, /* Invalid range end. */ - REG_ESPACE, /* Ran out of memory. */ - REG_BADRPT, /* No preceding re for repetition op. */ + _REG_BADPAT, /* Invalid pattern. */ + _REG_ECOLLATE, /* Invalid collating element. */ + _REG_ECTYPE, /* Invalid character class name. */ + _REG_EESCAPE, /* Trailing backslash. */ + _REG_ESUBREG, /* Invalid back reference. */ + _REG_EBRACK, /* Unmatched left bracket. */ + _REG_EPAREN, /* Parenthesis imbalance. */ + _REG_EBRACE, /* Unmatched \{. */ + _REG_BADBR, /* Invalid contents of \{\}. */ + _REG_ERANGE, /* Invalid range end. */ + _REG_ESPACE, /* Ran out of memory. */ + _REG_BADRPT, /* No preceding re for repetition op. */ /* Error codes we've added. */ - REG_EEND, /* Premature end. */ - REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ - REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ + _REG_EEND, /* Premature end. */ + _REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ } reg_errcode_t; + +#ifdef _XOPEN_SOURCE +# define REG_ENOSYS _REG_ENOSYS +#endif +#define REG_NOERROR _REG_NOERROR +#define REG_NOMATCH _REG_NOMATCH +#define REG_BADPAT _REG_BADPAT +#define REG_ECOLLATE _REG_ECOLLATE +#define REG_ECTYPE _REG_ECTYPE +#define REG_EESCAPE _REG_EESCAPE +#define REG_ESUBREG _REG_ESUBREG +#define REG_EBRACK _REG_EBRACK +#define REG_EPAREN _REG_EPAREN +#define REG_EBRACE _REG_EBRACE +#define REG_BADBR _REG_BADBR +#define REG_ERANGE _REG_ERANGE +#define REG_ESPACE _REG_ESPACE +#define REG_BADRPT _REG_BADRPT +#define REG_EEND _REG_EEND +#define REG_ESIZE _REG_ESIZE +#define REG_ERPAREN _REG_ERPAREN +/* struct re_pattern_buffer normally uses member names like `buffer' + that POSIX does not allow. In POSIX mode these members have names + with leading `re_' (e.g., `re_buffer'). */ +#ifdef __USE_GNU_REGEX +# define _REG_RE_NAME(id) id +# define _REG_RM_NAME(id) id +#else +# define _REG_RE_NAME(id) re_##id +# define _REG_RM_NAME(id) rm_##id +#endif + +/* The user can specify the type of the re_translate member by + defining the macro RE_TRANSLATE_TYPE, which defaults to unsigned + char *. This pollutes the POSIX name space, so in POSIX mode just + use unsigned char *. */ +#ifdef __USE_GNU_REGEX +# ifndef RE_TRANSLATE_TYPE +# define RE_TRANSLATE_TYPE unsigned char * +# endif +# define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE +#else +# define REG_TRANSLATE_TYPE unsigned char * +#endif + /* This data structure represents a compiled pattern. Before calling the pattern compiler, the fields `buffer', `allocated', `fastmap', `translate', and `no_sub' can be set. After the pattern has been compiled, the `re_nsub' field is available. All other fields are private to the regex routines. */ -#ifndef RE_TRANSLATE_TYPE -# define RE_TRANSLATE_TYPE char * -#endif - struct re_pattern_buffer { -/* [[[begin pattern_buffer]]] */ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ + unsigned char *_REG_RE_NAME (buffer); - /* Number of bytes to which `buffer' points. */ - unsigned long int allocated; + /* Number of bytes to which `buffer' points. */ + __re_long_size_t _REG_RE_NAME (allocated); - /* Number of bytes actually used in `buffer'. */ - unsigned long int used; + /* Number of bytes actually used in `buffer'. */ + __re_long_size_t _REG_RE_NAME (used); - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t _REG_RE_NAME (syntax); - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ + char *_REG_RE_NAME (fastmap); - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - RE_TRANSLATE_TYPE translate; + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ + REG_TRANSLATE_TYPE _REG_RE_NAME (translate); - /* Number of subexpressions found by the compiler. */ + /* Number of subexpressions found by the compiler. */ size_t re_nsub; - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ + unsigned int _REG_RE_NAME (can_be_null) : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#ifdef __USE_GNU_REGEX +# define REGS_UNALLOCATED 0 +# define REGS_REALLOCATE 1 +# define REGS_FIXED 2 +#endif + unsigned int _REG_RE_NAME (regs_allocated) : 2; - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned int _REG_RE_NAME (fastmap_accurate) : 1; - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned int _REG_RE_NAME (no_sub) : 1; - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ + unsigned int _REG_RE_NAME (not_bol) : 1; - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; + /* Similarly for an end-of-line anchor. */ + unsigned int _REG_RE_NAME (not_eol) : 1; - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; + /* If true, an anchor at a newline matches. */ + unsigned int _REG_RE_NAME (newline_anchor) : 1; /* [[[end pattern_buffer]]] */ }; typedef struct re_pattern_buffer regex_t; -/* Type for byte offsets within the string. POSIX mandates this. */ -typedef int regoff_t; - - /* This is the structure we store register match data in. See regex.texinfo for a full description of what registers match. */ struct re_registers { - unsigned num_regs; - regoff_t *start; - regoff_t *end; + __re_size_t _REG_RM_NAME (num_regs); + regoff_t *_REG_RM_NAME (start); + regoff_t *_REG_RM_NAME (end); }; /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, `re_match_2' returns information about at least this many registers the first time a `regs' structure is passed. */ -#ifndef RE_NREGS +#if !defined RE_NREGS && defined __USE_GNU_REGEX # define RE_NREGS 30 #endif @@ -423,38 +547,21 @@ typedef struct /* Declarations for routines. */ -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -# define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -# define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - /* Sets the current default syntax to SYNTAX, and return the old syntax. You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); /* Compile the regular expression PATTERN, with length LENGTH and syntax given by the global `re_syntax_options', into the buffer BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, size_t length, - struct re_pattern_buffer *buffer)); +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); /* Compile a fastmap for the compiled pattern in BUFFER; used to accelerate searches. Return 0 if successful and -2 if was an internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); /* Search in the string STRING (with length LENGTH) for the pattern @@ -462,31 +569,35 @@ extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); characters. Return the starting position of the match, -1 for no match, or -2 for an internal error. Also return register information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); +extern regoff_t re_search (struct re_pattern_buffer *__buffer, + const char *__string, __re_idx_t __length, + __re_idx_t __start, regoff_t __range, + struct re_registers *__regs); /* Like `re_search', but search in the concatenation of STRING1 and STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); +extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, __re_idx_t __length1, + const char *__string2, __re_idx_t __length2, + __re_idx_t __start, regoff_t __range, + struct re_registers *__regs, + __re_idx_t __stop); /* Like `re_search', but return how many characters in STRING the regexp in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); +extern regoff_t re_match (struct re_pattern_buffer *__buffer, + const char *__string, __re_idx_t __length, + __re_idx_t __start, struct re_registers *__regs); /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); +extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, __re_idx_t __length1, + const char *__string2, __re_idx_t __length2, + __re_idx_t __start, struct re_registers *__regs, + __re_idx_t __stop); /* Set REGS to hold NUM_REGS registers, storing them in STARTS and @@ -501,30 +612,61 @@ extern int re_match_2 Unless this function is called, the first search or match using PATTERN_BUFFER will allocate its own register data, without freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + __re_size_t __num_regs, + regoff_t *__starts, regoff_t *__ends); #if defined _REGEX_RE_COMP || defined _LIBC # ifndef _CRAY /* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); +extern char *re_comp (const char *); +extern int re_exec (const char *); +# endif +#endif + +/* GCC 2.95 and later have "__restrict"; C99 compilers have + "restrict", and "configure" may have defined "restrict". + Other compilers use __restrict, __restrict__, and _Restrict, and + 'configure' might #define 'restrict' to those words, so pick a + different name. */ +#ifndef _Restrict_ +# if 199901L <= __STDC_VERSION__ +# define _Restrict_ restrict +# elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__) +# define _Restrict_ __restrict +# else +# define _Restrict_ +# endif +#endif +/* gcc 3.1 and up support the [restrict] syntax. Don't trust + sys/cdefs.h's definition of __restrict_arr, though, as it + mishandles gcc -ansi -pedantic. */ +#ifndef _Restrict_arr_ +# if ((199901L <= __STDC_VERSION__ \ + || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \ + && !__STRICT_ANSI__)) \ + && !defined __GNUG__) +# define _Restrict_arr_ _Restrict_ +# else +# define _Restrict_arr_ # endif #endif /* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *__preg, const char *__pattern, - int __cflags)); +extern int regcomp (regex_t *_Restrict_ __preg, + const char *_Restrict_ __pattern, + int __cflags); -extern int regexec _RE_ARGS ((const regex_t *__preg, - const char *__string, size_t __nmatch, - regmatch_t __pmatch[], int __eflags)); +extern int regexec (const regex_t *_Restrict_ __preg, + const char *_Restrict_ __string, size_t __nmatch, + regmatch_t __pmatch[_Restrict_arr_], + int __eflags); -extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg, - char *__errbuf, size_t __errbuf_size)); +extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg, + char *_Restrict_ __errbuf, size_t __errbuf_size); -extern void regfree _RE_ARGS ((regex_t *__preg)); +extern void regfree (regex_t *__preg); #ifdef __cplusplus @@ -532,11 +674,3 @@ extern void regfree _RE_ARGS ((regex_t *__preg)); #endif /* C++ */ #endif /* regex.h */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/