From: Paul Eggert Date: Wed, 24 Aug 2005 22:29:38 +0000 (+0000) Subject: * config/srclist.txt: X-Git-Tag: cvs-readonly~3087 X-Git-Url: http://erislabs.net/gitweb/?a=commitdiff_plain;h=d6103c5e50ea7a7d52e98a3112d3d22f3c78c846;p=gnulib.git * config/srclist.txt: Remove glibc bug 1233 and add 1236, which supersedes it. * lib/regex.h: Fix a multitude of POSIX name space violations. These changes have an effect only for programs that define _POSIX_C_SOURCE, _POSIX_SOURCE, or _XOPEN_SOURCE; they do not change anything for programs compiled in the normal way. Also, there is no effect on the ABI. (_REGEX_SOURCE): New macro. Do not include if _XOPEN_SOURCE and VMS are both defined and _GNU_SOURCE is not; this fixes a name space violation. Rename the following macros to obey POSIX requirements. The old names are still visible as macros if _REGEX_SOURCE is defined. (REG_BACKSLASH_ESCAPE_IN_LISTS): renamed from RE_BACKSLASH_ESCAPE_IN_LISTS. (REG_BK_PLUS_QM): renamed from RE_BK_PLUS_QM. (REG_CHAR_CLASSES): renamed from RE_CHAR_CLASSES. (REG_CONTEXT_INDEP_ANCHORS): renamed from RE_CONTEXT_INDEP_ANCHORS. (REG_CONTEXT_INDEP_OPS): renamed from RE_CONTEXT_INDEP_OPS. (REG_CONTEXT_INVALID_OPS): renamed from RE_CONTEXT_INVALID_OPS. (REG_DOT_NEWLINE): renamed from RE_DOT_NEWLINE. (REG_DOT_NOT_NULL): renamed from RE_DOT_NOT_NULL. (REG_HAT_LISTS_NOT_NEWLINE): renamed from RE_HAT_LISTS_NOT_NEWLINE. (REG_INTERVALS): renamed from RE_INTERVALS. (REG_LIMITED_OPS): renamed from RE_LIMITED_OPS. (REG_NEWLINE_ALT): renamed from RE_NEWLINE_ALT. (REG_NO_BK_BRACES): renamed from RE_NO_BK_BRACES. (REG_NO_BK_PARENS): renamed from RE_NO_BK_PARENS. (REG_NO_BK_REFS): renamed from RE_NO_BK_REFS. (REG_NO_BK_VBAR): renamed from RE_NO_BK_VBAR. (REG_NO_EMPTY_RANGES): renamed from RE_NO_EMPTY_RANGES. (REG_UNMATCHED_RIGHT_PAREN_ORD): renamed from RE_UNMATCHED_RIGHT_PAREN_ORD. (REG_NO_POSIX_BACKTRACKING): renamed from RE_NO_POSIX_BACKTRACKING. (REG_NO_GNU_OPS): renamed from RE_NO_GNU_OPS. (REG_DEBUG): renamed from RE_DEBUG. (REG_INVALID_INTERVAL_ORD): renamed from RE_INVALID_INTERVAL_ORD. (REG_IGNORE_CASE): renamed from RE_ICASE. This renaming is a bit unusual, since we can't clash with the POSIX REG_ICASE. (REG_CARET_ANCHORS_HERE): renamed from RE_CARET_ANCHORS_HERE. (REG_CONTEXT_INVALID_DUP): renamed from RE_CONTEXT_INVALID_DUP. (REG_NO_SUB): renamed from RE_NO_SUB. (REG_SYNTAX_EMACS): renamed from RE_SYNTAX_EMACS. (REG_SYNTAX_AWK): renamed from RE_SYNTAX_AWK. (REG_SYNTAX_GNU_AWK): renamed from RE_SYNTAX_GNU_AWK. (REG_SYNTAX_POSIX_AWK): renamed from RE_SYNTAX_POSIX_AWK. (REG_SYNTAX_GREP): renamed from RE_SYNTAX_GREP. (REG_SYNTAX_EGREP): renamed from RE_SYNTAX_EGREP. (REG_SYNTAX_POSIX_EGREP): renamed from RE_SYNTAX_POSIX_EGREP. (REG_SYNTAX_ED): renamed from RE_SYNTAX_ED. (REG_SYNTAX_SED): renamed from RE_SYNTAX_SED. (_REG_SYNTAX_POSIX_COMMON): renamed from _RE_SYNTAX_POSIX_COMMON. (REG_SYNTAX_POSIX_BASIC): renamed from RE_SYNTAX_POSIX_BASIC. (REG_SYNTAX_POSIX_MINIMAL_BASIC): renamed from RE_SYNTAX_POSIX_MINIMAL_BASIC. (REG_SYNTAX_POSIX_EXTENDED): renamed from RE_SYNTAX_POSIX_EXTENDED. (REG_SYNTAX_POSIX_MINIMAL_EXTENDED): renamed from RE_SYNTAX_POSIX_MINIMAL_EXTENDED. (REG_DUP_MAX): renamed from RE_DUP_MAX. No need to undef it. (REG_UNALLOCATED): Renamed from REGS_UNALLOCATED. (REG_REALLOCATE): Renamed from REGS_REALLOCATE. (REG_FIXED): Renamed from REGS_FIXED. (REG_NREGS): Renamed from RE_NREGS. (REG_ICASE, REG_NEWLINE, REG_NOSUB): Do not depend on the values of other REG_* macros, since POSIX says the user is allowed to #undef these macros selectively. (reg_errcode_t): Update comment stating what other tables need to be consistent. Rename the following enum values to obey POSIX requirements. The old names are still visible as macros. (_REG_ENOSYS): Renamed from REG_ENOSYS. Define even if _XOPEN_SOURCE is not defined, since GNU is supposed to be a superset of POSIX as much as possible, and since we want reg_errcode_t to be a signed type for implementation consistency. (_REG_NOERROR): Renamed from REG_NOERROR. (_REG_NOMATCH): Renamed from REG_NOMATCH. (_REG_BADPAT): Renamed from REG_BADPAT. (_REG_ECOLLATE): Renamed from REG_ECOLLATE. (_REG_ECTYPE): Renamed from REG_ECTYPE. (_REG_EESCAPE): Renamed from REG_EESCAPE. (_REG_ESUBREG): Renamed from REG_ESUBREG. (_REG_EBRACK): Renamed from REG_EBRACK. (_REG_EPAREN): Renamed from REG_EPAREN. (_REG_EBRACE): Renamed from REG_EBRACE. (_REG_BADBR): Renamed from REG_BADBR. (_REG_ERANGE): Renamed from REG_ERANGE. (_REG_ESPACE): Renamed from REG_ESPACE. (_REG_BADRPT): Renamed from REG_BADRPT. (_REG_EEND): Renamed from REG_EEND. (_REG_ESIZE): Renamed from REG_ESIZE. (_REG_ERPAREN): Renamed from REG_ERPAREN. (REG_ENOSYS, REG_NOERROR, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE): (REG_ECTYPE, REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN): (REG_EBRACE, REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_EEND): (REG_ESIZE, REG_ERPAREN): Now macros, not enum constants. (_REG_RE_NAME, _REG_RM_NAME): New macros. (REG_TRANSLATE_TYPE): Renamed from RE_TRANSLATE_TYPE. All uses changed. But support the old name if the new one is not defined and if _REGEX_SOURCE. Change the following member names in struct re_pattern_buffer. The old names are still supported if !_REGEX_SOURCE. The new names are always supported, regardless of _REGEX_SOURCE. (re_buffer): Renamed from buffer. (re_allocated): Renamed from allocated. (re_used): Renamed from used. (re_syntax): Renamed from syntax. (re_fastmap): Renamed from fastmap. (re_translate): Renamed from translate. (re_can_be_null): Renamed from can_be_null. (re_regs_allocated): Renamed from regs_allocated. (re_fastmap_accurate): Renamed from fastmap_accurate. (re_no_sub): Renamed from no_sub. (re_not_bol): Renamed from not_bol. (re_not_eol): Renamed from not_eol. (re_newline_anchor): Renamed from newline_anchor. Change the following member names in struct re_registers. The old names are still supported if !_REGEX_SOURCE. The new names are always supported, regardless of _REGEX_SOURCE. (rm_num_regs): Renamed from num_regs. (rm_start): Renamed from start. (rm_end): Renamed from end. (re_set_syntax, re_compile_pattern, re_compile_fastmap): (re_search, re_search_2, re_match, re_match_2, re_set_registers): Prepend __ to parameter names. --- diff --git a/config/srclist.txt b/config/srclist.txt index 5d64ea811..2aa7a2b44 100644 --- a/config/srclist.txt +++ b/config/srclist.txt @@ -1,4 +1,4 @@ -# $Id: srclist.txt,v 1.81 2005-08-23 20:37:24 eggert Exp $ +# $Id: srclist.txt,v 1.82 2005-08-24 22:29:38 eggert Exp $ # Files for which we are not the source. See ./srclistvars.sh for the # variable definitions. @@ -107,7 +107,7 @@ $LIBCSRC/posix/regex.c lib gpl # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1207 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1222 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1232 -# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1233 +# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1236 #$LIBCSRC/posix/regex.h lib gpl # # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1215 diff --git a/lib/regex.h b/lib/regex.h index 188577936..8f6dc29bc 100644 --- a/lib/regex.h +++ b/lib/regex.h @@ -28,10 +28,16 @@ extern "C" { #endif -/* POSIX says that must be included (by the caller) before - . */ +/* Define _REGEX_SOURCE to get definitions that are incompatible with + POSIX. */ +#if (!defined _REGEX_SOURCE \ + && (defined _GNU_SOURCE \ + || (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \ + && !defined _XOPEN_SOURCE))) +# define _REGEX_SOURCE 1 +#endif -#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS +#if defined _REGEX_SOURCE && defined VMS /* VMS doesn't have `size_t' in , even though POSIX says it should be there. */ # include @@ -53,18 +59,18 @@ typedef unsigned long int reg_syntax_t; /* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) +#define REG_BACKSLASH_ESCAPE_IN_LISTS 1ul /* If this bit is not set, then + and ? are operators, and \+ and \? are literals. If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) +#define REG_BK_PLUS_QM (1ul << 1) /* If this bit is set, then character classes are supported. They are: [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) +#define REG_CHAR_CLASSES (1ul << 2) /* If this bit is set, then ^ and $ are always anchors (outside bracket expressions, of course). @@ -74,11 +80,11 @@ typedef unsigned long int reg_syntax_t; $ is an anchor if it is at the end of a regular expression, or before a close-group or an alternation operator. - This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + This bit could be (re)combined with REG_CONTEXT_INDEP_OPS, because POSIX draft 11.2 says that * etc. in leading positions is undefined. We already implemented a previous draft which made those constructs invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) +#define REG_CONTEXT_INDEP_ANCHORS (1ul << 3) /* If this bit is set, then special characters are always special regardless of where they are in the pattern. @@ -86,70 +92,70 @@ typedef unsigned long int reg_syntax_t; some contexts; otherwise they are ordinary. Specifically, * + ? and intervals are only special when not after the beginning, open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) +#define REG_CONTEXT_INDEP_OPS (1ul << 4) /* If this bit is set, then *, +, ?, and { cannot be first in an re or immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) +#define REG_CONTEXT_INVALID_OPS (1ul << 5) /* If this bit is set, then . matches newline. If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) +#define REG_DOT_NEWLINE (1ul << 6) /* If this bit is set, then . doesn't match NUL. If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) +#define REG_DOT_NOT_NULL (1ul << 7) /* If this bit is set, nonmatching lists [^...] do not match newline. If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) +#define REG_HAT_LISTS_NOT_NEWLINE (1ul << 8) /* If this bit is set, either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. + interval, depending on REG_NO_BK_BRACES. If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) +#define REG_INTERVALS (1ul << 9) /* If this bit is set, +, ? and | aren't recognized as operators. If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) +#define REG_LIMITED_OPS (1ul << 10) /* If this bit is set, newline is an alternation operator. If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) +#define REG_NEWLINE_ALT (1ul << 11) /* If this bit is set, then `{...}' defines an interval, and \{ and \} are literals. If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) +#define REG_NO_BK_BRACES (1ul << 12) /* If this bit is set, (...) defines a group, and \( and \) are literals. If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) +#define REG_NO_BK_PARENS (1ul << 13) /* If this bit is set, then \ matches . If not set, then \ is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) +#define REG_NO_BK_REFS (1ul << 14) /* If this bit is set, then | is an alternation operator, and \| is literal. If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) +#define REG_NO_BK_VBAR (1ul << 15) /* If this bit is set, then an ending range point collating higher than the starting range point, as in [z-a], is invalid. If not set, the containing range is empty and does not match any string. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) +#define REG_NO_EMPTY_RANGES (1ul << 16) /* If this bit is set, then an unmatched ) is ordinary. If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) +#define REG_UNMATCHED_RIGHT_PAREN_ORD (1ul << 17) /* If this bit is set, succeed as soon as we match the whole pattern, without further backtracking. */ -#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) +#define REG_NO_POSIX_BACKTRACKING (1ul << 18) /* If this bit is set, do not process the GNU regex operators. If not set, then the GNU regex operators are recognized. */ -#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) +#define REG_NO_GNU_OPS (1ul << 19) /* If this bit is set, turn on internal regex debugging. If not set, and debugging was on, turn it off. @@ -157,29 +163,29 @@ typedef unsigned long int reg_syntax_t; We define this bit always, so that all that's needed to turn on debugging is to recompile regex.c; the calling code can always have this bit set, and it won't affect anything in the normal case. */ -#define RE_DEBUG (RE_NO_GNU_OPS << 1) +#define REG_DEBUG (1ul << 20) /* If this bit is set, a syntactically invalid interval is treated as a string of ordinary characters. For example, the ERE 'a{1' is treated as 'a\{1'. */ -#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) +#define REG_INVALID_INTERVAL_ORD (1ul << 21) /* If this bit is set, then ignore case when matching. If not set, then case is significant. */ -#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) +#define REG_IGNORE_CASE (1ul << 22) -/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only +/* This bit is used internally like REG_CONTEXT_INDEP_ANCHORS but only for ^, because it is difficult to scan the regex backwards to find whether ^ should be special. */ -#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) +#define REG_CARET_ANCHORS_HERE (1ul << 23) /* If this bit is set, then \{ cannot be first in an bre or immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) +#define REG_CONTEXT_INVALID_DUP (1ul << 24) /* If this bit is set, then no_sub will be set to 1 during re_compile_pattern. */ -#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) +#define REG_NO_SUB (1ul << 25) /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is @@ -191,81 +197,78 @@ extern reg_syntax_t re_syntax_options; (The [[[ comments delimit what gets put into the Texinfo file, so don't delete them!) */ /* [[[begin syntaxes]]] */ -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ - | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) - -#define RE_SYNTAX_GNU_AWK \ - ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ - & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ - | RE_CONTEXT_INVALID_OPS )) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ - | RE_INTERVALS | RE_NO_GNU_OPS) - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -#define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ - | RE_INVALID_INTERVAL_ORD) +#define REG_SYNTAX_EMACS 0 + +#define REG_SYNTAX_AWK \ + (REG_BACKSLASH_ESCAPE_IN_LISTS | REG_DOT_NOT_NULL \ + | REG_NO_BK_PARENS | REG_NO_BK_REFS \ + | REG_NO_BK_VBAR | REG_NO_EMPTY_RANGES \ + | REG_DOT_NEWLINE | REG_CONTEXT_INDEP_ANCHORS \ + | REG_UNMATCHED_RIGHT_PAREN_ORD | REG_NO_GNU_OPS) + +#define REG_SYNTAX_GNU_AWK \ + ((REG_SYNTAX_POSIX_EXTENDED | REG_BACKSLASH_ESCAPE_IN_LISTS \ + | REG_DEBUG) \ + & ~(REG_DOT_NOT_NULL | REG_INTERVALS | REG_CONTEXT_INDEP_OPS \ + | REG_CONTEXT_INVALID_OPS )) + +#define REG_SYNTAX_POSIX_AWK \ + (REG_SYNTAX_POSIX_EXTENDED | REG_BACKSLASH_ESCAPE_IN_LISTS \ + | REG_INTERVALS | REG_NO_GNU_OPS) + +#define REG_SYNTAX_GREP \ + (REG_BK_PLUS_QM | REG_CHAR_CLASSES \ + | REG_HAT_LISTS_NOT_NEWLINE | REG_INTERVALS \ + | REG_NEWLINE_ALT) + +#define REG_SYNTAX_EGREP \ + (REG_CHAR_CLASSES | REG_CONTEXT_INDEP_ANCHORS \ + | REG_CONTEXT_INDEP_OPS | REG_HAT_LISTS_NOT_NEWLINE \ + | REG_NEWLINE_ALT | REG_NO_BK_PARENS \ + | REG_NO_BK_VBAR) + +#define REG_SYNTAX_POSIX_EGREP \ + (REG_SYNTAX_EGREP | REG_INTERVALS | REG_NO_BK_BRACES \ + | REG_INVALID_INTERVAL_ORD) /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC +#define REG_SYNTAX_ED REG_SYNTAX_POSIX_BASIC -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC +#define REG_SYNTAX_SED REG_SYNTAX_POSIX_BASIC /* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) +#define _REG_SYNTAX_POSIX_COMMON \ + (REG_CHAR_CLASSES | REG_DOT_NEWLINE | REG_DOT_NOT_NULL \ + | REG_INTERVALS | REG_NO_EMPTY_RANGES) -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) +#define REG_SYNTAX_POSIX_BASIC \ + (_REG_SYNTAX_POSIX_COMMON | REG_BK_PLUS_QM | REG_CONTEXT_INVALID_DUP) -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this +/* Differs from ..._POSIX_BASIC only in that REG_BK_PLUS_QM becomes + REG_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is - removed and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +#define REG_SYNTAX_POSIX_MINIMAL_BASIC \ + (_REG_SYNTAX_POSIX_COMMON | REG_LIMITED_OPS) + +#define REG_SYNTAX_POSIX_EXTENDED \ + (_REG_SYNTAX_POSIX_COMMON | REG_CONTEXT_INDEP_ANCHORS \ + | REG_CONTEXT_INDEP_OPS | REG_NO_BK_BRACES \ + | REG_NO_BK_PARENS | REG_NO_BK_VBAR \ + | REG_CONTEXT_INVALID_OPS | REG_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that REG_CONTEXT_INDEP_OPS is + removed and REG_NO_BK_REFS is added. */ +#define REG_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_REG_SYNTAX_POSIX_COMMON | REG_CONTEXT_INDEP_ANCHORS \ + | REG_CONTEXT_INVALID_OPS | REG_NO_BK_BRACES \ + | REG_NO_BK_PARENS | REG_NO_BK_REFS \ + | REG_NO_BK_VBAR | REG_UNMATCHED_RIGHT_PAREN_ORD) /* [[[end syntaxes]]] */ -/* Maximum number of duplicates an interval can allow. Some systems - (erroneously) define this in other header files, but we want our - value, so remove any previous define. */ -#ifdef RE_DUP_MAX -# undef RE_DUP_MAX -#endif -/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ -#define RE_DUP_MAX (0x7fff) +/* Maximum number of duplicates an interval can allow. This is + distinct from RE_DUP_MAX, to conform to POSIX name space rules and + to avoid collisions with . */ +#define REG_DUP_MAX 32767 /* POSIX `cflags' bits (i.e., information for `regcomp'). */ @@ -276,16 +279,16 @@ extern reg_syntax_t re_syntax_options; /* If this bit is set, then ignore case when matching. If not set, then case is significant. */ -#define REG_ICASE (REG_EXTENDED << 1) +#define REG_ICASE (1 << 1) /* If this bit is set, then anchors do not match at newline characters in the string. If not set, then anchors do match at newlines. */ -#define REG_NEWLINE (REG_ICASE << 1) +#define REG_NEWLINE (1 << 2) /* If this bit is set, then report only success or fail in regexec. If not set, then returns differ between not matching and errors. */ -#define REG_NOSUB (REG_NEWLINE << 1) +#define REG_NOSUB (1 << 3) /* POSIX `eflags' bits (i.e., information for regexec). */ @@ -306,76 +309,131 @@ extern reg_syntax_t re_syntax_options; /* If any error codes are removed, changed, or added, update the - `re_error_msg' table in regex.c. */ + `__re_error_msgid' table in regcomp.c. */ + typedef enum { -#if defined _XOPEN_SOURCE || 200112L <= _POSIX_C_SOURCE - REG_ENOSYS = -1, /* This will never happen for this implementation. */ -#else - _REG_ENOSYS = -1, /* This is so that reg_errcode_t is always signed. */ -#endif + _REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#define REG_ENOSYS _REG_ENOSYS - REG_NOERROR = 0, /* Success. */ - REG_NOMATCH, /* Didn't find a match (for regexec). */ + _REG_NOERROR, /* Success. */ +#define REG_NOERROR _REG_NOERROR + + _REG_NOMATCH, /* Didn't find a match (for regexec). */ +#define REG_NOMATCH _REG_NOMATCH /* POSIX regcomp return error codes. (In the order listed in the standard.) */ - REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Inalid collating element. */ - REG_ECTYPE, /* Invalid character class name. */ - REG_EESCAPE, /* Trailing backslash. */ - REG_ESUBREG, /* Invalid back reference. */ - REG_EBRACK, /* Unmatched left bracket. */ - REG_EPAREN, /* Parenthesis imbalance. */ - REG_EBRACE, /* Unmatched \{. */ - REG_BADBR, /* Invalid contents of \{\}. */ - REG_ERANGE, /* Invalid range end. */ - REG_ESPACE, /* Ran out of memory. */ - REG_BADRPT, /* No preceding re for repetition op. */ + + _REG_BADPAT, /* Invalid pattern. */ +#define REG_BADPAT _REG_BADPAT + + _REG_ECOLLATE, /* Inalid collating element. */ +#define REG_ECOLLATE _REG_ECOLLATE + + _REG_ECTYPE, /* Invalid character class name. */ +#define REG_ECTYPE _REG_ECTYPE + + _REG_EESCAPE, /* Trailing backslash. */ +#define REG_EESCAPE _REG_EESCAPE + + _REG_ESUBREG, /* Invalid back reference. */ +#define REG_ESUBREG _REG_ESUBREG + + _REG_EBRACK, /* Unmatched left bracket. */ +#define REG_EBRACK _REG_EBRACK + + _REG_EPAREN, /* Parenthesis imbalance. */ +#define REG_EPAREN _REG_EPAREN + + _REG_EBRACE, /* Unmatched \{. */ +#define REG_EBRACE _REG_EBRACE + + _REG_BADBR, /* Invalid contents of \{\}. */ +#define REG_BADBR _REG_BADBR + + _REG_ERANGE, /* Invalid range end. */ +#define REG_ERANGE _REG_ERANGE + + _REG_ESPACE, /* Ran out of memory. */ +#define REG_ESPACE _REG_ESPACE + + _REG_BADRPT, /* No preceding re for repetition op. */ +#define REG_BADRPT _REG_BADRPT /* Error codes we've added. */ - REG_EEND, /* Premature end. */ - REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ - REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ + + _REG_EEND, /* Premature end. */ +#define REG_EEND _REG_EEND + + _REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ +#define REG_ESIZE _REG_ESIZE + + _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +#define REG_ERPAREN _REG_ERPAREN + } reg_errcode_t; +/* In the traditional GNU implementation, regex.h defined member names + like `buffer' that POSIX does not allow. These members now have + names with leading `re_' (e.g., `re_buffer'). Support the old + names only if _REGEX_SOURCE is defined. New programs should use + the new names. */ +#ifdef _REGEX_SOURCE +# define _REG_RE_NAME(id) id +# define _REG_RM_NAME(id) id +#else +# define _REG_RE_NAME(id) re_##id +# define _REG_RM_NAME(id) rm_##id +#endif + +/* The user can specify the type of the re_translate member by + defining the macro REG_TRANSLATE_TYPE. In the traditional GNU + implementation, this macro was named RE_TRANSLATE_TYPE, but POSIX + does not allow this. Support the old name only if _REGEX_SOURCE + and if the new name is not defined. New programs should use the new + name. */ +#ifndef REG_TRANSLATE_TYPE +# if defined _REGEX_SOURCE && defined RE_TRANSLATE_TYPE +# define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE +# else +# define REG_TRANSLATE_TYPE char * +# endif +#endif + /* This data structure represents a compiled pattern. Before calling - the pattern compiler, the fields `buffer', `allocated', `fastmap', - `translate', and `no_sub' can be set. After the pattern has been + the pattern compiler), the fields `re_buffer', `re_allocated', `re_fastmap', + `re_translate', and `re_no_sub' can be set. After the pattern has been compiled, the `re_nsub' field is available. All other fields are private to the regex routines. */ -#ifndef RE_TRANSLATE_TYPE -# define RE_TRANSLATE_TYPE char * -#endif - struct re_pattern_buffer { /* [[[begin pattern_buffer]]] */ /* Space that holds the compiled pattern. It is declared as `unsigned char *' because its elements are sometimes used as array indexes. */ - unsigned char *buffer; + unsigned char *_REG_RE_NAME (buffer); - /* Number of bytes to which `buffer' points. */ - unsigned long int allocated; + /* Number of bytes to which `re_buffer' points. */ + unsigned long int _REG_RE_NAME (allocated); - /* Number of bytes actually used in `buffer'. */ - unsigned long int used; + /* Number of bytes actually used in `re_buffer'. */ + unsigned long int _REG_RE_NAME (used); /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; + reg_syntax_t _REG_RE_NAME (syntax); /* Pointer to a fastmap, if any, otherwise zero. re_search uses the fastmap, if there is one, to skip over impossible starting points for matches. */ - char *fastmap; + char *_REG_RE_NAME (fastmap); /* Either a translate table to apply to all characters before comparing them, or zero for no translation. The translation is applied to a pattern when it is compiled and to a string when it is matched. */ - RE_TRANSLATE_TYPE translate; + REG_TRANSLATE_TYPE _REG_RE_NAME (translate); /* Number of subexpressions found by the compiler. */ size_t re_nsub; @@ -385,34 +443,34 @@ struct re_pattern_buffer whether or not we should use the fastmap, so we don't set this absolutely perfectly; see `re_compile_fastmap' (the `duplicate' case). */ - unsigned can_be_null : 1; + unsigned int _REG_RE_NAME (can_be_null) : 1; - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; + /* If REG_UNALLOCATED, allocate space in the `regs' structure + for `max (REG_NREGS, re_nsub + 1)' groups. + If REG_REALLOCATE, reallocate space if necessary. + If REG_FIXED, use what's there. */ +#define REG_UNALLOCATED 0 +#define REG_REALLOCATE 1 +#define REG_FIXED 2 + unsigned int _REG_RE_NAME (regs_allocated) : 2; /* Set to zero when `regex_compile' compiles a pattern; set to one by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; + unsigned int _REG_RE_NAME (fastmap_accurate) : 1; /* If set, `re_match_2' does not return information about subexpressions. */ - unsigned no_sub : 1; + unsigned int _REG_RE_NAME (no_sub) : 1; /* If set, a beginning-of-line anchor doesn't match at the beginning of the string. */ - unsigned not_bol : 1; + unsigned int _REG_RE_NAME (not_bol) : 1; /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; + unsigned int _REG_RE_NAME (not_eol) : 1; /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; + unsigned int _REG_RE_NAME (newline_anchor) : 1; /* [[[end pattern_buffer]]] */ }; @@ -427,17 +485,17 @@ typedef int regoff_t; regex.texinfo for a full description of what registers match. */ struct re_registers { - unsigned num_regs; - regoff_t *start; - regoff_t *end; + unsigned int _REG_RM_NAME (num_regs); + regoff_t *_REG_RM_NAME (start); + regoff_t *_REG_RM_NAME (end); }; -/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, +/* If `regs_allocated' is REG_UNALLOCATED in the pattern buffer, `re_match_2' returns information about at least this many registers the first time a `regs' structure is passed. */ -#ifndef RE_NREGS -# define RE_NREGS 30 +#ifndef REG_NREGS +# define REG_NREGS 30 #endif @@ -454,49 +512,51 @@ typedef struct /* Sets the current default syntax to SYNTAX, and return the old syntax. You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax (reg_syntax_t syntax); +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); /* Compile the regular expression PATTERN, with length LENGTH and syntax given by the global `re_syntax_options', into the buffer BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern (const char *pattern, size_t length, - struct re_pattern_buffer *buffer); +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); /* Compile a fastmap for the compiled pattern in BUFFER; used to accelerate searches. Return 0 if successful and -2 if was an internal error. */ -extern int re_compile_fastmap (struct re_pattern_buffer *buffer); +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); /* Search in the string STRING (with length LENGTH) for the pattern compiled into BUFFER. Start searching at position START, for RANGE characters. Return the starting position of the match, -1 for no match, or -2 for an internal error. Also return register - information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search (struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, - struct re_registers *regs); + information in REGS (if REGS and BUFFER->re_no_sub are nonzero). */ +extern int re_search (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, int __range, + struct re_registers *__regs); /* Like `re_search', but search in the concatenation of STRING1 and STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, - int stop); +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, + int __start, int __range, struct re_registers *__regs, + int __stop); /* Like `re_search', but return how many characters in STRING the regexp in BUFFER matched, starting at position START. */ -extern int re_match (struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs); +extern int re_match (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, struct re_registers *__regs); /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop); +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, + int __start, struct re_registers *__regs, int __stop); /* Set REGS to hold NUM_REGS registers, storing them in STARTS and @@ -511,9 +571,10 @@ extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, Unless this function is called, the first search or match using PATTERN_BUFFER will allocate its own register data, without freeing the old data. */ -extern void re_set_registers (struct re_pattern_buffer *buffer, - struct re_registers *regs, unsigned num_regs, - regoff_t *starts, regoff_t *ends); +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); #if defined _REGEX_RE_COMP || defined _LIBC # ifndef _CRAY @@ -559,6 +620,99 @@ extern size_t regerror (int __errcode, const regex_t *__preg, extern void regfree (regex_t *__preg); +#ifdef _REGEX_SOURCE + +/* Define the POSIX-compatible member names in terms of the + incompatible (and deprecated) names established by _REG_RE_NAME. + New programs should use the re_* names. */ + +# define re_allocated allocated +# define re_buffer buffer +# define re_can_be_null can_be_null +# define re_fastmap fastmap +# define re_newline_anchor newline_anchor +# define re_no_sub no_sub +# define re_not_bol not_bol +# define re_not_eol not_eol +# define re_regs_allocated regs_allocated +# define re_syntax syntax +# define re_translate translate +# define re_used used + +/* Similarly for _REG_RM_NAME. */ + +# define rm_end end +# define rm_num_regs num_regs +# define rm_start start + +/* Undef RE_DUP_MAX first, in case the user has already included a + with an incompatible definition. + + On GNU systems, the most common spelling for RE_DUP_MAX's value in + is (0x7ffff), so define RE_DUP_MAX to that, not to + REG_DUP_MAX. This avoid some duplicate-macro-definition warnings + with programs that include after this file. + + New programs should not assume that regex.h defines RE_DUP_MAX; to + get the value of RE_DUP_MAX, they should instead include + and possibly invoke the sysconf function. */ + +# undef RE_DUP_MAX +# define RE_DUP_MAX (0x7fff) + +/* Define the following symbols for backward source compatibility. + These symbols violate the POSIX name space rules, and new programs + should avoid them. */ + +# define REGS_FIXED REG_FIXED +# define REGS_REALLOCATE REG_REALLOCATE +# define REGS_UNALLOCATED REG_UNALLOCATED +# define RE_BACKSLASH_ESCAPE_IN_LISTS REG_BACKSLASH_ESCAPE_IN_LISTS +# define RE_BK_PLUS_QM REG_BK_PLUS_QM +# define RE_CARET_ANCHORS_HERE REG_CARET_ANCHORS_HERE +# define RE_CHAR_CLASSES REG_CHAR_CLASSES +# define RE_CONTEXT_INDEP_ANCHORS REG_CONTEXT_INDEP_ANCHORS +# define RE_CONTEXT_INDEP_OPS REG_CONTEXT_INDEP_OPS +# define RE_CONTEXT_INVALID_DUP REG_CONTEXT_INVALID_DUP +# define RE_CONTEXT_INVALID_OPS REG_CONTEXT_INVALID_OPS +# define RE_DEBUG REG_DEBUG +# define RE_DOT_NEWLINE REG_DOT_NEWLINE +# define RE_DOT_NOT_NULL REG_DOT_NOT_NULL +# define RE_HAT_LISTS_NOT_NEWLINE REG_HAT_LISTS_NOT_NEWLINE +# define RE_ICASE REG_IGNORE_CASE /* avoid collision with REG_ICASE */ +# define RE_INTERVALS REG_INTERVALS +# define RE_INVALID_INTERVAL_ORD REG_INVALID_INTERVAL_ORD +# define RE_LIMITED_OPS REG_LIMITED_OPS +# define RE_NEWLINE_ALT REG_NEWLINE_ALT +# define RE_NO_BK_BRACES REG_NO_BK_BRACES +# define RE_NO_BK_PARENS REG_NO_BK_PARENS +# define RE_NO_BK_REFS REG_NO_BK_REFS +# define RE_NO_BK_VBAR REG_NO_BK_VBAR +# define RE_NO_EMPTY_RANGES REG_NO_EMPTY_RANGES +# define RE_NO_GNU_OPS REG_NO_GNU_OPS +# define RE_NO_POSIX_BACKTRACKING REG_NO_POSIX_BACKTRACKING +# define RE_NO_SUB REG_NO_SUB +# define RE_NREGS REG_NREGS +# define RE_SYNTAX_AWK REG_SYNTAX_AWK +# define RE_SYNTAX_ED REG_SYNTAX_ED +# define RE_SYNTAX_EGREP REG_SYNTAX_EGREP +# define RE_SYNTAX_EMACS REG_SYNTAX_EMACS +# define RE_SYNTAX_GNU_AWK REG_SYNTAX_GNU_AWK +# define RE_SYNTAX_GREP REG_SYNTAX_GREP +# define RE_SYNTAX_POSIX_AWK REG_SYNTAX_POSIX_AWK +# define RE_SYNTAX_POSIX_BASIC REG_SYNTAX_POSIX_BASIC +# define RE_SYNTAX_POSIX_EGREP REG_SYNTAX_POSIX_EGREP +# define RE_SYNTAX_POSIX_EXTENDED REG_SYNTAX_POSIX_EXTENDED +# define RE_SYNTAX_POSIX_MINIMAL_BASIC REG_SYNTAX_POSIX_MINIMAL_BASIC +# define RE_SYNTAX_POSIX_MINIMAL_EXTENDED REG_SYNTAX_POSIX_MINIMAL_EXTENDED +# define RE_SYNTAX_SED REG_SYNTAX_SED +# define RE_UNMATCHED_RIGHT_PAREN_ORD REG_UNMATCHED_RIGHT_PAREN_ORD +# ifndef RE_TRANSLATE_TYPE +# define RE_TRANSLATE_TYPE REG_TRANSLATE_TYPE +# endif + +#endif /* defined _REGEX_SOURCE */ + #ifdef __cplusplus } #endif /* C++ */