X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.h;h=2f6860d9ec5cf4cacfb5dfd43885aa75f522a6ac;hb=9d6639f1402fc8e869bc0c103b62173c57dcb09a;hp=31eaf849f757245a2baf6a8266444b1b3c94857c;hpb=2dd0bd2a2e7b07e3e47e35a4c06eab8b708fe499;p=gnulib.git diff --git a/regex.h b/regex.h index 31eaf849f..2f6860d9e 100644 --- a/regex.h +++ b/regex.h @@ -1,7 +1,7 @@ /* Definitions for data structures and routines for the regular expression library, version 0.12. - Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc. + Copyright (C) 1985,89,90,91,92,93,95,2000 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,7 +15,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + USA. */ #ifndef __REGEXP_LIBRARY_H__ #define __REGEXP_LIBRARY_H__ @@ -31,15 +32,19 @@ /* The following bits are used to determine the regexp syntax we - recognize. The set/not-set meanings are chosen so that Emacs syntax - remains the value 0. The bits are given in alphabetical order, and + recognize. The set/not-set meanings where historically chosen so + that Emacs syntax had the value 0. + The bits are given in alphabetical order, and the definitions shifted by one from the previous bit; thus, when we add or remove a bit, only one other definition need change. */ typedef unsigned reg_syntax_t; +/* If this bit is set, then even *?, +? and ?? match greedily. */ +#define RE_ALL_GREEDY (1) + /* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) +#define RE_BACKSLASH_ESCAPE_IN_LISTS (RE_ALL_GREEDY << 1) /* If this bit is not set, then + and ? are operators, and \+ and \? are literals. @@ -126,31 +131,41 @@ typedef unsigned reg_syntax_t; starting range point, the range is ignored. */ #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) -/* If this bit is set, then an unmatched ) is ordinary. - If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) - /* If this bit is set, succeed as soon as we match the whole pattern, without further backtracking. */ -#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) +#define RE_NO_POSIX_BACKTRACKING (RE_NO_EMPTY_RANGES << 1) + +/* If this bit is set, then (?:...) is treated as a shy group. */ +#define RE_SHY_GROUPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_SHY_GROUPS << 1) /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is stored in the pattern buffer, so changing this does not affect already-compiled regexps. */ extern reg_syntax_t re_syntax_options; + +#ifdef emacs +/* In Emacs, this is the string or buffer in which we + are matching. It is used for looking up syntax properties. */ +extern Lisp_Object re_match_object; +#endif + /* Define combinations of the above bits for the standard possibilities. (The [[[ comments delimit what gets put into the Texinfo file, so - don't delete them!) */ + don't delete them!) */ /* [[[begin syntaxes]]] */ -#define RE_SYNTAX_EMACS 0 +#define RE_SYNTAX_EMACS (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS) #define RE_SYNTAX_AWK \ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_ALL_GREEDY) #define RE_SYNTAX_POSIX_AWK \ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) @@ -158,13 +173,13 @@ extern reg_syntax_t re_syntax_options; #define RE_SYNTAX_GREP \ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) + | RE_NEWLINE_ALT | RE_ALL_GREEDY) #define RE_SYNTAX_EGREP \ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) + | RE_NO_BK_VBAR | RE_ALL_GREEDY) #define RE_SYNTAX_POSIX_EGREP \ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) @@ -177,7 +192,7 @@ extern reg_syntax_t re_syntax_options; /* Syntax bits common to both basic and extended POSIX regex syntax. */ #define _RE_SYNTAX_POSIX_COMMON \ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) + | RE_INTERVALS | RE_NO_EMPTY_RANGES | RE_ALL_GREEDY) #define RE_SYNTAX_POSIX_BASIC \ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) @@ -279,6 +294,10 @@ typedef enum compiled, the `re_nsub' field is available. All other fields are private to the regex routines. */ +#ifndef RE_TRANSLATE_TYPE +#define RE_TRANSLATE_TYPE char * +#endif + struct re_pattern_buffer { /* [[[begin pattern_buffer]]] */ @@ -305,7 +324,7 @@ struct re_pattern_buffer comparing them, or zero for no translation. The translation is applied to a pattern when it is compiled and to a string when it is matched. */ - char *translate; + RE_TRANSLATE_TYPE translate; /* Number of subexpressions found by the compiler. */ size_t re_nsub; @@ -313,8 +332,7 @@ struct re_pattern_buffer /* Zero if this pattern cannot match the empty string, one else. Well, in truth it's used only in `re_search_2', to see whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ + this absolutely perfectly; see `re_compile_fastmap'. */ unsigned can_be_null : 1; /* If REGS_UNALLOCATED, allocate space in the `regs' structure @@ -344,6 +362,12 @@ struct re_pattern_buffer /* If true, an anchor at a newline matches. */ unsigned newline_anchor : 1; +#ifdef emacs + /* If true, multi-byte form in the `buffer' should be recognized as a + multibyte character. */ + unsigned multibyte : 1; +#endif + /* [[[end pattern_buffer]]] */ }; @@ -388,15 +412,15 @@ typedef struct unfortunately clutters up the declarations a bit, but I think it's worth it. */ -#if __STDC__ +#if defined __STDC__ || defined PROTOTYPES #define _RE_ARGS(args) args -#else /* not __STDC__ */ +#else /* not __STDC__ || PROTOTYPES */ #define _RE_ARGS(args) () -#endif /* not __STDC__ */ +#endif /* not __STDC__ || PROTOTYPES */ /* Sets the current default syntax to SYNTAX, and return the old syntax. You can also simply assign to the `re_syntax_options' variable. */