X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.h;h=b39fc1e770e66cfaea38ba2cb10a68bfbf9a30b7;hb=2689f07b92f86ebbf5a328968ebb1e6b9214d357;hp=a495005ce93665611dafd1a267dd8e65f450fb8d;hpb=61c3a3d75ba7053850836e17bdc751d7b269db79;p=gnulib.git diff --git a/regex.h b/regex.h index a495005ce..b39fc1e77 100644 --- a/regex.h +++ b/regex.h @@ -1,7 +1,7 @@ /* Definitions for data structures and routines for the regular expression library, version 0.12. - Copyright (C) 1985, 89, 90, 91, 92, 1993 Free Software Foundation, Inc. + Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,7 +15,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + USA. */ #ifndef __REGEXP_LIBRARY_H__ #define __REGEXP_LIBRARY_H__ @@ -23,7 +24,7 @@ /* POSIX says that must be included (by the caller) before . */ -#ifdef VMS +#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS) /* VMS doesn't have `size_t' in , even though POSIX says it should be there. */ #include @@ -31,15 +32,19 @@ /* The following bits are used to determine the regexp syntax we - recognize. The set/not-set meanings are chosen so that Emacs syntax - remains the value 0. The bits are given in alphabetical order, and + recognize. The set/not-set meanings where historically chosen so + that Emacs syntax had the value 0. + The bits are given in alphabetical order, and the definitions shifted by one from the previous bit; thus, when we add or remove a bit, only one other definition need change. */ typedef unsigned reg_syntax_t; +/* If this bit is set, then even *?, +? and ?? match greedily. */ +#define RE_ALL_GREEDY (1) + /* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) +#define RE_BACKSLASH_ESCAPE_IN_LISTS (RE_ALL_GREEDY << 1) /* If this bit is not set, then + and ? are operators, and \+ and \? are literals. @@ -126,27 +131,38 @@ typedef unsigned reg_syntax_t; starting range point, the range is ignored. */ #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +#define RE_NO_POSIX_BACKTRACKING (RE_NO_EMPTY_RANGES << 1) + /* If this bit is set, then an unmatched ) is ordinary. If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_POSIX_BACKTRACKING << 1) /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is stored in the pattern buffer, so changing this does not affect already-compiled regexps. */ extern reg_syntax_t re_syntax_options; + +#ifdef emacs +/* In Emacs, this is the string or buffer in which we + are matching. It is used for looking up syntax properties. */ +extern Lisp_Object re_match_object; +#endif + /* Define combinations of the above bits for the standard possibilities. (The [[[ comments delimit what gets put into the Texinfo file, so - don't delete them!) */ + don't delete them!) */ /* [[[begin syntaxes]]] */ -#define RE_SYNTAX_EMACS 0 +#define RE_SYNTAX_EMACS (RE_CHAR_CLASSES | RE_INTERVALS) #define RE_SYNTAX_AWK \ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_ALL_GREEDY) #define RE_SYNTAX_POSIX_AWK \ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) @@ -154,13 +170,13 @@ extern reg_syntax_t re_syntax_options; #define RE_SYNTAX_GREP \ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) + | RE_NEWLINE_ALT | RE_ALL_GREEDY) #define RE_SYNTAX_EGREP \ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) + | RE_NO_BK_VBAR | RE_ALL_GREEDY) #define RE_SYNTAX_POSIX_EGREP \ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) @@ -173,7 +189,7 @@ extern reg_syntax_t re_syntax_options; /* Syntax bits common to both basic and extended POSIX regex syntax. */ #define _RE_SYNTAX_POSIX_COMMON \ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) + | RE_INTERVALS | RE_NO_EMPTY_RANGES | RE_ALL_GREEDY) #define RE_SYNTAX_POSIX_BASIC \ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) @@ -275,6 +291,12 @@ typedef enum compiled, the `re_nsub' field is available. All other fields are private to the regex routines. */ +#ifndef RE_TRANSLATE_TYPE +#define RE_TRANSLATE_TYPE char * +#define RE_TRANSLATE(TBL, C) ((TBL)[C]) +#define RE_TRANSLATE_P(TBL) (TBL) +#endif + struct re_pattern_buffer { /* [[[begin pattern_buffer]]] */ @@ -301,7 +323,7 @@ struct re_pattern_buffer comparing them, or zero for no translation. The translation is applied to a pattern when it is compiled and to a string when it is matched. */ - char *translate; + RE_TRANSLATE_TYPE translate; /* Number of subexpressions found by the compiler. */ size_t re_nsub; @@ -340,15 +362,14 @@ struct re_pattern_buffer /* If true, an anchor at a newline matches. */ unsigned newline_anchor : 1; + /* If true, multi-byte form in the `buffer' should be recognized as a + multibyte character. */ + unsigned multibyte : 1; + /* [[[end pattern_buffer]]] */ }; typedef struct re_pattern_buffer regex_t; - - -/* search.c (search_buffer) in Emacs needs this one opcode value. It is - defined both in `regex.c' and here. */ -#define RE_EXACTN_VALUE 1 /* Type for byte offsets within the string. POSIX mandates this. */ typedef int regoff_t; @@ -465,9 +486,11 @@ extern void re_set_registers _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, unsigned num_regs, regoff_t *starts, regoff_t *ends)); +#ifdef _REGEX_RE_COMP /* 4.2 bsd compatibility. */ extern char *re_comp _RE_ARGS ((const char *)); extern int re_exec _RE_ARGS ((const char *)); +#endif /* POSIX compatibility. */ extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));