/* Definitions for data structures and routines for the regular
expression library, version 0.12.
- Copyright (C) 1985, 89, 90, 91, 92, 1993 Free Software Foundation, Inc.
+ Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
#ifndef __REGEXP_LIBRARY_H__
#define __REGEXP_LIBRARY_H__
/* POSIX says that <sys/types.h> must be included (by the caller) before
<regex.h>. */
-#ifdef VMS
+#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS)
/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
should be there. */
#include <stddef.h>
/* The following bits are used to determine the regexp syntax we
- recognize. The set/not-set meanings are chosen so that Emacs syntax
- remains the value 0. The bits are given in alphabetical order, and
+ recognize. The set/not-set meanings where historically chosen so
+ that Emacs syntax had the value 0.
+ The bits are given in alphabetical order, and
the definitions shifted by one from the previous bit; thus, when we
add or remove a bit, only one other definition need change. */
typedef unsigned reg_syntax_t;
+/* If this bit is set, then even *?, +? and ?? match greedily. */
+#define RE_ALL_GREEDY (1)
+
/* If this bit is not set, then \ inside a bracket expression is literal.
If set, then such a \ quotes the following character. */
-#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (RE_ALL_GREEDY << 1)
/* If this bit is not set, then + and ? are operators, and \+ and \? are
literals.
starting range point, the range is ignored. */
#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
-/* If this bit is set, then an unmatched ) is ordinary.
- If not set, then an unmatched ) is invalid. */
-#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
-
/* If this bit is set, succeed as soon as we match the whole pattern,
without further backtracking. */
-#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+#define RE_NO_POSIX_BACKTRACKING (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_POSIX_BACKTRACKING << 1)
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
already-compiled regexps. */
extern reg_syntax_t re_syntax_options;
+
+#ifdef emacs
+/* In Emacs, this is the string or buffer in which we
+ are matching. It is used for looking up syntax properties. */
+extern Lisp_Object re_match_object;
+#endif
+
\f
/* Define combinations of the above bits for the standard possibilities.
(The [[[ comments delimit what gets put into the Texinfo file, so
- don't delete them!) */
+ don't delete them!) */
/* [[[begin syntaxes]]] */
-#define RE_SYNTAX_EMACS 0
+#define RE_SYNTAX_EMACS (RE_CHAR_CLASSES | RE_INTERVALS)
#define RE_SYNTAX_AWK \
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_ALL_GREEDY)
#define RE_SYNTAX_POSIX_AWK \
(RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
#define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
| RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
- | RE_NEWLINE_ALT)
+ | RE_NEWLINE_ALT | RE_ALL_GREEDY)
#define RE_SYNTAX_EGREP \
(RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
| RE_NEWLINE_ALT | RE_NO_BK_PARENS \
- | RE_NO_BK_VBAR)
+ | RE_NO_BK_VBAR | RE_ALL_GREEDY)
#define RE_SYNTAX_POSIX_EGREP \
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
/* Syntax bits common to both basic and extended POSIX regex syntax. */
#define _RE_SYNTAX_POSIX_COMMON \
(RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
- | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES | RE_ALL_GREEDY)
#define RE_SYNTAX_POSIX_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
compiled, the `re_nsub' field is available. All other fields are
private to the regex routines. */
+#ifndef RE_TRANSLATE_TYPE
+#define RE_TRANSLATE_TYPE char *
+#define RE_TRANSLATE(TBL, C) ((TBL)[C])
+#define RE_TRANSLATE_P(TBL) (TBL)
+#endif
+
struct re_pattern_buffer
{
/* [[[begin pattern_buffer]]] */
comparing them, or zero for no translation. The translation
is applied to a pattern when it is compiled and to a string
when it is matched. */
- char *translate;
+ RE_TRANSLATE_TYPE translate;
/* Number of subexpressions found by the compiler. */
size_t re_nsub;
/* If true, an anchor at a newline matches. */
unsigned newline_anchor : 1;
+ /* If true, multi-byte form in the `buffer' should be recognized as a
+ multibyte character. */
+ unsigned multibyte : 1;
+
/* [[[end pattern_buffer]]] */
};