/* Definitions for data structures and routines for the regular
- expression library, version 0.11.
+ expression library, version 0.12.
- Copyright (C) 1985, 89, 90, 91, 92 Free Software Foundation, Inc.
+ Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
#ifndef __REGEXP_LIBRARY_H__
#define __REGEXP_LIBRARY_H__
-#ifdef VMS
-/* POSIX says that size_t should be in stddef.h. */
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS)
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
#include <stddef.h>
#endif
-/* POSIX says that <sys/types.h> must be included before <regex.h>. */
/* The following bits are used to determine the regexp syntax we
recognize. The set/not-set meanings are chosen so that Emacs syntax
add or remove a bit, only one other definition need change. */
typedef unsigned reg_syntax_t;
+/* If this bit is set, then even *?, +? and ?? match greedily. */
+#define RE_ALL_GREEDY (1)
+
/* If this bit is not set, then \ inside a bracket expression is literal.
If set, then such a \ quotes the following character. */
-#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (RE_ALL_GREEDY << 1)
/* If this bit is not set, then + and ? are operators, and \+ and \? are
literals.
starting range point, the range is ignored. */
#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_NO_EMPTY_RANGES << 1)
+
/* If this bit is set, then an unmatched ) is ordinary.
If not set, then an unmatched ) is invalid. */
-#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_POSIX_BACKTRACKING << 1)
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
already-compiled regexps. */
extern reg_syntax_t re_syntax_options;
+
+#ifdef emacs
+/* In Emacs, this is the string or buffer in which we
+ are matching. It is used for looking up syntax properties. */
+extern Lisp_Object re_match_object;
+#endif
+
\f
/* Define combinations of the above bits for the standard possibilities.
(The [[[ comments delimit what gets put into the Texinfo file, so
#define RE_SYNTAX_AWK \
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VAR | RE_NO_EMPTY_RANGES \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_ALL_GREEDY)
#define RE_SYNTAX_POSIX_AWK \
(RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
#define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
| RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
- | RE_NEWLINE_ALT)
+ | RE_NEWLINE_ALT | RE_ALL_GREEDY)
#define RE_SYNTAX_EGREP \
(RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
| RE_NEWLINE_ALT | RE_NO_BK_PARENS \
- | RE_NO_BK_VBAR)
+ | RE_NO_BK_VBAR | RE_ALL_GREEDY)
#define RE_SYNTAX_POSIX_EGREP \
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
/* Syntax bits common to both basic and extended POSIX regex syntax. */
#define _RE_SYNTAX_POSIX_COMMON \
(RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
- | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES | RE_ALL_GREEDY)
#define RE_SYNTAX_POSIX_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
compiled, the `re_nsub' field is available. All other fields are
private to the regex routines. */
+#ifndef RE_TRANSLATE_TYPE
+#define RE_TRANSLATE_TYPE char *
+#define RE_TRANSLATE(TBL, C) ((TBL)[C])
+#define RE_TRANSLATE_P(TBL) (TBL)
+#endif
+
struct re_pattern_buffer
{
/* [[[begin pattern_buffer]]] */
comparing them, or zero for no translation. The translation
is applied to a pattern when it is compiled and to a string
when it is matched. */
- char *translate;
+ RE_TRANSLATE_TYPE translate;
/* Number of subexpressions found by the compiler. */
size_t re_nsub;
#define REGS_FIXED 2
unsigned regs_allocated : 2;
- /* Set to zero when regex_compile compiles a pattern; set to one
- by re_compile_fastmap when it updates the fastmap, if any. */
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
unsigned fastmap_accurate : 1;
- /* If set, regexec reports only success or failure and does not
- return anything in pmatch. */
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
unsigned no_sub : 1;
/* If set, a beginning-of-line anchor doesn't match at the
/* If true, an anchor at a newline matches. */
unsigned newline_anchor : 1;
+ /* If true, multi-byte form in the `buffer' should be recognized as a
+ multibyte character. */
+ unsigned multibyte : 1;
+
/* [[[end pattern_buffer]]] */
};
typedef struct re_pattern_buffer regex_t;
-
-
-/* search.c (search_buffer) in Emacs needs this one opcode value. It is
- defined both in `regex.c' and here. */
-#define RE_EXACTN_VALUE 1
\f
/* Type for byte offsets within the string. POSIX mandates this. */
typedef int regoff_t;
prototype (if we are ANSI), and once without (if we aren't) -- we
use the following macro to declare argument types. This
unfortunately clutters up the declarations a bit, but I think it's
- worth it.
-
- We also have to undo `const' if we are not ANSI and if it hasn't
- previously being taken care of. */
+ worth it. */
#if __STDC__
+
#define _RE_ARGS(args) args
-#else
+
+#else /* not __STDC__ */
+
#define _RE_ARGS(args) ()
-#ifndef const
-#define const
-#endif
-#endif
+
+#endif /* not __STDC__ */
/* Sets the current default syntax to SYNTAX, and return the old syntax.
You can also simply assign to the `re_syntax_options' variable. */
_RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
unsigned num_regs, regoff_t *starts, regoff_t *ends));
+#ifdef _REGEX_RE_COMP
/* 4.2 bsd compatibility. */
extern char *re_comp _RE_ARGS ((const char *));
extern int re_exec _RE_ARGS ((const char *));
+#endif
/* POSIX compatibility. */
extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));