(Implements POSIX draft P10003.2/D11.2, except for
internationalization features.)
- Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+ Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/* We need this for `regex.h', and perhaps for the Emacs include files. */
#include <sys/types.h>
-/* This is for other GNU distributions with internationalized messages.
- The GNU C Library itself does not yet support such messages. */
-#if HAVE_LIBINTL_H
+/* This is for other GNU distributions with internationalized messages. */
+#if HAVE_LIBINTL_H || defined (_LIBC)
# include <libintl.h>
#else
# define gettext(msgid) (msgid)
#else /* not emacs */
-#ifdef STDC_HEADERS
+/* If we are not linking with Emacs proper,
+ we can't use the relocating allocator
+ even if config.h says that we can. */
+#undef REL_ALLOC
+
+#if defined (STDC_HEADERS) || defined (_LIBC)
#include <stdlib.h>
#else
char *malloc ();
char *realloc ();
#endif
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
+ If nothing else has been done, use the method below. */
+#ifdef INHIBIT_STRING_HEADER
+#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
+#if !defined (bzero) && !defined (bcopy)
+#undef INHIBIT_STRING_HEADER
+#endif
+#endif
+#endif
-/* We used to test for `BSTRING' here, but only GCC and Emacs define
- `BSTRING', as far as I know, and neither of them use this code. */
+/* This is the normal way of making sure we have a bcopy and a bzero.
+ This is used in most programs--a few other programs avoid this
+ by defining INHIBIT_STRING_HEADER. */
#ifndef INHIBIT_STRING_HEADER
-#if HAVE_STRING_H || STDC_HEADERS
+#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
#include <string.h>
#ifndef bcmp
#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
#ifndef NULL
-#define NULL 0
+#define NULL (void *)0
#endif
/* We remove any previous definition of `SIGN_EXTEND_CHAR',
#include <alloca.h>
#else /* not __GNUC__ or HAVE_ALLOCA_H */
#ifndef _AIX /* Already did AIX, up at the top. */
+#if defined (__STDC__) && __STDC__
+void *alloca ();
+#else
char *alloca ();
+#endif
#endif /* not _AIX */
#endif /* not HAVE_ALLOCA_H */
#endif /* not __GNUC__ */
destination)
/* No need to do anything to free, after alloca. */
-#define REGEX_FREE(arg)
+#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
#endif /* not REGEX_MALLOC */
/* Define how to allocate the failure stack. */
-#ifdef REL_ALLOC
+#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
+
#define REGEX_ALLOCATE_STACK(size) \
r_alloc (&failure_stack_ptr, (size))
#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
#define REGEX_FREE_STACK(ptr) \
r_alloc_free (&failure_stack_ptr)
-#else /* not REL_ALLOC */
+#else /* not using relocating allocator */
#ifdef REGEX_MALLOC
#define REGEX_FREE_STACK(arg)
#endif /* not REGEX_MALLOC */
-#endif /* not REL_ALLOC */
+#endif /* not using relocating allocator */
/* True if `size1' is non-NULL and PTR is pointing anywhere inside
#endif
/* The match routines may not allocate if (1) they would do it with malloc
- and (2) it's not safe for them to use malloc. */
-#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && (defined (emacs) || defined (REL_ALLOC))
+ and (2) it's not safe for them to use malloc.
+ Note that if REL_ALLOC is defined, matching would not use malloc for the
+ failure stack, but we would still use it for the register vectors;
+ so REL_ALLOC should not affect this. */
+#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
#undef MATCH_MAY_ALLOCATE
#endif
exactly that if always used MAX_FAILURE_SPACE each time we failed.
This is a variable only so users of regex can assign to it; we never
change it ourselves. */
-#ifdef REL_ALLOC
-int re_max_failures = 20000000;
+#if defined (MATCH_MAY_ALLOCATE)
+int re_max_failures = 200000;
#else
int re_max_failures = 2000;
#endif
-typedef unsigned char *fail_stack_elt_t;
+union fail_stack_elt
+{
+ unsigned char *pointer;
+ int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
typedef struct
{
#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
-#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
-/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
+/* Define macros to initialize and free the failure stack.
+ Do `return -2' if the alloc fails. */
#ifdef MATCH_MAY_ALLOCATE
#define INIT_FAIL_STACK() \
fail_stack.size = INIT_FAILURE_ALLOC; \
fail_stack.avail = 0; \
} while (0)
+
+#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
#else
#define INIT_FAIL_STACK() \
do { \
fail_stack.avail = 0; \
} while (0)
+
+#define RESET_FAIL_STACK()
#endif
1)))
-/* Push PATTERN_OP on FAIL_STACK.
-
+/* Push pointer POINTER on FAIL_STACK.
Return 1 if was able to do so and 0 if ran out of memory allocating
space to do so. */
-#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
+#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
((FAIL_STACK_FULL () \
- && !DOUBLE_FAIL_STACK (fail_stack)) \
+ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
? 0 \
- : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
+ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1))
/* Push a pointer value onto the failure stack.
Assumes the variable `fail_stack'. Probably should only
be called from within `PUSH_FAILURE_POINT'. */
#define PUSH_FAILURE_POINTER(item) \
- fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) (item)
+ fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
/* This pushes an integer-valued item onto the failure stack.
Assumes the variable `fail_stack'. Probably should only
be called from within `PUSH_FAILURE_POINT'. */
#define PUSH_FAILURE_INT(item) \
- fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) (EMACS_INT) (item)
+ fail_stack.stack[fail_stack.avail++].integer = (item)
-/* The complement operation. Assumes `fail_stack' is nonempty. */
-#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail]
+/* Push a fail_stack_elt_t value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ELT(item) \
+ fail_stack.stack[fail_stack.avail++] = (item)
-/* The complement operation. Assumes `fail_stack' is nonempty. */
-#define POP_FAILURE_INT() (EMACS_INT) fail_stack.stack[--fail_stack.avail]
+/* These three POP... operations complement the three PUSH... operations.
+ All assume that `fail_stack' is nonempty. */
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
+#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
+#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
/* Used to omit pushing failure point id's when we're not debugging. */
#ifdef DEBUG
/* Push the info, starting with the registers. */ \
DEBUG_PRINT1 ("\n"); \
\
- for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
- this_reg++) \
- { \
- DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
- DEBUG_STATEMENT (num_regs_pushed++); \
+ if (!(RE_NO_POSIX_BACKTRACKING & bufp->syntax)) \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
\
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
- PUSH_FAILURE_POINTER (regstart[this_reg]); \
- \
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
- PUSH_FAILURE_POINTER (regend[this_reg]); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
\
- DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
- DEBUG_PRINT2 (" match_null=%d", \
- REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
- DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
- DEBUG_PRINT2 (" matched_something=%d", \
- MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT2 (" ever_matched=%d", \
- EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT1 ("\n"); \
- PUSH_FAILURE_POINTER (reg_info[this_reg].word); \
- } \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
\
DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
PUSH_FAILURE_INT (lowest_active_reg); \
#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
/* We actually push this many items. */
-#define NUM_FAILURE_ITEMS \
- ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
- + NUM_NONREG_ITEMS)
+#define NUM_FAILURE_ITEMS \
+ (((RE_NO_POSIX_BACKTRACKING & bufp->syntax \
+ ? 0 : highest_active_reg - lowest_active_reg + 1) \
+ * NUM_REG_ITEMS) \
+ + NUM_NONREG_ITEMS)
/* How many items can still be added to the stack without overflowing it. */
#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
low_reg = (unsigned) POP_FAILURE_INT (); \
DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
\
- for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
- { \
- DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ if (!(RE_NO_POSIX_BACKTRACKING & bufp->syntax)) \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
\
- reg_info[this_reg].word = POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ reg_info[this_reg].word = POP_FAILURE_ELT (); \
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
\
- regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
\
- regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ } \
+ else \
+ { \
+ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
+ { \
+ reg_info[this_reg].word = 0; \
+ regend[this_reg] = 0; \
+ regstart[this_reg] = 0; \
+ } \
+ highest_active_reg = high_reg; \
} \
\
set_regs_matched_done = 0; \
\f
/* Structure for per-register (a.k.a. per-group) information.
- This must not be longer than one word, because we push this value
- onto the failure stack. Other register information, such as the
+ Other register information, such as the
starting and ending positions (which are addresses), and the list of
inner groups (which is a bits list) are maintained in separate
variables.
the compiler will pack our bit fields into something that fits into
the type of `word', i.e., is something that fits into one item on the
failure stack. */
+
typedef union
{
fail_stack_elt_t word;
if necessary. Also cast from a signed character in the constant
string passed to us by the user to an unsigned char that we can use
as an array index (in, e.g., `translate'). */
+#ifndef PATFETCH
#define PATFETCH(c) \
do {if (p == pend) return REG_EEND; \
c = (unsigned char) *p++; \
- if (translate) c = translate[c]; \
+ if (translate) c = (unsigned char) translate[c]; \
} while (0)
+#endif
/* Fetch the next character in the uncompiled pattern, with no
translation. */
cast the subscript to translate because some data is declared as
`char *', to avoid warnings when a string constant is passed. But
when we use a character as a subscript we must make it unsigned. */
-#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+#ifndef TRANSLATE
+#define TRANSLATE(d) \
+ (translate ? (char) translate[(unsigned char) (d)] : (d))
+#endif
/* Macros for outputting the compiled pattern into `buffer'. */
const char *pend = pattern + size;
/* How to translate the characters in the pattern. */
- char *translate = bufp->translate;
+ RE_TRANSLATE_TYPE translate = bufp->translate;
/* Address of the count-byte of the most recently inserted `exactn'
command. This makes it possible to tell if a new exact-match
{
const char *next = p;
boolean next_backslash = *next == '\\';
- const char *next_next = p + 1 < pend ? p + 1 : NULL;
+ const char *next_next = p + 1 < pend ? p + 1 : 0;
return
/* Before a subexpression? */
static reg_errcode_t
compile_range (p_ptr, pend, translate, syntax, b)
const char **p_ptr, *pend;
- char *translate;
+ RE_TRANSLATE_TYPE translate;
reg_syntax_t syntax;
unsigned char *b;
{
/* This holds the pointer to the failure stack, when
it is allocated relocatably. */
+#ifdef REL_ALLOC
fail_stack_elt_t *failure_stack_ptr;
+#endif
/* Assume that each path through the pattern can be null until
proven otherwise. We set this false at the bottom of switch
/* Reset for next path. */
path_can_be_null = true;
- p = fail_stack.stack[--fail_stack.avail];
+ p = fail_stack.stack[--fail_stack.avail].pointer;
continue;
}
case at_dot:
case after_dot:
continue;
-#endif /* not emacs */
+#endif /* emacs */
case no_op:
/* If what's on the stack is where we are now, pop it. */
if (!FAIL_STACK_EMPTY ()
- && fail_stack.stack[fail_stack.avail - 1] == p)
+ && fail_stack.stack[fail_stack.avail - 1].pointer == p)
fail_stack.avail--;
continue;
{
if (!PUSH_PATTERN_OP (p + j, fail_stack))
{
- REGEX_FREE_STACK (fail_stack.stack);
+ RESET_FAIL_STACK ();
return -2;
}
}
bufp->can_be_null |= path_can_be_null;
done:
- REGEX_FREE_STACK (fail_stack.stack);
+ RESET_FAIL_STACK ();
return 0;
} /* re_compile_fastmap */
\f
{
int val;
register char *fastmap = bufp->fastmap;
- register char *translate = bufp->translate;
+ register RE_TRANSLATE_TYPE translate = bufp->translate;
int total_size = size1 + size2;
int endpos = startpos + range;
return -1;
/* Fix up RANGE if it might eventually take us outside
- the virtual concatenation of STRING1 and STRING2. */
- if (endpos < -1)
- range = -1 - startpos;
+ the virtual concatenation of STRING1 and STRING2.
+ Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
+ if (endpos < 0)
+ range = 0 - startpos;
else if (endpos > total_size)
range = total_size - startpos;
range = 1;
}
+#ifdef emacs
+ /* In a forward search for something that starts with \=.
+ don't keep searching past point. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
+ {
+ range = PT - startpos;
+ if (range <= 0)
+ return -1;
+ }
+#endif /* emacs */
+
/* Update the fastmap now if not correct already. */
if (fastmap && !bufp->fastmap_accurate)
if (re_compile_fastmap (bufp) == -2)
FREE_VAR (reg_info_dummy); \
} while (0)
#else
-#define FREE_VARIABLES() /* Do nothing! */
+#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
#endif /* not MATCH_MAY_ALLOCATE */
/* These values must meet several constraints. They must not be valid
unsigned char *just_past_start_mem = 0;
/* We use this to map every character in the string. */
- char *translate = bufp->translate;
+ RE_TRANSLATE_TYPE translate = bufp->translate;
/* Failure point stack. Each place that can handle a failure further
down the line pushes a failure point on this stack. It consists of
/* This holds the pointer to the failure stack, when
it is allocated relocatably. */
+#ifdef REL_ALLOC
fail_stack_elt_t *failure_stack_ptr;
+#endif
/* We fill all the registers internally, independent of what we
return, for use in backreferences. The number here includes
do
{
PREFETCH ();
- if (translate[(unsigned char) *d++] != (char) *p++)
+ if ((unsigned char) translate[(unsigned char) *d++]
+ != (unsigned char) *p++)
goto fail;
}
while (--mcnt);
for that group and all inner ones, so that if we fail back
to this point, the group's information will be correct.
For example, in \(a*\)*\1, we need the preceding group,
- and in \(\(a*\)b*\)\2, we need the inner group. */
+ and in \(zz\(a*\)b*\)\2, we need the inner group. */
/* We can't use `p' to check ahead because we push
a failure point to `p + mcnt' after we do this. */
if (PTR_CHAR_POS ((unsigned char *) d) <= point)
goto fail;
break;
-#if 0 /* not emacs19 */
- case at_dot:
- DEBUG_PRINT1 ("EXECUTING at_dot.\n");
- if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
- goto fail;
- break;
-#endif /* not emacs19 */
case syntaxspec:
DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
bcmp_translate (s1, s2, len, translate)
unsigned char *s1, *s2;
register int len;
- char *translate;
+ RE_TRANSLATE_TYPE translate;
{
register unsigned char *p1 = s1, *p2 = s2;
while (len)
{
unsigned i;
- preg->translate = (char *) malloc (CHAR_SET_SIZE);
+ preg->translate
+ = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
+ * sizeof (*(RE_TRANSLATE_TYPE)0));
if (preg->translate == NULL)
return (int) REG_ESPACE;