#define _GNU_SOURCE
+#ifdef HAVE_CONFIG_H
+#if defined (CONFIG_BROKETS)
+/* We use <config.h> instead of "config.h" so that a compilation
+ using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
+ (which it would do because it found this file in $srcdir). */
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
/* We need this for `regex.h', and perhaps for the Emacs include files. */
#include <sys/types.h>
#else /* not emacs */
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+
/* We used to test for `BSTRING' here, but only GCC and Emacs define
`BSTRING', as far as I know, and neither of them use this code. */
#if HAVE_STRING_H || STDC_HEADERS
#include <strings.h>
#endif
-#ifdef STDC_HEADERS
-#include <stdlib.h>
-#else
-char *malloc ();
-char *realloc ();
-#endif
-
-
/* Define the syntax stuff for \<, \>, etc. */
/* This must be nonzero for the wordchar and notwordchar pattern
macros don't need to be guarded with references to isascii. ...
Defining isascii to 1 should let any compiler worth its salt
eliminate the && through constant folding." */
-#if ! defined (isascii) || defined (STDC_HEADERS)
-#undef isascii
-#define isascii(c) 1
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISASCII(c) 1
+#else
+#define ISASCII(c) isascii(c)
#endif
#ifdef isblank
-#define ISBLANK(c) (isascii (c) && isblank (c))
+#define ISBLANK(c) (ISASCII (c) && isblank (c))
#else
#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
-#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
#else
-#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
#endif
-#define ISPRINT(c) (isascii (c) && isprint (c))
-#define ISDIGIT(c) (isascii (c) && isdigit (c))
-#define ISALNUM(c) (isascii (c) && isalnum (c))
-#define ISALPHA(c) (isascii (c) && isalpha (c))
-#define ISCNTRL(c) (isascii (c) && iscntrl (c))
-#define ISLOWER(c) (isascii (c) && islower (c))
-#define ISPUNCT(c) (isascii (c) && ispunct (c))
-#define ISSPACE(c) (isascii (c) && isspace (c))
-#define ISUPPER(c) (isascii (c) && isupper (c))
-#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
#ifndef NULL
#define NULL 0
\f
/* Avoiding alloca during matching, to placate r_alloc. */
-/* Define MATCH_SHOULD_NOT_ALLOCA if we need to make sure that the
+/* Define MATCH_MAY_ALLOCATE if we need to make sure that the
searching and matching functions should not call alloca. On some
systems, alloca is implemented in terms of malloc, and if we're
using the relocating allocator routines, then malloc could cause a
relocation, which might (if the strings being searched are in the
ralloc heap) shift the data out from underneath the regexp
- routines. */
-#if defined (REL_ALLOC)
-#if defined (C_ALLOCA)
-#define MATCH_SHOULD_NOT_ALLOCA
-#endif
+ routines.
+
+ Here's another reason to avoid allocation: Emacs insists on
+ processing input from X in a signal handler; processing X input may
+ call malloc; if input arrives while a matching routine is calling
+ malloc, then we're scrod. But Emacs can't just block input while
+ calling matching routines; then we don't notice interrupts when
+ they come in. So, Emacs blocks input around all regexp calls
+ except the matching calls, which it leaves unprotected, in the
+ faith that they will not malloc. */
+
+/* Normally, this is fine. */
+#define MATCH_MAY_ALLOCATE
+
+/* But under some circumstances, it's not. */
+#if defined (emacs) || (defined (REL_ALLOC) && defined (C_ALLOCA))
+#undef MATCH_MAY_ALLOCATE
#endif
\f
change it ourselves. */
int re_max_failures = 2000;
-typedef const unsigned char *fail_stack_elt_t;
+typedef unsigned char *fail_stack_elt_t;
typedef struct
{
/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
-#ifndef MATCH_SHOULD_NOT_ALLOCA
+#ifdef MATCH_MAY_ALLOCATE
#define INIT_FAIL_STACK() \
do { \
fail_stack.stack = (fail_stack_elt_t *) \
\f
-/* How do we implement MATCH_SHOULD_NOT_ALLOCA?
+/* How do we implement a missing MATCH_MAY_ALLOCATE?
We make the fail stack a global thing, and then grow it to
re_max_failures when we compile. */
-#ifdef MATCH_SHOULD_NOT_ALLOCA
+#ifndef MATCH_MAY_ALLOCATE
static fail_stack_type fail_stack;
static const char ** regstart, ** regend;
}
#endif /* DEBUG */
-#ifdef MATCH_SHOULD_NOT_ALLOCA
+#ifndef MATCH_MAY_ALLOCATE
/* Initialize the failure stack to the largest possible stack. This
isn't necessary unless we're trying to avoid calling alloca in
the search and match routines. */
struct re_pattern_buffer *bufp;
{
int j, k;
-#ifndef MATCH_SHOULD_NOT_ALLOCA
+#ifdef MATCH_MAY_ALLOCATE
fail_stack_type fail_stack;
#endif
#ifndef REGEX_MALLOC
register char *fastmap = bufp->fastmap;
unsigned char *pattern = bufp->buffer;
unsigned long size = bufp->used;
- const unsigned char *p = pattern;
+ unsigned char *p = pattern;
register unsigned char *pend = pattern + size;
/* Assume that each path through the pattern can be null until
{
bufp->regs_allocated = REGS_UNALLOCATED;
regs->num_regs = 0;
- regs->start = regs->end = (regoff_t) 0;
+ regs->start = regs->end = (regoff_t *) 0;
}
}
\f
/* This converts PTR, a pointer into one of the search strings `string1'
and `string2' into an offset from the beginning of that string. */
-#define POINTER_TO_OFFSET(ptr) \
- (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) \
+ ? ((regoff_t) ((ptr) - string1)) \
+ : ((regoff_t) ((ptr) - string2 + size1)))
/* Macros for dealing with the split strings in re_match_2. */
/* Free everything we malloc. */
-#ifdef MATCH_SHOULD_NOT_ALLOCA
-#define FREE_VARIABLES() /* Do nothing! */
-#else
+#ifdef MATCH_MAY_ALLOCATE
#ifdef REGEX_MALLOC
#define FREE_VAR(var) if (var) free (var); var = NULL
#define FREE_VARIABLES() \
/* Some MIPS systems (at least) want this to free alloca'd storage. */
#define FREE_VARIABLES() alloca (0)
#endif /* not REGEX_MALLOC */
-#endif /* not MATCH_SHOULD_NOT_ALLOCA */
+#else
+#define FREE_VARIABLES() /* Do nothing! */
+#endif /* not MATCH_MAY_ALLOCATE */
/* These values must meet several constraints. They must not be valid
register values; since we have a limit of 255 registers (because
scanning the strings. If the latter is zero, the failure point is
a ``dummy''; if a failure happens and the failure point is a dummy,
it gets discarded and the next next one is tried. */
-#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, this is global. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
fail_stack_type fail_stack;
#endif
#ifdef DEBUG
matching and the regnum-th regend points to right after where we
stopped matching the regnum-th subexpression. (The zeroth register
keeps track of what the whole pattern matches.) */
-#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, these are global. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
const char **regstart, **regend;
#endif
restored because it will have been set to wherever in the string we
are when we last see its open-group operator. Similarly for a
register's end. */
-#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, these are global. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
const char **old_regstart, **old_regend;
#endif
matched any of the pattern so far this time through the reg_num-th
subexpression. These two fields get reset each time through any
loop their register is in. */
-#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, this is global. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
register_info_type *reg_info;
#endif
This happens as we backtrack through the failure points, which in
turn happens only if we have not yet matched the entire string. */
unsigned best_regs_set = false;
-#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, these are global. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
const char **best_regstart, **best_regend;
#endif
const char *match_end = NULL;
/* Used when we pop values we don't care about. */
-#ifndef MATCH_SHOULD_NOT_ALLOCA /* otherwise, these are global. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
const char **reg_dummy;
register_info_type *reg_info_dummy;
#endif
INIT_FAIL_STACK ();
-#ifndef MATCH_SHOULD_NOT_ALLOCA
+#ifdef MATCH_MAY_ALLOCATE
/* Do not bother to initialize all the register variables if there are
no groups in the pattern, as it takes a fair amount of time. If
there are groups, we include space for register 0 (the whole
reg_info = reg_info_dummy = (register_info_type *) NULL;
}
#endif /* REGEX_MALLOC */
-#endif /* MATCH_SHOULD_NOT_ALLOCA */
+#endif /* MATCH_MAY_ALLOCATE */
/* The starting position is bogus. */
if (pos < 0 || pos > size1 + size2)
if (regs->num_regs > 0)
{
regs->start[0] = pos;
- regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
- : d - string2 + size1);
+ regs->end[0] = (MATCHING_IN_FIRST_STRING
+ ? ((regoff_t) (d - string1))
+ : ((regoff_t) (d - string2 + size1)));
}
/* Go through the first `min (num_regs, regs->num_regs)'
regs->start[mcnt] = regs->end[mcnt] = -1;
else
{
- regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
- regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
+ regs->start[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
}
}
detect that here, the alternative has put on a dummy
failure point which is what we will end up popping. */
- /* Skip over open/close-group commands. */
- while (p2 + 2 < pend
- && ((re_opcode_t) *p2 == stop_memory
- || (re_opcode_t) *p2 == start_memory))
- p2 += 3; /* Skip over args, too. */
+ /* Skip over open/close-group commands.
+ If what follows this loop is a ...+ construct,
+ look at what begins its body, since we will have to
+ match at least one of that. */
+ while (1)
+ {
+ if (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3;
+ else if (p2 + 6 < pend
+ && (re_opcode_t) *p2 == dummy_failure_jump)
+ p2 += 6;
+ else
+ break;
+ }
+
+ p1 = p + mcnt;
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
/* If we're at the end of the pattern, we can change. */
if (p2 == pend)
{
register unsigned char c
= *p2 == (unsigned char) endline ? '\n' : p2[2];
- p1 = p + mcnt;
- /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
- to the `maybe_finalize_jump' of this case. Examine what
- follows. */
if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
{
p[-3] = (unsigned char) pop_failure_jump;
}
}
}
+ else if ((re_opcode_t) *p2 == charset)
+ {
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+
+ if ((re_opcode_t) p1[3] == exactn
+ && ! (p2[1] * BYTEWIDTH > p1[4]
+ && (p2[1 + p1[4] / BYTEWIDTH]
+ & (1 << (p1[4] % BYTEWIDTH)))))
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset_not)
+ {
+ int idx;
+ /* We win if the charset_not inside the loop
+ lists every character listed in the charset after. */
+ for (idx = 0; idx < p2[1]; idx++)
+ if (! (p2[2 + idx] == 0
+ || (idx < p1[4]
+ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+ break;
+
+ if (idx == p2[1])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ else if ((re_opcode_t) p1[3] == charset)
+ {
+ int idx;
+ /* We win if the charset inside the loop
+ has no overlap with the one after the loop. */
+ for (idx = 0; idx < p2[1] && idx < p1[4]; idx++)
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)
+ break;
+
+ if (idx == p2[1] || idx == p1[4])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
}
p -= 2; /* Point at relative address again. */
if ((re_opcode_t) p[-1] != pop_failure_jump)