(Implements POSIX draft P10003.2/D11.2, except for
internationalization features.)
- Copyright (C) 1993 Free Software Foundation, Inc.
+ Copyright (C) 1993, 1994 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/* We used to test for `BSTRING' here, but only GCC and Emacs define
`BSTRING', as far as I know, and neither of them use this code. */
+#ifndef INHIBIT_STRING_HEADER
#if HAVE_STRING_H || STDC_HEADERS
#include <string.h>
#ifndef bcmp
#else
#include <strings.h>
#endif
+#endif
/* Define the syntax stuff for \<, \>, etc. */
ralloc heap) shift the data out from underneath the regexp
routines.
- Here's another reason to avoid allocation: Emacs insists on
- processing input from X in a signal handler; processing X input may
+ Here's another reason to avoid allocation: Emacs
+ processes input from X in a signal handler; processing X input may
call malloc; if input arrives while a matching routine is calling
malloc, then we're scrod. But Emacs can't just block input while
calling matching routines; then we don't notice interrupts when
/* Normally, this is fine. */
#define MATCH_MAY_ALLOCATE
-/* But under some circumstances, it's not. */
-#if defined (emacs) || (defined (REL_ALLOC) && defined (C_ALLOCA))
+/* The match routines may not allocate if (1) they would do it with malloc
+ and (2) it's not safe for htem to use malloc. */
+#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && (defined (emacs) || defined (REL_ALLOC))
#undef MATCH_MAY_ALLOCATE
#endif
The `fastmap' and `newline_anchor' fields are neither
examined nor set. */
+/* Return, freeing storage we allocated. */
+#define FREE_STACK_RETURN(value) \
+ return (free (compile_stack.stack), value)
+
static reg_errcode_t
regex_compile (pattern, size, syntax, bufp)
const char *pattern;
{ /* Caller did not allocate a buffer. Do it for them. */
bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
}
- if (!bufp->buffer) return REG_ESPACE;
+ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
bufp->allocated = INIT_BUF_SIZE;
}
if (!laststart)
{
if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
+ FREE_STACK_RETURN (REG_BADRPT);
else if (!(syntax & RE_CONTEXT_INDEP_OPS))
goto normal_char;
}
else if (syntax & RE_BK_PLUS_QM && c == '\\')
{
- if (p == pend) return REG_EESCAPE;
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
PATFETCH (c1);
if (!(c1 == '+' || c1 == '?'))
{
boolean had_char_class = false;
- if (p == pend) return REG_EBRACK;
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
/* Ensure that we have enough space to push a charset: the
opcode, the length count, and the bitset; 34 bytes in all. */
/* Read in characters and ranges, setting map bits. */
for (;;)
{
- if (p == pend) return REG_EBRACK;
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
PATFETCH (c);
/* \ might escape characters inside [...] and [^...]. */
if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
{
- if (p == pend) return REG_EESCAPE;
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
PATFETCH (c1);
SET_LIST_BIT (c1);
/* Look ahead to see if it's a range when the last thing
was a character class. */
if (had_char_class && c == '-' && *p != ']')
- return REG_ERANGE;
+ FREE_STACK_RETURN (REG_ERANGE);
/* Look ahead to see if it's a range when the last thing
was a character: if this is a hyphen not at the
{
reg_errcode_t ret
= compile_range (&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR) return ret;
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
else if (p[0] == '-' && p[1] != ']')
PATFETCH (c1);
ret = compile_range (&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR) return ret;
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
/* See if we're at the beginning of a possible character
c1 = 0;
/* If pattern is `[[:'. */
- if (p == pend) return REG_EBRACK;
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
for (;;)
{
boolean is_upper = STREQ (str, "upper");
boolean is_xdigit = STREQ (str, "xdigit");
- if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+ if (!IS_CHAR_CLASS (str))
+ FREE_STACK_RETURN (REG_ECTYPE);
/* Throw away the ] at the end of the character
class. */
PATFETCH (c);
- if (p == pend) return REG_EBRACK;
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
{
case '\\':
- if (p == pend) return REG_EESCAPE;
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
/* Do not translate the character after the \, so that we can
distinguish, e.g., \B from \b, even if we normally would
if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_backslash;
else
- return REG_ERPAREN;
+ FREE_STACK_RETURN (REG_ERPAREN);
handle_close:
if (fixup_alt_jump)
if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_char;
else
- return REG_ERPAREN;
+ FREE_STACK_RETURN (REG_ERPAREN);
/* Since we just checked for an empty stack above, this
``can't happen''. */
if (syntax & RE_NO_BK_BRACES)
goto unfetch_interval;
else
- return REG_EBRACE;
+ FREE_STACK_RETURN (REG_EBRACE);
}
GET_UNSIGNED_NUMBER (lower_bound);
if (syntax & RE_NO_BK_BRACES)
goto unfetch_interval;
else
- return REG_BADBR;
+ FREE_STACK_RETURN (REG_BADBR);
}
if (!(syntax & RE_NO_BK_BRACES))
{
- if (c != '\\') return REG_EBRACE;
+ if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
PATFETCH (c);
}
if (syntax & RE_NO_BK_BRACES)
goto unfetch_interval;
else
- return REG_BADBR;
+ FREE_STACK_RETURN (REG_BADBR);
}
/* We just parsed a valid interval. */
if (!laststart)
{
if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
+ FREE_STACK_RETURN (REG_BADRPT);
else if (syntax & RE_CONTEXT_INDEP_OPS)
laststart = b;
else
c1 = c - '0';
if (c1 > regnum)
- return REG_ESUBREG;
+ FREE_STACK_RETURN (REG_ESUBREG);
/* Can't back reference to a subexpression if inside of it. */
if (group_in_compile_stack (compile_stack, c1))
STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
if (!COMPILE_STACK_EMPTY)
- return REG_EPAREN;
+ FREE_STACK_RETURN (REG_EPAREN);
free (compile_stack.stack);
if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
{
fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+
+#ifdef emacs
if (! fail_stack.stack)
- fail_stack.stack =
- (fail_stack_elt_t *) malloc (fail_stack.size
- * sizeof (fail_stack_elt_t));
+ fail_stack.stack
+ = (fail_stack_elt_t *) xmalloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
else
- fail_stack.stack =
- (fail_stack_elt_t *) realloc (fail_stack.stack,
- (fail_stack.size
- * sizeof (fail_stack_elt_t)));
+ fail_stack.stack
+ = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+#else /* not emacs */
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) malloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) realloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+#endif /* not emacs */
}
/* Initialize some other variables the matcher uses. */
case anychar:
- /* `.' matches anything ... */
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- fastmap[j] = 1;
+ {
+ int fastmap_newline = fastmap['\n'];
- /* ... except perhaps newline. */
- if (!(bufp->syntax & RE_DOT_NEWLINE))
- fastmap['\n'] = 0;
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
- /* Return if we have already set `can_be_null'; if we have,
- then the fastmap is irrelevant. Something's wrong here. */
- else if (bufp->can_be_null)
- return 0;
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = fastmap_newline;
- /* Otherwise, have to check alternative paths. */
- break;
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ return 0;
+ /* Otherwise, have to check alternative paths. */
+ break;
+ }
#ifdef emacs
case syntaxspec:
val = re_match_2_internal (bufp, string1, size1, string2, size2,
startpos, regs, stop);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
alloca (0);
+#endif
+#endif
if (val >= 0)
return startpos;
longest match, try backtracking. */
if (d != end_match_2)
{
+ /* 1 if this match ends in the same string (string1 or string2)
+ as the best previous match. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+ /* 1 if this match is the best seen so far. */
+ boolean best_match_p = (same_str_p ? d > match_end
+ : !MATCHING_IN_FIRST_STRING);
+
DEBUG_PRINT1 ("backtracking.\n");
if (!FAIL_STACK_EMPTY ())
{ /* More failure points to try. */
- boolean same_str_p = (FIRST_STRING_P (match_end)
- == MATCHING_IN_FIRST_STRING);
/* If exceeds best match so far, save it. */
- if (!best_regs_set
- || (same_str_p && d > match_end)
- || (!same_str_p && !MATCHING_IN_FIRST_STRING))
+ if (!best_regs_set || best_match_p)
{
best_regs_set = true;
match_end = d;
goto fail;
}
- /* If no failure points, don't restore garbage. */
- else if (best_regs_set)
+ /* If no failure points, don't restore garbage. And if
+ last match is real best match, don't restore second
+ best one. */
+ else if (best_regs_set && !best_match_p)
{
restore_best_regs:
/* Restore best match. It may happen that `dend ==
}
else if ((re_opcode_t) *p2 == charset)
{
+#ifdef DEBUG
register unsigned char c
= *p2 == (unsigned char) endline ? '\n' : p2[2];
+#endif
if ((re_opcode_t) p1[3] == exactn
- && ! (p2[1] * BYTEWIDTH > p1[4]
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
&& (p2[1 + p1[4] / BYTEWIDTH]
& (1 << (p1[4] % BYTEWIDTH)))))
{
int idx;
/* We win if the charset_not inside the loop
lists every character listed in the charset after. */
- for (idx = 0; idx < p2[1]; idx++)
+ for (idx = 0; idx < (int) p2[1]; idx++)
if (! (p2[2 + idx] == 0
- || (idx < p1[4]
+ || (idx < (int) p1[4]
&& ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
break;
int idx;
/* We win if the charset inside the loop
has no overlap with the one after the loop. */
- for (idx = 0; idx < p2[1] && idx < p1[4]; idx++)
+ for (idx = 0;
+ idx < (int) p2[1] && idx < (int) p1[4];
+ idx++)
if ((p2[2 + idx] & p1[5 + idx]) != 0)
break;