REGEX_ALLOCATE_STACK. */
-/* Number of failure points for which to initially allocate space
+/* Approximate number of failure points for which to initially allocate space
when matching. If this number is exceeded, we allocate more
space, so it is not a hard limit. */
#ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
+#define INIT_FAILURE_ALLOC 20
#endif
/* Roughly the maximum number of failure points on the stack. Would be
- exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
+ exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
This is a variable only so users of regex can assign to it; we never
change it ourselves. */
#if defined (MATCH_MAY_ALLOCATE)
-/* 4400 was enough to cause a crash on Alpha OSF/1,
- whose default stack limit is 2mb. */
-int re_max_failures = 20000;
+/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
+ whose default stack limit is 2mb. In order for a larger
+ value to work reliably, you have to try to make it accord
+ with the process stack limit. */
+int re_max_failures = 40000;
#else
-int re_max_failures = 2000;
+int re_max_failures = 4000;
#endif
union fail_stack_elt
#define INIT_FAIL_STACK() \
do { \
fail_stack.stack = (fail_stack_elt_t *) \
- REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
+ * sizeof (fail_stack_elt_t)); \
\
if (fail_stack.stack == NULL) \
return -2; \
#endif
-/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+/* Double the size of FAIL_STACK, up to a limit
+ which allows approximately `re_max_failures' items.
Return 1 if succeeds, and 0 if either ran out of memory
allocating space for it or it was already too large.
REGEX_REALLOCATE_STACK requires `destination' be declared. */
-#define DOUBLE_FAIL_STACK(fail_stack) \
- ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+/* Factor to increase the failure stack size by
+ when we increase it.
+ This used to be 2, but 2 was too wasteful
+ because the old discarded stacks added up to as much space
+ were as ultimate, maximum-size stack. */
+#define FAIL_STACK_GROWTH_FACTOR 4
+
+#define GROW_FAIL_STACK(fail_stack) \
+ (((fail_stack).size * sizeof (fail_stack_elt_t) \
+ >= re_max_failures * TYPICAL_FAILURE_SIZE) \
? 0 \
- : ((fail_stack).stack = (fail_stack_elt_t *) \
+ : ((fail_stack).stack \
+ = (fail_stack_elt_t *) \
REGEX_REALLOCATE_STACK ((fail_stack).stack, \
(fail_stack).size * sizeof (fail_stack_elt_t), \
- ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
+ ((fail_stack).size * sizeof (fail_stack_elt_t) \
+ * FAIL_STACK_GROWTH_FACTOR))), \
\
(fail_stack).stack == NULL \
? 0 \
- : ((fail_stack).size <<= 1, \
+ : ((fail_stack).size \
+ = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
+ ((fail_stack).size * sizeof (fail_stack_elt_t) \
+ * FAIL_STACK_GROWTH_FACTOR)) \
+ / sizeof (fail_stack_elt_t)), \
1)))
space to do so. */
#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
((FAIL_STACK_FULL () \
- && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
+ && !GROW_FAIL_STACK (FAIL_STACK)) \
? 0 \
: ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1))
if we ever fail back to it.
Requires variables fail_stack, regstart, regend, reg_info, and
- num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
+ num_regs be declared. GROW_FAIL_STACK requires `destination' be
declared.
Does `return FAILURE_CODE' if runs out of memory. */
/* Ensure we have enough space allocated for what we will push. */ \
while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
{ \
- if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ if (!GROW_FAIL_STACK (fail_stack)) \
return failure_code; \
\
DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
#define NUM_NONREG_ITEMS 4
#endif
-/* We push at most this many items on the stack. */
-/* We used to use (num_regs - 1), which is the number of registers
- this regexp will save; but that was changed to 5
- to avoid stack overflow for a regexp with lots of parens. */
-#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+/* Estimate the size of data pushed by a typical failure stack entry.
+ An estimate is all we need, because all we use this for
+ is to choose a limit for how big to make the failure stack. */
+
+#define TYPICAL_FAILURE_SIZE 20
-/* We actually push this many items. */
+/* This is how many items we actually use for a failure point.
+ It depends on the regexp. */
#define NUM_FAILURE_ITEMS \
(((0 \
? 0 : highest_active_reg - lowest_active_reg + 1) \
when we use a character as a subscript we must make it unsigned. */
#ifndef TRANSLATE
#define TRANSLATE(d) \
- (translate ? (unsigned char) translate[(unsigned char) (d)] : (d))
+ (translate ? (unsigned char) RE_TRANSLATE (translate, (unsigned char) (d)) : (d))
#endif
}
else
{
- /* Could be the end of the bracket expression. If it's
- not (i.e., when the bracket expression is `[]' so
- far), the ']' character bit gets set way below. */
- if (c == ']' && p != p1 + 1)
- break;
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
}
/* If C indicates start of multibyte char, get the
else if (!escaped_char &&
syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
- { /* Leave room for the null. */
+ {
+ /* Leave room for the null. */
char str[CHAR_CLASS_MAX_LENGTH + 1];
PATFETCH (c);
{
int num_regs = bufp->re_nsub + 1;
- /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
- is strictly greater than re_max_failures, the largest possible stack
- is 2 * re_max_failures failure points. */
- if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+ if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
{
- fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+ fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE);
#ifdef emacs
if (! fail_stack.stack)
if (translate)
while (range > lim
&& !fastmap[(unsigned char)
- translate[(unsigned char) *d++]])
+ RE_TRANSLATE (translate, (unsigned char) *d++)])
range--;
else
while (range > lim && !fastmap[(unsigned char) *d++])
/* Update STARTPOS to the next character boundary. */
if (multibyte)
{
- const unsigned char *p = POS_ADDR_VSTRING (startpos);
- const unsigned char *pend = STOP_ADDR_VSTRING (startpos);
+ const unsigned char *p
+ = (const unsigned char *) POS_ADDR_VSTRING (startpos);
+ const unsigned char *pend
+ = (const unsigned char *) STOP_ADDR_VSTRING (startpos);
int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
range -= len;
}
else
{
- range--;
- startpos++;
- }
+ range--;
+ startpos++;
+ }
}
else
{
/* Update STARTPOS to the previous character boundary. */
if (multibyte)
{
- const unsigned char *p = POS_ADDR_VSTRING (startpos);
+ const unsigned char *p
+ = (const unsigned char *) POS_ADDR_VSTRING (startpos);
int len = 0;
/* Find the head of multibyte form. */
- while (!CHAR_HEAD_P (p))
+ while (!CHAR_HEAD_P (*p))
p--, len++;
/* Adjust it. */
do
{
PREFETCH ();
- if ((unsigned char) translate[(unsigned char) *d++]
+ if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d++)
!= (unsigned char) *p++)
goto fail;
}
is the character at D, and S2 is the syntax of C2. */
int c1, c2, s1, s2;
int pos1 = PTR_TO_OFFSET (d - 1);
+ int charpos;
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
#ifdef emacs
- UPDATE_SYNTAX_TABLE (pos1 ? pos1 : 1);
+ charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 ? pos1 : 1);
+ UPDATE_SYNTAX_TABLE (charpos);
#endif
s1 = SYNTAX (c1);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
s2 = SYNTAX (c2);
is the character at D, and S2 is the syntax of C2. */
int c1, c2, s1, s2;
int pos1 = PTR_TO_OFFSET (d - 1);
+ int charpos;
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
#ifdef emacs
- UPDATE_SYNTAX_TABLE (pos1);
+ charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+ UPDATE_SYNTAX_TABLE (charpos);
#endif
s1 = SYNTAX (c1);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
s2 = SYNTAX (c2);
is the character at D, and S2 is the syntax of C2. */
int c1, c2, s1, s2;
int pos1 = PTR_TO_OFFSET (d);
+ int charpos;
GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
#ifdef emacs
- UPDATE_SYNTAX_TABLE (pos1);
+ charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1);
+ UPDATE_SYNTAX_TABLE (charpos);
#endif
s2 = SYNTAX (c2);
{
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_BACKWARD (pos1 - 1);
+ UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
#endif
s1 = SYNTAX (c1);
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
int c1, c2, s1, s2;
+ int pos1 = PTR_TO_OFFSET (d);
+ int charpos;
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+ charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1);
+ UPDATE_SYNTAX_TABLE (charpos);
+#endif
s1 = SYNTAX (c1);
/* Case 2: S1 is not Sword. */
if (!AT_STRINGS_END (d))
{
GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
+#ifdef emacs
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+#endif
s2 = SYNTAX (c2);
/* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
#ifdef emacs
case before_dot:
DEBUG_PRINT1 ("EXECUTING before_dot.\n");
- if (PTR_CHAR_POS ((unsigned char *) d) >= PT)
+ if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE)
goto fail;
break;
case at_dot:
DEBUG_PRINT1 ("EXECUTING at_dot.\n");
- if (PTR_CHAR_POS ((unsigned char *) d) != PT)
+ if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE)
goto fail;
break;
case after_dot:
DEBUG_PRINT1 ("EXECUTING after_dot.\n");
- if (PTR_CHAR_POS ((unsigned char *) d) <= PT)
+ if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE)
goto fail;
break;
PREFETCH ();
#ifdef emacs
{
- int pos1 = PTR_TO_OFFSET (d);
+ int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
UPDATE_SYNTAX_TABLE (pos1);
}
#endif
PREFETCH ();
#ifdef emacs
{
- int pos1 = PTR_TO_OFFSET (d);
+ int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
UPDATE_SYNTAX_TABLE (pos1);
}
#endif
register unsigned char *p1 = s1, *p2 = s2;
while (len)
{
- if (translate[*p1++] != translate[*p2++]) return 1;
+ if (RE_TRANSLATE (translate, *p1++) != RE_TRANSLATE (translate, *p2++))
+ return 1;
len--;
}
return 0;