X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=a997402a15d16e72099b2b488e4a3ec6c8e5d5d5;hb=afd172b0725c16dbf96b547339df5b59e96e7907;hp=d30a922abdf2be4c49ea39e9e35d34b162be9107;hpb=3f8c9c6b3e239d83bd7884401cf4efa21050d524;p=gnulib.git diff --git a/regex.c b/regex.c index d30a922ab..a997402a1 100644 --- a/regex.c +++ b/regex.c @@ -1120,23 +1120,25 @@ static const char *re_error_msgid[] = REGEX_ALLOCATE_STACK. */ -/* Number of failure points for which to initially allocate space +/* Approximate number of failure points for which to initially allocate space when matching. If this number is exceeded, we allocate more space, so it is not a hard limit. */ #ifndef INIT_FAILURE_ALLOC -#define INIT_FAILURE_ALLOC 5 +#define INIT_FAILURE_ALLOC 20 #endif /* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_ITEMS items each time we failed. + exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. */ #if defined (MATCH_MAY_ALLOCATE) -/* 4400 was enough to cause a crash on Alpha OSF/1, - whose default stack limit is 2mb. */ -int re_max_failures = 20000; +/* Note that 4400 is enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. In order for a larger + value to work reliably, you have to try to make it accord + with the process stack limit. */ +int re_max_failures = 40000; #else -int re_max_failures = 2000; +int re_max_failures = 4000; #endif union fail_stack_elt @@ -1166,7 +1168,8 @@ typedef struct #define INIT_FAIL_STACK() \ do { \ fail_stack.stack = (fail_stack_elt_t *) \ - REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ + REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \ + * sizeof (fail_stack_elt_t)); \ \ if (fail_stack.stack == NULL) \ return -2; \ @@ -1186,24 +1189,40 @@ typedef struct #endif -/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. +/* Double the size of FAIL_STACK, up to a limit + which allows approximately `re_max_failures' items. Return 1 if succeeds, and 0 if either ran out of memory allocating space for it or it was already too large. REGEX_REALLOCATE_STACK requires `destination' be declared. */ -#define DOUBLE_FAIL_STACK(fail_stack) \ - ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ +/* Factor to increase the failure stack size by + when we increase it. + This used to be 2, but 2 was too wasteful + because the old discarded stacks added up to as much space + were as ultimate, maximum-size stack. */ +#define FAIL_STACK_GROWTH_FACTOR 4 + +#define GROW_FAIL_STACK(fail_stack) \ + (((fail_stack).size * sizeof (fail_stack_elt_t) \ + >= re_max_failures * TYPICAL_FAILURE_SIZE) \ ? 0 \ - : ((fail_stack).stack = (fail_stack_elt_t *) \ + : ((fail_stack).stack \ + = (fail_stack_elt_t *) \ REGEX_REALLOCATE_STACK ((fail_stack).stack, \ (fail_stack).size * sizeof (fail_stack_elt_t), \ - ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ + MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \ + ((fail_stack).size * sizeof (fail_stack_elt_t) \ + * FAIL_STACK_GROWTH_FACTOR))), \ \ (fail_stack).stack == NULL \ ? 0 \ - : ((fail_stack).size <<= 1, \ + : ((fail_stack).size \ + = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \ + ((fail_stack).size * sizeof (fail_stack_elt_t) \ + * FAIL_STACK_GROWTH_FACTOR)) \ + / sizeof (fail_stack_elt_t)), \ 1))) @@ -1212,7 +1231,7 @@ typedef struct space to do so. */ #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ ((FAIL_STACK_FULL () \ - && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ + && !GROW_FAIL_STACK (FAIL_STACK)) \ ? 0 \ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1)) @@ -1255,7 +1274,7 @@ typedef struct if we ever fail back to it. Requires variables fail_stack, regstart, regend, reg_info, and - num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be + num_regs be declared. GROW_FAIL_STACK requires `destination' be declared. Does `return FAILURE_CODE' if runs out of memory. */ @@ -1279,7 +1298,7 @@ typedef struct /* Ensure we have enough space allocated for what we will push. */ \ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ { \ - if (!DOUBLE_FAIL_STACK (fail_stack)) \ + if (!GROW_FAIL_STACK (fail_stack)) \ return failure_code; \ \ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ @@ -1346,13 +1365,14 @@ typedef struct #define NUM_NONREG_ITEMS 4 #endif -/* We push at most this many items on the stack. */ -/* We used to use (num_regs - 1), which is the number of registers - this regexp will save; but that was changed to 5 - to avoid stack overflow for a regexp with lots of parens. */ -#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) +/* Estimate the size of data pushed by a typical failure stack entry. + An estimate is all we need, because all we use this for + is to choose a limit for how big to make the failure stack. */ + +#define TYPICAL_FAILURE_SIZE 20 -/* We actually push this many items. */ +/* This is how many items we actually use for a failure point. + It depends on the regexp. */ #define NUM_FAILURE_ITEMS \ (((0 \ ? 0 : highest_active_reg - lowest_active_reg + 1) \ @@ -1540,7 +1560,7 @@ static reg_errcode_t compile_range (); when we use a character as a subscript we must make it unsigned. */ #ifndef TRANSLATE #define TRANSLATE(d) \ - (translate ? (unsigned char) translate[(unsigned char) (d)] : (d)) + (translate ? (unsigned char) RE_TRANSLATE (translate, (unsigned char) (d)) : (d)) #endif @@ -2185,11 +2205,11 @@ regex_compile (pattern, size, syntax, bufp) } else { - /* Could be the end of the bracket expression. If it's - not (i.e., when the bracket expression is `[]' so - far), the ']' character bit gets set way below. */ - if (c == ']' && p != p1 + 1) - break; + /* Could be the end of the bracket expression. If it's + not (i.e., when the bracket expression is `[]' so + far), the ']' character bit gets set way below. */ + if (c == ']' && p != p1 + 1) + break; } /* If C indicates start of multibyte char, get the @@ -2210,7 +2230,8 @@ regex_compile (pattern, size, syntax, bufp) else if (!escaped_char && syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') - { /* Leave room for the null. */ + { + /* Leave room for the null. */ char str[CHAR_CLASS_MAX_LENGTH + 1]; PATFETCH (c); @@ -2938,12 +2959,9 @@ regex_compile (pattern, size, syntax, bufp) { int num_regs = bufp->re_nsub + 1; - /* Since DOUBLE_FAIL_STACK refuses to double only if the current size - is strictly greater than re_max_failures, the largest possible stack - is 2 * re_max_failures failure points. */ - if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) + if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE) { - fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); + fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE); #ifdef emacs if (! fail_stack.stack) @@ -3810,7 +3828,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) if (translate) while (range > lim && !fastmap[(unsigned char) - translate[(unsigned char) *d++]]) + RE_TRANSLATE (translate, (unsigned char) *d++)]) range--; else while (range > lim && !fastmap[(unsigned char) *d++]) @@ -3856,8 +3874,10 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) /* Update STARTPOS to the next character boundary. */ if (multibyte) { - const unsigned char *p = POS_ADDR_VSTRING (startpos); - const unsigned char *pend = STOP_ADDR_VSTRING (startpos); + const unsigned char *p + = (const unsigned char *) POS_ADDR_VSTRING (startpos); + const unsigned char *pend + = (const unsigned char *) STOP_ADDR_VSTRING (startpos); int len = MULTIBYTE_FORM_LENGTH (p, pend - p); range -= len; @@ -3867,9 +3887,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) } else { - range--; - startpos++; - } + range--; + startpos++; + } } else { @@ -3879,11 +3899,12 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) /* Update STARTPOS to the previous character boundary. */ if (multibyte) { - const unsigned char *p = POS_ADDR_VSTRING (startpos); + const unsigned char *p + = (const unsigned char *) POS_ADDR_VSTRING (startpos); int len = 0; /* Find the head of multibyte form. */ - while (!CHAR_HEAD_P (p)) + while (!CHAR_HEAD_P (*p)) p--, len++; /* Adjust it. */ @@ -4497,7 +4518,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) do { PREFETCH (); - if ((unsigned char) translate[(unsigned char) *d++] + if ((unsigned char) RE_TRANSLATE (translate, (unsigned char) *d++) != (unsigned char) *p++) goto fail; } @@ -5298,15 +5319,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) is the character at D, and S2 is the syntax of C2. */ int c1, c2, s1, s2; int pos1 = PTR_TO_OFFSET (d - 1); + int charpos; GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); #ifdef emacs - UPDATE_SYNTAX_TABLE (pos1 ? pos1 : 1); + charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 ? pos1 : 1); + UPDATE_SYNTAX_TABLE (charpos); #endif s1 = SYNTAX (c1); #ifdef emacs - UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1); + UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); #endif s2 = SYNTAX (c2); @@ -5333,15 +5356,17 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) is the character at D, and S2 is the syntax of C2. */ int c1, c2, s1, s2; int pos1 = PTR_TO_OFFSET (d - 1); + int charpos; GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); #ifdef emacs - UPDATE_SYNTAX_TABLE (pos1); + charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); + UPDATE_SYNTAX_TABLE (charpos); #endif s1 = SYNTAX (c1); #ifdef emacs - UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1); + UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); #endif s2 = SYNTAX (c2); @@ -5368,10 +5393,12 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) is the character at D, and S2 is the syntax of C2. */ int c1, c2, s1, s2; int pos1 = PTR_TO_OFFSET (d); + int charpos; GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); #ifdef emacs - UPDATE_SYNTAX_TABLE (pos1); + charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1); + UPDATE_SYNTAX_TABLE (charpos); #endif s2 = SYNTAX (c2); @@ -5384,7 +5411,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) { GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); #ifdef emacs - UPDATE_SYNTAX_TABLE_BACKWARD (pos1 - 1); + UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1); #endif s1 = SYNTAX (c1); @@ -5409,8 +5436,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) /* C1 is the character before D, S1 is the syntax of C1, C2 is the character at D, and S2 is the syntax of C2. */ int c1, c2, s1, s2; + int pos1 = PTR_TO_OFFSET (d); + int charpos; GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); +#ifdef emacs + charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos1 - 1); + UPDATE_SYNTAX_TABLE (charpos); +#endif s1 = SYNTAX (c1); /* Case 2: S1 is not Sword. */ @@ -5421,6 +5454,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) if (!AT_STRINGS_END (d)) { GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); +#ifdef emacs + UPDATE_SYNTAX_TABLE_FORWARD (charpos); +#endif s2 = SYNTAX (c2); /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2) @@ -5434,19 +5470,19 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) #ifdef emacs case before_dot: DEBUG_PRINT1 ("EXECUTING before_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) >= PT) + if (PTR_BYTE_POS ((unsigned char *) d) >= PT_BYTE) goto fail; break; case at_dot: DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) != PT) + if (PTR_BYTE_POS ((unsigned char *) d) != PT_BYTE) goto fail; break; case after_dot: DEBUG_PRINT1 ("EXECUTING after_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) <= PT) + if (PTR_BYTE_POS ((unsigned char *) d) <= PT_BYTE) goto fail; break; @@ -5462,7 +5498,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) PREFETCH (); #ifdef emacs { - int pos1 = PTR_TO_OFFSET (d); + int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)); UPDATE_SYNTAX_TABLE (pos1); } #endif @@ -5496,7 +5532,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) PREFETCH (); #ifdef emacs { - int pos1 = PTR_TO_OFFSET (d); + int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)); UPDATE_SYNTAX_TABLE (pos1); } #endif @@ -5892,7 +5928,8 @@ bcmp_translate (s1, s2, len, translate) register unsigned char *p1 = s1, *p2 = s2; while (len) { - if (translate[*p1++] != translate[*p2++]) return 1; + if (RE_TRANSLATE (translate, *p1++) != RE_TRANSLATE (translate, *p2++)) + return 1; len--; } return 0;