X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=88ccc69a208c7b8814cc6bd03facc406c94b0351;hb=refs%2Ftags%2Fss-940725-22h45;hp=5316aaa4eaeacf28b612469aadc70e47e35c7282;hpb=58aaedfe9be29ab5eb9f98a1a4950abb23ed858a;p=gnulib.git diff --git a/regex.c b/regex.c index 5316aaa4e..88ccc69a2 100644 --- a/regex.c +++ b/regex.c @@ -3,7 +3,7 @@ (Implements POSIX draft P10003.2/D11.2, except for internationalization features.) - Copyright (C) 1993 Free Software Foundation, Inc. + Copyright (C) 1993, 1994 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -63,6 +63,7 @@ char *realloc (); /* We used to test for `BSTRING' here, but only GCC and Emacs define `BSTRING', as far as I know, and neither of them use this code. */ +#ifndef INHIBIT_STRING_HEADER #if HAVE_STRING_H || STDC_HEADERS #include #ifndef bcmp @@ -77,6 +78,7 @@ char *realloc (); #else #include #endif +#endif /* Define the syntax stuff for \<, \>, etc. */ @@ -896,8 +898,8 @@ static const char *re_error_msg[] = ralloc heap) shift the data out from underneath the regexp routines. - Here's another reason to avoid allocation: Emacs insists on - processing input from X in a signal handler; processing X input may + Here's another reason to avoid allocation: Emacs + processes input from X in a signal handler; processing X input may call malloc; if input arrives while a matching routine is calling malloc, then we're scrod. But Emacs can't just block input while calling matching routines; then we don't notice interrupts when @@ -908,8 +910,9 @@ static const char *re_error_msg[] = /* Normally, this is fine. */ #define MATCH_MAY_ALLOCATE -/* But under some circumstances, it's not. */ -#if defined (emacs) || (defined (REL_ALLOC) && defined (C_ALLOCA)) +/* The match routines may not allocate if (1) they would do it with malloc + and (2) it's not safe for htem to use malloc. */ +#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && (defined (emacs) || defined (REL_ALLOC)) #undef MATCH_MAY_ALLOCATE #endif @@ -1488,6 +1491,10 @@ typedef struct The `fastmap' and `newline_anchor' fields are neither examined nor set. */ +/* Return, freeing storage we allocated. */ +#define FREE_STACK_RETURN(value) \ + return (free (compile_stack.stack), value) + static reg_errcode_t regex_compile (pattern, size, syntax, bufp) const char *pattern; @@ -1594,7 +1601,7 @@ regex_compile (pattern, size, syntax, bufp) { /* Caller did not allocate a buffer. Do it for them. */ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); } - if (!bufp->buffer) return REG_ESPACE; + if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); bufp->allocated = INIT_BUF_SIZE; } @@ -1649,7 +1656,7 @@ regex_compile (pattern, size, syntax, bufp) if (!laststart) { if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; + FREE_STACK_RETURN (REG_BADRPT); else if (!(syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; } @@ -1682,7 +1689,7 @@ regex_compile (pattern, size, syntax, bufp) else if (syntax & RE_BK_PLUS_QM && c == '\\') { - if (p == pend) return REG_EESCAPE; + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); PATFETCH (c1); if (!(c1 == '+' || c1 == '?')) @@ -1781,7 +1788,7 @@ regex_compile (pattern, size, syntax, bufp) { boolean had_char_class = false; - if (p == pend) return REG_EBRACK; + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); /* Ensure that we have enough space to push a charset: the opcode, the length count, and the bitset; 34 bytes in all. */ @@ -1812,14 +1819,14 @@ regex_compile (pattern, size, syntax, bufp) /* Read in characters and ranges, setting map bits. */ for (;;) { - if (p == pend) return REG_EBRACK; + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); PATFETCH (c); /* \ might escape characters inside [...] and [^...]. */ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') { - if (p == pend) return REG_EESCAPE; + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); PATFETCH (c1); SET_LIST_BIT (c1); @@ -1835,7 +1842,7 @@ regex_compile (pattern, size, syntax, bufp) /* Look ahead to see if it's a range when the last thing was a character class. */ if (had_char_class && c == '-' && *p != ']') - return REG_ERANGE; + FREE_STACK_RETURN (REG_ERANGE); /* Look ahead to see if it's a range when the last thing was a character: if this is a hyphen not at the @@ -1848,7 +1855,7 @@ regex_compile (pattern, size, syntax, bufp) { reg_errcode_t ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); } else if (p[0] == '-' && p[1] != ']') @@ -1859,7 +1866,7 @@ regex_compile (pattern, size, syntax, bufp) PATFETCH (c1); ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); } /* See if we're at the beginning of a possible character @@ -1873,7 +1880,7 @@ regex_compile (pattern, size, syntax, bufp) c1 = 0; /* If pattern is `[[:'. */ - if (p == pend) return REG_EBRACK; + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); for (;;) { @@ -1904,13 +1911,14 @@ regex_compile (pattern, size, syntax, bufp) boolean is_upper = STREQ (str, "upper"); boolean is_xdigit = STREQ (str, "xdigit"); - if (!IS_CHAR_CLASS (str)) return REG_ECTYPE; + if (!IS_CHAR_CLASS (str)) + FREE_STACK_RETURN (REG_ECTYPE); /* Throw away the ] at the end of the character class. */ PATFETCH (c); - if (p == pend) return REG_EBRACK; + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); for (ch = 0; ch < 1 << BYTEWIDTH; ch++) { @@ -1992,7 +2000,7 @@ regex_compile (pattern, size, syntax, bufp) case '\\': - if (p == pend) return REG_EESCAPE; + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); /* Do not translate the character after the \, so that we can distinguish, e.g., \B from \b, even if we normally would @@ -2057,7 +2065,7 @@ regex_compile (pattern, size, syntax, bufp) if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) goto normal_backslash; else - return REG_ERPAREN; + FREE_STACK_RETURN (REG_ERPAREN); handle_close: if (fixup_alt_jump) @@ -2077,7 +2085,7 @@ regex_compile (pattern, size, syntax, bufp) if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) goto normal_char; else - return REG_ERPAREN; + FREE_STACK_RETURN (REG_ERPAREN); /* Since we just checked for an empty stack above, this ``can't happen''. */ @@ -2184,7 +2192,7 @@ regex_compile (pattern, size, syntax, bufp) if (syntax & RE_NO_BK_BRACES) goto unfetch_interval; else - return REG_EBRACE; + FREE_STACK_RETURN (REG_EBRACE); } GET_UNSIGNED_NUMBER (lower_bound); @@ -2204,12 +2212,12 @@ regex_compile (pattern, size, syntax, bufp) if (syntax & RE_NO_BK_BRACES) goto unfetch_interval; else - return REG_BADBR; + FREE_STACK_RETURN (REG_BADBR); } if (!(syntax & RE_NO_BK_BRACES)) { - if (c != '\\') return REG_EBRACE; + if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); PATFETCH (c); } @@ -2219,7 +2227,7 @@ regex_compile (pattern, size, syntax, bufp) if (syntax & RE_NO_BK_BRACES) goto unfetch_interval; else - return REG_BADBR; + FREE_STACK_RETURN (REG_BADBR); } /* We just parsed a valid interval. */ @@ -2228,7 +2236,7 @@ regex_compile (pattern, size, syntax, bufp) if (!laststart) { if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; + FREE_STACK_RETURN (REG_BADRPT); else if (syntax & RE_CONTEXT_INDEP_OPS) laststart = b; else @@ -2395,7 +2403,7 @@ regex_compile (pattern, size, syntax, bufp) c1 = c - '0'; if (c1 > regnum) - return REG_ESUBREG; + FREE_STACK_RETURN (REG_ESUBREG); /* Can't back reference to a subexpression if inside of it. */ if (group_in_compile_stack (compile_stack, c1)) @@ -2467,7 +2475,7 @@ regex_compile (pattern, size, syntax, bufp) STORE_JUMP (jump_past_alt, fixup_alt_jump, b); if (!COMPILE_STACK_EMPTY) - return REG_EPAREN; + FREE_STACK_RETURN (REG_EPAREN); free (compile_stack.stack); @@ -2495,15 +2503,28 @@ regex_compile (pattern, size, syntax, bufp) if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) { fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); + +#ifdef emacs if (! fail_stack.stack) - fail_stack.stack = - (fail_stack_elt_t *) malloc (fail_stack.size - * sizeof (fail_stack_elt_t)); + fail_stack.stack + = (fail_stack_elt_t *) xmalloc (fail_stack.size + * sizeof (fail_stack_elt_t)); else - fail_stack.stack = - (fail_stack_elt_t *) realloc (fail_stack.stack, - (fail_stack.size - * sizeof (fail_stack_elt_t))); + fail_stack.stack + = (fail_stack_elt_t *) xrealloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +#else /* not emacs */ + if (! fail_stack.stack) + fail_stack.stack + = (fail_stack_elt_t *) malloc (fail_stack.size + * sizeof (fail_stack_elt_t)); + else + fail_stack.stack + = (fail_stack_elt_t *) realloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +#endif /* not emacs */ } /* Initialize some other variables the matcher uses. */ @@ -2830,22 +2851,25 @@ re_compile_fastmap (bufp) case anychar: - /* `.' matches anything ... */ - for (j = 0; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; + { + int fastmap_newline = fastmap['\n']; - /* ... except perhaps newline. */ - if (!(bufp->syntax & RE_DOT_NEWLINE)) - fastmap['\n'] = 0; + /* `.' matches anything ... */ + for (j = 0; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; - /* Return if we have already set `can_be_null'; if we have, - then the fastmap is irrelevant. Something's wrong here. */ - else if (bufp->can_be_null) - return 0; + /* ... except perhaps newline. */ + if (!(bufp->syntax & RE_DOT_NEWLINE)) + fastmap['\n'] = fastmap_newline; - /* Otherwise, have to check alternative paths. */ - break; + /* Return if we have already set `can_be_null'; if we have, + then the fastmap is irrelevant. Something's wrong here. */ + else if (bufp->can_be_null) + return 0; + /* Otherwise, have to check alternative paths. */ + break; + } #ifdef emacs case syntaxspec: @@ -3161,7 +3185,11 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) val = re_match_2_internal (bufp, string1, size1, string2, size2, startpos, regs, stop); +#ifndef REGEX_MALLOC +#ifdef C_ALLOCA alloca (0); +#endif +#endif if (val >= 0) return startpos; @@ -3569,17 +3597,21 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) longest match, try backtracking. */ if (d != end_match_2) { + /* 1 if this match ends in the same string (string1 or string2) + as the best previous match. */ + boolean same_str_p = (FIRST_STRING_P (match_end) + == MATCHING_IN_FIRST_STRING); + /* 1 if this match is the best seen so far. */ + boolean best_match_p = (same_str_p ? d > match_end + : !MATCHING_IN_FIRST_STRING); + DEBUG_PRINT1 ("backtracking.\n"); if (!FAIL_STACK_EMPTY ()) { /* More failure points to try. */ - boolean same_str_p = (FIRST_STRING_P (match_end) - == MATCHING_IN_FIRST_STRING); /* If exceeds best match so far, save it. */ - if (!best_regs_set - || (same_str_p && d > match_end) - || (!same_str_p && !MATCHING_IN_FIRST_STRING)) + if (!best_regs_set || best_match_p) { best_regs_set = true; match_end = d; @@ -3595,8 +3627,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) goto fail; } - /* If no failure points, don't restore garbage. */ - else if (best_regs_set) + /* If no failure points, don't restore garbage. And if + last match is real best match, don't restore second + best one. */ + else if (best_regs_set && !best_match_p) { restore_best_regs: /* Restore best match. It may happen that `dend == @@ -4253,11 +4287,13 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) } else if ((re_opcode_t) *p2 == charset) { +#ifdef DEBUG register unsigned char c = *p2 == (unsigned char) endline ? '\n' : p2[2]; +#endif if ((re_opcode_t) p1[3] == exactn - && ! (p2[1] * BYTEWIDTH > p1[4] + && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] && (p2[1 + p1[4] / BYTEWIDTH] & (1 << (p1[4] % BYTEWIDTH))))) { @@ -4271,9 +4307,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) int idx; /* We win if the charset_not inside the loop lists every character listed in the charset after. */ - for (idx = 0; idx < p2[1]; idx++) + for (idx = 0; idx < (int) p2[1]; idx++) if (! (p2[2 + idx] == 0 - || (idx < p1[4] + || (idx < (int) p1[4] && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) break; @@ -4288,7 +4324,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) int idx; /* We win if the charset inside the loop has no overlap with the one after the loop. */ - for (idx = 0; idx < p2[1] && idx < p1[4]; idx++) + for (idx = 0; + idx < (int) p2[1] && idx < (int) p1[4]; + idx++) if ((p2[2 + idx] & p1[5 + idx]) != 0) break;