X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=regex.c;h=4d617d39b172b9f065fb08a3ef20333c8e3671fc;hb=a9576d3cabf1bfe37c4a886666696757d79e6a47;hp=3129ed499fde57b35e6829df1a0b114162f19360;hpb=d59c8610faea508d7ce2c615124fa9aa15e6fda1;p=gnulib.git diff --git a/regex.c b/regex.c index 3129ed499..4d617d39b 100644 --- a/regex.c +++ b/regex.c @@ -3,7 +3,7 @@ (Implements POSIX draft P10003.2/D11.2, except for internationalization features.) - Copyright (C) 1985, 89, 90, 91, 92 Free Software Foundation, Inc. + Copyright (C) 1993 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,11 +29,14 @@ /* We need this for `regex.h', and perhaps for the Emacs include files. */ #include +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + /* The `emacs' switch turns on certain matching commands that make sense only in Emacs. */ #ifdef emacs -#include "config.h" #include "lisp.h" #include "buffer.h" #include "syntax.h" @@ -45,11 +48,17 @@ /* We used to test for `BSTRING' here, but only GCC and Emacs define `BSTRING', as far as I know, and neither of them use this code. */ -#if USG || STDC_HEADERS +#if HAVE_STRING_H || STDC_HEADERS #include +#ifndef bcmp #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) +#endif +#ifndef bcopy #define bcopy(s, d, n) memcpy ((d), (s), (n)) +#endif +#ifndef bzero #define bzero(s, n) memset ((s), 0, (n)) +#endif #else #include #endif @@ -115,15 +124,34 @@ init_syntax_once () /* Get the interface, including the syntax bits. */ #include "regex.h" - /* isalpha etc. are used for the character classes. */ #include -#ifndef isgraph -#define isgraph(c) (isprint (c) && !isspace (c)) + +#ifndef isascii +#define isascii(c) 1 #endif -#ifndef isblank -#define isblank(c) ((c) == ' ' || (c) == '\t') + +#ifdef isblank +#define ISBLANK(c) (isascii (c) && isblank (c)) +#else +#define ISBLANK(c) ((c) == ' ' || (c) == '\t') #endif +#ifdef isgraph +#define ISGRAPH(c) (isascii (c) && isgraph (c)) +#else +#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) +#endif + +#define ISPRINT(c) (isascii (c) && isprint (c)) +#define ISDIGIT(c) (isascii (c) && isdigit (c)) +#define ISALNUM(c) (isascii (c) && isalnum (c)) +#define ISALPHA(c) (isascii (c) && isalpha (c)) +#define ISCNTRL(c) (isascii (c) && iscntrl (c)) +#define ISLOWER(c) (isascii (c) && islower (c)) +#define ISPUNCT(c) (isascii (c) && ispunct (c)) +#define ISSPACE(c) (isascii (c) && isspace (c)) +#define ISUPPER(c) (isascii (c) && isupper (c)) +#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) #ifndef NULL #define NULL 0 @@ -136,7 +164,7 @@ init_syntax_once () #undef SIGN_EXTEND_CHAR #if __STDC__ #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -#else +#else /* not __STDC__ */ /* As in Harbison and Steele. */ #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) #endif @@ -443,6 +471,7 @@ static int debug = 0; #define DEBUG_PRINT1(x) if (debug) printf (x) #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ if (debug) print_partial_compiled_pattern (s, e) #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ @@ -756,6 +785,7 @@ print_double_string (where, string1, size1, string2, size2) #define DEBUG_PRINT1(x) #define DEBUG_PRINT2(x1, x2) #define DEBUG_PRINT3(x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) @@ -988,7 +1018,7 @@ typedef struct { if (p != pend) \ { \ PATFETCH (c); \ - while (isdigit (c)) \ + while (ISDIGIT (c)) \ { \ if (num < 0) \ num = 0; \ @@ -1021,9 +1051,9 @@ typedef struct `buffer' is the compiled pattern; `syntax' is set to SYNTAX; `used' is set to the length of the compiled pattern; - `fastmap_accurate' is set to zero; - `re_nsub' is set to the number of groups in PATTERN; - `not_bol' and `not_eol' are set to zero. + `fastmap_accurate' is zero; + `re_nsub' is the number of subexpressions in PATTERN; + `not_bol' and `not_eol' are zero; The `fastmap' and `newline_anchor' fields are neither examined nor set. */ @@ -1272,6 +1302,7 @@ regex_compile (pattern, size, syntax, bufp) the `*'. Do we have to do something analogous here for null bytes, because of RE_DOT_NOT_NULL? */ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') + && zero_times_ok && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') && !(syntax & RE_DOT_NEWLINE)) { /* We have .*\n. */ @@ -1453,18 +1484,18 @@ regex_compile (pattern, size, syntax, bufp) for (ch = 0; ch < 1 << BYTEWIDTH; ch++) { - if ( (is_alnum && isalnum (ch)) - || (is_alpha && isalpha (ch)) - || (is_blank && isblank (ch)) - || (is_cntrl && iscntrl (ch)) - || (is_digit && isdigit (ch)) - || (is_graph && isgraph (ch)) - || (is_lower && islower (ch)) - || (is_print && isprint (ch)) - || (is_punct && ispunct (ch)) - || (is_space && isspace (ch)) - || (is_upper && isupper (ch)) - || (is_xdigit && isxdigit (ch))) + if ( (is_alnum && ISALNUM (ch)) + || (is_alpha && ISALPHA (ch)) + || (is_blank && ISBLANK (ch)) + || (is_cntrl && ISCNTRL (ch)) + || (is_digit && ISDIGIT (ch)) + || (is_graph && ISGRAPH (ch)) + || (is_lower && ISLOWER (ch)) + || (is_print && ISPRINT (ch)) + || (is_punct && ISPUNCT (ch)) + || (is_space && ISSPACE (ch)) + || (is_upper && ISUPPER (ch)) + || (is_xdigit && ISXDIGIT (ch))) SET_LIST_BIT (ch); } had_char_class = true; @@ -1582,6 +1613,10 @@ regex_compile (pattern, size, syntax, bufp) fixup_alt_jump = 0; laststart = 0; begalt = b; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; break; @@ -1631,6 +1666,10 @@ regex_compile (pattern, size, syntax, bufp) : 0; laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; this_group_regnum = COMPILE_STACK_TOP.regnum; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; /* We're at the end of the group, so now we know how many groups were inside this one. */ @@ -1672,10 +1711,10 @@ regex_compile (pattern, size, syntax, bufp) | v | v a | b | c - If we are at `b,' then fixup_alt_jump right now points to a - three-byte space after `a.' We'll put in the jump, set - fixup_alt_jump to right after `b,' and leave behind three - bytes which we'll fill in when we get to after `c.' */ + If we are at `b', then fixup_alt_jump right now points to a + three-byte space after `a'. We'll put in the jump, set + fixup_alt_jump to right after `b', and leave behind three + bytes which we'll fill in when we get to after `c'. */ if (fixup_alt_jump) STORE_JUMP (jump_past_alt, fixup_alt_jump, b); @@ -2167,18 +2206,20 @@ compile_range (p_ptr, pend, translate, syntax, b) unsigned this_char; const char *p = *p_ptr; + int range_start, range_end; - /* Even though the pattern is a signed `char *', we need to fetch into - `unsigned char's. Reason: if the high bit of the pattern character - is set, the range endpoints will be negative if we fetch into a - signed `char *'. */ - unsigned char range_end; - unsigned char range_start = p[-2]; - if (p == pend) return REG_ERANGE; - PATFETCH (range_end); + /* Even though the pattern is a signed `char *', we need to fetch + with unsigned char *'s; if the high bit of the pattern character + is set, the range endpoints will be negative if we fetch using a + signed char *. + + We also want to fetch the endpoints without translating them; the + appropriate translation is done in the bit-setting loop below. */ + range_start = ((unsigned char *) p)[-2]; + range_end = ((unsigned char *) p)[0]; /* Have to increment the pointer into the pattern string, so the caller isn't still at the ending character. */ @@ -2316,6 +2357,7 @@ typedef struct int this_reg; \ \ DEBUG_STATEMENT (failure_id++); \ + DEBUG_STATEMENT (nfailure_points_pushed++); \ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ @@ -2469,6 +2511,8 @@ typedef struct regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ } \ + \ + DEBUG_STATEMENT (nfailure_points_popped++); \ } /* POP_FAILURE_POINT */ /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in @@ -2856,15 +2900,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) else if (endpos > total_size) range = total_size - startpos; - /* Update the fastmap now if not correct already. */ - if (fastmap && !bufp->fastmap_accurate) - if (re_compile_fastmap (bufp) == -2) - return -2; - /* If the search isn't to be a backwards one, don't waste time in a - long search for a pattern that says it is anchored. */ - if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf - && range > 0) + search for a pattern that must be anchored. */ + if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) { if (startpos > 0) return -1; @@ -2872,6 +2910,12 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) range = 1; } + /* Update the fastmap now if not correct already. */ + if (fastmap && !bufp->fastmap_accurate) + if (re_compile_fastmap (bufp) == -2) + return -2; + + /* Loop through the string, looking for a place to start matching. */ for (;;) { /* If a fastmap is supplied, skip quickly over characters that @@ -2895,7 +2939,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) inside the loop. */ if (translate) while (range > lim - && !fastmap[(unsigned char) translate[*d++]]) + && !fastmap[(unsigned char) + translate[(unsigned char) *d++]]) range--; else while (range > lim && !fastmap[(unsigned char) *d++]) @@ -2909,7 +2954,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) ? string2[startpos - size1] : string1[startpos]); - if (!fastmap[TRANSLATE (c)]) + if (!fastmap[(unsigned char) TRANSLATE (c)]) goto advance; } } @@ -2983,12 +3028,9 @@ typedef union #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) -/* Call this when have matched something; it sets `matched' flags for the - registers corresponding to the group of which we currently are inside. - Also records whether this group ever matched something. We only care - about this information at `stop_memory', and then only about the - previous time through the loop (if the group is starred or whatever). - So it is ok to clear all the nonactive registers here. */ +/* Call this when have matched a real character; it sets `matched' flags + for the subexpressions which we are currently inside. Also records + that those subexprs have matched. */ #define SET_REGS_MATCHED() \ do \ { \ @@ -3033,24 +3075,24 @@ typedef union /* Test if at very beginning or at very end of the virtual concatenation of `string1' and `string2'. If only one string, it's `string2'. */ -#define AT_STRINGS_BEG() (d == (size1 ? string1 : string2) || !size2) -#define AT_STRINGS_END() (d == end2) +#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) +#define AT_STRINGS_END(d) ((d) == end2) /* Test if D points to a character which is word-constituent. We have two special cases to check for: if past the end of string1, look at the first character in string2; and if before the beginning of - string2, look at the last character in string1. - - Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG (). */ -#define LETTER_P(d) \ + string2, look at the last character in string1. */ +#define WORDCHAR_P(d) \ (SYNTAX ((d) == end1 ? *string2 \ - : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == Sword) + : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ + == Sword) /* Test if the character before D and the one at D differ with respect to being word-constituent. */ #define AT_WORD_BOUNDARY(d) \ - (AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d - 1) != LETTER_P (d)) + (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ + || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) /* Free everything we malloc. */ @@ -3157,6 +3199,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) fail_stack_type fail_stack; #ifdef DEBUG static unsigned failure_id = 0; + unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; #endif /* We fill all the registers internally, independent of what we @@ -3250,8 +3293,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) else { /* We must initialize all our variables to NULL, so that - `FREE_VARIABLES' doesn't try to free them. Too bad this isn't - Lisp, so we could have a list of variables. As it is, */ + `FREE_VARIABLES' doesn't try to free them. */ regstart = regend = old_regstart = old_regend = best_regstart = best_regend = reg_dummy = NULL; reg_info = reg_info_dummy = (register_info_type *) NULL; @@ -3335,8 +3377,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) if (p == pend) { /* End of pattern means we might have succeeded. */ - DEBUG_PRINT1 ("End of pattern: "); - /* If not end of string, try backtracking. Otherwise done. */ + DEBUG_PRINT1 ("end of pattern ... "); + + /* If we haven't matched the entire string, and we want the + longest match, try backtracking. */ if (d != end_match_2) { DEBUG_PRINT1 ("backtracking.\n"); @@ -3374,6 +3418,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) For example, the pattern `x.*y.*z' against the strings `x-' and `y-z-', if the two strings are not consecutive in memory. */ + DEBUG_PRINT1 ("Restoring best registers.\n"); + d = match_end; dend = ((d >= string1 && d <= end1) ? end_match_1 : end_match_2); @@ -3386,7 +3432,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) } } /* d != end_match_2 */ - DEBUG_PRINT1 ("\nAccepting match.\n"); + DEBUG_PRINT1 ("Accepting match.\n"); /* If caller wants register contents data back, do it. */ if (regs && !bufp->no_sub) @@ -3452,7 +3498,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) } /* regs && !bufp->no_sub */ FREE_VARIABLES (); - DEBUG_PRINT2 ("%d registers pushed.\n", num_regs_pushed); + DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", + nfailure_points_pushed, nfailure_points_popped, + nfailure_points_pushed - nfailure_points_popped); + DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); mcnt = d - pos - (MATCHING_IN_FIRST_STRING ? string1 @@ -3654,7 +3703,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* If just failed to match something this time around with a group that's operated on by a repetition operator, try to - force exit from the ``loop,'' and restore the register + force exit from the ``loop'', and restore the register information for this group that we had before trying this last match. */ if ((!MATCHED_SOMETHING (reg_info[*p]) @@ -3798,7 +3847,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) case begline: DEBUG_PRINT1 ("EXECUTING begline.\n"); - if (AT_STRINGS_BEG ()) + if (AT_STRINGS_BEG (d)) { if (!bufp->not_bol) break; } @@ -3814,7 +3863,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) case endline: DEBUG_PRINT1 ("EXECUTING endline.\n"); - if (AT_STRINGS_END ()) + if (AT_STRINGS_END (d)) { if (!bufp->not_eol) break; } @@ -3831,7 +3880,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* Match at the very beginning of the data. */ case begbuf: DEBUG_PRINT1 ("EXECUTING begbuf.\n"); - if (AT_STRINGS_BEG ()) + if (AT_STRINGS_BEG (d)) break; goto fail; @@ -3839,7 +3888,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* Match at the very end of the data. */ case endbuf: DEBUG_PRINT1 ("EXECUTING endbuf.\n"); - if (AT_STRINGS_END ()) + if (AT_STRINGS_END (d)) break; goto fail; @@ -3893,7 +3942,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) the original * applied to a group), save the information for that group and all inner ones, so that if we fail back to this point, the group's information will be correct. - For example, in \(a*\)*\1, we only need the preceding group, + For example, in \(a*\)*\1, we need the preceding group, and in \(\(a*\)b*\)\2, we need the inner group. */ /* We can't use `p' to check ahead because we push @@ -3923,8 +3972,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) break; - /* A smart repeat ends with a maybe_pop_jump. - We change it either to a pop_failure_jump or a jump. */ + /* A smart repeat ends with `maybe_pop_jump'. + We change it to either `pop_failure_jump' or `jump'. */ case maybe_pop_jump: EXTRACT_NUMBER_AND_INCR (mcnt, p); DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); @@ -3952,7 +4001,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* If we're at the end of the pattern, we can change. */ if (p2 == pend) - { + { + /* Consider what happens when matching ":\(.*\)" + against ":/". I don't really understand this code + yet. */ p[-3] = (unsigned char) pop_failure_jump; DEBUG_PRINT1 (" End of pattern: change to `pop_failure_jump'.\n"); @@ -3969,7 +4021,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) to the `maybe_finalize_jump' of this case. Examine what follows. */ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) - p[-3] = (unsigned char) pop_failure_jump; + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + else if ((re_opcode_t) p1[3] == charset || (re_opcode_t) p1[3] == charset_not) { @@ -3984,9 +4041,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) if (!not) { p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 - (" No match: change to `pop_failure_jump'.\n"); - + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); } } } @@ -3995,6 +4050,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) if ((re_opcode_t) p[-1] != pop_failure_jump) { p[-1] = (unsigned char) jump; + DEBUG_PRINT1 (" Match => jump.\n"); goto unconditional_jump; } /* Note fall through. */ @@ -4056,7 +4112,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* At the end of an alternative, we need to push a dummy failure - point in case we are followed by a pop_failure_jump', because + point in case we are followed by a `pop_failure_jump', because we don't want the failure point for the alternative to be popped. For example, matching `(a|ab)*' against `aab' requires that we match the `ab' alternative. */ @@ -4133,14 +4189,14 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) case wordbeg: DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); - if (LETTER_P (d) && (AT_STRINGS_BEG () || !LETTER_P (d - 1))) + if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) break; goto fail; case wordend: DEBUG_PRINT1 ("EXECUTING wordend.\n"); - if (!AT_STRINGS_BEG () && LETTER_P (d - 1) - && (!LETTER_P (d) || AT_STRINGS_END ())) + if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) + && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) break; goto fail; @@ -4177,11 +4233,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) goto matchsyntax; case wordchar: - DEBUG_PRINT1 ("EXECUTING wordchar.\n"); + DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); mcnt = (int) Sword; matchsyntax: PREFETCH (); - if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail; + if (SYNTAX (*d++) != (enum syntaxcode) mcnt) + goto fail; SET_REGS_MATCHED (); break; @@ -4191,11 +4248,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) goto matchnotsyntax; case notwordchar: - DEBUG_PRINT1 ("EXECUTING notwordchar.\n"); + DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); mcnt = (int) Sword; - matchnotsyntax: /* We goto here from notsyntaxspec. */ + matchnotsyntax: PREFETCH (); - if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail; + if (SYNTAX (*d++) == (enum syntaxcode) mcnt) + goto fail; SET_REGS_MATCHED (); break; @@ -4203,17 +4261,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) case wordchar: DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); PREFETCH (); - if (!LETTER_P (d)) + if (!WORDCHAR_P (d)) goto fail; SET_REGS_MATCHED (); + d++; break; case notwordchar: DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); PREFETCH (); - if (LETTER_P (d)) + if (WORDCHAR_P (d)) goto fail; SET_REGS_MATCHED (); + d++; break; #endif /* not emacs */ @@ -4680,10 +4740,12 @@ regcomp (preg, pattern, cflags) { reg_errcode_t ret; unsigned syntax - = cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; + = (cflags & REG_EXTENDED) ? + RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; /* regex_compile will allocate the space for the compiled pattern. */ preg->buffer = 0; + preg->allocated = 0; /* Don't bother to use a fastmap when searching. This simplifies the REG_NEWLINE case: if we used a fastmap, we'd have to put all the @@ -4701,7 +4763,7 @@ regcomp (preg, pattern, cflags) /* Map uppercase characters to corresponding lowercase ones. */ for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = isupper (i) ? tolower (i) : i; + preg->translate[i] = ISUPPER (i) ? tolower (i) : i; } else preg->translate = NULL; @@ -4808,7 +4870,7 @@ regexec (preg, string, nmatch, pmatch, eflags) /* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. */ + from either regcomp or regexec. We don't use PREG here. */ size_t regerror (errcode, preg, errbuf, errbuf_size) @@ -4817,9 +4879,25 @@ regerror (errcode, preg, errbuf, errbuf_size) char *errbuf; size_t errbuf_size; { - const char *msg - = re_error_msg[errcode] == NULL ? "Success" : re_error_msg[errcode]; - size_t msg_size = strlen (msg) + 1; /* Includes the null. */ + const char *msg; + size_t msg_size; + + if (errcode < 0 + || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = re_error_msg[errcode]; + + /* POSIX doesn't require that we do anything in this case, but why + not be nice. */ + if (! msg) + msg = "Success"; + + msg_size = strlen (msg) + 1; /* Includes the null. */ if (errbuf_size != 0) {