X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregex.c;h=f047ecc3cf27bbe3ef100ec420a2a9c2b07658c5;hb=8d2f9f58a034951c99ca4f426764e14f55155369;hp=15ca91bc54a29b81a91d415d880a8d017c08a0ef;hpb=88b5b816da2f613c5b978e2d0f7e22c33306dc0a;p=gnulib.git diff --git a/lib/regex.c b/lib/regex.c index 15ca91bc5..f047ecc3c 100644 --- a/lib/regex.c +++ b/lib/regex.c @@ -26,6 +26,17 @@ #define _GNU_SOURCE +#ifdef HAVE_CONFIG_H +#if defined (emacs) || defined (CONFIG_BROKETS) +/* We use instead of "config.h" so that a compilation + using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h + (which it would do because it found this file in $srcdir). */ +#include +#else +#include "config.h" +#endif +#endif + /* We need this for `regex.h', and perhaps for the Emacs include files. */ #include @@ -137,32 +148,34 @@ init_syntax_once () macros don't need to be guarded with references to isascii. ... Defining isascii to 1 should let any compiler worth its salt eliminate the && through constant folding." */ -#if ! defined (isascii) || defined (STDC_HEADERS) -#undef isascii -#define isascii(c) 1 + +#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) +#define ISASCII(c) 1 +#else +#define ISASCII(c) isascii(c) #endif #ifdef isblank -#define ISBLANK(c) (isascii (c) && isblank (c)) +#define ISBLANK(c) (ISASCII (c) && isblank (c)) #else #define ISBLANK(c) ((c) == ' ' || (c) == '\t') #endif #ifdef isgraph -#define ISGRAPH(c) (isascii (c) && isgraph (c)) +#define ISGRAPH(c) (ISASCII (c) && isgraph (c)) #else -#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) +#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) #endif -#define ISPRINT(c) (isascii (c) && isprint (c)) -#define ISDIGIT(c) (isascii (c) && isdigit (c)) -#define ISALNUM(c) (isascii (c) && isalnum (c)) -#define ISALPHA(c) (isascii (c) && isalpha (c)) -#define ISCNTRL(c) (isascii (c) && iscntrl (c)) -#define ISLOWER(c) (isascii (c) && islower (c)) -#define ISPUNCT(c) (isascii (c) && ispunct (c)) -#define ISSPACE(c) (isascii (c) && isspace (c)) -#define ISUPPER(c) (isascii (c) && isupper (c)) -#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) +#define ISPRINT(c) (ISASCII (c) && isprint (c)) +#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +#define ISALNUM(c) (ISASCII (c) && isalnum (c)) +#define ISALPHA(c) (ISASCII (c) && isalpha (c)) +#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +#define ISLOWER(c) (ISASCII (c) && islower (c)) +#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +#define ISSPACE(c) (ISASCII (c) && isspace (c)) +#define ISUPPER(c) (ISASCII (c) && isupper (c)) +#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) #ifndef NULL #define NULL 0 @@ -243,12 +256,16 @@ char *alloca (); #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) +#undef MAX +#undef MIN #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) typedef char boolean; #define false 0 #define true 1 + +static int re_match_2_internal (); /* These are the command codes that appear in compiled regular expressions. Some opcodes are followed by argument bytes. A @@ -919,7 +936,7 @@ static const char *re_error_msg[] = change it ourselves. */ int re_max_failures = 2000; -typedef const unsigned char *fail_stack_elt_t; +typedef unsigned char *fail_stack_elt_t; typedef struct { @@ -1487,7 +1504,7 @@ regex_compile (pattern, size, syntax, bufp) they can be reliably used as array indices. */ register unsigned char c, c1; - /* A random tempory spot in PATTERN. */ + /* A random temporary spot in PATTERN. */ const char *p1; /* Points to the end of the buffer, where we should append. */ @@ -2236,7 +2253,7 @@ regex_compile (pattern, size, syntax, bufp) we're all done, the pattern will look like: set_number_at set_number_at - succeed_n + succeed_n jump_n (The upper bound and `jump_n' are omitted if @@ -2480,12 +2497,7 @@ regex_compile (pattern, size, syntax, bufp) is strictly greater than re_max_failures, the largest possible stack is 2 * re_max_failures failure points. */ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); - if (fail_stack.stack) - fail_stack.stack = - (fail_stack_elt_t *) realloc (fail_stack.stack, - (fail_stack.size - * sizeof (fail_stack_elt_t))); - else + if (! fail_stack.stack) fail_stack.stack = (fail_stack_elt_t *) malloc (fail_stack.size * sizeof (fail_stack_elt_t)); @@ -2722,7 +2734,7 @@ re_compile_fastmap (bufp) register char *fastmap = bufp->fastmap; unsigned char *pattern = bufp->buffer; unsigned long size = bufp->used; - const unsigned char *p = pattern; + unsigned char *p = pattern; register unsigned char *pend = pattern + size; /* Assume that each path through the pattern can be null until @@ -3010,7 +3022,7 @@ re_set_registers (bufp, regs, num_regs, starts, ends) { bufp->regs_allocated = REGS_UNALLOCATED; regs->num_regs = 0; - regs->start = regs->end = (regoff_t) 0; + regs->start = regs->end = (regoff_t *) 0; } } @@ -3143,8 +3155,10 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) && !bufp->can_be_null) return -1; - val = re_match_2 (bufp, string1, size1, string2, size2, - startpos, regs, stop); + val = re_match_2_internal (bufp, string1, size1, string2, size2, + startpos, regs, stop); + alloca (0); + if (val >= 0) return startpos; @@ -3177,8 +3191,10 @@ static boolean alt_match_null_string_p (), /* This converts PTR, a pointer into one of the search strings `string1' and `string2' into an offset from the beginning of that string. */ -#define POINTER_TO_OFFSET(ptr) \ - (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) +#define POINTER_TO_OFFSET(ptr) \ + (FIRST_STRING_P (ptr) \ + ? ((regoff_t) ((ptr) - string1)) \ + : ((regoff_t) ((ptr) - string2 + size1))) /* Macros for dealing with the split strings in re_match_2. */ @@ -3238,8 +3254,8 @@ static boolean alt_match_null_string_p (), FREE_VAR (reg_info_dummy); \ } while (0) #else /* not REGEX_MALLOC */ -/* Some MIPS systems (at least) want this to free alloca'd storage. */ -#define FREE_VARIABLES() alloca (0) +/* This used to do alloca (0), but now we do that in the caller. */ +#define FREE_VARIABLES() /* Nothing */ #endif /* not REGEX_MALLOC */ #else #define FREE_VARIABLES() /* Do nothing! */ @@ -3266,8 +3282,11 @@ re_match (bufp, string, size, pos, regs) const char *string; int size, pos; struct re_registers *regs; - { - return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); +{ + int result = re_match_2_internal (bufp, NULL, 0, string, size, + pos, regs, size); + alloca (0); + return result; } #endif /* not emacs */ @@ -3294,6 +3313,23 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) struct re_registers *regs; int stop; { + int result = re_match_2_internal (bufp, string1, size1, string2, size2, + pos, regs, stop); + alloca (0); + return result; +} + +/* This is a separate function so that we can force an alloca cleanup + afterwards. */ +static int +re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int pos; + struct re_registers *regs; + int stop; +{ /* General temporaries. */ int mcnt; unsigned char *p1; @@ -3312,6 +3348,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) unsigned char *p = bufp->buffer; register unsigned char *pend = p + bufp->used; + /* Mark the opcode just after a start_memory, so we can test for an + empty subpattern when we get to the stop_memory. */ + unsigned char *just_past_start_mem = 0; + /* We use this to map every character in the string. */ char *translate = bufp->translate; @@ -3617,8 +3657,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) if (regs->num_regs > 0) { regs->start[0] = pos; - regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 - : d - string2 + size1); + regs->end[0] = (MATCHING_IN_FIRST_STRING + ? ((regoff_t) (d - string1)) + : ((regoff_t) (d - string2 + size1))); } /* Go through the first `min (num_regs, regs->num_regs)' @@ -3629,8 +3670,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) regs->start[mcnt] = regs->end[mcnt] = -1; else { - regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); - regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); + regs->start[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); + regs->end[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); } } @@ -3789,6 +3832,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* Move past the register number and inner group count. */ p += 2; + just_past_start_mem = p; break; @@ -3853,7 +3897,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) information for this group that we had before trying this last match. */ if ((!MATCHED_SOMETHING (reg_info[*p]) - || (re_opcode_t) p[-3] == start_memory) + || just_past_start_mem == p - 1) && (p + 2) < pend) { boolean is_a_jump_n = false; @@ -4443,8 +4487,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) mcnt = (int) Sword; matchsyntax: PREFETCH (); - if (SYNTAX (*d++) != (enum syntaxcode) mcnt) - goto fail; + /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ + d++; + if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) + goto fail; SET_REGS_MATCHED (); break; @@ -4458,8 +4504,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) mcnt = (int) Sword; matchnotsyntax: PREFETCH (); - if (SYNTAX (*d++) == (enum syntaxcode) mcnt) - goto fail; + /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ + d++; + if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) + goto fail; SET_REGS_MATCHED (); break;