X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregex.c;h=81b06ff5fe1626f5de1709e672ff0cbaa0711b07;hb=fb2a33bb98f09325349f84f1007035e415d8175f;hp=bf3e96855d45ae30d72d0c5be769d7c4bd70e4df;hpb=6b89992dbb9ec71f4505b47447bde264e3035056;p=gnulib.git diff --git a/lib/regex.c b/lib/regex.c index bf3e96855..81b06ff5f 100644 --- a/lib/regex.c +++ b/lib/regex.c @@ -26,6 +26,17 @@ #define _GNU_SOURCE +#ifdef HAVE_CONFIG_H +#if defined (CONFIG_BROKETS) +/* We use instead of "config.h" so that a compilation + using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h + (which it would do because it found this file in $srcdir). */ +#include +#else +#include "config.h" +#endif +#endif + /* We need this for `regex.h', and perhaps for the Emacs include files. */ #include @@ -46,6 +57,14 @@ #else /* not emacs */ +#ifdef STDC_HEADERS +#include +#else +char *malloc (); +char *realloc (); +#endif + + /* We used to test for `BSTRING' here, but only GCC and Emacs define `BSTRING', as far as I know, and neither of them use this code. */ #if HAVE_STRING_H || STDC_HEADERS @@ -63,14 +82,6 @@ #include #endif -#ifdef STDC_HEADERS -#include -#else -char *malloc (); -char *realloc (); -#endif - - /* Define the syntax stuff for \<, \>, etc. */ /* This must be nonzero for the wordchar and notwordchar pattern @@ -881,13 +892,22 @@ static const char *re_error_msg[] = using the relocating allocator routines, then malloc could cause a relocation, which might (if the strings being searched are in the ralloc heap) shift the data out from underneath the regexp - routines. */ + routines. + + Here's another reason to avoid allocation: Emacs insists on + processing input from X in a signal handler; processing X input may + call malloc; if input arrives while a matching routine is calling + malloc, then we're scrod. But Emacs can't just block input while + calling matching routines; then we don't notice interrupts when + they come in. So, Emacs blocks input around all regexp calls + except the matching calls, which it leaves unprotected, in the + faith that they will not malloc. */ /* Normally, this is fine. */ #define MATCH_MAY_ALLOCATE /* But under some circumstances, it's not. */ -#if defined (REL_ALLOC) && defined (C_ALLOCA) +#if defined (emacs) || (defined (REL_ALLOC) && defined (C_ALLOCA)) #undef MATCH_MAY_ALLOCATE #endif @@ -910,7 +930,7 @@ static const char *re_error_msg[] = change it ourselves. */ int re_max_failures = 2000; -typedef const unsigned char *fail_stack_elt_t; +typedef unsigned char *fail_stack_elt_t; typedef struct { @@ -2713,7 +2733,7 @@ re_compile_fastmap (bufp) register char *fastmap = bufp->fastmap; unsigned char *pattern = bufp->buffer; unsigned long size = bufp->used; - const unsigned char *p = pattern; + unsigned char *p = pattern; register unsigned char *pend = pattern + size; /* Assume that each path through the pattern can be null until @@ -3168,8 +3188,10 @@ static boolean alt_match_null_string_p (), /* This converts PTR, a pointer into one of the search strings `string1' and `string2' into an offset from the beginning of that string. */ -#define POINTER_TO_OFFSET(ptr) \ - (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) +#define POINTER_TO_OFFSET(ptr) \ + (FIRST_STRING_P (ptr) \ + ? ((regoff_t) ((ptr) - string1)) \ + : ((regoff_t) ((ptr) - string2 + size1))) /* Macros for dealing with the split strings in re_match_2. */ @@ -3608,8 +3630,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) if (regs->num_regs > 0) { regs->start[0] = pos; - regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 - : d - string2 + size1); + regs->end[0] = (MATCHING_IN_FIRST_STRING + ? ((regoff_t) (d - string1)) + : ((regoff_t) (d - string2 + size1))); } /* Go through the first `min (num_regs, regs->num_regs)' @@ -3620,8 +3643,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) regs->start[mcnt] = regs->end[mcnt] = -1; else { - regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); - regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); + regs->start[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); + regs->end[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); } } @@ -4130,11 +4155,27 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) detect that here, the alternative has put on a dummy failure point which is what we will end up popping. */ - /* Skip over open/close-group commands. */ - while (p2 + 2 < pend - && ((re_opcode_t) *p2 == stop_memory - || (re_opcode_t) *p2 == start_memory)) - p2 += 3; /* Skip over args, too. */ + /* Skip over open/close-group commands. + If what follows this loop is a ...+ construct, + look at what begins its body, since we will have to + match at least one of that. */ + while (1) + { + if (p2 + 2 < pend + && ((re_opcode_t) *p2 == stop_memory + || (re_opcode_t) *p2 == start_memory)) + p2 += 3; + else if (p2 + 6 < pend + && (re_opcode_t) *p2 == dummy_failure_jump) + p2 += 6; + else + break; + } + + p1 = p + mcnt; + /* p1[0] ... p1[2] are the `on_failure_jump' corresponding + to the `maybe_finalize_jump' of this case. Examine what + follows. */ /* If we're at the end of the pattern, we can change. */ if (p2 == pend) @@ -4152,11 +4193,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) { register unsigned char c = *p2 == (unsigned char) endline ? '\n' : p2[2]; - p1 = p + mcnt; - /* p1[0] ... p1[2] are the `on_failure_jump' corresponding - to the `maybe_finalize_jump' of this case. Examine what - follows. */ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) { p[-3] = (unsigned char) pop_failure_jump; @@ -4182,6 +4219,54 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) } } } + else if ((re_opcode_t) *p2 == charset) + { + register unsigned char c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; + + if ((re_opcode_t) p1[3] == exactn + && ! (p2[1] * BYTEWIDTH > p1[4] + && (p2[1 + p1[4] / BYTEWIDTH] + & (1 << (p1[4] % BYTEWIDTH))))) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + + else if ((re_opcode_t) p1[3] == charset_not) + { + int idx; + /* We win if the charset_not inside the loop + lists every character listed in the charset after. */ + for (idx = 0; idx < p2[1]; idx++) + if (! (p2[2 + idx] == 0 + || (idx < p1[4] + && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) + break; + + if (idx == p2[1]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + else if ((re_opcode_t) p1[3] == charset) + { + int idx; + /* We win if the charset inside the loop + has no overlap with the one after the loop. */ + for (idx = 0; idx < p2[1] && idx < p1[4]; idx++) + if ((p2[2 + idx] & p1[5 + idx]) != 0) + break; + + if (idx == p2[1] || idx == p1[4]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } } p -= 2; /* Point at relative address again. */ if ((re_opcode_t) p[-1] != pop_failure_jump)