X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregex.c;h=25a219e267fff2a837707320a3a1dbba46eaca43;hb=b3024d9c5ab39fcce8d7aa8069c4d5bcd139a70b;hp=eb5a6b3d478126fa6bf008e90cf1ea08a40db84c;hpb=48f06745b58de505238ca1eb2b865126b72ca24c;p=gnulib.git diff --git a/lib/regex.c b/lib/regex.c index eb5a6b3d4..25a219e26 100644 --- a/lib/regex.c +++ b/lib/regex.c @@ -89,6 +89,10 @@ /* This is for other GNU distributions with internationalized messages. */ #if HAVE_LIBINTL_H || defined _LIBC # include +# ifdef _LIBC +# undef gettext +# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) +# endif #else # define gettext(msgid) (msgid) #endif @@ -197,35 +201,34 @@ char *realloc (); eliminate the && through constant folding." Solaris defines some of these symbols so we must undefine them first. */ -#undef ISASCII #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) -# define ISASCII(c) 1 +# define IN_CTYPE_DOMAIN(c) 1 #else -# define ISASCII(c) isascii(c) +# define IN_CTYPE_DOMAIN(c) isascii(c) #endif #ifdef isblank -# define ISBLANK(c) (ISASCII (c) && isblank (c)) +# define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c)) #else # define ISBLANK(c) ((c) == ' ' || (c) == '\t') #endif #ifdef isgraph -# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +# define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c)) #else -# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +# define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c)) #endif #undef ISPRINT -#define ISPRINT(c) (ISASCII (c) && isprint (c)) -#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) -#define ISALNUM(c) (ISASCII (c) && isalnum (c)) -#define ISALPHA(c) (ISASCII (c) && isalpha (c)) -#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) -#define ISLOWER(c) (ISASCII (c) && islower (c)) -#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) -#define ISSPACE(c) (ISASCII (c) && isspace (c)) -#define ISUPPER(c) (ISASCII (c) && isupper (c)) -#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) +#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) +#define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c)) +#define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c)) +#define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c)) +#define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c)) +#define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c)) +#define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c)) +#define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c)) +#define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c)) +#define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c)) #ifdef _tolower # define TOLOWER(c) _tolower(c) @@ -1234,7 +1237,7 @@ typedef struct # if defined MATCH_MAY_ALLOCATE /* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */ -int re_max_failures = 20000; +int re_max_failures = 4000; # else int re_max_failures = 2000; # endif @@ -1747,29 +1750,51 @@ static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start, reset the pointers that pointed into the old block to point to the correct places in the new one. If extending the buffer results in it being larger than MAX_BUF_SIZE, then flag memory exhausted. */ +#if __BOUNDED_POINTERS__ +# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated) +# define MOVE_BUFFER_POINTER(P) \ + (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr) +# define ELSE_EXTEND_BUFFER_HIGH_BOUND \ + else \ + { \ + SET_HIGH_BOUND (b); \ + SET_HIGH_BOUND (begalt); \ + if (fixup_alt_jump) \ + SET_HIGH_BOUND (fixup_alt_jump); \ + if (laststart) \ + SET_HIGH_BOUND (laststart); \ + if (pending_exact) \ + SET_HIGH_BOUND (pending_exact); \ + } +#else +# define MOVE_BUFFER_POINTER(P) (P) += incr +# define ELSE_EXTEND_BUFFER_HIGH_BOUND +#endif #define EXTEND_BUFFER() \ - do { \ + do { \ unsigned char *old_buffer = bufp->buffer; \ - if (bufp->allocated == MAX_BUF_SIZE) \ + if (bufp->allocated == MAX_BUF_SIZE) \ return REG_ESIZE; \ bufp->allocated <<= 1; \ if (bufp->allocated > MAX_BUF_SIZE) \ - bufp->allocated = MAX_BUF_SIZE; \ + bufp->allocated = MAX_BUF_SIZE; \ bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\ if (bufp->buffer == NULL) \ return REG_ESPACE; \ /* If the buffer moved, move all the pointers into it. */ \ if (old_buffer != bufp->buffer) \ { \ - b = (b - old_buffer) + bufp->buffer; \ - begalt = (begalt - old_buffer) + bufp->buffer; \ - if (fixup_alt_jump) \ - fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ - if (laststart) \ - laststart = (laststart - old_buffer) + bufp->buffer; \ - if (pending_exact) \ - pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ + int incr = bufp->buffer - old_buffer; \ + MOVE_BUFFER_POINTER (b); \ + MOVE_BUFFER_POINTER (begalt); \ + if (fixup_alt_jump) \ + MOVE_BUFFER_POINTER (fixup_alt_jump); \ + if (laststart) \ + MOVE_BUFFER_POINTER (laststart); \ + if (pending_exact) \ + MOVE_BUFFER_POINTER (pending_exact); \ } \ + ELSE_EXTEND_BUFFER_HIGH_BOUND \ } while (0) @@ -2690,7 +2715,7 @@ regex_compile (pattern, size, syntax, bufp) PATFETCH (c); /* Now add the multibyte character(s) we found - to the acceptabed list. + to the accept list. XXX Note that this is not entirely correct. we would have to match multibyte sequences @@ -2703,7 +2728,10 @@ regex_compile (pattern, size, syntax, bufp) if (c1 == 1) range_start = extra[idx]; while (c1-- > 0) - SET_LIST_BIT (extra[idx++]); + { + SET_LIST_BIT (extra[idx]); + ++idx; + } } #endif had_char_class = false; @@ -3485,23 +3513,19 @@ compile_range (range_start_char, p_ptr, pend, translate, syntax, b) unsigned char *b; { unsigned this_char; - const char *p = *p_ptr; reg_errcode_t ret; - char range_start[2]; - char range_end[2]; - char ch[2]; +#if _LIBC + const unsigned char *collseq; + unsigned int start_colseq; + unsigned int end_colseq; +#else + unsigned end_char; +#endif if (p == pend) return REG_ERANGE; - /* Fetch the endpoints without translating them; the - appropriate translation is done in the bit-setting loop below. */ - range_start[0] = range_start_char; - range_start[1] = '\0'; - range_end[0] = p[0]; - range_end[1] = '\0'; - /* Have to increment the pointer into the pattern string, so the caller isn't still at the ending character. */ (*p_ptr)++; @@ -3509,19 +3533,34 @@ compile_range (range_start_char, p_ptr, pend, translate, syntax, b) /* Report an error if the range is empty and the syntax prohibits this. */ ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - /* Here we see why `this_char' has to be larger than an `unsigned - char' -- we would otherwise go into an infinite loop, since all - characters <= 0xff. */ - ch[1] = '\0'; +#if _LIBC + collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_COLLSEQMB); + + start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)]; + end_colseq = collseq[(unsigned char) TRANSLATE (p[0])]; for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) { - ch[0] = this_char; - if (strcoll (range_start, ch) <= 0 && strcoll (ch, range_end) <= 0) + unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)]; + + if (start_colseq <= this_colseq && this_colseq <= end_colseq) { SET_LIST_BIT (TRANSLATE (this_char)); ret = REG_NOERROR; } } +#else + /* Here we see why `this_char' has to be larger than an `unsigned + char' -- we would otherwise go into an infinite loop, since all + characters <= 0xff. */ + range_start_char = TRANSLATE (range_start_char); + end_char = TRANSLATE (p[0]); + for (this_char = range_start_char; this_char <= end_char; ++this_char) + { + SET_LIST_BIT (TRANSLATE (this_char)); + ret = REG_NOERROR; + } +#endif return ret; }