X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fregex.c;h=25a219e267fff2a837707320a3a1dbba46eaca43;hb=b3024d9c5ab39fcce8d7aa8069c4d5bcd139a70b;hp=8a20da1824f5f8dd12ca6c78e2f57cdbc2c2a4eb;hpb=777b414c015f9cc2341694d001a2b6d2a7155801;p=gnulib.git diff --git a/lib/regex.c b/lib/regex.c index 8a20da182..25a219e26 100644 --- a/lib/regex.c +++ b/lib/regex.c @@ -89,6 +89,10 @@ /* This is for other GNU distributions with internationalized messages. */ #if HAVE_LIBINTL_H || defined _LIBC # include +# ifdef _LIBC +# undef gettext +# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) +# endif #else # define gettext(msgid) (msgid) #endif @@ -197,35 +201,34 @@ char *realloc (); eliminate the && through constant folding." Solaris defines some of these symbols so we must undefine them first. */ -#undef ISASCII #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) -# define ISASCII(c) 1 +# define IN_CTYPE_DOMAIN(c) 1 #else -# define ISASCII(c) isascii(c) +# define IN_CTYPE_DOMAIN(c) isascii(c) #endif #ifdef isblank -# define ISBLANK(c) (ISASCII (c) && isblank (c)) +# define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c)) #else # define ISBLANK(c) ((c) == ' ' || (c) == '\t') #endif #ifdef isgraph -# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +# define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c)) #else -# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +# define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c)) #endif #undef ISPRINT -#define ISPRINT(c) (ISASCII (c) && isprint (c)) -#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) -#define ISALNUM(c) (ISASCII (c) && isalnum (c)) -#define ISALPHA(c) (ISASCII (c) && isalpha (c)) -#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) -#define ISLOWER(c) (ISASCII (c) && islower (c)) -#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) -#define ISSPACE(c) (ISASCII (c) && isspace (c)) -#define ISUPPER(c) (ISASCII (c) && isupper (c)) -#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) +#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) +#define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c)) +#define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c)) +#define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c)) +#define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c)) +#define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c)) +#define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c)) +#define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c)) +#define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c)) +#define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c)) #ifdef _tolower # define TOLOWER(c) _tolower(c) @@ -1234,7 +1237,7 @@ typedef struct # if defined MATCH_MAY_ALLOCATE /* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */ -int re_max_failures = 20000; +int re_max_failures = 4000; # else int re_max_failures = 2000; # endif @@ -2712,7 +2715,7 @@ regex_compile (pattern, size, syntax, bufp) PATFETCH (c); /* Now add the multibyte character(s) we found - to the acceptabed list. + to the accept list. XXX Note that this is not entirely correct. we would have to match multibyte sequences @@ -2725,7 +2728,10 @@ regex_compile (pattern, size, syntax, bufp) if (c1 == 1) range_start = extra[idx]; while (c1-- > 0) - SET_LIST_BIT (extra[idx++]); + { + SET_LIST_BIT (extra[idx]); + ++idx; + } } #endif had_char_class = false; @@ -3507,23 +3513,19 @@ compile_range (range_start_char, p_ptr, pend, translate, syntax, b) unsigned char *b; { unsigned this_char; - const char *p = *p_ptr; reg_errcode_t ret; - char range_start[2]; - char range_end[2]; - char ch[2]; +#if _LIBC + const unsigned char *collseq; + unsigned int start_colseq; + unsigned int end_colseq; +#else + unsigned end_char; +#endif if (p == pend) return REG_ERANGE; - /* Fetch the endpoints without translating them; the - appropriate translation is done in the bit-setting loop below. */ - range_start[0] = range_start_char; - range_start[1] = '\0'; - range_end[0] = p[0]; - range_end[1] = '\0'; - /* Have to increment the pointer into the pattern string, so the caller isn't still at the ending character. */ (*p_ptr)++; @@ -3531,19 +3533,34 @@ compile_range (range_start_char, p_ptr, pend, translate, syntax, b) /* Report an error if the range is empty and the syntax prohibits this. */ ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - /* Here we see why `this_char' has to be larger than an `unsigned - char' -- we would otherwise go into an infinite loop, since all - characters <= 0xff. */ - ch[1] = '\0'; +#if _LIBC + collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE, + _NL_COLLATE_COLLSEQMB); + + start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)]; + end_colseq = collseq[(unsigned char) TRANSLATE (p[0])]; for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) { - ch[0] = this_char; - if (strcoll (range_start, ch) <= 0 && strcoll (ch, range_end) <= 0) + unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)]; + + if (start_colseq <= this_colseq && this_colseq <= end_colseq) { SET_LIST_BIT (TRANSLATE (this_char)); ret = REG_NOERROR; } } +#else + /* Here we see why `this_char' has to be larger than an `unsigned + char' -- we would otherwise go into an infinite loop, since all + characters <= 0xff. */ + range_start_char = TRANSLATE (range_start_char); + end_char = TRANSLATE (p[0]); + for (this_char = range_start_char; this_char <= end_char; ++this_char) + { + SET_LIST_BIT (TRANSLATE (this_char)); + ret = REG_NOERROR; + } +#endif return ret; }