From 9dcc0e05407833a59b84cb6b32460242ddc6ff98 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Mon, 5 Feb 2007 02:42:27 +0000 Subject: [PATCH] New module 'mbscasestr'. Reduced goal of 'strcasestr'. --- ChangeLog | 22 ++++++++++ MODULES.html.sh | 1 + lib/mbscasestr.c | 6 +-- lib/strcasestr.c | 123 +++++++++++-------------------------------------------- lib/string_.h | 27 +++++++----- m4/strcasestr.m4 | 12 +++--- m4/string_h.m4 | 3 +- modules/string | 3 +- 8 files changed, 78 insertions(+), 119 deletions(-) diff --git a/ChangeLog b/ChangeLog index d66536bcb..0aa4e8125 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,27 @@ 2007-02-04 Bruno Haible + New module mbscasestr, reduced goal of strcasestr. + * modules/mbscasestr: New file. + * lib/mbscasestr.c: New file, copied from lib/strcasestr.c. + (mbscasestr): Renamed from strcasestr. + * lib/strcasestr.c: Don't include mbuiter.h. + (strcasestr): Remove support for multibyte locales. + * lib/string_.h (strcasestr): Don`t rename. Declare only if missing. + Change the conditional link warning. + (mbscasestr): New declaration. + * m4/mbscasestr.m4: New file. + * m4/strcasestr.m4 (gl_FUNC_STRCASESTR): Enable the replacement only if + the system does not have strcasestr. Set HAVE_STRCASESTR instead of + REPLACE_STRCASESTR. + * m4/string_h.m4 (gl_HEADER_STRING_H_DEFAULTS): Initialize + HAVE_STRCASESTR instead of REPLACE_STRCASESTR. + (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize GNULIB_MBSCASESTR. + * modules/string (string.h): Also substitute GNULIB_MBSCASESTR. + Substitute HAVE_STRCASESTR instead of REPLACE_STRCASESTR. + * MODULES.html.sh (Internationalization functions): Add mbscasestr. + +2007-02-04 Bruno Haible + Simplify handling of strncasecmp. * lib/string_.h (strncasecmp): Remove test for GNULIB_STRCASE. Change the conditional link warning. diff --git a/MODULES.html.sh b/MODULES.html.sh index 1de85543d..715bc9dc0 100755 --- a/MODULES.html.sh +++ b/MODULES.html.sh @@ -2164,6 +2164,7 @@ func_all_modules () func_module mbsrchr func_module mbsstr func_module mbscasecmp + func_module mbscasestr func_module mbswidth func_module memcasecmp func_module memcoll diff --git a/lib/mbscasestr.c b/lib/mbscasestr.c index 0a25f86f6..cf23cbaa4 100644 --- a/lib/mbscasestr.c +++ b/lib/mbscasestr.c @@ -30,12 +30,12 @@ #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) -/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive - comparison. +/* Find the first occurrence of the character string NEEDLE in the character + string HAYSTACK, using case-insensitive comparison. Note: This function may, in multibyte locales, return success even if strlen (haystack) < strlen (needle) ! */ char * -strcasestr (const char *haystack, const char *needle) +mbscasestr (const char *haystack, const char *needle) { /* Be careful not to look at the entire extent of haystack or needle until needed. This is useful because of these two cases: diff --git a/lib/strcasestr.c b/lib/strcasestr.c index 0a25f86f6..e1f5952c7 100644 --- a/lib/strcasestr.c +++ b/lib/strcasestr.c @@ -24,10 +24,6 @@ #include #include /* for NULL, in case a nonstandard string.h lacks it */ -#if HAVE_MBRTOWC -# include "mbuiter.h" -#endif - #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive @@ -37,109 +33,40 @@ char * strcasestr (const char *haystack, const char *needle) { - /* Be careful not to look at the entire extent of haystack or needle - until needed. This is useful because of these two cases: - - haystack may be very long, and a match of needle found early, - - needle may be very long, and not even a short initial segment of - needle may be found in haystack. */ -#if HAVE_MBRTOWC - if (MB_CUR_MAX > 1) + if (*needle != '\0') { - mbui_iterator_t iter_needle; + /* Speed up the following searches of needle by caching its first + character. */ + unsigned char b = TOLOWER ((unsigned char) *needle); - mbui_init (iter_needle, needle); - if (mbui_avail (iter_needle)) + needle++; + for (;; haystack++) { - mbchar_t b; - mbui_iterator_t iter_haystack; - - mb_copy (&b, &mbui_cur (iter_needle)); - if (b.wc_valid) - b.wc = towlower (b.wc); - - mbui_init (iter_haystack, haystack); - for (;; mbui_advance (iter_haystack)) + if (*haystack == '\0') + /* No match. */ + return NULL; + if (TOLOWER ((unsigned char) *haystack) == b) + /* The first character matches. */ { - mbchar_t c; + const char *rhaystack = haystack + 1; + const char *rneedle = needle; - if (!mbui_avail (iter_haystack)) - /* No match. */ - return NULL; - - mb_copy (&c, &mbui_cur (iter_haystack)); - if (c.wc_valid) - c.wc = towlower (c.wc); - if (mb_equal (c, b)) - /* The first character matches. */ + for (;; rhaystack++, rneedle++) { - mbui_iterator_t rhaystack; - mbui_iterator_t rneedle; - - memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t)); - mbui_advance (rhaystack); - - mbui_init (rneedle, needle); - if (!mbui_avail (rneedle)) - abort (); - mbui_advance (rneedle); - - for (;; mbui_advance (rhaystack), mbui_advance (rneedle)) - { - if (!mbui_avail (rneedle)) - /* Found a match. */ - return (char *) mbui_cur_ptr (iter_haystack); - if (!mbui_avail (rhaystack)) - /* No match. */ - return NULL; - if (!mb_caseequal (mbui_cur (rhaystack), - mbui_cur (rneedle))) - /* Nothing in this round. */ - break; - } + if (*rneedle == '\0') + /* Found a match. */ + return (char *) haystack; + if (*rhaystack == '\0') + /* No match. */ + return NULL; + if (TOLOWER ((unsigned char) *rhaystack) + != TOLOWER ((unsigned char) *rneedle)) + /* Nothing in this round. */ + break; } } } - else - return (char *) haystack; } else -#endif - { - if (*needle != '\0') - { - /* Speed up the following searches of needle by caching its first - character. */ - unsigned char b = TOLOWER ((unsigned char) *needle); - - needle++; - for (;; haystack++) - { - if (*haystack == '\0') - /* No match. */ - return NULL; - if (TOLOWER ((unsigned char) *haystack) == b) - /* The first character matches. */ - { - const char *rhaystack = haystack + 1; - const char *rneedle = needle; - - for (;; rhaystack++, rneedle++) - { - if (*rneedle == '\0') - /* Found a match. */ - return (char *) haystack; - if (*rhaystack == '\0') - /* No match. */ - return NULL; - if (TOLOWER ((unsigned char) *rhaystack) - != TOLOWER ((unsigned char) *rneedle)) - /* Nothing in this round. */ - break; - } - } - } - } - else - return (char *) haystack; - } + return (char *) haystack; } diff --git a/lib/string_.h b/lib/string_.h index e1c0373e9..118a2d4a2 100644 --- a/lib/string_.h +++ b/lib/string_.h @@ -257,19 +257,17 @@ extern char *strsep (char **restrict __stringp, char const *restrict __delim); #endif /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive - comparison. - Note: This function may, in multibyte locales, return success even if - strlen (haystack) < strlen (needle) ! */ -#if @GNULIB_STRCASESTR@ -# if @REPLACE_STRCASESTR@ -# undef strcasestr -# define strcasestr rpl_strcasestr + comparison. */ +#if ! @HAVE_STRCASESTR@ extern char *strcasestr (const char *haystack, const char *needle); -# endif -#elif defined GNULIB_POSIXCHECK +#endif +#if defined GNULIB_POSIXCHECK +/* strcasestr() does not work with multibyte strings: + It is a glibc extension, and glibc implements it only for unibyte + locales. */ # undef strcasestr # define strcasestr(a,b) \ - (GL_LINK_WARNING ("strcasestr is often incorrectly implemented for multibyte locales - use gnulib module 'strcasestr' for correct and portable internationalization"), \ + (GL_LINK_WARNING ("strcasestr does work correctly on character strings in multibyte locales - use mbscasestr if you care about internationalization, or use c-strcasestr if you want a locale independent function"), \ strcasestr (a, b)) #endif @@ -345,6 +343,15 @@ extern char * mbsstr (const char *haystack, const char *needle); extern int mbscasecmp (const char *s1, const char *s2); #endif +#if @GNULIB_MBSCASESTR@ +/* Find the first occurrence of the character string NEEDLE in the character + string HAYSTACK, using case-insensitive comparison. + Note: This function may, in multibyte locales, return success even if + strlen (haystack) < strlen (needle) ! + Unlike strcasestr(), this function works correctly in multibyte locales. */ +extern char * mbscasestr (const char *haystack, const char *needle); +#endif + #ifdef __cplusplus } diff --git a/m4/strcasestr.m4 b/m4/strcasestr.m4 index 61c348a36..98992166e 100644 --- a/m4/strcasestr.m4 +++ b/m4/strcasestr.m4 @@ -1,4 +1,4 @@ -# strcasestr.m4 serial 4 +# strcasestr.m4 serial 5 dnl Copyright (C) 2005, 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -7,11 +7,11 @@ dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_STRCASESTR], [ AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS]) - dnl No known system has a strcasestr() function that works correctly in - dnl multibyte locales. Therefore we use our version always. - AC_LIBOBJ(strcasestr) - REPLACE_STRCASESTR=1 - gl_PREREQ_STRCASESTR + AC_REPLACE_FUNCS(strcasestr) + if test $ac_cv_func_strcasestr = no; then + HAVE_STRCASESTR=0 + gl_PREREQ_STRCASESTR + fi ]) # Prerequisites of lib/strcasestr.c. diff --git a/m4/string_h.m4 b/m4/string_h.m4 index 31f702b55..f89046ac4 100644 --- a/m4/string_h.m4 +++ b/m4/string_h.m4 @@ -41,8 +41,8 @@ AC_DEFUN([gl_HEADER_STRING_H_DEFAULTS], HAVE_DECL_STRNLEN=1; AC_SUBST([HAVE_DECL_STRNLEN]) HAVE_STRPBRK=1; AC_SUBST([HAVE_STRPBRK]) HAVE_STRSEP=1; AC_SUBST([HAVE_STRSEP]) + HAVE_STRCASESTR=1; AC_SUBST([HAVE_STRCASESTR]) HAVE_DECL_STRTOK_R=1; AC_SUBST([HAVE_DECL_STRTOK_R]) - REPLACE_STRCASESTR=0; AC_SUBST([REPLACE_STRCASESTR]) ]) AC_DEFUN([gl_STRING_MODULE_INDICATOR], @@ -71,4 +71,5 @@ AC_DEFUN([gl_STRING_MODULE_INDICATOR_DEFAULTS], GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR]) GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR]) GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP]) + GNULIB_MBSCASESTR=0; AC_SUBST([GNULIB_MBSCASESTR]) ]) diff --git a/modules/string b/modules/string index a2eab98f3..573c2c177 100644 --- a/modules/string +++ b/modules/string @@ -25,6 +25,7 @@ string.h: string_.h -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \ -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \ -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \ + -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \ @@ -52,8 +53,8 @@ string.h: string_.h -e 's|@''HAVE_DECL_STRNLEN''@|$(HAVE_DECL_STRNLEN)|g' \ -e 's|@''HAVE_STRPBRK''@|$(HAVE_STRPBRK)|g' \ -e 's|@''HAVE_STRSEP''@|$(HAVE_STRSEP)|g' \ + -e 's|@''HAVE_STRCASESTR''@|$(HAVE_STRCASESTR)|g' \ -e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \ - -e 's|@''REPLACE_STRCASESTR''@|$(REPLACE_STRCASESTR)|g' \ < $(srcdir)/string_.h; \ } > $@-t mv $@-t $@ -- 2.11.0