From 103b7cd6e6cc45a5aa570ad8c4d0aa6b0188b602 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Wed, 17 Aug 2005 14:05:33 +0000 Subject: [PATCH] Make strstr() work in multibyte locales. --- ChangeLog | 5 ++ lib/ChangeLog | 5 ++ lib/strstr.c | 205 +++++++++++++++++++++++++++++---------------------------- lib/strstr.h | 11 +--- m4/ChangeLog | 5 ++ m4/strstr.m4 | 17 +++-- modules/strstr | 2 + 7 files changed, 134 insertions(+), 116 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5d561a2db..bf3805c75 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2005-08-17 Bruno Haible + * modules/strstr (Files): Add m4/mbrtowc.m4. + (Depends-on): Add mbuiter. + +2005-08-17 Bruno Haible + * modules/strcase (Depends-on): Add mbuiter. Remove strnlen1, mbchar. 2005-08-17 Bruno Haible diff --git a/lib/ChangeLog b/lib/ChangeLog index dbd242127..c29c07cd8 100644 --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,5 +1,10 @@ 2005-08-17 Bruno Haible + * strstr.h: Ignore HAVE_STRSTR, always declare the gnulib function. + * strstr.c: Completely rewritten, with multibyte locale support. + +2005-08-17 Bruno Haible + * strcasecmp.c: Use mbuiter.h. 2005-08-17 Bruno Haible diff --git a/lib/strstr.c b/lib/strstr.c index 8f82b4589..5bd7cb465 100644 --- a/lib/strstr.c +++ b/lib/strstr.c @@ -1,119 +1,126 @@ -/* Copyright (C) 1994, 1999, 2002-2003 Free Software Foundation, Inc. -This file is part of the GNU C Library. +/* Searching in a string. + Copyright (C) 2005 Free Software Foundation, Inc. + Written by Bruno Haible , 2005. -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -/* - * My personal strstr() implementation that beats most other algorithms. - * Until someone tells me otherwise, I assume that this is the - * fastest implementation of strstr() in C. - * I deliberately chose not to comment it. You should have at least - * as much fun trying to understand it, as I had to write it :-). - * - * Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de */ + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #if HAVE_CONFIG_H # include #endif -#include - -typedef unsigned chartype; +/* Specification. */ +#include "strstr.h" -#undef strstr +#if HAVE_MBRTOWC +# include "mbuiter.h" +#endif +/* Find the first occurrence of NEEDLE in HAYSTACK. */ char * -strstr (const char *phaystack, const char *pneedle) +strstr (const char *haystack, const char *needle) { - register const unsigned char *haystack, *needle; - register chartype b, c; + /* Be careful not to look at the entire extent of haystack or needle + until needed. This is useful because of these two cases: + - haystack may be very long, and a match of needle found early, + - needle may be very long, and not even a short initial segment of + needle may be found in haystack. */ +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) + { + mbui_iterator_t iter_needle; - haystack = (const unsigned char *) phaystack; - needle = (const unsigned char *) pneedle; + mbui_init (iter_needle, needle); + if (mbui_avail (iter_needle)) + { + mbui_iterator_t iter_haystack; - b = *needle; - if (b != '\0') + mbui_init (iter_haystack, haystack); + for (;; mbui_advance (iter_haystack)) + { + if (!mbui_avail (iter_haystack)) + /* No match. */ + return NULL; + + if (mb_equal (mbui_cur (iter_haystack), mbui_cur (iter_needle))) + /* The first character matches. */ + { + mbui_iterator_t rhaystack; + mbui_iterator_t rneedle; + + memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t)); + mbui_advance (rhaystack); + + mbui_init (rneedle, needle); + if (!mbui_avail (rneedle)) + abort (); + mbui_advance (rneedle); + + for (;; mbui_advance (rhaystack), mbui_advance (rneedle)) + { + if (!mbui_avail (rneedle)) + /* Found a match. */ + return (char *) haystack; + if (!mbui_avail (rhaystack)) + /* No match. */ + return NULL; + if (!mb_equal (mbui_cur (rhaystack), mbui_cur (rneedle))) + /* Nothing in this round. */ + break; + } + } + } + } + else + return (char *) haystack; + } + else +#endif { - haystack--; /* possible ANSI violation */ - do + if (*needle != '\0') { - c = *++haystack; - if (c == '\0') - goto ret0; - } - while (c != b); + /* Speed up the following searches of needle by caching its first + character. */ + char b = *needle++; - c = *++needle; - if (c == '\0') - goto foundneedle; - ++needle; - goto jin; - - for (;;) - { - register chartype a; - register const unsigned char *rhaystack, *rneedle; - - do + for (;; haystack++) { - a = *++haystack; - if (a == '\0') - goto ret0; - if (a == b) - break; - a = *++haystack; - if (a == '\0') - goto ret0; -shloop:; } - while (a != b); - -jin: a = *++haystack; - if (a == '\0') - goto ret0; - - if (a != c) - goto shloop; - - rhaystack = haystack-- + 1; - rneedle = needle; - a = *rneedle; - - if (*rhaystack == a) - do - { - if (a == '\0') - goto foundneedle; - ++rhaystack; - a = *++needle; - if (*rhaystack != a) - break; - if (a == '\0') - goto foundneedle; - ++rhaystack; - a = *++needle; - } - while (*rhaystack == a); - - needle = rneedle; /* took the register-poor approach */ - - if (a == '\0') - break; - } + if (*haystack == '\0') + /* No match. */ + return NULL; + if (*haystack == b) + /* The first character matches. */ + { + const char *rhaystack = haystack + 1; + const char *rneedle = needle; + + for (;; rhaystack++, rneedle++) + { + if (*rneedle == '\0') + /* Found a match. */ + return (char *) haystack; + if (*rhaystack == '\0') + /* No match. */ + return NULL; + if (*rhaystack != *rneedle) + /* Nothing in this round. */ + break; + } + } + } + } + else + return (char *) haystack; } -foundneedle: - return (char*) haystack; -ret0: - return 0; } diff --git a/lib/strstr.h b/lib/strstr.h index 76b90b07c..ca9a82a96 100644 --- a/lib/strstr.h +++ b/lib/strstr.h @@ -1,5 +1,5 @@ /* Searching in a string. - Copyright (C) 2001-2003 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2005 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,13 +15,6 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#if HAVE_STRSTR - -/* Get strstr() declaration. */ -#include - -#else - #ifdef __cplusplus extern "C" { #endif @@ -32,5 +25,3 @@ extern char *strstr (const char *haystack, const char *needle); #ifdef __cplusplus } #endif - -#endif diff --git a/m4/ChangeLog b/m4/ChangeLog index 78a74691f..878eeb1e5 100644 --- a/m4/ChangeLog +++ b/m4/ChangeLog @@ -1,3 +1,8 @@ +2005-08-17 Bruno Haible + + * strstr.m4 (gl_FUNC_STRSTR): Use the replacement function always. + (gl_PREREQ_STRSTR): Use gl_FUNC_MBRTOWC. + 2005-08-16 Paul Eggert * getopt.m4 (gl_GETOPT_CHECK_HEADERS): Do not override the results diff --git a/m4/strstr.m4 b/m4/strstr.m4 index 0fe0f73cd..2856ce75c 100644 --- a/m4/strstr.m4 +++ b/m4/strstr.m4 @@ -1,16 +1,19 @@ -# strstr.m4 serial 2 -dnl Copyright (C) 2002-2003 Free Software Foundation, Inc. +# strstr.m4 serial 3 +dnl Copyright (C) 2002-2003, 2005 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([gl_FUNC_STRSTR], [ - AC_REPLACE_FUNCS(strstr) - if test $ac_cv_func_strstr = no; then - gl_PREREQ_STRSTR - fi + dnl No known system has a strstr() function that works correctly in + dnl multibyte locales. Therefore we use our version always. + AC_LIBOBJ(strstr) + AC_DEFINE(strstr, rpl_strstr, [Define to rpl_strstr always.]) + gl_PREREQ_STRSTR ]) # Prerequisites of lib/strstr.c. -AC_DEFUN([gl_PREREQ_STRSTR], [:]) +AC_DEFUN([gl_PREREQ_STRSTR], [ + gl_FUNC_MBRTOWC +]) diff --git a/modules/strstr b/modules/strstr index c6891d215..08aa468f4 100644 --- a/modules/strstr +++ b/modules/strstr @@ -5,8 +5,10 @@ Files: lib/strstr.h lib/strstr.c m4/strstr.m4 +m4/mbrtowc.m4 Depends-on: +mbuiter configure.ac: gl_FUNC_STRSTR -- 2.11.0