From 2a2eb440165889566a738cd355381b4b28870206 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Mon, 5 Feb 2007 03:23:34 +0000 Subject: [PATCH] New module 'mbsspn'. --- ChangeLog | 13 ++++++++ MODULES.html.sh | 1 + lib/mbsspn.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/string_.h | 18 +++++++++++ m4/mbsspn.m4 | 16 ++++++++++ m4/string_h.m4 | 1 + modules/mbsspn | 29 +++++++++++++++++ modules/string | 1 + 8 files changed, 176 insertions(+) create mode 100644 lib/mbsspn.c create mode 100644 m4/mbsspn.m4 create mode 100644 modules/mbsspn diff --git a/ChangeLog b/ChangeLog index f9e086973..d5a390e58 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,18 @@ 2007-02-04 Bruno Haible + New module mbsspn. + * modules/mbsspn: New file. + * lib/mbsspn.c: New file. + * lib/string_.h (strspn): Add a conditional link warning. + (mbsspn): New declaration. + * m4/mbsspn.m4: New file. + * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize + GNULIB_MBSCSPN. + * modules/string (string.h): Also substitute GNULIB_MBSSPN. + * MODULES.html.sh (Internationalization functions): Add mbsspn. + +2007-02-04 Bruno Haible + New module mbspbrk. * modules/mbspbrk: New file. * lib/mbspbrk.c: New file. diff --git a/MODULES.html.sh b/MODULES.html.sh index ca14d6058..aa40216c3 100755 --- a/MODULES.html.sh +++ b/MODULES.html.sh @@ -2167,6 +2167,7 @@ func_all_modules () func_module mbscasestr func_module mbscspn func_module mbspbrk + func_module mbsspn func_module mbswidth func_module memcasecmp func_module memcoll diff --git a/lib/mbsspn.c b/lib/mbsspn.c new file mode 100644 index 000000000..f25c9096c --- /dev/null +++ b/lib/mbsspn.c @@ -0,0 +1,97 @@ +/* Searching a string for a character outside a given set of characters. + Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc. + Written by Bruno Haible , 2007. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include + +/* Specification. */ +#include + +#if HAVE_MBRTOWC +# include "mbuiter.h" +#endif + +/* Find the first occurrence in the character string STRING of any character + not in the character string REJECT. Return the number of bytes from the + beginning of the string to this occurrence, or to the end of the string + if none exists. */ +size_t +mbsspn (const char *string, const char *reject) +{ + /* Optimize two cases. */ + if (reject[0] == '\0') + return 0; + if (reject[1] == '\0') + { + unsigned char uc = (unsigned char) reject[0]; + +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) + { + mbui_iterator_t iter; + + for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter)) + if (!(mb_len (mbui_cur (iter)) == 1 + && (unsigned char) * mbui_cur_ptr (iter) == uc)) + return mbui_cur_ptr (iter) - string; + return strlen (string); + } + else +#endif + { + const char *ptr; + + for (ptr = string; *ptr != '\0'; ptr++) + if ((unsigned char) *ptr != uc) + break; + return ptr - string; + } + } + /* General case. */ +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) + { + mbui_iterator_t iter; + + for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter)) + { + if (mb_len (mbui_cur (iter)) == 1) + { + if (mbschr (reject, (unsigned char) * mbui_cur_ptr (iter)) == NULL) + return mbui_cur_ptr (iter) - string; + } + else + { + mbui_iterator_t aiter; + + for (mbui_init (aiter, reject); + mbui_avail (aiter); + mbui_advance (aiter)) + { + if (!mbui_avail (aiter)) + return mbui_cur_ptr (iter) - string; + if (mb_equal (mbui_cur (aiter), mbui_cur (iter))) + break; + } + } + } + return strlen (string); + } + else +#endif + return strspn (string, reject); +} diff --git a/lib/string_.h b/lib/string_.h index f25a99d43..78b27647f 100644 --- a/lib/string_.h +++ b/lib/string_.h @@ -233,6 +233,15 @@ extern char *strpbrk (char const *__s, char const *__accept); #endif #if defined GNULIB_POSIXCHECK +/* strspn() assumes the second argument is a list of single-byte characters. + Even in this simple case, it cannot work with multibyte strings. */ +# undef strspn +# define strspn(s,a) \ + (GL_LINK_WARNING ("strspn cannot work correctly on character strings in multibyte locales - use mbsspn if you care about internationalization"), \ + strspn (s, a)) +#endif + +#if defined GNULIB_POSIXCHECK /* strrchr() does not work with multibyte strings if the locale encoding is GB18030 and the character to be searched is a digit. */ # undef strrchr @@ -391,6 +400,15 @@ extern size_t mbscspn (const char *string, const char *accept); extern char * mbspbrk (const char *string, const char *accept); #endif +#if @GNULIB_MBSSPN@ +/* Find the first occurrence in the character string STRING of any character + not in the character string REJECT. Return the number of bytes from the + beginning of the string to this occurrence, or to the end of the string + if none exists. + Unlike strspn(), this function works correctly in multibyte locales. */ +extern size_t mbsspn (const char *string, const char *reject); +#endif + #ifdef __cplusplus } diff --git a/m4/mbsspn.m4 b/m4/mbsspn.m4 new file mode 100644 index 000000000..2359c101b --- /dev/null +++ b/m4/mbsspn.m4 @@ -0,0 +1,16 @@ +# mbsspn.m4 serial 1 +dnl Copyright (C) 2007 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_DEFUN([gl_FUNC_MBSSPN], +[ + gl_PREREQ_MBSSPN +]) + +# Prerequisites of lib/mbsspn.c. +AC_DEFUN([gl_PREREQ_MBSSPN], [ + AC_REQUIRE([gl_FUNC_MBRTOWC]) + : +]) diff --git a/m4/string_h.m4 b/m4/string_h.m4 index bdcf23552..bcb99e706 100644 --- a/m4/string_h.m4 +++ b/m4/string_h.m4 @@ -74,4 +74,5 @@ AC_DEFUN([gl_STRING_MODULE_INDICATOR_DEFAULTS], GNULIB_MBSCASESTR=0; AC_SUBST([GNULIB_MBSCASESTR]) GNULIB_MBSCSPN=0; AC_SUBST([GNULIB_MBSCSPN]) GNULIB_MBSPBRK=0; AC_SUBST([GNULIB_MBSPBRK]) + GNULIB_MBSSPN=0; AC_SUBST([GNULIB_MBSSPN]) ]) diff --git a/modules/mbsspn b/modules/mbsspn new file mode 100644 index 000000000..7d6ee606b --- /dev/null +++ b/modules/mbsspn @@ -0,0 +1,29 @@ +Description: +mbsspn() function: search a string for any outside a set of characters. + +Files: +lib/mbsspn.c +m4/mbsspn.m4 +m4/mbrtowc.m4 + +Depends-on: +mbuiter +string +mbschr + +configure.ac: +gl_FUNC_MBSSPN +gl_STRING_MODULE_INDICATOR([mbsspn]) + +Makefile.am: +lib_SOURCES += mbsspn.c + +Include: + + +License: +LGPL + +Maintainer: +Bruno Haible + diff --git a/modules/string b/modules/string index fe9e61a31..b4acff429 100644 --- a/modules/string +++ b/modules/string @@ -28,6 +28,7 @@ string.h: string_.h -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \ -e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \ -e 's|@''GNULIB_MBSPBRK''@|$(GNULIB_MBSPBRK)|g' \ + -e 's|@''GNULIB_MBSSPN''@|$(GNULIB_MBSSPN)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \ -- 2.11.0