X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fstrcasecmp.c;h=ab196718f34f33a0cbd7d5d7e5f41fb204a47595;hb=d913c90f6a01f889db65cbb516683aab2f28c533;hp=70c22a1889738ddee25a08d6a31a06c66fa9924e;hpb=5516de90cec9ff3c9aead77fd8b6e54b4ec3b1c3;p=gnulib.git diff --git a/lib/strcasecmp.c b/lib/strcasecmp.c index 70c22a188..ab196718f 100644 --- a/lib/strcasecmp.c +++ b/lib/strcasecmp.c @@ -1,7 +1,5 @@ /* Case-insensitive string comparison function. - Copyright (C) 1998, 1999, 2005 Free Software Foundation, Inc. - Written by Bruno Haible , 2005, - based on earlier glibc code. + Copyright (C) 1998-1999, 2005-2007, 2009-2010 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,225 +15,49 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#ifdef HAVE_CONFIG_H -# include -#endif +#include /* Specification. */ -#include "strcase.h" +#include #include - -#if HAVE_MBRTOWC - -#include "strnlen1.h" - -/* Like mbiter.h, except it doesn't look at the entire string. */ - -#include "mbchar.h" - -#include -#include -#include -#include -#include - -struct mbiter_multi -{ - bool at_end; /* true if the end of the string has been reached */ - bool in_shift; /* true if next byte may not be interpreted as ASCII */ - mbstate_t state; /* if in_shift: current shift state */ - bool next_done; /* true if mbi_avail has already filled the following */ - struct mbchar cur; /* the current character: - const char *cur.ptr pointer to current character - The following are only valid after mbi_avail. - size_t cur.bytes number of bytes of current character - bool cur.wc_valid true if wc is a valid wide character - wchar_t cur.wc if wc_valid: the current character - */ -}; - -static inline void -mbiter_multi_next (struct mbiter_multi *iter) -{ - if (iter->next_done) - return; - if (iter->in_shift) - goto with_shift; - /* Handle most ASCII characters quickly, without calling mbrtowc(). */ - if (is_basic (*iter->cur.ptr)) - { - /* These characters are part of the basic character set. ISO C 99 - guarantees that their wide character code is identical to their - char code. */ - iter->cur.bytes = 1; - iter->cur.wc = *iter->cur.ptr; - iter->cur.wc_valid = true; - } - else - { - assert (mbsinit (&iter->state)); - iter->in_shift = true; - with_shift: - iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr, - strnlen1 (iter->cur.ptr, MB_CUR_MAX), - &iter->state); - if (iter->cur.bytes == (size_t) -1) - { - /* An invalid multibyte sequence was encountered. */ - iter->cur.bytes = 1; - iter->cur.wc_valid = false; - /* Whether to set iter->in_shift = false and reset iter->state - or not is not very important; the string is bogus anyway. */ - } - else if (iter->cur.bytes == (size_t) -2) - { - /* An incomplete multibyte character at the end. */ - iter->cur.bytes = strlen (iter->cur.ptr) + 1; - iter->cur.wc_valid = false; - /* Whether to set iter->in_shift = false and reset iter->state - or not is not important; the string end is reached anyway. */ - } - else - { - if (iter->cur.bytes == 0) - { - /* A null wide character was encountered. */ - iter->cur.bytes = 1; - assert (*iter->cur.ptr == '\0'); - assert (iter->cur.wc == 0); - } - iter->cur.wc_valid = true; - - /* When in the initial state, we can go back treating ASCII - characters more quickly. */ - if (mbsinit (&iter->state)) - iter->in_shift = false; - } - } - iter->next_done = true; -} - -static inline void -mbiter_multi_reloc (struct mbiter_multi *iter, ptrdiff_t ptrdiff) -{ - iter->cur.ptr += ptrdiff; -} - -/* Iteration macros. */ -typedef struct mbiter_multi mbi_iterator_t; -#define mbi_init(iter, startptr) \ - ((iter).cur.ptr = (startptr), (iter).at_end = false, \ - (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \ - (iter).next_done = false) -#define mbi_avail(iter) \ - (!(iter).at_end && (mbiter_multi_next (&(iter)), true)) -#define mbi_advance(iter) \ - ((mb_isnul ((iter).cur) ? ((iter).at_end = true) : 0), \ - (iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false) - -/* Access to the current character. */ -#define mbi_cur(iter) (iter).cur -#define mbi_cur_ptr(iter) (iter).cur.ptr - -#endif +#include #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) /* Compare strings S1 and S2, ignoring case, returning less than, equal to or greater than zero if S1 is lexicographically less than, equal to or greater than S2. - Note: This function may, in multibyte locales, return 0 for strings of - different lengths! */ + Note: This function does not work with multibyte strings! */ + int strcasecmp (const char *s1, const char *s2) { - if (s1 == s2) + const unsigned char *p1 = (const unsigned char *) s1; + const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2) return 0; - /* Be careful not to look at the entire extent of s1 or s2 until needed. - This is useful because when two strings differ, the difference is - most often already in the very few first characters. */ -#if HAVE_MBRTOWC - if (MB_CUR_MAX > 1) + do { - mbi_iterator_t iter1; - mbi_iterator_t iter2; - - mbi_init (iter1, s1); - mbi_init (iter2, s2); + c1 = TOLOWER (*p1); + c2 = TOLOWER (*p2); - while (mbi_avail (iter1) && mbi_avail (iter2)) - { - /* Sort invalid characters after all valid ones. */ - if (!mbi_cur (iter1).wc_valid) - { - if (!mbi_cur (iter2).wc_valid) - { - /* Compare two invalid characters. */ - int cmp; + if (c1 == '\0') + break; - if (mbi_cur (iter1).bytes > mbi_cur (iter2).bytes) - return 1; - if (mbi_cur (iter1).bytes < mbi_cur (iter2).bytes) - return -1; - cmp = memcmp (mbi_cur_ptr (iter1), mbi_cur_ptr (iter2), - mbi_cur (iter1).bytes); - if (cmp != 0) - return cmp; - } - else - /* mbi_cur (iter1) invalid, mbi_cur (iter2) valid. */ - return 1; - } - else - { - if (!mbi_cur (iter2).wc_valid) - /* mbi_cur (iter1) valid, mbi_cur (iter2) invalid. */ - return -1; - else - { - /* Compare two valid characters. */ - wchar_t c1 = towlower (mbi_cur (iter1).wc); - wchar_t c2 = towlower (mbi_cur (iter2).wc); - - if (c1 > c2) - return 1; - if (c1 < c2) - return -1; - } - } - mbi_advance (iter1); - mbi_advance (iter2); - } - if (mbi_avail (iter1)) - /* s2 terminated before s1. */ - return 1; - if (mbi_avail (iter2)) - /* s1 terminated before s2. */ - return -1; - return 0; + ++p1; + ++p2; } - else -#endif - { - const unsigned char *p1 = (const unsigned char *) s1; - const unsigned char *p2 = (const unsigned char *) s2; - unsigned char c1, c2; - - do - { - c1 = TOLOWER (*p1); - c2 = TOLOWER (*p2); + while (c1 == c2); - if (c1 == '\0') - break; - - ++p1; - ++p2; - } - while (c1 == c2); - - return c1 - c2; - } + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); }