1 /* Case-insensitive string comparison function.
2 Copyright (C) 1998, 1999, 2005 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2005,
4 based on earlier glibc code.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
33 /* Like mbiter.h, except it doesn't look at the entire string. */
45 bool at_end; /* true if the end of the string has been reached */
46 bool in_shift; /* true if next byte may not be interpreted as ASCII */
47 mbstate_t state; /* if in_shift: current shift state */
48 bool next_done; /* true if mbi_avail has already filled the following */
49 struct mbchar cur; /* the current character:
50 const char *cur.ptr pointer to current character
51 The following are only valid after mbi_avail.
52 size_t cur.bytes number of bytes of current character
53 bool cur.wc_valid true if wc is a valid wide character
54 wchar_t cur.wc if wc_valid: the current character
59 mbiter_multi_next (struct mbiter_multi *iter)
65 /* Handle most ASCII characters quickly, without calling mbrtowc(). */
66 if (is_basic (*iter->cur.ptr))
68 /* These characters are part of the basic character set. ISO C 99
69 guarantees that their wide character code is identical to their
72 iter->cur.wc = *iter->cur.ptr;
73 iter->cur.wc_valid = true;
77 assert (mbsinit (&iter->state));
78 iter->in_shift = true;
80 iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr,
81 strnlen1 (iter->cur.ptr, MB_CUR_MAX),
83 if (iter->cur.bytes == (size_t) -1)
85 /* An invalid multibyte sequence was encountered. */
87 iter->cur.wc_valid = false;
88 /* Whether to set iter->in_shift = false and reset iter->state
89 or not is not very important; the string is bogus anyway. */
91 else if (iter->cur.bytes == (size_t) -2)
93 /* An incomplete multibyte character at the end. */
94 iter->cur.bytes = strlen (iter->cur.ptr) + 1;
95 iter->cur.wc_valid = false;
96 /* Whether to set iter->in_shift = false and reset iter->state
97 or not is not important; the string end is reached anyway. */
101 if (iter->cur.bytes == 0)
103 /* A null wide character was encountered. */
105 assert (*iter->cur.ptr == '\0');
106 assert (iter->cur.wc == 0);
108 iter->cur.wc_valid = true;
110 /* When in the initial state, we can go back treating ASCII
111 characters more quickly. */
112 if (mbsinit (&iter->state))
113 iter->in_shift = false;
116 iter->next_done = true;
119 /* Iteration macros. */
120 typedef struct mbiter_multi mbi_iterator_t;
121 #define mbi_init(iter, startptr) \
122 ((iter).cur.ptr = (startptr), (iter).at_end = false, \
123 (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \
124 (iter).next_done = false)
125 #define mbi_avail(iter) \
126 (!(iter).at_end && (mbiter_multi_next (&(iter)), true))
127 #define mbi_advance(iter) \
128 ((mb_isnul ((iter).cur) ? ((iter).at_end = true) : 0), \
129 (iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false)
131 /* Access to the current character. */
132 #define mbi_cur(iter) (iter).cur
133 #define mbi_cur_ptr(iter) (iter).cur.ptr
137 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
139 /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
140 greater than zero if S1 is lexicographically less than, equal to or greater
142 Note: This function may, in multibyte locales, return 0 for strings of
143 different lengths! */
145 strcasecmp (const char *s1, const char *s2)
150 /* Be careful not to look at the entire extent of s1 or s2 until needed.
151 This is useful because when two strings differ, the difference is
152 most often already in the very few first characters. */
156 mbi_iterator_t iter1;
157 mbi_iterator_t iter2;
159 mbi_init (iter1, s1);
160 mbi_init (iter2, s2);
162 while (mbi_avail (iter1) && mbi_avail (iter2))
164 int cmp = mb_casecmp (mbi_cur (iter1), mbi_cur (iter2));
172 if (mbi_avail (iter1))
173 /* s2 terminated before s1. */
175 if (mbi_avail (iter2))
176 /* s1 terminated before s2. */
183 const unsigned char *p1 = (const unsigned char *) s1;
184 const unsigned char *p2 = (const unsigned char *) s2;
185 unsigned char c1, c2;