1 /* c-strcasestr.c -- case insensitive substring search in C locale
2 Copyright (C) 2005-2007 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2005.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "c-strcasestr.h"
30 /* Knuth-Morris-Pratt algorithm. */
31 #define CANON_ELEMENT(c) c_tolower (c)
34 /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
36 Note: This function may, in multibyte locales, return success even if
37 strlen (haystack) < strlen (needle) ! */
39 c_strcasestr (const char *haystack, const char *needle)
41 /* Be careful not to look at the entire extent of haystack or needle
42 until needed. This is useful because of these two cases:
43 - haystack may be very long, and a match of needle found early,
44 - needle may be very long, and not even a short initial segment of
45 needle may be found in haystack. */
48 /* Minimizing the worst-case complexity:
49 Let n = strlen(haystack), m = strlen(needle).
50 The naïve algorithm is O(n*m) worst-case.
51 The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
53 To achieve linear complexity and yet amortize the cost of the memory
54 allocation, we activate the Knuth-Morris-Pratt algorithm only once
55 the naïve algorithm has already run for some time; more precisely,
57 - the outer loop count is >= 10,
58 - the average number of comparisons per outer loop is >= 5,
59 - the total number of comparisons is >= m.
60 But we try it only once. If the memory allocation attempt failed,
63 size_t outer_loop_count = 0;
64 size_t comparison_count = 0;
65 size_t last_ccount = 0; /* last comparison count */
66 const char *needle_last_ccount = needle; /* = needle + last_ccount */
68 /* Speed up the following searches of needle by caching its first
70 unsigned char b = c_tolower ((unsigned char) *needle);
75 if (*haystack == '\0')
79 /* See whether it's advisable to use an asymptotically faster
82 && outer_loop_count >= 10
83 && comparison_count >= 5 * outer_loop_count)
85 /* See if needle + comparison_count now reaches the end of
87 if (needle_last_ccount != NULL)
90 strnlen (needle_last_ccount, comparison_count - last_ccount);
91 if (*needle_last_ccount == '\0')
92 needle_last_ccount = NULL;
93 last_ccount = comparison_count;
95 if (needle_last_ccount == NULL)
97 /* Try the Knuth-Morris-Pratt algorithm. */
100 knuth_morris_pratt_unibyte (haystack, needle - 1, &result);
102 return (char *) result;
109 if (c_tolower ((unsigned char) *haystack) == b)
110 /* The first character matches. */
112 const char *rhaystack = haystack + 1;
113 const char *rneedle = needle;
115 for (;; rhaystack++, rneedle++)
117 if (*rneedle == '\0')
119 return (char *) haystack;
120 if (*rhaystack == '\0')
124 if (c_tolower ((unsigned char) *rhaystack)
125 != c_tolower ((unsigned char) *rneedle))
126 /* Nothing in this round. */
133 return (char *) haystack;