1 /* Substring test for UTF-8/UTF-16/UTF-32 strings.
2 Copyright (C) 1999, 2002, 2006, 2010-2011 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2002, 2005.
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 FUNC (const UNIT *haystack, const UNIT *needle)
21 UNIT first = needle[0];
23 /* Is needle empty? */
25 return (UNIT *) haystack;
27 /* Is needle nearly empty (only one unit)? */
29 return U_STRCHR (haystack, first);
32 /* Is needle nearly empty (only one character)? */
35 int count = U_STRMBTOUC (&first_uc, needle);
36 if (count > 0 && needle[count] == 0)
37 return U_STRCHR (haystack, first_uc);
42 return (uint8_t *) strstr ((const char *) haystack, (const char *) needle);
45 /* Minimizing the worst-case complexity:
46 Let n = U_STRLEN(haystack), m = U_STRLEN(needle).
47 The naïve algorithm is O(n*m) worst-case.
48 The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
50 To achieve linear complexity and yet amortize the cost of the
51 memory allocation, we activate the Knuth-Morris-Pratt algorithm
52 only once the naïve algorithm has already run for some time; more
54 - the outer loop count is >= 10,
55 - the average number of comparisons per outer loop is >= 5,
56 - the total number of comparisons is >= m.
57 But we try it only once. If the memory allocation attempt failed,
60 size_t outer_loop_count = 0;
61 size_t comparison_count = 0;
62 size_t last_ccount = 0; /* last comparison count */
63 const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */
65 /* Speed up the following searches of needle by caching its first
75 /* See whether it's advisable to use an asymptotically faster
78 && outer_loop_count >= 10
79 && comparison_count >= 5 * outer_loop_count)
81 /* See if needle + comparison_count now reaches the end of
83 if (needle_last_ccount != NULL)
86 U_STRNLEN (needle_last_ccount,
87 comparison_count - last_ccount);
88 if (*needle_last_ccount == 0)
89 needle_last_ccount = NULL;
90 last_ccount = comparison_count;
92 if (needle_last_ccount == NULL)
94 /* Try the Knuth-Morris-Pratt algorithm. */
97 knuth_morris_pratt (haystack,
98 needle - 1, U_STRLEN (needle - 1),
101 return (UNIT *) result;
109 /* The first character matches. */
111 const UNIT *rhaystack = haystack + 1;
112 const UNIT *rneedle = needle;
114 for (;; rhaystack++, rneedle++)
118 return (UNIT *) haystack;
123 if (*rhaystack != *rneedle)
124 /* Nothing in this round. */