X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fmbsstr.c;h=35fd02b544f4f1aae6e59a691e46372974d9f4a9;hb=4cd8485e904c1a98470eb548fe6ac0f04e343c4e;hp=f28311815638a8335cea3e4bbd2992ca635355a2;hpb=69f441a727eb38c7bff8bb1ca3cbd90dfc1cb0ae;p=gnulib.git diff --git a/lib/mbsstr.c b/lib/mbsstr.c index f28311815..35fd02b54 100644 --- a/lib/mbsstr.c +++ b/lib/mbsstr.c @@ -1,11 +1,11 @@ /* Searching in a string. - Copyright (C) 2005-2007 Free Software Foundation, Inc. + Copyright (C) 2005-2008 Free Software Foundation, Inc. Written by Bruno Haible , 2005. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -13,8 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + along with this program. If not, see . */ #include @@ -24,100 +23,21 @@ #include #include /* for NULL, in case a nonstandard string.h lacks it */ +#include "malloca.h" #if HAVE_MBRTOWC # include "mbuiter.h" #endif -/* Knuth-Morris-Pratt algorithm. - See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm - Return a boolean indicating success. */ - -static bool -knuth_morris_pratt_unibyte (const char *haystack, const char *needle, - const char **resultp) -{ - size_t m = strlen (needle); - - /* Allocate the table. */ - size_t *table = (size_t *) malloc (m * sizeof (size_t)); - if (table == NULL) - return false; - /* Fill the table. - For 0 < i < m: - 0 < table[i] <= i is defined such that - rhaystack[0..i-1] == needle[0..i-1] and rhaystack[i] != needle[i] - implies - forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1], - and table[i] is as large as possible with this property. - table[0] remains uninitialized. */ - { - size_t i, j; - - table[1] = 1; - j = 0; - for (i = 2; i < m; i++) - { - unsigned char b = (unsigned char) needle[i - 1]; - - for (;;) - { - if (b == (unsigned char) needle[j]) - { - table[i] = i - ++j; - break; - } - if (j == 0) - { - table[i] = i; - break; - } - j = j - table[j]; - } - } - } - - /* Search, using the table to accelerate the processing. */ - { - size_t j; - const char *rhaystack; - const char *phaystack; - - *resultp = NULL; - j = 0; - rhaystack = haystack; - phaystack = haystack; - /* Invariant: phaystack = rhaystack + j. */ - while (*phaystack != '\0') - if ((unsigned char) needle[j] == (unsigned char) *phaystack) - { - j++; - phaystack++; - if (j == m) - { - /* The entire needle has been found. */ - *resultp = rhaystack; - break; - } - } - else if (j > 0) - { - /* Found a match of needle[0..j-1], mismatch at needle[j]. */ - rhaystack += table[j]; - j -= table[j]; - } - else - { - /* Found a mismatch at needle[0] already. */ - rhaystack++; - phaystack++; - } - } - - free (table); - return true; -} +/* Knuth-Morris-Pratt algorithm. */ +#define CANON_ELEMENT(c) c +#include "str-kmp.h" #if HAVE_MBRTOWC +/* Knuth-Morris-Pratt algorithm. + See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm + Return a boolean indicating success: + Return true and set *RESULTP if the search was completed. + Return false if it was aborted because not enough memory was available. */ static bool knuth_morris_pratt_multibyte (const char *haystack, const char *needle, const char **resultp) @@ -127,7 +47,7 @@ knuth_morris_pratt_multibyte (const char *haystack, const char *needle, size_t *table; /* Allocate room for needle_mbchars and the table. */ - char *memory = (char *) malloc (m * (sizeof (mbchar_t) + sizeof (size_t))); + char *memory = (char *) nmalloca (m, sizeof (mbchar_t) + sizeof (size_t)); if (memory == NULL) return false; needle_mbchars = (mbchar_t *) memory; @@ -146,34 +66,67 @@ knuth_morris_pratt_multibyte (const char *haystack, const char *needle, /* Fill the table. For 0 < i < m: 0 < table[i] <= i is defined such that - rhaystack[0..i-1] == needle[0..i-1] and rhaystack[i] != needle[i] - implies - forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1], + forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x], and table[i] is as large as possible with this property. + This implies: + 1) For 0 < i < m: + If table[i] < i, + needle[table[i]..i-1] = needle[0..i-1-table[i]]. + 2) For 0 < i < m: + rhaystack[0..i-1] == needle[0..i-1] + and exists h, i <= h < m: rhaystack[h] != needle[h] + implies + forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1]. table[0] remains uninitialized. */ { size_t i, j; + /* i = 1: Nothing to verify for x = 0. */ table[1] = 1; j = 0; + for (i = 2; i < m; i++) { + /* Here: j = i-1 - table[i-1]. + The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold + for x < table[i-1], by induction. + Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ mbchar_t *b = &needle_mbchars[i - 1]; for (;;) { + /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x] + is known to hold for x < i-1-j. + Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ if (mb_equal (*b, needle_mbchars[j])) { + /* Set table[i] := i-1-j. */ table[i] = i - ++j; break; } + /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds + for x = i-1-j, because + needle[i-1] != needle[j] = needle[i-1-x]. */ if (j == 0) { + /* The inequality holds for all possible x. */ table[i] = i; break; } + /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds + for i-1-j < x < i-1-j+table[j], because for these x: + needle[x..i-2] + = needle[x-(i-1-j)..j-1] + != needle[0..j-1-(x-(i-1-j))] (by definition of table[j]) + = needle[0..i-2-x], + hence needle[x..i-1] != needle[0..i-1-x]. + Furthermore + needle[i-1-j+table[j]..i-2] + = needle[table[j]..j-1] + = needle[0..j-1-table[j]] (by definition of table[j]). */ j = j - table[j]; } + /* Here: j = i - table[i]. */ } } @@ -222,7 +175,7 @@ knuth_morris_pratt_multibyte (const char *haystack, const char *needle, } } - free (memory); + freea (memory); return true; } #endif