X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Fstr-two-way.h;h=7dcb38721139e18e6ac5c22a8ba6ca958b8bb502;hb=b344de996cd51f8a2f2558a3172016b64d99c622;hp=5f150af6afb6681e7647a892911e6e653518bc4d;hpb=c823199df2cc03b6bd70d0a2fef5999af82792fe;p=gnulib.git diff --git a/lib/str-two-way.h b/lib/str-two-way.h index 5f150af6a..7dcb38721 100644 --- a/lib/str-two-way.h +++ b/lib/str-two-way.h @@ -1,5 +1,5 @@ /* Byte-wise substring search, using the Two-Way algorithm. - Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2008-2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Written by Eric Blake , 2008. @@ -44,14 +44,15 @@ #include #include -/* We use the Two-Way string matching algorithm, which guarantees - linear complexity with constant space. Additionally, for long - needles, we also use a bad character shift table similar to the - Boyer-Moore algorithm to achieve improved (potentially sub-linear) - performance. +/* We use the Two-Way string matching algorithm (also known as + Chrochemore-Perrin), which guarantees linear complexity with + constant space. Additionally, for long needles, we also use a bad + character shift table similar to the Boyer-Moore algorithm to + achieve improved (potentially sub-linear) performance. - See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 - and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm + See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260, + http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm, + http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.34.6641&rep=rep1&type=pdf */ /* Point at which computing a bad-byte shift table is likely to be @@ -108,7 +109,7 @@ static size_t critical_factorization (const unsigned char *needle, size_t needle_len, size_t *period) { - /* Index of last byte of left half. */ + /* Index of last byte of left half, or SIZE_MAX. */ size_t max_suffix, max_suffix_rev; size_t j; /* Index into NEEDLE for current candidate suffix. */ size_t k; /* Offset into current period. */ @@ -124,8 +125,8 @@ critical_factorization (const unsigned char *needle, size_t needle_len, } /* Invariants: - 1 <= j < NEEDLE_LEN - 1 - 0 <= max_suffix{,_rev} < j + 0 <= j < NEEDLE_LEN - 1 + -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed) min(max_suffix, max_suffix_rev) < global period of NEEDLE 1 <= p <= global period of NEEDLE p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j] @@ -133,8 +134,9 @@ critical_factorization (const unsigned char *needle, size_t needle_len, */ /* Perform lexicographic search. */ - max_suffix = 0; - j = k = p = 1; + max_suffix = SIZE_MAX; + j = 0; + k = p = 1; while (j + k < needle_len) { a = CANON_ELEMENT (needle[j + k]); @@ -167,8 +169,9 @@ critical_factorization (const unsigned char *needle, size_t needle_len, *period = p; /* Perform reverse lexicographic search. */ - max_suffix_rev = 0; - j = k = p = 1; + max_suffix_rev = SIZE_MAX; + j = 0; + k = p = 1; while (j + k < needle_len) { a = CANON_ELEMENT (needle[j + k]);