X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fstr-two-way.h;h=7dcb38721139e18e6ac5c22a8ba6ca958b8bb502;hb=0ce62ceb71b8c4c07e23f3d51e22040c5c51a466;hp=dd8097667d93d6b57ae755d8d7486a209c655713;hpb=d60f3b0c6b0f93a601acd1cfd3923f94ca05abb0;p=gnulib.git diff --git a/lib/str-two-way.h b/lib/str-two-way.h index dd8097667..7dcb38721 100644 --- a/lib/str-two-way.h +++ b/lib/str-two-way.h @@ -44,14 +44,15 @@ #include #include -/* We use the Two-Way string matching algorithm, which guarantees - linear complexity with constant space. Additionally, for long - needles, we also use a bad character shift table similar to the - Boyer-Moore algorithm to achieve improved (potentially sub-linear) - performance. - - See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 - and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm +/* We use the Two-Way string matching algorithm (also known as + Chrochemore-Perrin), which guarantees linear complexity with + constant space. Additionally, for long needles, we also use a bad + character shift table similar to the Boyer-Moore algorithm to + achieve improved (potentially sub-linear) performance. + + See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260, + http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm, + http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.34.6641&rep=rep1&type=pdf */ /* Point at which computing a bad-byte shift table is likely to be @@ -108,7 +109,7 @@ static size_t critical_factorization (const unsigned char *needle, size_t needle_len, size_t *period) { - /* Index of last byte of left half. */ + /* Index of last byte of left half, or SIZE_MAX. */ size_t max_suffix, max_suffix_rev; size_t j; /* Index into NEEDLE for current candidate suffix. */ size_t k; /* Offset into current period. */ @@ -124,8 +125,8 @@ critical_factorization (const unsigned char *needle, size_t needle_len, } /* Invariants: - 1 <= j < NEEDLE_LEN - 1 - 0 <= max_suffix{,_rev} < j + 0 <= j < NEEDLE_LEN - 1 + -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed) min(max_suffix, max_suffix_rev) < global period of NEEDLE 1 <= p <= global period of NEEDLE p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j] @@ -133,8 +134,9 @@ critical_factorization (const unsigned char *needle, size_t needle_len, */ /* Perform lexicographic search. */ - max_suffix = 0; - j = k = p = 1; + max_suffix = SIZE_MAX; + j = 0; + k = p = 1; while (j + k < needle_len) { a = CANON_ELEMENT (needle[j + k]); @@ -167,8 +169,9 @@ critical_factorization (const unsigned char *needle, size_t needle_len, *period = p; /* Perform reverse lexicographic search. */ - max_suffix_rev = 0; - j = k = p = 1; + max_suffix_rev = SIZE_MAX; + j = 0; + k = p = 1; while (j + k < needle_len) { a = CANON_ELEMENT (needle[j + k]);