X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fstr-two-way.h;h=3f67741bb29077bb8d98fe225cade37e259ac4b4;hb=ad8f11251eeef41965bd7a358516325238117971;hp=b0338a70a1a9e02d80a17d007fcfa4c6ff4dd02d;hpb=a37217922a7b85ddaab2d802b275905f6e9310ac;p=gnulib.git diff --git a/lib/str-two-way.h b/lib/str-two-way.h index b0338a70a..3f67741bb 100644 --- a/lib/str-two-way.h +++ b/lib/str-two-way.h @@ -1,5 +1,5 @@ /* Byte-wise substring search, using the Two-Way algorithm. - Copyright (C) 2008 Free Software Foundation, Inc. + Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. Written by Eric Blake , 2008. @@ -21,21 +21,21 @@ , and define: RESULT_TYPE A macro that expands to the return type. AVAILABLE(h, h_l, j, n_l) - A macro that returns nonzero if there are - at least N_L bytes left starting at H[J]. - H is 'unsigned char *', H_L, J, and N_L - are 'size_t'; H_L is an lvalue. For - NUL-terminated searches, H_L can be - modified each iteration to avoid having - to compute the end of H up front. + A macro that returns nonzero if there are + at least N_L bytes left starting at H[J]. + H is 'unsigned char *', H_L, J, and N_L + are 'size_t'; H_L is an lvalue. For + NUL-terminated searches, H_L can be + modified each iteration to avoid having + to compute the end of H up front. For case-insensitivity, you may optionally define: CMP_FUNC(p1, p2, l) A macro that returns 0 iff the first L - characters of P1 and P2 are equal. + characters of P1 and P2 are equal. CANON_ELEMENT(c) A macro that canonicalizes an element right after - it has been fetched from one of the two strings. - The argument is an 'unsigned char'; the result - must be an 'unsigned char' as well. + it has been fetched from one of the two strings. + The argument is an 'unsigned char'; the result + must be an 'unsigned char' as well. This file undefines the macros documented above, and defines LONG_NEEDLE_THRESHOLD. @@ -103,7 +103,7 @@ periodicity. */ static size_t critical_factorization (const unsigned char *needle, size_t needle_len, - size_t *period) + size_t *period) { /* Index of last byte of left half, or SIZE_MAX. */ size_t max_suffix, max_suffix_rev; @@ -130,29 +130,29 @@ critical_factorization (const unsigned char *needle, size_t needle_len, a = CANON_ELEMENT (needle[j + k]); b = CANON_ELEMENT (needle[max_suffix + k]); if (a < b) - { - /* Suffix is smaller, period is entire prefix so far. */ - j += k; - k = 1; - p = j - max_suffix; - } + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix; + } else if (a == b) - { - /* Advance through repetition of the current period. */ - if (k != p) - ++k; - else - { - j += p; - k = 1; - } - } + { + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else + { + j += p; + k = 1; + } + } else /* b < a */ - { - /* Suffix is larger, start over from current location. */ - max_suffix = j++; - k = p = 1; - } + { + /* Suffix is larger, start over from current location. */ + max_suffix = j++; + k = p = 1; + } } *period = p; @@ -165,29 +165,29 @@ critical_factorization (const unsigned char *needle, size_t needle_len, a = CANON_ELEMENT (needle[j + k]); b = CANON_ELEMENT (needle[max_suffix_rev + k]); if (b < a) - { - /* Suffix is smaller, period is entire prefix so far. */ - j += k; - k = 1; - p = j - max_suffix_rev; - } + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix_rev; + } else if (a == b) - { - /* Advance through repetition of the current period. */ - if (k != p) - ++k; - else - { - j += p; - k = 1; - } - } + { + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else + { + j += p; + k = 1; + } + } else /* a < b */ - { - /* Suffix is larger, start over from current location. */ - max_suffix_rev = j++; - k = p = 1; - } + { + /* Suffix is larger, start over from current location. */ + max_suffix_rev = j++; + k = p = 1; + } } /* Choose the longer suffix. Return the first byte of the right @@ -210,7 +210,7 @@ critical_factorization (const unsigned char *needle, size_t needle_len, HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. */ static RETURN_TYPE two_way_short_needle (const unsigned char *haystack, size_t haystack_len, - const unsigned char *needle, size_t needle_len) + const unsigned char *needle, size_t needle_len) { size_t i; /* Index into current byte of NEEDLE. */ size_t j; /* Index into current window of HAYSTACK. */ @@ -227,65 +227,65 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, if (CMP_FUNC (needle, needle + period, suffix) == 0) { /* Entire needle is periodic; a mismatch can only advance by the - period, so use memory to avoid rescanning known occurrences - of the period. */ + period, so use memory to avoid rescanning known occurrences + of the period. */ size_t memory = 0; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) - { - /* Scan for matches in right half. */ - i = MAX (suffix, memory); - while (i < needle_len && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - ++i; - if (needle_len <= i) - { - /* Scan for matches in left half. */ - i = suffix - 1; - while (memory < i + 1 && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - --i; - if (i + 1 < memory + 1) - return (RETURN_TYPE) (haystack + j); - /* No match, so remember how many repetitions of period - on the right half were scanned. */ - j += period; - memory = needle_len - period; - } - else - { - j += i - suffix + 1; - memory = 0; - } - } + { + /* Scan for matches in right half. */ + i = MAX (suffix, memory); + while (i < needle_len && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (memory < i + 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i + 1 < memory + 1) + return (RETURN_TYPE) (haystack + j); + /* No match, so remember how many repetitions of period + on the right half were scanned. */ + j += period; + memory = needle_len - period; + } + else + { + j += i - suffix + 1; + memory = 0; + } + } } else { /* The two halves of needle are distinct; no extra memory is - required, and any mismatch results in a maximal shift. */ + required, and any mismatch results in a maximal shift. */ period = MAX (suffix, needle_len - suffix) + 1; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) - { - /* Scan for matches in right half. */ - i = suffix; - while (i < needle_len && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - ++i; - if (needle_len <= i) - { - /* Scan for matches in left half. */ - i = suffix - 1; - while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - --i; - if (i == SIZE_MAX) - return (RETURN_TYPE) (haystack + j); - j += period; - } - else - j += i - suffix + 1; - } + { + /* Scan for matches in right half. */ + i = suffix; + while (i < needle_len && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i == SIZE_MAX) + return (RETURN_TYPE) (haystack + j); + j += period; + } + else + j += i - suffix + 1; + } } return NULL; } @@ -304,7 +304,7 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, sublinear performance is not possible. */ static RETURN_TYPE two_way_long_needle (const unsigned char *haystack, size_t haystack_len, - const unsigned char *needle, size_t needle_len) + const unsigned char *needle, size_t needle_len) { size_t i; /* Index into current byte of NEEDLE. */ size_t j; /* Index into current window of HAYSTACK. */ @@ -331,93 +331,93 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len, if (CMP_FUNC (needle, needle + period, suffix) == 0) { /* Entire needle is periodic; a mismatch can only advance by the - period, so use memory to avoid rescanning known occurrences - of the period. */ + period, so use memory to avoid rescanning known occurrences + of the period. */ size_t memory = 0; size_t shift; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) - { - /* Check the last byte first; if it does not match, then - shift to the next possible match location. */ - shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; - if (0 < shift) - { - if (memory && shift < period) - { - /* Since needle is periodic, but the last period has - a byte out of place, there can be no match until - after the mismatch. */ - shift = needle_len - period; - memory = 0; - } - j += shift; - continue; - } - /* Scan for matches in right half. The last byte has - already been matched, by virtue of the shift table. */ - i = MAX (suffix, memory); - while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - ++i; - if (needle_len - 1 <= i) - { - /* Scan for matches in left half. */ - i = suffix - 1; - while (memory < i + 1 && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - --i; - if (i + 1 < memory + 1) - return (RETURN_TYPE) (haystack + j); - /* No match, so remember how many repetitions of period - on the right half were scanned. */ - j += period; - memory = needle_len - period; - } - else - { - j += i - suffix + 1; - memory = 0; - } - } + { + /* Check the last byte first; if it does not match, then + shift to the next possible match location. */ + shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; + if (0 < shift) + { + if (memory && shift < period) + { + /* Since needle is periodic, but the last period has + a byte out of place, there can be no match until + after the mismatch. */ + shift = needle_len - period; + memory = 0; + } + j += shift; + continue; + } + /* Scan for matches in right half. The last byte has + already been matched, by virtue of the shift table. */ + i = MAX (suffix, memory); + while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len - 1 <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (memory < i + 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i + 1 < memory + 1) + return (RETURN_TYPE) (haystack + j); + /* No match, so remember how many repetitions of period + on the right half were scanned. */ + j += period; + memory = needle_len - period; + } + else + { + j += i - suffix + 1; + memory = 0; + } + } } else { /* The two halves of needle are distinct; no extra memory is - required, and any mismatch results in a maximal shift. */ + required, and any mismatch results in a maximal shift. */ size_t shift; period = MAX (suffix, needle_len - suffix) + 1; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) - { - /* Check the last byte first; if it does not match, then - shift to the next possible match location. */ - shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; - if (0 < shift) - { - j += shift; - continue; - } - /* Scan for matches in right half. The last byte has - already been matched, by virtue of the shift table. */ - i = suffix; - while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - ++i; - if (needle_len - 1 <= i) - { - /* Scan for matches in left half. */ - i = suffix - 1; - while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) - == CANON_ELEMENT (haystack[i + j]))) - --i; - if (i == SIZE_MAX) - return (RETURN_TYPE) (haystack + j); - j += period; - } - else - j += i - suffix + 1; - } + { + /* Check the last byte first; if it does not match, then + shift to the next possible match location. */ + shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; + if (0 < shift) + { + j += shift; + continue; + } + /* Scan for matches in right half. The last byte has + already been matched, by virtue of the shift table. */ + i = suffix; + while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len - 1 <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i == SIZE_MAX) + return (RETURN_TYPE) (haystack + j); + j += period; + } + else + j += i - suffix + 1; + } } return NULL; }