X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Fgit-merge-changelog.c;h=a199f32d5957308e95663207e3aaf37bb8555d32;hb=ade4fb6ef4588f370c1bdbdb90665501140a608d;hp=1911c08e5ca78b0932e239548651ada0e1bb09ed;hpb=ea180b80e8ba9da7bce84c12175343b964363688;p=gnulib.git diff --git a/lib/git-merge-changelog.c b/lib/git-merge-changelog.c index 1911c08e5..a199f32d5 100644 --- a/lib/git-merge-changelog.c +++ b/lib/git-merge-changelog.c @@ -1,5 +1,5 @@ /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files. - Copyright (C) 2008 Bruno Haible + Copyright (C) 2008-2009 Bruno Haible This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ default merge driver has no clue how to deal with this. Furthermore the conflicts are presented with more <<<< ==== >>>> markers than necessary; this is because the default merge driver makes pointless - effects to look at the individual line changes inside a ChangeLog entry. + efforts to look at the individual line changes inside a ChangeLog entry. This program serves as a 'git' merge driver that avoids these problems. 1. It produces no conflict when ChangeLog entries have been inserted @@ -186,7 +186,7 @@ entry_equals (const void *elt1, const void *elt2) const struct entry *entry2 = (const struct entry *) elt2; return entry1->length == entry2->length && memcmp (entry1->string, entry2->string, entry1->length) == 0; -}; +} /* Return a hash code of the contents of a ChangeLog entry. */ static size_t @@ -211,9 +211,12 @@ entry_hashcode (const void *elt) /* Perform a fuzzy comparison of two ChangeLog entries. Return a similarity measure of the two entries, a value between 0 and 1. - 0 stands for very distinct, 1 for identical. */ + 0 stands for very distinct, 1 for identical. + If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can + be returned. */ static double -entry_fstrcmp (const struct entry *entry1, const struct entry *entry2) +entry_fstrcmp (const struct entry *entry1, const struct entry *entry2, + double lower_bound) { /* fstrcmp works only on NUL terminated strings. */ char *memory; @@ -233,7 +236,8 @@ entry_fstrcmp (const struct entry *entry1, const struct entry *entry2) p += entry2->length; *p++ = '\0'; } - similarity = fstrcmp (memory, memory + entry1->length + 1); + similarity = + fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound); freea (memory); return similarity; } @@ -325,18 +329,159 @@ read_changelog_file (const char *filename, struct changelog_file *result) } } +/* A mapping (correspondence) between entries of FILE1 and of FILE2. */ +struct entries_mapping +{ + struct changelog_file *file1; + struct changelog_file *file2; + /* Mapping from indices in FILE1 to indices in FILE2. + A value -1 means that the entry from FILE1 is not found in FILE2. + A value -2 means that it has not yet been computed. */ + ssize_t *index_mapping; + /* Mapping from indices in FILE2 to indices in FILE1. + A value -1 means that the entry from FILE2 is not found in FILE1. + A value -2 means that it has not yet been computed. */ + ssize_t *index_mapping_reverse; +}; + +/* Look up (or lazily compute) the mapping of an entry in FILE1. + i is the index in FILE1. + Return the index in FILE2, or -1 when the entry is not found in FILE2. */ +static ssize_t +entries_mapping_get (struct entries_mapping *mapping, ssize_t i) +{ + if (mapping->index_mapping[i] < -1) + { + struct changelog_file *file1 = mapping->file1; + struct changelog_file *file2 = mapping->file2; + size_t n1 = file1->num_entries; + size_t n2 = file2->num_entries; + struct entry *entry_i = file1->entries[i]; + ssize_t j; + + /* Search whether it approximately occurs in file2. */ + ssize_t best_j = -1; + double best_j_similarity = 0.0; + for (j = n2 - 1; j >= 0; j--) + if (mapping->index_mapping_reverse[j] < 0) + { + double similarity = + entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity); + if (similarity > best_j_similarity) + { + best_j = j; + best_j_similarity = similarity; + } + } + if (best_j_similarity >= FSTRCMP_THRESHOLD) + { + /* Found a similar entry in file2. */ + struct entry *entry_j = file2->entries[best_j]; + /* Search whether it approximately occurs in file1 at index i. */ + ssize_t best_i = -1; + double best_i_similarity = 0.0; + ssize_t ii; + for (ii = n1 - 1; ii >= 0; ii--) + if (mapping->index_mapping[ii] < 0) + { + double similarity = + entry_fstrcmp (file1->entries[ii], entry_j, + best_i_similarity); + if (similarity > best_i_similarity) + { + best_i = ii; + best_i_similarity = similarity; + } + } + if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i) + { + mapping->index_mapping[i] = best_j; + mapping->index_mapping_reverse[best_j] = i; + } + } + if (mapping->index_mapping[i] < -1) + /* It does not approximately occur in FILE2. + Remember it, for next time. */ + mapping->index_mapping[i] = -1; + } + return mapping->index_mapping[i]; +} + +/* Look up (or lazily compute) the mapping of an entry in FILE2. + j is the index in FILE2. + Return the index in FILE1, or -1 when the entry is not found in FILE1. */ +static ssize_t +entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j) +{ + if (mapping->index_mapping_reverse[j] < -1) + { + struct changelog_file *file1 = mapping->file1; + struct changelog_file *file2 = mapping->file2; + size_t n1 = file1->num_entries; + size_t n2 = file2->num_entries; + struct entry *entry_j = file2->entries[j]; + ssize_t i; + + /* Search whether it approximately occurs in file1. */ + ssize_t best_i = -1; + double best_i_similarity = 0.0; + for (i = n1 - 1; i >= 0; i--) + if (mapping->index_mapping[i] < 0) + { + double similarity = + entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity); + if (similarity > best_i_similarity) + { + best_i = i; + best_i_similarity = similarity; + } + } + if (best_i_similarity >= FSTRCMP_THRESHOLD) + { + /* Found a similar entry in file1. */ + struct entry *entry_i = file1->entries[best_i]; + /* Search whether it approximately occurs in file2 at index j. */ + ssize_t best_j = -1; + double best_j_similarity = 0.0; + ssize_t jj; + for (jj = n2 - 1; jj >= 0; jj--) + if (mapping->index_mapping_reverse[jj] < 0) + { + double similarity = + entry_fstrcmp (entry_i, file2->entries[jj], + best_j_similarity); + if (similarity > best_j_similarity) + { + best_j = jj; + best_j_similarity = similarity; + } + } + if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j) + { + mapping->index_mapping_reverse[j] = best_i; + mapping->index_mapping[best_i] = j; + } + } + if (mapping->index_mapping_reverse[j] < -1) + /* It does not approximately occur in FILE1. + Remember it, for next time. */ + mapping->index_mapping_reverse[j] = -1; + } + return mapping->index_mapping_reverse[j]; +} + /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2. - Return a set of two arrays: - - An array mapping FILE1 indices to FILE2 indices (or -1 when the entry - from FILE1 is not found in FILE2). - - An array mapping FILE2 indices to FILE1 indices (or -1 when the entry - from FILE2 is not found in FILE1). The correspondence also takes into account small modifications; i.e. the indicated relation is not equality of entries but best-match similarity - of entries. */ + of entries. + If FULL is true, the maximum of matching is done up-front. If it is false, + it is done in a lazy way through the functions entries_mapping_get and + entries_mapping_reverse_get. + Return the result in *RESULT. */ static void compute_mapping (struct changelog_file *file1, struct changelog_file *file2, - ssize_t *result[2]) + bool full, + struct entries_mapping *result) { /* Mapping from indices in file1 to indices in file2. */ ssize_t *index_mapping; @@ -348,15 +493,15 @@ compute_mapping (struct changelog_file *file1, struct changelog_file *file2, index_mapping = XNMALLOC (n1, ssize_t); for (i = 0; i < n1; i++) - index_mapping[i] = -1; + index_mapping[i] = -2; index_mapping_reverse = XNMALLOC (n2, ssize_t); for (j = 0; j < n2; j++) - index_mapping_reverse[j] = -1; + index_mapping_reverse[j] = -2; for (i = n1 - 1; i >= 0; i--) /* Take an entry from file1. */ - if (index_mapping[i] < 0) + if (index_mapping[i] < -1) { struct entry *entry = file1->entries[i]; /* Search whether it occurs in file2. */ @@ -365,87 +510,55 @@ compute_mapping (struct changelog_file *file1, struct changelog_file *file2, { j = n2 - 1 - j; /* Found an exact correspondence. */ - ASSERT (index_mapping_reverse[j] < 0); - index_mapping[i] = j; - index_mapping_reverse[j] = i; - /* Look for more occurrences of the same entry. */ - { - ssize_t curr_i = i; - ssize_t curr_j = j; - - for (;;) + /* If index_mapping_reverse[j] >= 0, we have already seen other + copies of this entry, and there were more occurrences of it in + file1 than in file2. In this case, do nothing. */ + if (index_mapping_reverse[j] < 0) + { + index_mapping[i] = j; + index_mapping_reverse[j] = i; + /* Look for more occurrences of the same entry. Match them + as long as they pair up. Unpaired occurrences of the same + entry are left without mapping. */ { - ssize_t next_i; - ssize_t next_j; - - next_i = - gl_list_indexof_from (file1->entries_reversed, n1 - curr_i, - entry); - if (next_i < 0) - break; - next_j = - gl_list_indexof_from (file2->entries_reversed, n2 - curr_j, - entry); - if (next_j < 0) - break; - curr_i = n1 - 1 - next_i; - curr_j = n2 - 1 - next_j; - ASSERT (index_mapping[curr_i] < 0); - ASSERT (index_mapping_reverse[curr_j] < 0); - index_mapping[curr_i] = curr_j; - index_mapping_reverse[curr_j] = curr_i; - } - } - } - } + ssize_t curr_i = i; + ssize_t curr_j = j; - for (i = n1 - 1; i >= 0; i--) - /* Take an entry from file1. */ - if (index_mapping[i] < 0) - { - struct entry *entry_i = file1->entries[i]; - /* Search whether it approximately occurs in file2. */ - ssize_t best_j = -1; - double best_j_similarity = 0.0; - for (j = n2 - 1; j >= 0; j--) - if (index_mapping_reverse[j] < 0) - { - double similarity = entry_fstrcmp (entry_i, file2->entries[j]); - if (similarity > best_j_similarity) - { - best_j = j; - best_j_similarity = similarity; - } - } - if (best_j_similarity >= FSTRCMP_THRESHOLD) - { - /* Found a similar entry in file2. */ - struct entry *entry_j = file2->entries[best_j]; - /* Search whether it approximately occurs in file1 at index i. */ - ssize_t best_i = -1; - double best_i_similarity = 0.0; - ssize_t ii; - for (ii = n1 - 1; ii >= 0; ii--) - if (index_mapping[ii] < 0) - { - double similarity = - entry_fstrcmp (file1->entries[ii], entry_j); - if (similarity > best_i_similarity) + for (;;) { - best_i = i; - best_i_similarity = similarity; + ssize_t next_i; + ssize_t next_j; + + next_i = + gl_list_indexof_from (file1->entries_reversed, + n1 - curr_i, entry); + if (next_i < 0) + break; + next_j = + gl_list_indexof_from (file2->entries_reversed, + n2 - curr_j, entry); + if (next_j < 0) + break; + curr_i = n1 - 1 - next_i; + curr_j = n2 - 1 - next_j; + ASSERT (index_mapping[curr_i] < 0); + ASSERT (index_mapping_reverse[curr_j] < 0); + index_mapping[curr_i] = curr_j; + index_mapping_reverse[curr_j] = curr_i; } } - if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i) - { - index_mapping[i] = best_j; - index_mapping_reverse[best_j] = i; } } } - result[0] = index_mapping; - result[1] = index_mapping_reverse; + result->file1 = file1; + result->file2 = file2; + result->index_mapping = index_mapping; + result->index_mapping_reverse = index_mapping_reverse; + + if (full) + for (i = n1 - 1; i >= 0; i--) + entries_mapping_get (result, i); } /* An "edit" is a textual modification performed by the user, that needs to @@ -729,7 +842,8 @@ try_split_merged_entry (const struct entry *old_entry, new_body.string = new_entry->string + split_offset; new_body.length = new_entry->length - split_offset; - similarity = entry_fstrcmp (&old_body, &new_body); + similarity = + entry_fstrcmp (&old_body, &new_body, best_similarity); if (similarity > best_similarity) { best_split_offset = split_offset; @@ -808,7 +922,7 @@ conflict_write (FILE *fp, struct conflict *c) /* Long options. */ static const struct option long_options[] = -{ +{ { "help", no_argument, NULL, 'h' }, { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 }, { "version", no_argument, NULL, 'V' }, @@ -922,9 +1036,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\ struct changelog_file mainstream_file; struct changelog_file modified_file; /* Mapping from indices in ancestor_file to indices in mainstream_file. */ - ssize_t *index_mapping; - /* Mapping from indices in mainstream_file to indices in ancestor_file. */ - ssize_t *index_mapping_reverse; + struct entries_mapping mapping; struct differences diffs; gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */ gl_list_t /* */ result_entries; @@ -975,7 +1087,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\ How to distinguish these situation? There are several hints: - During a "git stash apply", GIT_REFLOG_ACTION is not set. During a "git pull", it is set to 'pull '. During a "git pull --rebase", - it is set to 'pull --rebase'. + it is set to 'pull --rebase'. During a "git cherry-pick", it is + set to 'cherry-pick'. - During a "git stash apply", there is an environment variable of the form GITHEAD_<40_hex_digits>='Stashed changes'. */ { @@ -1002,7 +1115,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\ downstream = true; else { - /* "git stash apply", "git rebase" and similar. */ + /* "git stash apply", "git rebase", "git cherry-pick" and + similar. */ downstream = false; } } @@ -1041,12 +1155,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\ /* Compute correspondence between the entries of ancestor_file and of mainstream_file. */ - { - ssize_t *result[2]; - compute_mapping (&ancestor_file, &mainstream_file, result); - index_mapping = result[0]; - index_mapping_reverse = result[1]; - } + compute_mapping (&ancestor_file, &mainstream_file, false, &mapping); + (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */ /* Compute differences between the entries of ancestor_file and of modified_file. */ @@ -1102,10 +1212,10 @@ There is NO WARRANTY, to the extent permitted by law.\n\ ancestor_file.entries[i_after]. See whether these two entries still exist in mainstream_file and are still consecutive. */ - k_before = index_mapping[i_before]; + k_before = entries_mapping_get (&mapping, i_before); k_after = (i_after == ancestor_file.num_entries ? mainstream_file.num_entries - : index_mapping[i_after]); + : entries_mapping_get (&mapping, i_after)); if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1) { /* Yes, the entry before and after are still neighbours @@ -1155,7 +1265,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\ for (i = edit->i1; i <= edit->i2; i++) { struct entry *removed_entry = ancestor_file.entries[i]; - ssize_t k = index_mapping[i]; + ssize_t k = entries_mapping_get (&mapping, i); if (k >= 0 && entry_equals (removed_entry, mainstream_file.entries[k])) @@ -1217,7 +1327,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\ size_t i; for (i = edit->i1 + 1; i <= edit->i2; i++) if (entry_fstrcmp (ancestor_file.entries[i], - modified_file.entries[i + edit->j2 - edit->i2]) + modified_file.entries[i + edit->j2 - edit->i2], + FSTRCMP_THRESHOLD) < FSTRCMP_THRESHOLD) { simple_merged = false; @@ -1248,7 +1359,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\ ? split[1] : modified_file.entries[j]); size_t i = j + edit->i2 - edit->j2; - ssize_t k = index_mapping[i]; + ssize_t k = entries_mapping_get (&mapping, i); if (k >= 0 && entry_equals (ancestor_file.entries[i], mainstream_file.entries[k])) @@ -1298,7 +1409,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\ simple = true; for (i = edit->i1; i <= edit->i2; i++) if (entry_fstrcmp (ancestor_file.entries[i], - modified_file.entries[i + edit->j2 - edit->i2]) + modified_file.entries[i + edit->j2 - edit->i2], + FSTRCMP_THRESHOLD) < FSTRCMP_THRESHOLD) { simple = false; @@ -1327,7 +1439,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\ { struct entry *changed_entry = modified_file.entries[j]; size_t i = j + edit->i2 - edit->j2; - ssize_t k = index_mapping[i]; + ssize_t k = entries_mapping_get (&mapping, i); if (k >= 0 && entry_equals (ancestor_file.entries[i], mainstream_file.entries[k])) @@ -1366,13 +1478,13 @@ There is NO WARRANTY, to the extent permitted by law.\n\ See whether this entry and the following num_changed entries still exist in mainstream_file and are still consecutive. */ - k_before = index_mapping[i_before]; + k_before = entries_mapping_get (&mapping, i_before); linear = (k_before >= 0); if (linear) { size_t i; for (i = i_before + 1; i <= i_before + num_changed; i++) - if (index_mapping[i] != k_before + (i - i_before)) + if (entries_mapping_get (&mapping, i) != k_before + (i - i_before)) { linear = false; break; @@ -1392,7 +1504,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\ { struct entry *changed_entry = modified_file.entries[j]; size_t i = j + edit->i2 - edit->j2; - ssize_t k = index_mapping[i]; + ssize_t k = entries_mapping_get (&mapping, i); ASSERT (k >= 0); if (entry_equals (ancestor_file.entries[i], mainstream_file.entries[k])) @@ -1432,7 +1544,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\ ssize_t k_first; bool linear_unchanged; i_first = edit->i1; - k_first = index_mapping[i_first]; + k_first = entries_mapping_get (&mapping, i_first); linear_unchanged = (k_first >= 0 && entry_equals (ancestor_file.entries[i_first], @@ -1441,9 +1553,9 @@ There is NO WARRANTY, to the extent permitted by law.\n\ { size_t i; for (i = i_first + 1; i <= edit->i2; i++) - if (!(index_mapping[i] == k_first + (i - i_first) + if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first) && entry_equals (ancestor_file.entries[i], - mainstream_file.entries[index_mapping[i]]))) + mainstream_file.entries[entries_mapping_get (&mapping, i)]))) { linear_unchanged = false; break; @@ -1462,7 +1574,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\ } for (i = edit->i1; i <= edit->i2; i++) { - ssize_t k = index_mapping[i]; + ssize_t k = entries_mapping_get (&mapping, i); ASSERT (k >= 0); ASSERT (entry_equals (ancestor_file.entries[i], mainstream_file.entries[k]));