X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=lib%2Fgit-merge-changelog.c;h=a199f32d5957308e95663207e3aaf37bb8555d32;hb=8def010e77c2bace5391a1cda45e158d9e0a3f7d;hp=4a63f94ac4d2c7b86c39499b15ab24f0c26b9ec6;hpb=177498a108d49a4cfe37027ca5df0483adcb6ef0;p=gnulib.git diff --git a/lib/git-merge-changelog.c b/lib/git-merge-changelog.c index 4a63f94ac..a199f32d5 100644 --- a/lib/git-merge-changelog.c +++ b/lib/git-merge-changelog.c @@ -1,5 +1,5 @@ /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files. - Copyright (C) 2008 Bruno Haible + Copyright (C) 2008-2009 Bruno Haible This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ default merge driver has no clue how to deal with this. Furthermore the conflicts are presented with more <<<< ==== >>>> markers than necessary; this is because the default merge driver makes pointless - effects to look at the individual line changes inside a ChangeLog entry. + efforts to look at the individual line changes inside a ChangeLog entry. This program serves as a 'git' merge driver that avoids these problems. 1. It produces no conflict when ChangeLog entries have been inserted @@ -133,6 +133,7 @@ #include "gl_list.h" #include "gl_array_list.h" #include "gl_linkedhash_list.h" +#include "gl_rbtreehash_list.h" #include "gl_linked_list.h" #include "xalloc.h" #include "xmalloca.h" @@ -150,6 +151,7 @@ while (0) #define FSTRCMP_THRESHOLD 0.6 +#define FSTRCMP_STRICTER_THRESHOLD 0.8 /* Representation of a ChangeLog entry. The string may contain NUL bytes; therefore it is represented as a plain @@ -158,8 +160,24 @@ struct entry { char *string; size_t length; + /* Cache for the hash code. */ + bool hashcode_cached; + size_t hashcode; }; +/* Create an entry. + The memory region passed by the caller must of indefinite extent. It is + *not* copied here. */ +static struct entry * +entry_create (char *string, size_t length) +{ + struct entry *result = XMALLOC (struct entry); + result->string = string; + result->length = length; + result->hashcode_cached = false; + return result; +} + /* Compare two entries for equality. */ static bool entry_equals (const void *elt1, const void *elt2) @@ -168,29 +186,37 @@ entry_equals (const void *elt1, const void *elt2) const struct entry *entry2 = (const struct entry *) elt2; return entry1->length == entry2->length && memcmp (entry1->string, entry2->string, entry1->length) == 0; -}; +} /* Return a hash code of the contents of a ChangeLog entry. */ static size_t entry_hashcode (const void *elt) { - const struct entry *entry = (const struct entry *) elt; - /* See http://www.haible.de/bruno/hashfunc.html. */ - const char *s; - size_t n; - size_t h = 0; + struct entry *entry = (struct entry *) elt; + if (!entry->hashcode_cached) + { + /* See http://www.haible.de/bruno/hashfunc.html. */ + const char *s; + size_t n; + size_t h = 0; - for (s = entry->string, n = entry->length; n > 0; s++, n--) - h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9))); + for (s = entry->string, n = entry->length; n > 0; s++, n--) + h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9))); - return h; + entry->hashcode = h; + entry->hashcode_cached = true; + } + return entry->hashcode; } /* Perform a fuzzy comparison of two ChangeLog entries. Return a similarity measure of the two entries, a value between 0 and 1. - 0 stands for very distinct, 1 for identical. */ + 0 stands for very distinct, 1 for identical. + If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can + be returned. */ static double -entry_fstrcmp (const struct entry *entry1, const struct entry *entry2) +entry_fstrcmp (const struct entry *entry1, const struct entry *entry2, + double lower_bound) { /* fstrcmp works only on NUL terminated strings. */ char *memory; @@ -210,7 +236,8 @@ entry_fstrcmp (const struct entry *entry1, const struct entry *entry2) p += entry2->length; *p++ = '\0'; } - similarity = fstrcmp (memory, memory + entry1->length + 1); + similarity = + fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound); freea (memory); return similarity; } @@ -247,7 +274,7 @@ read_changelog_file (const char *filename, struct changelog_file *result) gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode, NULL, true); result->entries_reversed = - gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode, + gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode, NULL, true); /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts at a line following a blank line and that starts with a non-whitespace @@ -280,9 +307,7 @@ read_changelog_file (const char *filename, struct changelog_file *result) } } - curr = XMALLOC (struct entry); - curr->string = start; - curr->length = ptr - start; + curr = entry_create (start, ptr - start); gl_list_add_last (result->entries_list, curr); gl_list_add_first (result->entries_reversed, curr); @@ -304,18 +329,159 @@ read_changelog_file (const char *filename, struct changelog_file *result) } } +/* A mapping (correspondence) between entries of FILE1 and of FILE2. */ +struct entries_mapping +{ + struct changelog_file *file1; + struct changelog_file *file2; + /* Mapping from indices in FILE1 to indices in FILE2. + A value -1 means that the entry from FILE1 is not found in FILE2. + A value -2 means that it has not yet been computed. */ + ssize_t *index_mapping; + /* Mapping from indices in FILE2 to indices in FILE1. + A value -1 means that the entry from FILE2 is not found in FILE1. + A value -2 means that it has not yet been computed. */ + ssize_t *index_mapping_reverse; +}; + +/* Look up (or lazily compute) the mapping of an entry in FILE1. + i is the index in FILE1. + Return the index in FILE2, or -1 when the entry is not found in FILE2. */ +static ssize_t +entries_mapping_get (struct entries_mapping *mapping, ssize_t i) +{ + if (mapping->index_mapping[i] < -1) + { + struct changelog_file *file1 = mapping->file1; + struct changelog_file *file2 = mapping->file2; + size_t n1 = file1->num_entries; + size_t n2 = file2->num_entries; + struct entry *entry_i = file1->entries[i]; + ssize_t j; + + /* Search whether it approximately occurs in file2. */ + ssize_t best_j = -1; + double best_j_similarity = 0.0; + for (j = n2 - 1; j >= 0; j--) + if (mapping->index_mapping_reverse[j] < 0) + { + double similarity = + entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity); + if (similarity > best_j_similarity) + { + best_j = j; + best_j_similarity = similarity; + } + } + if (best_j_similarity >= FSTRCMP_THRESHOLD) + { + /* Found a similar entry in file2. */ + struct entry *entry_j = file2->entries[best_j]; + /* Search whether it approximately occurs in file1 at index i. */ + ssize_t best_i = -1; + double best_i_similarity = 0.0; + ssize_t ii; + for (ii = n1 - 1; ii >= 0; ii--) + if (mapping->index_mapping[ii] < 0) + { + double similarity = + entry_fstrcmp (file1->entries[ii], entry_j, + best_i_similarity); + if (similarity > best_i_similarity) + { + best_i = ii; + best_i_similarity = similarity; + } + } + if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i) + { + mapping->index_mapping[i] = best_j; + mapping->index_mapping_reverse[best_j] = i; + } + } + if (mapping->index_mapping[i] < -1) + /* It does not approximately occur in FILE2. + Remember it, for next time. */ + mapping->index_mapping[i] = -1; + } + return mapping->index_mapping[i]; +} + +/* Look up (or lazily compute) the mapping of an entry in FILE2. + j is the index in FILE2. + Return the index in FILE1, or -1 when the entry is not found in FILE1. */ +static ssize_t +entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j) +{ + if (mapping->index_mapping_reverse[j] < -1) + { + struct changelog_file *file1 = mapping->file1; + struct changelog_file *file2 = mapping->file2; + size_t n1 = file1->num_entries; + size_t n2 = file2->num_entries; + struct entry *entry_j = file2->entries[j]; + ssize_t i; + + /* Search whether it approximately occurs in file1. */ + ssize_t best_i = -1; + double best_i_similarity = 0.0; + for (i = n1 - 1; i >= 0; i--) + if (mapping->index_mapping[i] < 0) + { + double similarity = + entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity); + if (similarity > best_i_similarity) + { + best_i = i; + best_i_similarity = similarity; + } + } + if (best_i_similarity >= FSTRCMP_THRESHOLD) + { + /* Found a similar entry in file1. */ + struct entry *entry_i = file1->entries[best_i]; + /* Search whether it approximately occurs in file2 at index j. */ + ssize_t best_j = -1; + double best_j_similarity = 0.0; + ssize_t jj; + for (jj = n2 - 1; jj >= 0; jj--) + if (mapping->index_mapping_reverse[jj] < 0) + { + double similarity = + entry_fstrcmp (entry_i, file2->entries[jj], + best_j_similarity); + if (similarity > best_j_similarity) + { + best_j = jj; + best_j_similarity = similarity; + } + } + if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j) + { + mapping->index_mapping_reverse[j] = best_i; + mapping->index_mapping[best_i] = j; + } + } + if (mapping->index_mapping_reverse[j] < -1) + /* It does not approximately occur in FILE1. + Remember it, for next time. */ + mapping->index_mapping_reverse[j] = -1; + } + return mapping->index_mapping_reverse[j]; +} + /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2. - Return a set of two arrays: - - An array mapping FILE1 indices to FILE2 indices (or -1 when the entry - from FILE1 is not found in FILE2). - - An array mapping FILE2 indices to FILE1 indices (or -1 when the entry - from FILE2 is not found in FILE1). The correspondence also takes into account small modifications; i.e. the indicated relation is not equality of entries but best-match similarity - of entries. */ + of entries. + If FULL is true, the maximum of matching is done up-front. If it is false, + it is done in a lazy way through the functions entries_mapping_get and + entries_mapping_reverse_get. + Return the result in *RESULT. */ static void compute_mapping (struct changelog_file *file1, struct changelog_file *file2, - ssize_t *result[2]) + bool full, + struct entries_mapping *result) { /* Mapping from indices in file1 to indices in file2. */ ssize_t *index_mapping; @@ -327,15 +493,15 @@ compute_mapping (struct changelog_file *file1, struct changelog_file *file2, index_mapping = XNMALLOC (n1, ssize_t); for (i = 0; i < n1; i++) - index_mapping[i] = -1; + index_mapping[i] = -2; index_mapping_reverse = XNMALLOC (n2, ssize_t); for (j = 0; j < n2; j++) - index_mapping_reverse[j] = -1; + index_mapping_reverse[j] = -2; for (i = n1 - 1; i >= 0; i--) /* Take an entry from file1. */ - if (index_mapping[i] < 0) + if (index_mapping[i] < -1) { struct entry *entry = file1->entries[i]; /* Search whether it occurs in file2. */ @@ -344,87 +510,55 @@ compute_mapping (struct changelog_file *file1, struct changelog_file *file2, { j = n2 - 1 - j; /* Found an exact correspondence. */ - ASSERT (index_mapping_reverse[j] < 0); - index_mapping[i] = j; - index_mapping_reverse[j] = i; - /* Look for more occurrences of the same entry. */ - { - ssize_t curr_i = i; - ssize_t curr_j = j; - - for (;;) + /* If index_mapping_reverse[j] >= 0, we have already seen other + copies of this entry, and there were more occurrences of it in + file1 than in file2. In this case, do nothing. */ + if (index_mapping_reverse[j] < 0) + { + index_mapping[i] = j; + index_mapping_reverse[j] = i; + /* Look for more occurrences of the same entry. Match them + as long as they pair up. Unpaired occurrences of the same + entry are left without mapping. */ { - ssize_t next_i; - ssize_t next_j; - - next_i = - gl_list_indexof_from (file1->entries_reversed, n1 - curr_i, - entry); - if (next_i < 0) - break; - next_j = - gl_list_indexof_from (file2->entries_reversed, n2 - curr_j, - entry); - if (next_j < 0) - break; - curr_i = n1 - 1 - next_i; - curr_j = n2 - 1 - next_j; - ASSERT (index_mapping[curr_i] < 0); - ASSERT (index_mapping_reverse[curr_j] < 0); - index_mapping[curr_i] = curr_j; - index_mapping_reverse[curr_j] = curr_i; - } - } - } - } + ssize_t curr_i = i; + ssize_t curr_j = j; - for (i = n1 - 1; i >= 0; i--) - /* Take an entry from file1. */ - if (index_mapping[i] < 0) - { - struct entry *entry_i = file1->entries[i]; - /* Search whether it approximately occurs in file2. */ - ssize_t best_j = -1; - double best_j_similarity = 0.0; - for (j = n2 - 1; j >= 0; j--) - if (index_mapping_reverse[j] < 0) - { - double similarity = entry_fstrcmp (entry_i, file2->entries[j]); - if (similarity > best_j_similarity) - { - best_j = j; - best_j_similarity = similarity; - } - } - if (best_j_similarity >= FSTRCMP_THRESHOLD) - { - /* Found a similar entry in file2. */ - struct entry *entry_j = file2->entries[best_j]; - /* Search whether it approximately occurs in file1 at index i. */ - ssize_t best_i = -1; - double best_i_similarity = 0.0; - ssize_t ii; - for (ii = n1 - 1; ii >= 0; ii--) - if (index_mapping[ii] < 0) - { - double similarity = - entry_fstrcmp (file1->entries[ii], entry_j); - if (similarity > best_i_similarity) + for (;;) { - best_i = i; - best_i_similarity = similarity; + ssize_t next_i; + ssize_t next_j; + + next_i = + gl_list_indexof_from (file1->entries_reversed, + n1 - curr_i, entry); + if (next_i < 0) + break; + next_j = + gl_list_indexof_from (file2->entries_reversed, + n2 - curr_j, entry); + if (next_j < 0) + break; + curr_i = n1 - 1 - next_i; + curr_j = n2 - 1 - next_j; + ASSERT (index_mapping[curr_i] < 0); + ASSERT (index_mapping_reverse[curr_j] < 0); + index_mapping[curr_i] = curr_j; + index_mapping_reverse[curr_j] = curr_i; } } - if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i) - { - index_mapping[i] = best_j; - index_mapping_reverse[best_j] = i; } } } - result[0] = index_mapping; - result[1] = index_mapping_reverse; + result->file1 = file1; + result->file2 = file2; + result->index_mapping = index_mapping; + result->index_mapping_reverse = index_mapping_reverse; + + if (full) + for (i = n1 - 1; i >= 0; i--) + entries_mapping_get (result, i); } /* An "edit" is a textual modification performed by the user, that needs to @@ -638,6 +772,116 @@ compute_differences (struct changelog_file *file1, struct changelog_file *file2, /* An empty entry. */ static struct entry empty_entry = { NULL, 0 }; +/* Return the end a paragraph. + ENTRY is an entry. + OFFSET is an offset into the entry, OFFSET <= ENTRY->length. + Return the offset of the end of paragraph, as an offset <= ENTRY->length; + it is the start of a blank line or the end of the entry. */ +static size_t +find_paragraph_end (const struct entry *entry, size_t offset) +{ + const char *string = entry->string; + size_t length = entry->length; + + for (;;) + { + const char *nl = memchr (string + offset, '\n', length - offset); + if (nl == NULL) + return length; + offset = (nl - string) + 1; + if (offset < length && string[offset] == '\n') + return offset; + } +} + +/* Split a merged entry. + Given an old entry of the form + TITLE + BODY + and a new entry of the form + TITLE + BODY1 + BODY' + where the two titles are the same and BODY and BODY' are very similar, + this computes two new entries + TITLE + BODY1 + and + TITLE + BODY' + and returns true. + If the entries don't have this form, it returns false. */ +static bool +try_split_merged_entry (const struct entry *old_entry, + const struct entry *new_entry, + struct entry *new_split[2]) +{ + size_t old_title_len = find_paragraph_end (old_entry, 0); + size_t new_title_len = find_paragraph_end (new_entry, 0); + struct entry old_body; + struct entry new_body; + size_t best_split_offset; + double best_similarity; + size_t split_offset; + + /* Same title? */ + if (!(old_title_len == new_title_len + && memcmp (old_entry->string, new_entry->string, old_title_len) == 0)) + return false; + + old_body.string = old_entry->string + old_title_len; + old_body.length = old_entry->length - old_title_len; + + /* Determine where to split the new entry. + This is done by maximizing the similarity between BODY and BODY'. */ + best_split_offset = split_offset = new_title_len; + best_similarity = 0.0; + for (;;) + { + double similarity; + + new_body.string = new_entry->string + split_offset; + new_body.length = new_entry->length - split_offset; + similarity = + entry_fstrcmp (&old_body, &new_body, best_similarity); + if (similarity > best_similarity) + { + best_split_offset = split_offset; + best_similarity = similarity; + } + if (best_similarity == 1.0) + /* It cannot get better. */ + break; + + if (split_offset < new_entry->length) + split_offset = find_paragraph_end (new_entry, split_offset + 1); + else + break; + } + + /* BODY' should not be empty. */ + if (best_split_offset == new_entry->length) + return false; + ASSERT (new_entry->string[best_split_offset] == '\n'); + + /* A certain similarity between BODY and BODY' is required. */ + if (best_similarity < FSTRCMP_STRICTER_THRESHOLD) + return false; + + new_split[0] = entry_create (new_entry->string, best_split_offset + 1); + + { + size_t len1 = new_title_len; + size_t len2 = new_entry->length - best_split_offset; + char *combined = XNMALLOC (len1 + len2, char); + memcpy (combined, new_entry->string, len1); + memcpy (combined + len1, new_entry->string + best_split_offset, len2); + new_split[1] = entry_create (combined, len1 + len2); + } + + return true; +} + /* Write the contents of an entry to the output stream FP. */ static void entry_write (FILE *fp, struct entry *entry) @@ -678,8 +922,9 @@ conflict_write (FILE *fp, struct conflict *c) /* Long options. */ static const struct option long_options[] = -{ +{ { "help", no_argument, NULL, 'h' }, + { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 }, { "version", no_argument, NULL, 'V' }, { NULL, 0, NULL, 0 } }; @@ -702,6 +947,14 @@ usage (int status) printf ("B-FILE-NAME names the user-modified file.\n"); printf ("Writes the merged file into A-FILE-NAME.\n"); printf ("\n"); + printf ("Operation modifiers:\n"); + printf ("\ + --split-merged-entry Possibly split a merged entry between paragraphs.\n\ + Use this if you have the habit to merge unrelated\n\ + entries into a single one, separated only by a\n\ + newline, just because they happened on the same\n\ + date.\n"); + printf ("\n"); printf ("Informative output:\n"); printf (" -h, --help display this help and exit\n"); printf (" -V, --version output version information and exit\n"); @@ -719,6 +972,7 @@ main (int argc, char *argv[]) int optchar; bool do_help; bool do_version; + bool split_merged_entry; /* Set program name for messages. */ set_program_name (argv[0]); @@ -726,6 +980,7 @@ main (int argc, char *argv[]) /* Set default values for variables. */ do_help = false; do_version = false; + split_merged_entry = false; /* Parse command line options. */ while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF) @@ -739,7 +994,10 @@ main (int argc, char *argv[]) case 'V': do_version = true; break; - default: + case CHAR_MAX + 1: /* --split-merged-entry */ + split_merged_entry = true; + break; + default: usage (EXIT_FAILURE); } @@ -778,9 +1036,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\ struct changelog_file mainstream_file; struct changelog_file modified_file; /* Mapping from indices in ancestor_file to indices in mainstream_file. */ - ssize_t *index_mapping; - /* Mapping from indices in mainstream_file to indices in ancestor_file. */ - ssize_t *index_mapping_reverse; + struct entries_mapping mapping; struct differences diffs; gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */ gl_list_t /* */ result_entries; @@ -831,7 +1087,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\ How to distinguish these situation? There are several hints: - During a "git stash apply", GIT_REFLOG_ACTION is not set. During a "git pull", it is set to 'pull '. During a "git pull --rebase", - it is set to 'pull --rebase'. + it is set to 'pull --rebase'. During a "git cherry-pick", it is + set to 'cherry-pick'. - During a "git stash apply", there is an environment variable of the form GITHEAD_<40_hex_digits>='Stashed changes'. */ { @@ -858,7 +1115,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\ downstream = true; else { - /* "git stash apply", "git rebase" and similar. */ + /* "git stash apply", "git rebase", "git cherry-pick" and + similar. */ downstream = false; } } @@ -897,12 +1155,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\ /* Compute correspondence between the entries of ancestor_file and of mainstream_file. */ - { - ssize_t *result[2]; - compute_mapping (&ancestor_file, &mainstream_file, result); - index_mapping = result[0]; - index_mapping_reverse = result[1]; - } + compute_mapping (&ancestor_file, &mainstream_file, false, &mapping); + (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */ /* Compute differences between the entries of ancestor_file and of modified_file. */ @@ -958,10 +1212,10 @@ There is NO WARRANTY, to the extent permitted by law.\n\ ancestor_file.entries[i_after]. See whether these two entries still exist in mainstream_file and are still consecutive. */ - k_before = index_mapping[i_before]; + k_before = entries_mapping_get (&mapping, i_before); k_after = (i_after == ancestor_file.num_entries ? mainstream_file.num_entries - : index_mapping[i_after]); + : entries_mapping_get (&mapping, i_after)); if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1) { /* Yes, the entry before and after are still neighbours @@ -1011,7 +1265,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\ for (i = edit->i1; i <= edit->i2; i++) { struct entry *removed_entry = ancestor_file.entries[i]; - ssize_t k = index_mapping[i]; + ssize_t k = entries_mapping_get (&mapping, i); if (k >= 0 && entry_equals (removed_entry, mainstream_file.entries[k])) @@ -1040,122 +1294,82 @@ There is NO WARRANTY, to the extent permitted by law.\n\ break; case CHANGE: { - bool simple; - bool done; - /* Test whether the change is "simple", i.e. whether it - consists of small changes to the old ChangeLog entries - and additions before them: - entry_1 ... entry_n - are mapped to - added_entry ... added_entry modified_entry_1 ... modified_entry_n. */ - if (edit->i2 - edit->i1 <= edit->j2 - edit->j1) - { - size_t i; - simple = true; - for (i = edit->i1; i <= edit->i2; i++) - if (entry_fstrcmp (ancestor_file.entries[i], - modified_file.entries[i + edit->j2 - edit->i2]) - < FSTRCMP_THRESHOLD) - { - simple = false; - break; - } - } - else - simple = false; - done = false; - if (simple) + bool done = false; + /* When the user usually merges entries from the same day, + and this edit is at the top of the file: */ + if (split_merged_entry && edit->j1 == 0) { - /* Apply the additions and each of the single-entry changes - separately. */ - size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */ - size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed; - if (edit->j1 == 0) - { - /* A simple change at the top of modified_file. - Apply it to the top of mainstream_file. */ - ssize_t j; - for (j = edit->j1 + num_added - 1; j >= edit->j1; j--) - { - struct entry *added_entry = modified_file.entries[j]; - gl_list_add_first (result_entries, added_entry); - } - for (j = edit->j1 + num_added; j <= edit->j2; j++) - { - struct entry *changed_entry = modified_file.entries[j]; - size_t i = j + edit->i2 - edit->j2; - ssize_t k = index_mapping[i]; - if (k >= 0 - && entry_equals (ancestor_file.entries[i], - mainstream_file.entries[k])) - { - gl_list_node_set_value (result_entries, - result_entries_pointers[k], - changed_entry); - } - else - { - struct conflict *c = XMALLOC (struct conflict); - c->num_old_entries = 1; - c->old_entries = - XNMALLOC (c->num_old_entries, struct entry *); - c->old_entries[0] = ancestor_file.entries[i]; - c->num_modified_entries = 1; - c->modified_entries = - XNMALLOC (c->num_modified_entries, struct entry *); - c->modified_entries[0] = changed_entry; - gl_list_add_last (result_conflicts, c); - } - } - done = true; - } - else + /* Test whether the change is "simple merged", i.e. whether + it consists of additions, followed by an augmentation of + the first changed entry, followed by small changes of the + remaining entries: + entry_1 + entry_2 + ... + entry_n + are mapped to + added_entry + ... + added_entry + augmented_entry_1 + modified_entry_2 + ... + modified_entry_n. */ + if (edit->i2 - edit->i1 <= edit->j2 - edit->j1) { - ssize_t i_before; - ssize_t k_before; - bool linear; - i_before = diffs.index_mapping_reverse[edit->j1 - 1]; - ASSERT (i_before >= 0); - /* A simple change after ancestor_file.entries[i_before]. - See whether this entry and the following num_changed - entries still exist in mainstream_file and are still - consecutive. */ - k_before = index_mapping[i_before]; - linear = (k_before >= 0); - if (linear) + struct entry *split[2]; + bool simple_merged = + try_split_merged_entry (ancestor_file.entries[edit->i1], + modified_file.entries[edit->i1 + edit->j2 - edit->i2], + split); + if (simple_merged) { size_t i; - for (i = i_before + 1; i <= i_before + num_changed; i++) - if (index_mapping[i] != k_before + (i - i_before)) + for (i = edit->i1 + 1; i <= edit->i2; i++) + if (entry_fstrcmp (ancestor_file.entries[i], + modified_file.entries[i + edit->j2 - edit->i2], + FSTRCMP_THRESHOLD) + < FSTRCMP_THRESHOLD) { - linear = false; + simple_merged = false; break; } } - if (linear) + if (simple_merged) { - gl_list_node_t node_for_insert = - result_entries_pointers[k_before + 1]; + /* Apply the additions at the top of modified_file. + Apply each of the single-entry changes + separately. */ + size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */ + size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed; ssize_t j; + /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */ + gl_list_add_first (result_entries, split[0]); + /* The additions. */ for (j = edit->j1 + num_added - 1; j >= edit->j1; j--) { struct entry *added_entry = modified_file.entries[j]; - gl_list_add_before (result_entries, node_for_insert, added_entry); + gl_list_add_first (result_entries, added_entry); } + /* Now the single-entry changes. */ for (j = edit->j1 + num_added; j <= edit->j2; j++) { - struct entry *changed_entry = modified_file.entries[j]; + struct entry *changed_entry = + (j == edit->j1 + num_added + ? split[1] + : modified_file.entries[j]); size_t i = j + edit->i2 - edit->j2; - ssize_t k = index_mapping[i]; - ASSERT (k >= 0); - if (entry_equals (ancestor_file.entries[i], - mainstream_file.entries[k])) + ssize_t k = entries_mapping_get (&mapping, i); + if (k >= 0 + && entry_equals (ancestor_file.entries[i], + mainstream_file.entries[k])) { gl_list_node_set_value (result_entries, result_entries_pointers[k], changed_entry); } - else + else if (!entry_equals (ancestor_file.entries[i], + changed_entry)) { struct conflict *c = XMALLOC (struct conflict); c->num_old_entries = 1; @@ -1173,54 +1387,203 @@ There is NO WARRANTY, to the extent permitted by law.\n\ } } } - else + if (!done) { - /* A big change. - See whether the num_changed entries still exist unchanged - in mainstream_file and are still consecutive. */ - ssize_t i_first; - ssize_t k_first; - bool linear_unchanged; - i_first = edit->i1; - k_first = index_mapping[i_first]; - linear_unchanged = - (k_first >= 0 - && entry_equals (ancestor_file.entries[i_first], - mainstream_file.entries[k_first])); - if (linear_unchanged) + bool simple; + /* Test whether the change is "simple", i.e. whether it + consists of small changes to the old ChangeLog entries + and additions before them: + entry_1 + ... + entry_n + are mapped to + added_entry + ... + added_entry + modified_entry_1 + ... + modified_entry_n. */ + if (edit->i2 - edit->i1 <= edit->j2 - edit->j1) { size_t i; - for (i = i_first + 1; i <= edit->i2; i++) - if (!(index_mapping[i] == k_first + (i - i_first) - && entry_equals (ancestor_file.entries[i], - mainstream_file.entries[index_mapping[i]]))) + simple = true; + for (i = edit->i1; i <= edit->i2; i++) + if (entry_fstrcmp (ancestor_file.entries[i], + modified_file.entries[i + edit->j2 - edit->i2], + FSTRCMP_THRESHOLD) + < FSTRCMP_THRESHOLD) { - linear_unchanged = false; + simple = false; break; } } - if (linear_unchanged) + else + simple = false; + if (simple) { - gl_list_node_t node_for_insert = - result_entries_pointers[k_first]; - ssize_t j; - size_t i; - for (j = edit->j2; j >= edit->j1; j--) + /* Apply the additions and each of the single-entry + changes separately. */ + size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */ + size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed; + if (edit->j1 == 0) { - struct entry *new_entry = modified_file.entries[j]; - gl_list_add_before (result_entries, node_for_insert, new_entry); + /* A simple change at the top of modified_file. + Apply it to the top of mainstream_file. */ + ssize_t j; + for (j = edit->j1 + num_added - 1; j >= edit->j1; j--) + { + struct entry *added_entry = modified_file.entries[j]; + gl_list_add_first (result_entries, added_entry); + } + for (j = edit->j1 + num_added; j <= edit->j2; j++) + { + struct entry *changed_entry = modified_file.entries[j]; + size_t i = j + edit->i2 - edit->j2; + ssize_t k = entries_mapping_get (&mapping, i); + if (k >= 0 + && entry_equals (ancestor_file.entries[i], + mainstream_file.entries[k])) + { + gl_list_node_set_value (result_entries, + result_entries_pointers[k], + changed_entry); + } + else + { + struct conflict *c; + ASSERT (!entry_equals (ancestor_file.entries[i], + changed_entry)); + c = XMALLOC (struct conflict); + c->num_old_entries = 1; + c->old_entries = + XNMALLOC (c->num_old_entries, struct entry *); + c->old_entries[0] = ancestor_file.entries[i]; + c->num_modified_entries = 1; + c->modified_entries = + XNMALLOC (c->num_modified_entries, struct entry *); + c->modified_entries[0] = changed_entry; + gl_list_add_last (result_conflicts, c); + } + } + done = true; } - for (i = edit->i1; i <= edit->i2; i++) + else + { + ssize_t i_before; + ssize_t k_before; + bool linear; + i_before = diffs.index_mapping_reverse[edit->j1 - 1]; + ASSERT (i_before >= 0); + /* A simple change after ancestor_file.entries[i_before]. + See whether this entry and the following num_changed + entries still exist in mainstream_file and are still + consecutive. */ + k_before = entries_mapping_get (&mapping, i_before); + linear = (k_before >= 0); + if (linear) + { + size_t i; + for (i = i_before + 1; i <= i_before + num_changed; i++) + if (entries_mapping_get (&mapping, i) != k_before + (i - i_before)) + { + linear = false; + break; + } + } + if (linear) + { + gl_list_node_t node_for_insert = + result_entries_pointers[k_before + 1]; + ssize_t j; + for (j = edit->j1 + num_added - 1; j >= edit->j1; j--) + { + struct entry *added_entry = modified_file.entries[j]; + gl_list_add_before (result_entries, node_for_insert, added_entry); + } + for (j = edit->j1 + num_added; j <= edit->j2; j++) + { + struct entry *changed_entry = modified_file.entries[j]; + size_t i = j + edit->i2 - edit->j2; + ssize_t k = entries_mapping_get (&mapping, i); + ASSERT (k >= 0); + if (entry_equals (ancestor_file.entries[i], + mainstream_file.entries[k])) + { + gl_list_node_set_value (result_entries, + result_entries_pointers[k], + changed_entry); + } + else + { + struct conflict *c; + ASSERT (!entry_equals (ancestor_file.entries[i], + changed_entry)); + c = XMALLOC (struct conflict); + c->num_old_entries = 1; + c->old_entries = + XNMALLOC (c->num_old_entries, struct entry *); + c->old_entries[0] = ancestor_file.entries[i]; + c->num_modified_entries = 1; + c->modified_entries = + XNMALLOC (c->num_modified_entries, struct entry *); + c->modified_entries[0] = changed_entry; + gl_list_add_last (result_conflicts, c); + } + } + done = true; + } + } + } + else + { + /* A big change. + See whether the num_changed entries still exist + unchanged in mainstream_file and are still + consecutive. */ + ssize_t i_first; + ssize_t k_first; + bool linear_unchanged; + i_first = edit->i1; + k_first = entries_mapping_get (&mapping, i_first); + linear_unchanged = + (k_first >= 0 + && entry_equals (ancestor_file.entries[i_first], + mainstream_file.entries[k_first])); + if (linear_unchanged) { - ssize_t k = index_mapping[i]; - ASSERT (k >= 0); - ASSERT (entry_equals (ancestor_file.entries[i], - mainstream_file.entries[k])); - gl_list_node_set_value (result_entries, - result_entries_pointers[k], - &empty_entry); + size_t i; + for (i = i_first + 1; i <= edit->i2; i++) + if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first) + && entry_equals (ancestor_file.entries[i], + mainstream_file.entries[entries_mapping_get (&mapping, i)]))) + { + linear_unchanged = false; + break; + } + } + if (linear_unchanged) + { + gl_list_node_t node_for_insert = + result_entries_pointers[k_first]; + ssize_t j; + size_t i; + for (j = edit->j2; j >= edit->j1; j--) + { + struct entry *new_entry = modified_file.entries[j]; + gl_list_add_before (result_entries, node_for_insert, new_entry); + } + for (i = edit->i1; i <= edit->i2; i++) + { + ssize_t k = entries_mapping_get (&mapping, i); + ASSERT (k >= 0); + ASSERT (entry_equals (ancestor_file.entries[i], + mainstream_file.entries[k])); + gl_list_node_set_value (result_entries, + result_entries_pointers[k], + &empty_entry); + } + done = true; } - done = true; } } if (!done) @@ -1261,11 +1624,14 @@ There is NO WARRANTY, to the extent permitted by law.\n\ for (i = 0; i < n; i++) conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i)); } + /* Output the modified and unmodified entries, in order. */ { - size_t n = gl_list_size (result_entries); - size_t i; - for (i = 0; i < n; i++) - entry_write (fp, (struct entry *) gl_list_get_at (result_entries, i)); + gl_list_iterator_t iter = gl_list_iterator (result_entries); + const void *elt; + gl_list_node_t node; + while (gl_list_iterator_next (&iter, &elt, &node)) + entry_write (fp, (struct entry *) elt); + gl_list_iterator_free (&iter); } if (fwriteerror (fp))