1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008-2010 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 efforts to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
44 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
50 Additionally, for git users:
51 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the
54 [merge "merge-changelog"]
55 name = GNU-style ChangeLog merge driver
56 driver = /usr/local/bin/git-merge-changelog %O %A %B
58 - In every directory that contains a ChangeLog file, add a file
59 '.gitattributes' with this line:
61 ChangeLog merge=merge-changelog
63 (See "man 5 gitattributes" for more info.)
65 Additionally, for bzr users:
66 - Install the 'extmerge' bzr plug-in listed at
67 <http://doc.bazaar.canonical.com/plugins/en/index.html>
68 <http://wiki.bazaar.canonical.com/BzrPlugins>
69 - Add to your $HOME/.bazaar/bazaar.conf the line
71 external_merge = git-merge-changelog %b %T %o
73 - Then, to merge a conflict in a ChangeLog file, use
75 $ bzr extmerge ChangeLog
77 Additionally, for hg users:
78 - Add to your $HOME/.hgrc the lines
81 ChangeLog = git-merge-changelog
84 git-merge-changelog.executable = /usr/local/bin/git-merge-changelog
85 git-merge-changelog.args = $base $local $other
87 See <http://www.selenic.com/mercurial/hgrc.5.html> section merge-tools
91 /* Use as an alternative to 'diff3':
92 git-merge-changelog performs the same role as "diff3 -m", just with
94 $ git-merge-changelog %O %A %B
99 /* Calling convention:
100 A merge driver is called with three filename arguments:
101 1. %O = The common ancestor of %A and %B.
102 2. %A = The file's contents from the "current branch".
103 3. %B = The file's contents from the "other branch"; this is the contents
106 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
107 maintainer to a central maintainer) or of a downstream pull with --rebase:
108 2. %A = The file's newest pulled contents; modified by other committers.
109 3. %B = The user's newest copy of the file; modified by the user.
110 In case of a downstream pull (e.g. from a central repository to the user)
111 or of an upstream pull with --rebase:
112 2. %A = The user's newest copy of the file; modified by the user.
113 3. %B = The file's newest pulled contents; modified by other committers.
115 It should write its merged output into file %A. It can also echo some
116 remarks to stdout. It should exit with return code 0 if the merge could
117 be resolved cleanly, or with non-zero return code if there were conflicts.
121 The structure of a ChangeLog file: It consists of ChangeLog entries. A
122 ChangeLog entry starts at a line following a blank line and that starts with
123 a non-whitespace character, or at the beginning of a file.
124 The merge driver works as follows: It reads the three files into memory and
125 dissects them into ChangeLog entries. It then finds the differences between
126 %O and %B. They are classified as:
127 - removals (some consecutive entries removed),
128 - changes (some consecutive entries removed, some consecutive entries
130 - additions (some consecutive entries added).
131 The driver then attempts to apply the changes to %A.
132 To this effect, it first computes a correspondence between the entries in %O
133 and the entries in %A, using fuzzy string matching to still identify changed
135 - Removals are applied one by one. If the entry is present in %A, at any
136 position, it is removed. If not, the removal is marked as a conflict.
137 - Additions at the top of %B are applied at the top of %A.
138 - Additions between entry x and entry y (y may be the file end) in %B are
139 applied between entry x and entry y in %A (if they still exist and are
140 still consecutive in %A), otherwise the additions are marked as a
142 - Changes are categorized into "simple changes":
145 added_entry ... added_entry modified_entry1 ... modified_entryn,
146 where the correspondence between entry_i and modified_entry_i is still
147 clear; and "big changes": these are all the rest. Simple changes at the
148 top of %B are applied by putting the added entries at the top of %A. The
149 changes in simple changes are applied one by one; possibly leading to
150 single-entry conflicts. Big changes are applied en bloc, possibly
151 leading to conflicts spanning multiple entries.
152 - Conflicts are output at the top of the file and cause an exit status of
164 #include <sys/types.h>
167 #include "progname.h"
169 #include "read-file.h"
170 #include "gl_xlist.h"
171 #include "gl_array_list.h"
172 #include "gl_linkedhash_list.h"
173 #include "gl_rbtreehash_list.h"
174 #include "gl_linked_list.h"
176 #include "xmalloca.h"
179 #include "c-strstr.h"
180 #include "fwriteerror.h"
182 #define ASSERT(expr) \
190 #define FSTRCMP_THRESHOLD 0.6
191 #define FSTRCMP_STRICTER_THRESHOLD 0.8
193 /* Representation of a ChangeLog entry.
194 The string may contain NUL bytes; therefore it is represented as a plain
195 opaque memory region. */
200 /* Cache for the hash code. */
201 bool hashcode_cached;
206 The memory region passed by the caller must of indefinite extent. It is
207 *not* copied here. */
208 static struct entry *
209 entry_create (char *string, size_t length)
211 struct entry *result = XMALLOC (struct entry);
212 result->string = string;
213 result->length = length;
214 result->hashcode_cached = false;
218 /* Compare two entries for equality. */
220 entry_equals (const void *elt1, const void *elt2)
222 const struct entry *entry1 = (const struct entry *) elt1;
223 const struct entry *entry2 = (const struct entry *) elt2;
224 return entry1->length == entry2->length
225 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
228 /* Return a hash code of the contents of a ChangeLog entry. */
230 entry_hashcode (const void *elt)
232 struct entry *entry = (struct entry *) elt;
233 if (!entry->hashcode_cached)
235 /* See http://www.haible.de/bruno/hashfunc.html. */
240 for (s = entry->string, n = entry->length; n > 0; s++, n--)
241 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
244 entry->hashcode_cached = true;
246 return entry->hashcode;
249 /* Perform a fuzzy comparison of two ChangeLog entries.
250 Return a similarity measure of the two entries, a value between 0 and 1.
251 0 stands for very distinct, 1 for identical.
252 If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
255 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
258 /* fstrcmp works only on NUL terminated strings. */
262 if (memchr (entry1->string, '\0', entry1->length) != NULL)
264 if (memchr (entry2->string, '\0', entry2->length) != NULL)
266 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
269 memcpy (p, entry1->string, entry1->length);
272 memcpy (p, entry2->string, entry2->length);
277 fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
282 /* This structure represents an entire ChangeLog file, after it was read
284 struct changelog_file
286 /* The entries, as a list. */
287 gl_list_t /* <struct entry *> */ entries_list;
288 /* The entries, as a list in opposite direction. */
289 gl_list_t /* <struct entry *> */ entries_reversed;
290 /* The entries, as an array. */
292 struct entry **entries;
295 /* Read a ChangeLog file into memory.
296 Return the contents in *RESULT. */
298 read_changelog_file (const char *filename, struct changelog_file *result)
300 /* Read the file in text mode, otherwise it's hard to recognize empty
303 char *contents = read_file (filename, &length);
304 if (contents == NULL)
306 fprintf (stderr, "could not read file '%s'\n", filename);
310 result->entries_list =
311 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
313 result->entries_reversed =
314 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
316 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
317 at a line following a blank line and that starts with a non-whitespace
318 character, or at the beginning of a file.
319 Split the file contents into entries. */
321 char *contents_end = contents + length;
322 char *start = contents;
323 while (start < contents_end)
325 /* Search the end of the current entry. */
329 while (ptr < contents_end)
331 ptr = memchr (ptr, '\n', contents_end - ptr);
338 if (contents_end - ptr >= 2
340 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
347 curr = entry_create (start, ptr - start);
348 gl_list_add_last (result->entries_list, curr);
349 gl_list_add_first (result->entries_reversed, curr);
355 result->num_entries = gl_list_size (result->entries_list);
356 result->entries = XNMALLOC (result->num_entries, struct entry *);
359 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
362 while (gl_list_iterator_next (&iter, &elt, &node))
363 result->entries[index++] = (struct entry *) elt;
364 gl_list_iterator_free (&iter);
365 ASSERT (index == result->num_entries);
369 /* A mapping (correspondence) between entries of FILE1 and of FILE2. */
370 struct entries_mapping
372 struct changelog_file *file1;
373 struct changelog_file *file2;
374 /* Mapping from indices in FILE1 to indices in FILE2.
375 A value -1 means that the entry from FILE1 is not found in FILE2.
376 A value -2 means that it has not yet been computed. */
377 ssize_t *index_mapping;
378 /* Mapping from indices in FILE2 to indices in FILE1.
379 A value -1 means that the entry from FILE2 is not found in FILE1.
380 A value -2 means that it has not yet been computed. */
381 ssize_t *index_mapping_reverse;
384 /* Look up (or lazily compute) the mapping of an entry in FILE1.
385 i is the index in FILE1.
386 Return the index in FILE2, or -1 when the entry is not found in FILE2. */
388 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
390 if (mapping->index_mapping[i] < -1)
392 struct changelog_file *file1 = mapping->file1;
393 struct changelog_file *file2 = mapping->file2;
394 size_t n1 = file1->num_entries;
395 size_t n2 = file2->num_entries;
396 struct entry *entry_i = file1->entries[i];
399 /* Search whether it approximately occurs in file2. */
401 double best_j_similarity = 0.0;
402 for (j = n2 - 1; j >= 0; j--)
403 if (mapping->index_mapping_reverse[j] < 0)
406 entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
407 if (similarity > best_j_similarity)
410 best_j_similarity = similarity;
413 if (best_j_similarity >= FSTRCMP_THRESHOLD)
415 /* Found a similar entry in file2. */
416 struct entry *entry_j = file2->entries[best_j];
417 /* Search whether it approximately occurs in file1 at index i. */
419 double best_i_similarity = 0.0;
421 for (ii = n1 - 1; ii >= 0; ii--)
422 if (mapping->index_mapping[ii] < 0)
425 entry_fstrcmp (file1->entries[ii], entry_j,
427 if (similarity > best_i_similarity)
430 best_i_similarity = similarity;
433 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
435 mapping->index_mapping[i] = best_j;
436 mapping->index_mapping_reverse[best_j] = i;
439 if (mapping->index_mapping[i] < -1)
440 /* It does not approximately occur in FILE2.
441 Remember it, for next time. */
442 mapping->index_mapping[i] = -1;
444 return mapping->index_mapping[i];
447 /* Look up (or lazily compute) the mapping of an entry in FILE2.
448 j is the index in FILE2.
449 Return the index in FILE1, or -1 when the entry is not found in FILE1. */
451 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
453 if (mapping->index_mapping_reverse[j] < -1)
455 struct changelog_file *file1 = mapping->file1;
456 struct changelog_file *file2 = mapping->file2;
457 size_t n1 = file1->num_entries;
458 size_t n2 = file2->num_entries;
459 struct entry *entry_j = file2->entries[j];
462 /* Search whether it approximately occurs in file1. */
464 double best_i_similarity = 0.0;
465 for (i = n1 - 1; i >= 0; i--)
466 if (mapping->index_mapping[i] < 0)
469 entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
470 if (similarity > best_i_similarity)
473 best_i_similarity = similarity;
476 if (best_i_similarity >= FSTRCMP_THRESHOLD)
478 /* Found a similar entry in file1. */
479 struct entry *entry_i = file1->entries[best_i];
480 /* Search whether it approximately occurs in file2 at index j. */
482 double best_j_similarity = 0.0;
484 for (jj = n2 - 1; jj >= 0; jj--)
485 if (mapping->index_mapping_reverse[jj] < 0)
488 entry_fstrcmp (entry_i, file2->entries[jj],
490 if (similarity > best_j_similarity)
493 best_j_similarity = similarity;
496 if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
498 mapping->index_mapping_reverse[j] = best_i;
499 mapping->index_mapping[best_i] = j;
502 if (mapping->index_mapping_reverse[j] < -1)
503 /* It does not approximately occur in FILE1.
504 Remember it, for next time. */
505 mapping->index_mapping_reverse[j] = -1;
507 return mapping->index_mapping_reverse[j];
510 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
511 The correspondence also takes into account small modifications; i.e. the
512 indicated relation is not equality of entries but best-match similarity
514 If FULL is true, the maximum of matching is done up-front. If it is false,
515 it is done in a lazy way through the functions entries_mapping_get and
516 entries_mapping_reverse_get.
517 Return the result in *RESULT. */
519 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
521 struct entries_mapping *result)
523 /* Mapping from indices in file1 to indices in file2. */
524 ssize_t *index_mapping;
525 /* Mapping from indices in file2 to indices in file1. */
526 ssize_t *index_mapping_reverse;
527 size_t n1 = file1->num_entries;
528 size_t n2 = file2->num_entries;
531 index_mapping = XNMALLOC (n1, ssize_t);
532 for (i = 0; i < n1; i++)
533 index_mapping[i] = -2;
535 index_mapping_reverse = XNMALLOC (n2, ssize_t);
536 for (j = 0; j < n2; j++)
537 index_mapping_reverse[j] = -2;
539 for (i = n1 - 1; i >= 0; i--)
540 /* Take an entry from file1. */
541 if (index_mapping[i] < -1)
543 struct entry *entry = file1->entries[i];
544 /* Search whether it occurs in file2. */
545 j = gl_list_indexof (file2->entries_reversed, entry);
549 /* Found an exact correspondence. */
550 /* If index_mapping_reverse[j] >= 0, we have already seen other
551 copies of this entry, and there were more occurrences of it in
552 file1 than in file2. In this case, do nothing. */
553 if (index_mapping_reverse[j] < 0)
555 index_mapping[i] = j;
556 index_mapping_reverse[j] = i;
557 /* Look for more occurrences of the same entry. Match them
558 as long as they pair up. Unpaired occurrences of the same
559 entry are left without mapping. */
570 gl_list_indexof_from (file1->entries_reversed,
575 gl_list_indexof_from (file2->entries_reversed,
579 curr_i = n1 - 1 - next_i;
580 curr_j = n2 - 1 - next_j;
581 ASSERT (index_mapping[curr_i] < 0);
582 ASSERT (index_mapping_reverse[curr_j] < 0);
583 index_mapping[curr_i] = curr_j;
584 index_mapping_reverse[curr_j] = curr_i;
591 result->file1 = file1;
592 result->file2 = file2;
593 result->index_mapping = index_mapping;
594 result->index_mapping_reverse = index_mapping_reverse;
597 for (i = n1 - 1; i >= 0; i--)
598 entries_mapping_get (result, i);
601 /* An "edit" is a textual modification performed by the user, that needs to
602 be applied to the other file. */
605 /* Some consecutive entries were added. */
607 /* Some consecutive entries were removed; some other consecutive entries
608 were added at the same position. (Not necessarily the same number of
611 /* Some consecutive entries were removed. */
615 /* This structure represents an edit. */
619 /* Range of indices into the entries of FILE1. */
620 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
621 /* Range of indices into the entries of FILE2. */
622 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
625 /* This structure represents the differences from one file, FILE1, to another
629 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
630 from FILE1 is not found in FILE2). */
631 ssize_t *index_mapping;
632 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
633 from FILE2 is not found in FILE1). */
634 ssize_t *index_mapping_reverse;
635 /* The edits that transform FILE1 into FILE2. */
640 /* Import the difference detection algorithm from GNU diff. */
641 #define ELEMENT struct entry *
642 #define EQUAL entry_equals
643 #define OFFSET ssize_t
644 #define EXTRA_CONTEXT_FIELDS \
645 ssize_t *index_mapping; \
646 ssize_t *index_mapping_reverse;
647 #define NOTE_DELETE(ctxt, xoff) \
648 ctxt->index_mapping[xoff] = -1
649 #define NOTE_INSERT(ctxt, yoff) \
650 ctxt->index_mapping_reverse[yoff] = -1
653 /* Compute the differences between the entries of FILE1 and the entries of
656 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
657 struct differences *result)
659 /* Unlike compute_mapping, which mostly ignores the order of the entries and
660 therefore works well when some entries are permuted, here we use the order.
661 I think this is needed in order to distinguish changes from
662 additions+removals; I don't know how to say what is a "change" if the
663 files are considered as unordered sets of entries. */
665 size_t n1 = file1->num_entries;
666 size_t n2 = file2->num_entries;
669 gl_list_t /* <struct edit *> */ edits;
671 ctxt.xvec = file1->entries;
672 ctxt.yvec = file2->entries;
673 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
674 for (i = 0; i < n1; i++)
675 ctxt.index_mapping[i] = 0;
676 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
677 for (j = 0; j < n2; j++)
678 ctxt.index_mapping_reverse[j] = 0;
679 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
680 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
681 ctxt.too_expensive = n1 + n2;
683 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
684 each removed or added entry. */
685 compareseq (0, n1, 0, n2, 0, &ctxt);
687 /* Complete the index_mapping and index_mapping_reverse arrays. */
690 while (i < n1 || j < n2)
692 while (i < n1 && ctxt.index_mapping[i] < 0)
694 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
696 ASSERT ((i < n1) == (j < n2));
697 if (i == n1 && j == n2)
699 ctxt.index_mapping[i] = j;
700 ctxt.index_mapping_reverse[j] = i;
705 /* Create the edits. */
706 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
709 while (i < n1 || j < n2)
715 e = XMALLOC (struct edit);
719 gl_list_add_last (edits, e);
726 e = XMALLOC (struct edit);
730 gl_list_add_last (edits, e);
733 if (ctxt.index_mapping[i] >= 0)
735 if (ctxt.index_mapping_reverse[j] >= 0)
737 ASSERT (ctxt.index_mapping[i] == j);
738 ASSERT (ctxt.index_mapping_reverse[j] == i);
745 ASSERT (ctxt.index_mapping_reverse[j] < 0);
746 e = XMALLOC (struct edit);
751 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
753 gl_list_add_last (edits, e);
758 if (ctxt.index_mapping_reverse[j] >= 0)
761 ASSERT (ctxt.index_mapping[i] < 0);
762 e = XMALLOC (struct edit);
767 while (i < n1 && ctxt.index_mapping[i] < 0);
769 gl_list_add_last (edits, e);
774 ASSERT (ctxt.index_mapping[i] < 0);
775 ASSERT (ctxt.index_mapping_reverse[j] < 0);
776 e = XMALLOC (struct edit);
781 while (i < n1 && ctxt.index_mapping[i] < 0);
786 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
788 gl_list_add_last (edits, e);
793 result->index_mapping = ctxt.index_mapping;
794 result->index_mapping_reverse = ctxt.index_mapping_reverse;
795 result->num_edits = gl_list_size (edits);
796 result->edits = XNMALLOC (result->num_edits, struct edit *);
799 gl_list_iterator_t iter = gl_list_iterator (edits);
802 while (gl_list_iterator_next (&iter, &elt, &node))
803 result->edits[index++] = (struct edit *) elt;
804 gl_list_iterator_free (&iter);
805 ASSERT (index == result->num_edits);
809 /* An empty entry. */
810 static struct entry empty_entry = { NULL, 0 };
812 /* Return the end a paragraph.
814 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
815 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
816 it is the start of a blank line or the end of the entry. */
818 find_paragraph_end (const struct entry *entry, size_t offset)
820 const char *string = entry->string;
821 size_t length = entry->length;
825 const char *nl = memchr (string + offset, '\n', length - offset);
828 offset = (nl - string) + 1;
829 if (offset < length && string[offset] == '\n')
834 /* Split a merged entry.
835 Given an old entry of the form
838 and a new entry of the form
842 where the two titles are the same and BODY and BODY' are very similar,
843 this computes two new entries
850 If the entries don't have this form, it returns false. */
852 try_split_merged_entry (const struct entry *old_entry,
853 const struct entry *new_entry,
854 struct entry *new_split[2])
856 size_t old_title_len = find_paragraph_end (old_entry, 0);
857 size_t new_title_len = find_paragraph_end (new_entry, 0);
858 struct entry old_body;
859 struct entry new_body;
860 size_t best_split_offset;
861 double best_similarity;
865 if (!(old_title_len == new_title_len
866 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
869 old_body.string = old_entry->string + old_title_len;
870 old_body.length = old_entry->length - old_title_len;
872 /* Determine where to split the new entry.
873 This is done by maximizing the similarity between BODY and BODY'. */
874 best_split_offset = split_offset = new_title_len;
875 best_similarity = 0.0;
880 new_body.string = new_entry->string + split_offset;
881 new_body.length = new_entry->length - split_offset;
883 entry_fstrcmp (&old_body, &new_body, best_similarity);
884 if (similarity > best_similarity)
886 best_split_offset = split_offset;
887 best_similarity = similarity;
889 if (best_similarity == 1.0)
890 /* It cannot get better. */
893 if (split_offset < new_entry->length)
894 split_offset = find_paragraph_end (new_entry, split_offset + 1);
899 /* BODY' should not be empty. */
900 if (best_split_offset == new_entry->length)
902 ASSERT (new_entry->string[best_split_offset] == '\n');
904 /* A certain similarity between BODY and BODY' is required. */
905 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
908 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
911 size_t len1 = new_title_len;
912 size_t len2 = new_entry->length - best_split_offset;
913 char *combined = XNMALLOC (len1 + len2, char);
914 memcpy (combined, new_entry->string, len1);
915 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
916 new_split[1] = entry_create (combined, len1 + len2);
922 /* Write the contents of an entry to the output stream FP. */
924 entry_write (FILE *fp, struct entry *entry)
926 if (entry->length > 0)
927 fwrite (entry->string, 1, entry->length, fp);
930 /* This structure represents a conflict.
931 A conflict can occur for various reasons. */
934 /* Parts from the ancestor file. */
935 size_t num_old_entries;
936 struct entry **old_entries;
937 /* Parts of the modified file. */
938 size_t num_modified_entries;
939 struct entry **modified_entries;
942 /* Write a conflict to the output stream FP, including markers. */
944 conflict_write (FILE *fp, struct conflict *c)
948 /* Use the same syntax as git's default merge driver.
949 Don't indent the contents of the entries (with things like ">" or "-"),
950 otherwise the user needs more textual editing to resolve the conflict. */
951 fputs ("<<<<<<<\n", fp);
952 for (i = 0; i < c->num_old_entries; i++)
953 entry_write (fp, c->old_entries[i]);
954 fputs ("=======\n", fp);
955 for (i = 0; i < c->num_modified_entries; i++)
956 entry_write (fp, c->modified_entries[i]);
957 fputs (">>>>>>>\n", fp);
961 static const struct option long_options[] =
963 { "help", no_argument, NULL, 'h' },
964 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
965 { "version", no_argument, NULL, 'V' },
969 /* Print a usage message and exit. */
973 if (status != EXIT_SUCCESS)
974 fprintf (stderr, "Try '%s --help' for more information.\n",
978 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
981 printf ("Merges independent modifications of a ChangeLog style file.\n");
982 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
983 printf ("A-FILE-NAME names the publicly modified file.\n");
984 printf ("B-FILE-NAME names the user-modified file.\n");
985 printf ("Writes the merged file into A-FILE-NAME.\n");
987 #if 0 /* --split-merged-entry is now on by default. */
988 printf ("Operation modifiers:\n");
990 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
991 Use this if you have the habit to merge unrelated\n\
992 entries into a single one, separated only by a\n\
993 newline, just because they happened on the same\n\
997 printf ("Informative output:\n");
998 printf (" -h, --help display this help and exit\n");
999 printf (" -V, --version output version information and exit\n");
1001 fputs ("Report bugs to <bug-gnulib@gnu.org>.\n",
1009 main (int argc, char *argv[])
1014 bool split_merged_entry;
1016 /* Set program name for messages. */
1017 set_program_name (argv[0]);
1019 /* Set default values for variables. */
1022 split_merged_entry = true;
1024 /* Parse command line options. */
1025 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
1028 case '\0': /* Long option. */
1036 case CHAR_MAX + 1: /* --split-merged-entry */
1039 usage (EXIT_FAILURE);
1044 /* Version information is requested. */
1045 printf ("%s\n", program_name);
1046 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1047 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
1048 This is free software: you are free to change and redistribute it.\n\
1049 There is NO WARRANTY, to the extent permitted by law.\n\
1052 printf ("Written by %s.\n", "Bruno Haible");
1053 exit (EXIT_SUCCESS);
1058 /* Help is requested. */
1059 usage (EXIT_SUCCESS);
1062 /* Test argument count. */
1063 if (optind + 3 != argc)
1064 error (EXIT_FAILURE, 0, "expected three arguments");
1067 const char *ancestor_file_name; /* O-FILE-NAME */
1068 const char *destination_file_name; /* A-FILE-NAME */
1070 const char *other_file_name; /* B-FILE-NAME */
1071 const char *mainstream_file_name;
1072 const char *modified_file_name;
1073 struct changelog_file ancestor_file;
1074 struct changelog_file mainstream_file;
1075 struct changelog_file modified_file;
1076 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
1077 struct entries_mapping mapping;
1078 struct differences diffs;
1079 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1080 gl_list_t /* <struct entry *> */ result_entries;
1081 gl_list_t /* <struct conflict *> */ result_conflicts;
1083 ancestor_file_name = argv[optind];
1084 destination_file_name = argv[optind + 1];
1085 other_file_name = argv[optind + 2];
1087 /* Heuristic to determine whether it's a pull in downstream direction
1088 (e.g. pull from a centralized server) or a pull in upstream direction
1089 (e.g. "git stash apply").
1091 For ChangeLog this distinction is important. The difference between
1092 an "upstream" and a "downstream" repository is that more people are
1093 looking at the "upstream" repository. They want to be informed about
1094 changes and expect them to be shown at the top of the ChangeLog.
1095 When a user pulls downstream, on the other hand, he has two options:
1096 a) He gets the change entries from the central repository also at the
1097 top of his ChangeLog, and his own changes come after them.
1098 b) He gets the change entries from the central repository after those
1099 he has collected for his branch. His own change entries stay at
1100 the top of the ChangeLog file.
1101 In the case a) he has to reorder the ChangeLog before he can commit.
1102 No one does that. So most people want b).
1103 In other words, the order of entries in a ChangeLog should represent
1104 the order in which they have flown (or will flow) into the *central*
1107 But in git this is fundamentally indistinguishable, because when Linus
1108 pulls patches from akpm and akpm pulls patches from Linus, it's not
1109 clear which of the two is more "upstream". Also, when you have many
1110 branches in a repository and pull from one to another, "git" has no way
1111 to know which branch is more "upstream" than the other. The git-tag(1)
1112 manual page also says:
1113 "One important aspect of git is it is distributed, and being
1114 distributed largely means there is no inherent "upstream" or
1115 "downstream" in the system."
1116 Therefore anyone who attempts to produce a ChangeLog from the merge
1119 Here we allow the user to specify the pull direction through an
1120 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
1121 environment variables are not set, we assume a "simple single user"
1122 usage pattern: He manages local changes through stashes and uses
1123 "git pull" only to pull downstream.
1125 How to distinguish these situation? There are several hints:
1126 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
1127 a "git pull", it is set to 'pull '. During a "git pull --rebase",
1128 it is set to 'pull --rebase'. During a "git cherry-pick", it is
1129 set to 'cherry-pick'.
1130 - During a "git stash apply", there is an environment variable of
1131 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
1135 var = getenv ("GIT_DOWNSTREAM");
1136 if (var != NULL && var[0] != '\0')
1140 var = getenv ("GIT_UPSTREAM");
1141 if (var != NULL && var[0] != '\0')
1145 var = getenv ("GIT_REFLOG_ACTION");
1146 #if 0 /* Debugging code */
1147 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1150 && ((strncmp (var, "pull", 4) == 0
1151 && c_strstr (var, " --rebase") == NULL)
1152 || strncmp (var, "merge origin", 12) == 0))
1156 /* "git stash apply", "git rebase", "git cherry-pick" and
1164 #if 0 /* Debugging code */
1167 printf ("First line of %%A:\n");
1168 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1169 printf ("First line of %%B:\n");
1170 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1171 printf ("Guessing calling convention: %s\n",
1173 ? "%A = modified by user, %B = upstream"
1174 : "%A = upstream, %B = modified by user");
1180 mainstream_file_name = other_file_name;
1181 modified_file_name = destination_file_name;
1185 mainstream_file_name = destination_file_name;
1186 modified_file_name = other_file_name;
1189 /* Read the three files into memory. */
1190 read_changelog_file (ancestor_file_name, &ancestor_file);
1191 read_changelog_file (mainstream_file_name, &mainstream_file);
1192 read_changelog_file (modified_file_name, &modified_file);
1194 /* Compute correspondence between the entries of ancestor_file and of
1196 compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1197 (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1199 /* Compute differences between the entries of ancestor_file and of
1201 compute_differences (&ancestor_file, &modified_file, &diffs);
1203 /* Compute the result. */
1204 result_entries_pointers =
1205 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1207 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1211 for (k = 0; k < mainstream_file.num_entries; k++)
1212 result_entries_pointers[k] =
1213 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1216 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1219 for (e = 0; e < diffs.num_edits; e++)
1221 struct edit *edit = diffs.edits[e];
1227 /* An addition to the top of modified_file.
1228 Apply it to the top of mainstream_file. */
1230 for (j = edit->j2; j >= edit->j1; j--)
1232 struct entry *added_entry = modified_file.entries[j];
1233 gl_list_add_first (result_entries, added_entry);
1242 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1243 ASSERT (i_before >= 0);
1244 i_after = (edit->j2 + 1 == modified_file.num_entries
1245 ? ancestor_file.num_entries
1246 : diffs.index_mapping_reverse[edit->j2 + 1]);
1247 ASSERT (i_after >= 0);
1248 ASSERT (i_after == i_before + 1);
1249 /* An addition between ancestor_file.entries[i_before] and
1250 ancestor_file.entries[i_after]. See whether these two
1251 entries still exist in mainstream_file and are still
1253 k_before = entries_mapping_get (&mapping, i_before);
1254 k_after = (i_after == ancestor_file.num_entries
1255 ? mainstream_file.num_entries
1256 : entries_mapping_get (&mapping, i_after));
1257 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1259 /* Yes, the entry before and after are still neighbours
1260 in mainstream_file. Apply the addition between
1262 if (k_after == mainstream_file.num_entries)
1265 for (j = edit->j1; j <= edit->j2; j++)
1267 struct entry *added_entry = modified_file.entries[j];
1268 gl_list_add_last (result_entries, added_entry);
1273 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1275 for (j = edit->j1; j <= edit->j2; j++)
1277 struct entry *added_entry = modified_file.entries[j];
1278 gl_list_add_before (result_entries, node_k_after, added_entry);
1284 /* It's not clear where the additions should be applied.
1285 Let the user decide. */
1286 struct conflict *c = XMALLOC (struct conflict);
1288 c->num_old_entries = 0;
1289 c->old_entries = NULL;
1290 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1291 c->modified_entries =
1292 XNMALLOC (c->num_modified_entries, struct entry *);
1293 for (j = edit->j1; j <= edit->j2; j++)
1294 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1295 gl_list_add_last (result_conflicts, c);
1301 /* Apply the removals one by one. */
1303 for (i = edit->i1; i <= edit->i2; i++)
1305 struct entry *removed_entry = ancestor_file.entries[i];
1306 ssize_t k = entries_mapping_get (&mapping, i);
1308 && entry_equals (removed_entry,
1309 mainstream_file.entries[k]))
1311 /* The entry to be removed still exists in
1312 mainstream_file. Remove it. */
1313 gl_list_node_set_value (result_entries,
1314 result_entries_pointers[k],
1319 /* The entry to be removed was already removed or was
1320 modified. This is a conflict. */
1321 struct conflict *c = XMALLOC (struct conflict);
1322 c->num_old_entries = 1;
1324 XNMALLOC (c->num_old_entries, struct entry *);
1325 c->old_entries[0] = removed_entry;
1326 c->num_modified_entries = 0;
1327 c->modified_entries = NULL;
1328 gl_list_add_last (result_conflicts, c);
1336 /* When the user usually merges entries from the same day,
1337 and this edit is at the top of the file: */
1338 if (split_merged_entry && edit->j1 == 0)
1340 /* Test whether the change is "simple merged", i.e. whether
1341 it consists of additions, followed by an augmentation of
1342 the first changed entry, followed by small changes of the
1355 modified_entry_n. */
1356 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1358 struct entry *split[2];
1359 bool simple_merged =
1360 try_split_merged_entry (ancestor_file.entries[edit->i1],
1361 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1366 for (i = edit->i1 + 1; i <= edit->i2; i++)
1367 if (entry_fstrcmp (ancestor_file.entries[i],
1368 modified_file.entries[i + edit->j2 - edit->i2],
1370 < FSTRCMP_THRESHOLD)
1372 simple_merged = false;
1378 /* Apply the additions at the top of modified_file.
1379 Apply each of the single-entry changes
1381 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1382 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1384 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1385 gl_list_add_first (result_entries, split[0]);
1386 /* The additions. */
1387 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1389 struct entry *added_entry = modified_file.entries[j];
1390 gl_list_add_first (result_entries, added_entry);
1392 /* Now the single-entry changes. */
1393 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1395 struct entry *changed_entry =
1396 (j == edit->j1 + num_added
1398 : modified_file.entries[j]);
1399 size_t i = j + edit->i2 - edit->j2;
1400 ssize_t k = entries_mapping_get (&mapping, i);
1402 && entry_equals (ancestor_file.entries[i],
1403 mainstream_file.entries[k]))
1405 gl_list_node_set_value (result_entries,
1406 result_entries_pointers[k],
1409 else if (!entry_equals (ancestor_file.entries[i],
1412 struct conflict *c = XMALLOC (struct conflict);
1413 c->num_old_entries = 1;
1415 XNMALLOC (c->num_old_entries, struct entry *);
1416 c->old_entries[0] = ancestor_file.entries[i];
1417 c->num_modified_entries = 1;
1418 c->modified_entries =
1419 XNMALLOC (c->num_modified_entries, struct entry *);
1420 c->modified_entries[0] = changed_entry;
1421 gl_list_add_last (result_conflicts, c);
1431 /* Test whether the change is "simple", i.e. whether it
1432 consists of small changes to the old ChangeLog entries
1433 and additions before them:
1443 modified_entry_n. */
1444 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1448 for (i = edit->i1; i <= edit->i2; i++)
1449 if (entry_fstrcmp (ancestor_file.entries[i],
1450 modified_file.entries[i + edit->j2 - edit->i2],
1452 < FSTRCMP_THRESHOLD)
1462 /* Apply the additions and each of the single-entry
1463 changes separately. */
1464 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1465 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1468 /* A simple change at the top of modified_file.
1469 Apply it to the top of mainstream_file. */
1471 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1473 struct entry *added_entry = modified_file.entries[j];
1474 gl_list_add_first (result_entries, added_entry);
1476 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1478 struct entry *changed_entry = modified_file.entries[j];
1479 size_t i = j + edit->i2 - edit->j2;
1480 ssize_t k = entries_mapping_get (&mapping, i);
1482 && entry_equals (ancestor_file.entries[i],
1483 mainstream_file.entries[k]))
1485 gl_list_node_set_value (result_entries,
1486 result_entries_pointers[k],
1492 ASSERT (!entry_equals (ancestor_file.entries[i],
1494 c = XMALLOC (struct conflict);
1495 c->num_old_entries = 1;
1497 XNMALLOC (c->num_old_entries, struct entry *);
1498 c->old_entries[0] = ancestor_file.entries[i];
1499 c->num_modified_entries = 1;
1500 c->modified_entries =
1501 XNMALLOC (c->num_modified_entries, struct entry *);
1502 c->modified_entries[0] = changed_entry;
1503 gl_list_add_last (result_conflicts, c);
1513 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1514 ASSERT (i_before >= 0);
1515 /* A simple change after ancestor_file.entries[i_before].
1516 See whether this entry and the following num_changed
1517 entries still exist in mainstream_file and are still
1519 k_before = entries_mapping_get (&mapping, i_before);
1520 linear = (k_before >= 0);
1524 for (i = i_before + 1; i <= i_before + num_changed; i++)
1525 if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1533 gl_list_node_t node_for_insert =
1534 result_entries_pointers[k_before + 1];
1536 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1538 struct entry *added_entry = modified_file.entries[j];
1539 gl_list_add_before (result_entries, node_for_insert, added_entry);
1541 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1543 struct entry *changed_entry = modified_file.entries[j];
1544 size_t i = j + edit->i2 - edit->j2;
1545 ssize_t k = entries_mapping_get (&mapping, i);
1547 if (entry_equals (ancestor_file.entries[i],
1548 mainstream_file.entries[k]))
1550 gl_list_node_set_value (result_entries,
1551 result_entries_pointers[k],
1557 ASSERT (!entry_equals (ancestor_file.entries[i],
1559 c = XMALLOC (struct conflict);
1560 c->num_old_entries = 1;
1562 XNMALLOC (c->num_old_entries, struct entry *);
1563 c->old_entries[0] = ancestor_file.entries[i];
1564 c->num_modified_entries = 1;
1565 c->modified_entries =
1566 XNMALLOC (c->num_modified_entries, struct entry *);
1567 c->modified_entries[0] = changed_entry;
1568 gl_list_add_last (result_conflicts, c);
1578 See whether the num_changed entries still exist
1579 unchanged in mainstream_file and are still
1583 bool linear_unchanged;
1585 k_first = entries_mapping_get (&mapping, i_first);
1588 && entry_equals (ancestor_file.entries[i_first],
1589 mainstream_file.entries[k_first]));
1590 if (linear_unchanged)
1593 for (i = i_first + 1; i <= edit->i2; i++)
1594 if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1595 && entry_equals (ancestor_file.entries[i],
1596 mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1598 linear_unchanged = false;
1602 if (linear_unchanged)
1604 gl_list_node_t node_for_insert =
1605 result_entries_pointers[k_first];
1608 for (j = edit->j2; j >= edit->j1; j--)
1610 struct entry *new_entry = modified_file.entries[j];
1611 gl_list_add_before (result_entries, node_for_insert, new_entry);
1613 for (i = edit->i1; i <= edit->i2; i++)
1615 ssize_t k = entries_mapping_get (&mapping, i);
1617 ASSERT (entry_equals (ancestor_file.entries[i],
1618 mainstream_file.entries[k]));
1619 gl_list_node_set_value (result_entries,
1620 result_entries_pointers[k],
1629 struct conflict *c = XMALLOC (struct conflict);
1631 c->num_old_entries = edit->i2 - edit->i1 + 1;
1633 XNMALLOC (c->num_old_entries, struct entry *);
1634 for (i = edit->i1; i <= edit->i2; i++)
1635 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1636 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1637 c->modified_entries =
1638 XNMALLOC (c->num_modified_entries, struct entry *);
1639 for (j = edit->j1; j <= edit->j2; j++)
1640 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1641 gl_list_add_last (result_conflicts, c);
1649 /* Output the result. */
1651 FILE *fp = fopen (destination_file_name, "w");
1654 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1655 exit (EXIT_FAILURE);
1658 /* Output the conflicts at the top. */
1660 size_t n = gl_list_size (result_conflicts);
1662 for (i = 0; i < n; i++)
1663 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1665 /* Output the modified and unmodified entries, in order. */
1667 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1669 gl_list_node_t node;
1670 while (gl_list_iterator_next (&iter, &elt, &node))
1671 entry_write (fp, (struct entry *) elt);
1672 gl_list_iterator_free (&iter);
1675 if (fwriteerror (fp))
1677 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1678 exit (EXIT_FAILURE);
1682 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);