1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008-2010 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 efforts to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
44 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
50 Additionally, for git users:
51 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the
54 [merge "merge-changelog"]
55 name = GNU-style ChangeLog merge driver
56 driver = /usr/local/bin/git-merge-changelog %O %A %B
58 - In every directory that contains a ChangeLog file, add a file
59 '.gitattributes' with this line:
61 ChangeLog merge=merge-changelog
63 (See "man 5 gitattributes" for more info.)
65 Additionally, for bzr users:
66 - Install the 'extmerge' bzr plug-in listed at
67 <http://doc.bazaar.canonical.com/plugins/en/index.html>
68 <http://wiki.bazaar.canonical.com/BzrPlugins>
69 - Add to your $HOME/.bazaar/bazaar.conf the line
71 external_merge = git-merge-changelog %b %T %o
73 - Then, to merge a conflict in a ChangeLog file, use
75 $ bzr extmerge ChangeLog
77 Additionally, for hg users:
78 - Add to your $HOME/.hgrc a couple of lines in a section [merge-tools].
79 See <http://www.selenic.com/mercurial/hgrc.5.html> section merge-tools
83 /* Use as an alternative to 'diff3':
84 git-merge-changelog performs the same role as "diff3 -m", just with
86 $ git-merge-changelog %O %A %B
91 /* Calling convention:
92 A merge driver is called with three filename arguments:
93 1. %O = The common ancestor of %A and %B.
94 2. %A = The file's contents from the "current branch".
95 3. %B = The file's contents from the "other branch"; this is the contents
98 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
99 maintainer to a central maintainer) or of a downstream pull with --rebase:
100 2. %A = The file's newest pulled contents; modified by other committers.
101 3. %B = The user's newest copy of the file; modified by the user.
102 In case of a downstream pull (e.g. from a central repository to the user)
103 or of an upstream pull with --rebase:
104 2. %A = The user's newest copy of the file; modified by the user.
105 3. %B = The file's newest pulled contents; modified by other committers.
107 It should write its merged output into file %A. It can also echo some
108 remarks to stdout. It should exit with return code 0 if the merge could
109 be resolved cleanly, or with non-zero return code if there were conflicts.
113 The structure of a ChangeLog file: It consists of ChangeLog entries. A
114 ChangeLog entry starts at a line following a blank line and that starts with
115 a non-whitespace character, or at the beginning of a file.
116 The merge driver works as follows: It reads the three files into memory and
117 dissects them into ChangeLog entries. It then finds the differences between
118 %O and %B. They are classified as:
119 - removals (some consecutive entries removed),
120 - changes (some consecutive entries removed, some consecutive entries
122 - additions (some consecutive entries added).
123 The driver then attempts to apply the changes to %A.
124 To this effect, it first computes a correspondence between the entries in %O
125 and the entries in %A, using fuzzy string matching to still identify changed
127 - Removals are applied one by one. If the entry is present in %A, at any
128 position, it is removed. If not, the removal is marked as a conflict.
129 - Additions at the top of %B are applied at the top of %A.
130 - Additions between entry x and entry y (y may be the file end) in %B are
131 applied between entry x and entry y in %A (if they still exist and are
132 still consecutive in %A), otherwise the additions are marked as a
134 - Changes are categorized into "simple changes":
137 added_entry ... added_entry modified_entry1 ... modified_entryn,
138 where the correspondence between entry_i and modified_entry_i is still
139 clear; and "big changes": these are all the rest. Simple changes at the
140 top of %B are applied by putting the added entries at the top of %A. The
141 changes in simple changes are applied one by one; possibly leading to
142 single-entry conflicts. Big changes are applied en bloc, possibly
143 leading to conflicts spanning multiple entries.
144 - Conflicts are output at the top of the file and cause an exit status of
156 #include <sys/types.h>
159 #include "progname.h"
161 #include "read-file.h"
162 #include "gl_xlist.h"
163 #include "gl_array_list.h"
164 #include "gl_linkedhash_list.h"
165 #include "gl_rbtreehash_list.h"
166 #include "gl_linked_list.h"
168 #include "xmalloca.h"
171 #include "c-strstr.h"
172 #include "fwriteerror.h"
174 #define ASSERT(expr) \
182 #define FSTRCMP_THRESHOLD 0.6
183 #define FSTRCMP_STRICTER_THRESHOLD 0.8
185 /* Representation of a ChangeLog entry.
186 The string may contain NUL bytes; therefore it is represented as a plain
187 opaque memory region. */
192 /* Cache for the hash code. */
193 bool hashcode_cached;
198 The memory region passed by the caller must of indefinite extent. It is
199 *not* copied here. */
200 static struct entry *
201 entry_create (char *string, size_t length)
203 struct entry *result = XMALLOC (struct entry);
204 result->string = string;
205 result->length = length;
206 result->hashcode_cached = false;
210 /* Compare two entries for equality. */
212 entry_equals (const void *elt1, const void *elt2)
214 const struct entry *entry1 = (const struct entry *) elt1;
215 const struct entry *entry2 = (const struct entry *) elt2;
216 return entry1->length == entry2->length
217 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
220 /* Return a hash code of the contents of a ChangeLog entry. */
222 entry_hashcode (const void *elt)
224 struct entry *entry = (struct entry *) elt;
225 if (!entry->hashcode_cached)
227 /* See http://www.haible.de/bruno/hashfunc.html. */
232 for (s = entry->string, n = entry->length; n > 0; s++, n--)
233 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
236 entry->hashcode_cached = true;
238 return entry->hashcode;
241 /* Perform a fuzzy comparison of two ChangeLog entries.
242 Return a similarity measure of the two entries, a value between 0 and 1.
243 0 stands for very distinct, 1 for identical.
244 If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
247 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
250 /* fstrcmp works only on NUL terminated strings. */
254 if (memchr (entry1->string, '\0', entry1->length) != NULL)
256 if (memchr (entry2->string, '\0', entry2->length) != NULL)
258 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
261 memcpy (p, entry1->string, entry1->length);
264 memcpy (p, entry2->string, entry2->length);
269 fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
274 /* This structure represents an entire ChangeLog file, after it was read
276 struct changelog_file
278 /* The entries, as a list. */
279 gl_list_t /* <struct entry *> */ entries_list;
280 /* The entries, as a list in opposite direction. */
281 gl_list_t /* <struct entry *> */ entries_reversed;
282 /* The entries, as an array. */
284 struct entry **entries;
287 /* Read a ChangeLog file into memory.
288 Return the contents in *RESULT. */
290 read_changelog_file (const char *filename, struct changelog_file *result)
292 /* Read the file in text mode, otherwise it's hard to recognize empty
295 char *contents = read_file (filename, &length);
296 if (contents == NULL)
298 fprintf (stderr, "could not read file '%s'\n", filename);
302 result->entries_list =
303 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
305 result->entries_reversed =
306 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
308 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
309 at a line following a blank line and that starts with a non-whitespace
310 character, or at the beginning of a file.
311 Split the file contents into entries. */
313 char *contents_end = contents + length;
314 char *start = contents;
315 while (start < contents_end)
317 /* Search the end of the current entry. */
321 while (ptr < contents_end)
323 ptr = memchr (ptr, '\n', contents_end - ptr);
330 if (contents_end - ptr >= 2
332 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
339 curr = entry_create (start, ptr - start);
340 gl_list_add_last (result->entries_list, curr);
341 gl_list_add_first (result->entries_reversed, curr);
347 result->num_entries = gl_list_size (result->entries_list);
348 result->entries = XNMALLOC (result->num_entries, struct entry *);
351 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
354 while (gl_list_iterator_next (&iter, &elt, &node))
355 result->entries[index++] = (struct entry *) elt;
356 gl_list_iterator_free (&iter);
357 ASSERT (index == result->num_entries);
361 /* A mapping (correspondence) between entries of FILE1 and of FILE2. */
362 struct entries_mapping
364 struct changelog_file *file1;
365 struct changelog_file *file2;
366 /* Mapping from indices in FILE1 to indices in FILE2.
367 A value -1 means that the entry from FILE1 is not found in FILE2.
368 A value -2 means that it has not yet been computed. */
369 ssize_t *index_mapping;
370 /* Mapping from indices in FILE2 to indices in FILE1.
371 A value -1 means that the entry from FILE2 is not found in FILE1.
372 A value -2 means that it has not yet been computed. */
373 ssize_t *index_mapping_reverse;
376 /* Look up (or lazily compute) the mapping of an entry in FILE1.
377 i is the index in FILE1.
378 Return the index in FILE2, or -1 when the entry is not found in FILE2. */
380 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
382 if (mapping->index_mapping[i] < -1)
384 struct changelog_file *file1 = mapping->file1;
385 struct changelog_file *file2 = mapping->file2;
386 size_t n1 = file1->num_entries;
387 size_t n2 = file2->num_entries;
388 struct entry *entry_i = file1->entries[i];
391 /* Search whether it approximately occurs in file2. */
393 double best_j_similarity = 0.0;
394 for (j = n2 - 1; j >= 0; j--)
395 if (mapping->index_mapping_reverse[j] < 0)
398 entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
399 if (similarity > best_j_similarity)
402 best_j_similarity = similarity;
405 if (best_j_similarity >= FSTRCMP_THRESHOLD)
407 /* Found a similar entry in file2. */
408 struct entry *entry_j = file2->entries[best_j];
409 /* Search whether it approximately occurs in file1 at index i. */
411 double best_i_similarity = 0.0;
413 for (ii = n1 - 1; ii >= 0; ii--)
414 if (mapping->index_mapping[ii] < 0)
417 entry_fstrcmp (file1->entries[ii], entry_j,
419 if (similarity > best_i_similarity)
422 best_i_similarity = similarity;
425 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
427 mapping->index_mapping[i] = best_j;
428 mapping->index_mapping_reverse[best_j] = i;
431 if (mapping->index_mapping[i] < -1)
432 /* It does not approximately occur in FILE2.
433 Remember it, for next time. */
434 mapping->index_mapping[i] = -1;
436 return mapping->index_mapping[i];
439 /* Look up (or lazily compute) the mapping of an entry in FILE2.
440 j is the index in FILE2.
441 Return the index in FILE1, or -1 when the entry is not found in FILE1. */
443 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
445 if (mapping->index_mapping_reverse[j] < -1)
447 struct changelog_file *file1 = mapping->file1;
448 struct changelog_file *file2 = mapping->file2;
449 size_t n1 = file1->num_entries;
450 size_t n2 = file2->num_entries;
451 struct entry *entry_j = file2->entries[j];
454 /* Search whether it approximately occurs in file1. */
456 double best_i_similarity = 0.0;
457 for (i = n1 - 1; i >= 0; i--)
458 if (mapping->index_mapping[i] < 0)
461 entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
462 if (similarity > best_i_similarity)
465 best_i_similarity = similarity;
468 if (best_i_similarity >= FSTRCMP_THRESHOLD)
470 /* Found a similar entry in file1. */
471 struct entry *entry_i = file1->entries[best_i];
472 /* Search whether it approximately occurs in file2 at index j. */
474 double best_j_similarity = 0.0;
476 for (jj = n2 - 1; jj >= 0; jj--)
477 if (mapping->index_mapping_reverse[jj] < 0)
480 entry_fstrcmp (entry_i, file2->entries[jj],
482 if (similarity > best_j_similarity)
485 best_j_similarity = similarity;
488 if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
490 mapping->index_mapping_reverse[j] = best_i;
491 mapping->index_mapping[best_i] = j;
494 if (mapping->index_mapping_reverse[j] < -1)
495 /* It does not approximately occur in FILE1.
496 Remember it, for next time. */
497 mapping->index_mapping_reverse[j] = -1;
499 return mapping->index_mapping_reverse[j];
502 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
503 The correspondence also takes into account small modifications; i.e. the
504 indicated relation is not equality of entries but best-match similarity
506 If FULL is true, the maximum of matching is done up-front. If it is false,
507 it is done in a lazy way through the functions entries_mapping_get and
508 entries_mapping_reverse_get.
509 Return the result in *RESULT. */
511 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
513 struct entries_mapping *result)
515 /* Mapping from indices in file1 to indices in file2. */
516 ssize_t *index_mapping;
517 /* Mapping from indices in file2 to indices in file1. */
518 ssize_t *index_mapping_reverse;
519 size_t n1 = file1->num_entries;
520 size_t n2 = file2->num_entries;
523 index_mapping = XNMALLOC (n1, ssize_t);
524 for (i = 0; i < n1; i++)
525 index_mapping[i] = -2;
527 index_mapping_reverse = XNMALLOC (n2, ssize_t);
528 for (j = 0; j < n2; j++)
529 index_mapping_reverse[j] = -2;
531 for (i = n1 - 1; i >= 0; i--)
532 /* Take an entry from file1. */
533 if (index_mapping[i] < -1)
535 struct entry *entry = file1->entries[i];
536 /* Search whether it occurs in file2. */
537 j = gl_list_indexof (file2->entries_reversed, entry);
541 /* Found an exact correspondence. */
542 /* If index_mapping_reverse[j] >= 0, we have already seen other
543 copies of this entry, and there were more occurrences of it in
544 file1 than in file2. In this case, do nothing. */
545 if (index_mapping_reverse[j] < 0)
547 index_mapping[i] = j;
548 index_mapping_reverse[j] = i;
549 /* Look for more occurrences of the same entry. Match them
550 as long as they pair up. Unpaired occurrences of the same
551 entry are left without mapping. */
562 gl_list_indexof_from (file1->entries_reversed,
567 gl_list_indexof_from (file2->entries_reversed,
571 curr_i = n1 - 1 - next_i;
572 curr_j = n2 - 1 - next_j;
573 ASSERT (index_mapping[curr_i] < 0);
574 ASSERT (index_mapping_reverse[curr_j] < 0);
575 index_mapping[curr_i] = curr_j;
576 index_mapping_reverse[curr_j] = curr_i;
583 result->file1 = file1;
584 result->file2 = file2;
585 result->index_mapping = index_mapping;
586 result->index_mapping_reverse = index_mapping_reverse;
589 for (i = n1 - 1; i >= 0; i--)
590 entries_mapping_get (result, i);
593 /* An "edit" is a textual modification performed by the user, that needs to
594 be applied to the other file. */
597 /* Some consecutive entries were added. */
599 /* Some consecutive entries were removed; some other consecutive entries
600 were added at the same position. (Not necessarily the same number of
603 /* Some consecutive entries were removed. */
607 /* This structure represents an edit. */
611 /* Range of indices into the entries of FILE1. */
612 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
613 /* Range of indices into the entries of FILE2. */
614 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
617 /* This structure represents the differences from one file, FILE1, to another
621 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
622 from FILE1 is not found in FILE2). */
623 ssize_t *index_mapping;
624 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
625 from FILE2 is not found in FILE1). */
626 ssize_t *index_mapping_reverse;
627 /* The edits that transform FILE1 into FILE2. */
632 /* Import the difference detection algorithm from GNU diff. */
633 #define ELEMENT struct entry *
634 #define EQUAL entry_equals
635 #define OFFSET ssize_t
636 #define EXTRA_CONTEXT_FIELDS \
637 ssize_t *index_mapping; \
638 ssize_t *index_mapping_reverse;
639 #define NOTE_DELETE(ctxt, xoff) \
640 ctxt->index_mapping[xoff] = -1
641 #define NOTE_INSERT(ctxt, yoff) \
642 ctxt->index_mapping_reverse[yoff] = -1
645 /* Compute the differences between the entries of FILE1 and the entries of
648 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
649 struct differences *result)
651 /* Unlike compute_mapping, which mostly ignores the order of the entries and
652 therefore works well when some entries are permuted, here we use the order.
653 I think this is needed in order to distinguish changes from
654 additions+removals; I don't know how to say what is a "change" if the
655 files are considered as unordered sets of entries. */
657 size_t n1 = file1->num_entries;
658 size_t n2 = file2->num_entries;
661 gl_list_t /* <struct edit *> */ edits;
663 ctxt.xvec = file1->entries;
664 ctxt.yvec = file2->entries;
665 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
666 for (i = 0; i < n1; i++)
667 ctxt.index_mapping[i] = 0;
668 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
669 for (j = 0; j < n2; j++)
670 ctxt.index_mapping_reverse[j] = 0;
671 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
672 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
673 ctxt.too_expensive = n1 + n2;
675 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
676 each removed or added entry. */
677 compareseq (0, n1, 0, n2, 0, &ctxt);
679 /* Complete the index_mapping and index_mapping_reverse arrays. */
682 while (i < n1 || j < n2)
684 while (i < n1 && ctxt.index_mapping[i] < 0)
686 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
688 ASSERT ((i < n1) == (j < n2));
689 if (i == n1 && j == n2)
691 ctxt.index_mapping[i] = j;
692 ctxt.index_mapping_reverse[j] = i;
697 /* Create the edits. */
698 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
701 while (i < n1 || j < n2)
707 e = XMALLOC (struct edit);
711 gl_list_add_last (edits, e);
718 e = XMALLOC (struct edit);
722 gl_list_add_last (edits, e);
725 if (ctxt.index_mapping[i] >= 0)
727 if (ctxt.index_mapping_reverse[j] >= 0)
729 ASSERT (ctxt.index_mapping[i] == j);
730 ASSERT (ctxt.index_mapping_reverse[j] == i);
737 ASSERT (ctxt.index_mapping_reverse[j] < 0);
738 e = XMALLOC (struct edit);
743 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
745 gl_list_add_last (edits, e);
750 if (ctxt.index_mapping_reverse[j] >= 0)
753 ASSERT (ctxt.index_mapping[i] < 0);
754 e = XMALLOC (struct edit);
759 while (i < n1 && ctxt.index_mapping[i] < 0);
761 gl_list_add_last (edits, e);
766 ASSERT (ctxt.index_mapping[i] < 0);
767 ASSERT (ctxt.index_mapping_reverse[j] < 0);
768 e = XMALLOC (struct edit);
773 while (i < n1 && ctxt.index_mapping[i] < 0);
778 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
780 gl_list_add_last (edits, e);
785 result->index_mapping = ctxt.index_mapping;
786 result->index_mapping_reverse = ctxt.index_mapping_reverse;
787 result->num_edits = gl_list_size (edits);
788 result->edits = XNMALLOC (result->num_edits, struct edit *);
791 gl_list_iterator_t iter = gl_list_iterator (edits);
794 while (gl_list_iterator_next (&iter, &elt, &node))
795 result->edits[index++] = (struct edit *) elt;
796 gl_list_iterator_free (&iter);
797 ASSERT (index == result->num_edits);
801 /* An empty entry. */
802 static struct entry empty_entry = { NULL, 0 };
804 /* Return the end a paragraph.
806 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
807 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
808 it is the start of a blank line or the end of the entry. */
810 find_paragraph_end (const struct entry *entry, size_t offset)
812 const char *string = entry->string;
813 size_t length = entry->length;
817 const char *nl = memchr (string + offset, '\n', length - offset);
820 offset = (nl - string) + 1;
821 if (offset < length && string[offset] == '\n')
826 /* Split a merged entry.
827 Given an old entry of the form
830 and a new entry of the form
834 where the two titles are the same and BODY and BODY' are very similar,
835 this computes two new entries
842 If the entries don't have this form, it returns false. */
844 try_split_merged_entry (const struct entry *old_entry,
845 const struct entry *new_entry,
846 struct entry *new_split[2])
848 size_t old_title_len = find_paragraph_end (old_entry, 0);
849 size_t new_title_len = find_paragraph_end (new_entry, 0);
850 struct entry old_body;
851 struct entry new_body;
852 size_t best_split_offset;
853 double best_similarity;
857 if (!(old_title_len == new_title_len
858 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
861 old_body.string = old_entry->string + old_title_len;
862 old_body.length = old_entry->length - old_title_len;
864 /* Determine where to split the new entry.
865 This is done by maximizing the similarity between BODY and BODY'. */
866 best_split_offset = split_offset = new_title_len;
867 best_similarity = 0.0;
872 new_body.string = new_entry->string + split_offset;
873 new_body.length = new_entry->length - split_offset;
875 entry_fstrcmp (&old_body, &new_body, best_similarity);
876 if (similarity > best_similarity)
878 best_split_offset = split_offset;
879 best_similarity = similarity;
881 if (best_similarity == 1.0)
882 /* It cannot get better. */
885 if (split_offset < new_entry->length)
886 split_offset = find_paragraph_end (new_entry, split_offset + 1);
891 /* BODY' should not be empty. */
892 if (best_split_offset == new_entry->length)
894 ASSERT (new_entry->string[best_split_offset] == '\n');
896 /* A certain similarity between BODY and BODY' is required. */
897 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
900 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
903 size_t len1 = new_title_len;
904 size_t len2 = new_entry->length - best_split_offset;
905 char *combined = XNMALLOC (len1 + len2, char);
906 memcpy (combined, new_entry->string, len1);
907 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
908 new_split[1] = entry_create (combined, len1 + len2);
914 /* Write the contents of an entry to the output stream FP. */
916 entry_write (FILE *fp, struct entry *entry)
918 if (entry->length > 0)
919 fwrite (entry->string, 1, entry->length, fp);
922 /* This structure represents a conflict.
923 A conflict can occur for various reasons. */
926 /* Parts from the ancestor file. */
927 size_t num_old_entries;
928 struct entry **old_entries;
929 /* Parts of the modified file. */
930 size_t num_modified_entries;
931 struct entry **modified_entries;
934 /* Write a conflict to the output stream FP, including markers. */
936 conflict_write (FILE *fp, struct conflict *c)
940 /* Use the same syntax as git's default merge driver.
941 Don't indent the contents of the entries (with things like ">" or "-"),
942 otherwise the user needs more textual editing to resolve the conflict. */
943 fputs ("<<<<<<<\n", fp);
944 for (i = 0; i < c->num_old_entries; i++)
945 entry_write (fp, c->old_entries[i]);
946 fputs ("=======\n", fp);
947 for (i = 0; i < c->num_modified_entries; i++)
948 entry_write (fp, c->modified_entries[i]);
949 fputs (">>>>>>>\n", fp);
953 static const struct option long_options[] =
955 { "help", no_argument, NULL, 'h' },
956 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
957 { "version", no_argument, NULL, 'V' },
961 /* Print a usage mesage and exit. */
965 if (status != EXIT_SUCCESS)
966 fprintf (stderr, "Try `%s --help' for more information.\n",
970 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
973 printf ("Merges independent modifications of a ChangeLog style file.\n");
974 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
975 printf ("A-FILE-NAME names the publicly modified file.\n");
976 printf ("B-FILE-NAME names the user-modified file.\n");
977 printf ("Writes the merged file into A-FILE-NAME.\n");
979 #if 0 /* --split-merged-entry is now on by default. */
980 printf ("Operation modifiers:\n");
982 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
983 Use this if you have the habit to merge unrelated\n\
984 entries into a single one, separated only by a\n\
985 newline, just because they happened on the same\n\
989 printf ("Informative output:\n");
990 printf (" -h, --help display this help and exit\n");
991 printf (" -V, --version output version information and exit\n");
993 fputs ("Report bugs to <bug-gnu-gettext@gnu.org>.\n",
1001 main (int argc, char *argv[])
1006 bool split_merged_entry;
1008 /* Set program name for messages. */
1009 set_program_name (argv[0]);
1011 /* Set default values for variables. */
1014 split_merged_entry = true;
1016 /* Parse command line options. */
1017 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
1020 case '\0': /* Long option. */
1028 case CHAR_MAX + 1: /* --split-merged-entry */
1031 usage (EXIT_FAILURE);
1036 /* Version information is requested. */
1037 printf ("%s\n", program_name);
1038 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1039 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
1040 This is free software: you are free to change and redistribute it.\n\
1041 There is NO WARRANTY, to the extent permitted by law.\n\
1044 printf ("Written by %s.\n", "Bruno Haible");
1045 exit (EXIT_SUCCESS);
1050 /* Help is requested. */
1051 usage (EXIT_SUCCESS);
1054 /* Test argument count. */
1055 if (optind + 3 != argc)
1056 error (EXIT_FAILURE, 0, "expected three arguments");
1059 const char *ancestor_file_name; /* O-FILE-NAME */
1060 const char *destination_file_name; /* A-FILE-NAME */
1062 const char *other_file_name; /* B-FILE-NAME */
1063 const char *mainstream_file_name;
1064 const char *modified_file_name;
1065 struct changelog_file ancestor_file;
1066 struct changelog_file mainstream_file;
1067 struct changelog_file modified_file;
1068 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
1069 struct entries_mapping mapping;
1070 struct differences diffs;
1071 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1072 gl_list_t /* <struct entry *> */ result_entries;
1073 gl_list_t /* <struct conflict *> */ result_conflicts;
1075 ancestor_file_name = argv[optind];
1076 destination_file_name = argv[optind + 1];
1077 other_file_name = argv[optind + 2];
1079 /* Heuristic to determine whether it's a pull in downstream direction
1080 (e.g. pull from a centralized server) or a pull in upstream direction
1081 (e.g. "git stash apply").
1083 For ChangeLog this distinction is important. The difference between
1084 an "upstream" and a "downstream" repository is that more people are
1085 looking at the "upstream" repository. They want to be informed about
1086 changes and expect them to be shown at the top of the ChangeLog.
1087 When a user pulls downstream, on the other hand, he has two options:
1088 a) He gets the change entries from the central repository also at the
1089 top of his ChangeLog, and his own changes come after them.
1090 b) He gets the change entries from the central repository after those
1091 he has collected for his branch. His own change entries stay at
1092 the top of the ChangeLog file.
1093 In the case a) he has to reorder the ChangeLog before he can commit.
1094 No one does that. So most people want b).
1095 In other words, the order of entries in a ChangeLog should represent
1096 the order in which they have flown (or will flow) into the *central*
1099 But in git this is fundamentally indistinguishable, because when Linus
1100 pulls patches from akpm and akpm pulls patches from Linus, it's not
1101 clear which of the two is more "upstream". Also, when you have many
1102 branches in a repository and pull from one to another, "git" has no way
1103 to know which branch is more "upstream" than the other. The git-tag(1)
1104 manual page also says:
1105 "One important aspect of git is it is distributed, and being
1106 distributed largely means there is no inherent "upstream" or
1107 "downstream" in the system."
1108 Therefore anyone who attempts to produce a ChangeLog from the merge
1111 Here we allow the user to specify the pull direction through an
1112 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
1113 environment variables are not set, we assume a "simple single user"
1114 usage pattern: He manages local changes through stashes and uses
1115 "git pull" only to pull downstream.
1117 How to distinguish these situation? There are several hints:
1118 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
1119 a "git pull", it is set to 'pull '. During a "git pull --rebase",
1120 it is set to 'pull --rebase'. During a "git cherry-pick", it is
1121 set to 'cherry-pick'.
1122 - During a "git stash apply", there is an environment variable of
1123 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
1127 var = getenv ("GIT_DOWNSTREAM");
1128 if (var != NULL && var[0] != '\0')
1132 var = getenv ("GIT_UPSTREAM");
1133 if (var != NULL && var[0] != '\0')
1137 var = getenv ("GIT_REFLOG_ACTION");
1138 #if 0 /* Debugging code */
1139 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1142 && ((strncmp (var, "pull", 4) == 0
1143 && c_strstr (var, " --rebase") == NULL)
1144 || strncmp (var, "merge origin", 12) == 0))
1148 /* "git stash apply", "git rebase", "git cherry-pick" and
1156 #if 0 /* Debugging code */
1159 printf ("First line of %%A:\n");
1160 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1161 printf ("First line of %%B:\n");
1162 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1163 printf ("Guessing calling convention: %s\n",
1165 ? "%A = modified by user, %B = upstream"
1166 : "%A = upstream, %B = modified by user");
1172 mainstream_file_name = other_file_name;
1173 modified_file_name = destination_file_name;
1177 mainstream_file_name = destination_file_name;
1178 modified_file_name = other_file_name;
1181 /* Read the three files into memory. */
1182 read_changelog_file (ancestor_file_name, &ancestor_file);
1183 read_changelog_file (mainstream_file_name, &mainstream_file);
1184 read_changelog_file (modified_file_name, &modified_file);
1186 /* Compute correspondence between the entries of ancestor_file and of
1188 compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1189 (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1191 /* Compute differences between the entries of ancestor_file and of
1193 compute_differences (&ancestor_file, &modified_file, &diffs);
1195 /* Compute the result. */
1196 result_entries_pointers =
1197 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1199 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1203 for (k = 0; k < mainstream_file.num_entries; k++)
1204 result_entries_pointers[k] =
1205 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1208 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1211 for (e = 0; e < diffs.num_edits; e++)
1213 struct edit *edit = diffs.edits[e];
1219 /* An addition to the top of modified_file.
1220 Apply it to the top of mainstream_file. */
1222 for (j = edit->j2; j >= edit->j1; j--)
1224 struct entry *added_entry = modified_file.entries[j];
1225 gl_list_add_first (result_entries, added_entry);
1234 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1235 ASSERT (i_before >= 0);
1236 i_after = (edit->j2 + 1 == modified_file.num_entries
1237 ? ancestor_file.num_entries
1238 : diffs.index_mapping_reverse[edit->j2 + 1]);
1239 ASSERT (i_after >= 0);
1240 ASSERT (i_after == i_before + 1);
1241 /* An addition between ancestor_file.entries[i_before] and
1242 ancestor_file.entries[i_after]. See whether these two
1243 entries still exist in mainstream_file and are still
1245 k_before = entries_mapping_get (&mapping, i_before);
1246 k_after = (i_after == ancestor_file.num_entries
1247 ? mainstream_file.num_entries
1248 : entries_mapping_get (&mapping, i_after));
1249 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1251 /* Yes, the entry before and after are still neighbours
1252 in mainstream_file. Apply the addition between
1254 if (k_after == mainstream_file.num_entries)
1257 for (j = edit->j1; j <= edit->j2; j++)
1259 struct entry *added_entry = modified_file.entries[j];
1260 gl_list_add_last (result_entries, added_entry);
1265 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1267 for (j = edit->j1; j <= edit->j2; j++)
1269 struct entry *added_entry = modified_file.entries[j];
1270 gl_list_add_before (result_entries, node_k_after, added_entry);
1276 /* It's not clear where the additions should be applied.
1277 Let the user decide. */
1278 struct conflict *c = XMALLOC (struct conflict);
1280 c->num_old_entries = 0;
1281 c->old_entries = NULL;
1282 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1283 c->modified_entries =
1284 XNMALLOC (c->num_modified_entries, struct entry *);
1285 for (j = edit->j1; j <= edit->j2; j++)
1286 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1287 gl_list_add_last (result_conflicts, c);
1293 /* Apply the removals one by one. */
1295 for (i = edit->i1; i <= edit->i2; i++)
1297 struct entry *removed_entry = ancestor_file.entries[i];
1298 ssize_t k = entries_mapping_get (&mapping, i);
1300 && entry_equals (removed_entry,
1301 mainstream_file.entries[k]))
1303 /* The entry to be removed still exists in
1304 mainstream_file. Remove it. */
1305 gl_list_node_set_value (result_entries,
1306 result_entries_pointers[k],
1311 /* The entry to be removed was already removed or was
1312 modified. This is a conflict. */
1313 struct conflict *c = XMALLOC (struct conflict);
1314 c->num_old_entries = 1;
1316 XNMALLOC (c->num_old_entries, struct entry *);
1317 c->old_entries[0] = removed_entry;
1318 c->num_modified_entries = 0;
1319 c->modified_entries = NULL;
1320 gl_list_add_last (result_conflicts, c);
1328 /* When the user usually merges entries from the same day,
1329 and this edit is at the top of the file: */
1330 if (split_merged_entry && edit->j1 == 0)
1332 /* Test whether the change is "simple merged", i.e. whether
1333 it consists of additions, followed by an augmentation of
1334 the first changed entry, followed by small changes of the
1347 modified_entry_n. */
1348 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1350 struct entry *split[2];
1351 bool simple_merged =
1352 try_split_merged_entry (ancestor_file.entries[edit->i1],
1353 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1358 for (i = edit->i1 + 1; i <= edit->i2; i++)
1359 if (entry_fstrcmp (ancestor_file.entries[i],
1360 modified_file.entries[i + edit->j2 - edit->i2],
1362 < FSTRCMP_THRESHOLD)
1364 simple_merged = false;
1370 /* Apply the additions at the top of modified_file.
1371 Apply each of the single-entry changes
1373 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1374 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1376 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1377 gl_list_add_first (result_entries, split[0]);
1378 /* The additions. */
1379 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1381 struct entry *added_entry = modified_file.entries[j];
1382 gl_list_add_first (result_entries, added_entry);
1384 /* Now the single-entry changes. */
1385 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1387 struct entry *changed_entry =
1388 (j == edit->j1 + num_added
1390 : modified_file.entries[j]);
1391 size_t i = j + edit->i2 - edit->j2;
1392 ssize_t k = entries_mapping_get (&mapping, i);
1394 && entry_equals (ancestor_file.entries[i],
1395 mainstream_file.entries[k]))
1397 gl_list_node_set_value (result_entries,
1398 result_entries_pointers[k],
1401 else if (!entry_equals (ancestor_file.entries[i],
1404 struct conflict *c = XMALLOC (struct conflict);
1405 c->num_old_entries = 1;
1407 XNMALLOC (c->num_old_entries, struct entry *);
1408 c->old_entries[0] = ancestor_file.entries[i];
1409 c->num_modified_entries = 1;
1410 c->modified_entries =
1411 XNMALLOC (c->num_modified_entries, struct entry *);
1412 c->modified_entries[0] = changed_entry;
1413 gl_list_add_last (result_conflicts, c);
1423 /* Test whether the change is "simple", i.e. whether it
1424 consists of small changes to the old ChangeLog entries
1425 and additions before them:
1435 modified_entry_n. */
1436 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1440 for (i = edit->i1; i <= edit->i2; i++)
1441 if (entry_fstrcmp (ancestor_file.entries[i],
1442 modified_file.entries[i + edit->j2 - edit->i2],
1444 < FSTRCMP_THRESHOLD)
1454 /* Apply the additions and each of the single-entry
1455 changes separately. */
1456 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1457 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1460 /* A simple change at the top of modified_file.
1461 Apply it to the top of mainstream_file. */
1463 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1465 struct entry *added_entry = modified_file.entries[j];
1466 gl_list_add_first (result_entries, added_entry);
1468 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1470 struct entry *changed_entry = modified_file.entries[j];
1471 size_t i = j + edit->i2 - edit->j2;
1472 ssize_t k = entries_mapping_get (&mapping, i);
1474 && entry_equals (ancestor_file.entries[i],
1475 mainstream_file.entries[k]))
1477 gl_list_node_set_value (result_entries,
1478 result_entries_pointers[k],
1484 ASSERT (!entry_equals (ancestor_file.entries[i],
1486 c = XMALLOC (struct conflict);
1487 c->num_old_entries = 1;
1489 XNMALLOC (c->num_old_entries, struct entry *);
1490 c->old_entries[0] = ancestor_file.entries[i];
1491 c->num_modified_entries = 1;
1492 c->modified_entries =
1493 XNMALLOC (c->num_modified_entries, struct entry *);
1494 c->modified_entries[0] = changed_entry;
1495 gl_list_add_last (result_conflicts, c);
1505 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1506 ASSERT (i_before >= 0);
1507 /* A simple change after ancestor_file.entries[i_before].
1508 See whether this entry and the following num_changed
1509 entries still exist in mainstream_file and are still
1511 k_before = entries_mapping_get (&mapping, i_before);
1512 linear = (k_before >= 0);
1516 for (i = i_before + 1; i <= i_before + num_changed; i++)
1517 if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1525 gl_list_node_t node_for_insert =
1526 result_entries_pointers[k_before + 1];
1528 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1530 struct entry *added_entry = modified_file.entries[j];
1531 gl_list_add_before (result_entries, node_for_insert, added_entry);
1533 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1535 struct entry *changed_entry = modified_file.entries[j];
1536 size_t i = j + edit->i2 - edit->j2;
1537 ssize_t k = entries_mapping_get (&mapping, i);
1539 if (entry_equals (ancestor_file.entries[i],
1540 mainstream_file.entries[k]))
1542 gl_list_node_set_value (result_entries,
1543 result_entries_pointers[k],
1549 ASSERT (!entry_equals (ancestor_file.entries[i],
1551 c = XMALLOC (struct conflict);
1552 c->num_old_entries = 1;
1554 XNMALLOC (c->num_old_entries, struct entry *);
1555 c->old_entries[0] = ancestor_file.entries[i];
1556 c->num_modified_entries = 1;
1557 c->modified_entries =
1558 XNMALLOC (c->num_modified_entries, struct entry *);
1559 c->modified_entries[0] = changed_entry;
1560 gl_list_add_last (result_conflicts, c);
1570 See whether the num_changed entries still exist
1571 unchanged in mainstream_file and are still
1575 bool linear_unchanged;
1577 k_first = entries_mapping_get (&mapping, i_first);
1580 && entry_equals (ancestor_file.entries[i_first],
1581 mainstream_file.entries[k_first]));
1582 if (linear_unchanged)
1585 for (i = i_first + 1; i <= edit->i2; i++)
1586 if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1587 && entry_equals (ancestor_file.entries[i],
1588 mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1590 linear_unchanged = false;
1594 if (linear_unchanged)
1596 gl_list_node_t node_for_insert =
1597 result_entries_pointers[k_first];
1600 for (j = edit->j2; j >= edit->j1; j--)
1602 struct entry *new_entry = modified_file.entries[j];
1603 gl_list_add_before (result_entries, node_for_insert, new_entry);
1605 for (i = edit->i1; i <= edit->i2; i++)
1607 ssize_t k = entries_mapping_get (&mapping, i);
1609 ASSERT (entry_equals (ancestor_file.entries[i],
1610 mainstream_file.entries[k]));
1611 gl_list_node_set_value (result_entries,
1612 result_entries_pointers[k],
1621 struct conflict *c = XMALLOC (struct conflict);
1623 c->num_old_entries = edit->i2 - edit->i1 + 1;
1625 XNMALLOC (c->num_old_entries, struct entry *);
1626 for (i = edit->i1; i <= edit->i2; i++)
1627 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1628 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1629 c->modified_entries =
1630 XNMALLOC (c->num_modified_entries, struct entry *);
1631 for (j = edit->j1; j <= edit->j2; j++)
1632 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1633 gl_list_add_last (result_conflicts, c);
1641 /* Output the result. */
1643 FILE *fp = fopen (destination_file_name, "w");
1646 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1647 exit (EXIT_FAILURE);
1650 /* Output the conflicts at the top. */
1652 size_t n = gl_list_size (result_conflicts);
1654 for (i = 0; i < n; i++)
1655 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1657 /* Output the modified and unmodified entries, in order. */
1659 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1661 gl_list_node_t node;
1662 while (gl_list_iterator_next (&iter, &elt, &node))
1663 entry_write (fp, (struct entry *) elt);
1664 gl_list_iterator_free (&iter);
1667 if (fwriteerror (fp))
1669 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1670 exit (EXIT_FAILURE);
1674 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);