1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008-2010 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 efforts to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
43 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
48 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the lines
50 [merge "merge-changelog"]
51 name = GNU-style ChangeLog merge driver
52 driver = /usr/local/bin/git-merge-changelog %O %A %B
54 - In every directory that contains a ChangeLog file, add a file
55 '.gitattributes' with this line:
57 ChangeLog merge=merge-changelog
59 (See "man 5 gitattributes" for more info.)
62 /* Calling convention:
63 A merge driver is called with three filename arguments:
64 1. %O = The common ancestor of %A and %B.
65 2. %A = The file's contents from the "current branch".
66 3. %B = The file's contents from the "other branch"; this is the contents
69 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
70 maintainer to a central maintainer) or of a downstream pull with --rebase:
71 2. %A = The file's newest pulled contents; modified by other committers.
72 3. %B = The user's newest copy of the file; modified by the user.
73 In case of a downstream pull (e.g. from a central repository to the user)
74 or of an upstream pull with --rebase:
75 2. %A = The user's newest copy of the file; modified by the user.
76 3. %B = The file's newest pulled contents; modified by other committers.
78 It should write its merged output into file %A. It can also echo some
79 remarks to stdout. It should exit with return code 0 if the merge could
80 be resolved cleanly, or with non-zero return code if there were conflicts.
84 The structure of a ChangeLog file: It consists of ChangeLog entries. A
85 ChangeLog entry starts at a line following a blank line and that starts with
86 a non-whitespace character, or at the beginning of a file.
87 The merge driver works as follows: It reads the three files into memory and
88 dissects them into ChangeLog entries. It then finds the differences between
89 %O and %B. They are classified as:
90 - removals (some consecutive entries removed),
91 - changes (some consecutive entries removed, some consecutive entries
93 - additions (some consecutive entries added).
94 The driver then attempts to apply the changes to %A.
95 To this effect, it first computes a correspondence between the entries in %O
96 and the entries in %A, using fuzzy string matching to still identify changed
98 - Removals are applied one by one. If the entry is present in %A, at any
99 position, it is removed. If not, the removal is marked as a conflict.
100 - Additions at the top of %B are applied at the top of %A.
101 - Additions between entry x and entry y (y may be the file end) in %B are
102 applied between entry x and entry y in %A (if they still exist and are
103 still consecutive in %A), otherwise the additions are marked as a
105 - Changes are categorized into "simple changes":
108 added_entry ... added_entry modified_entry1 ... modified_entryn,
109 where the correspondence between entry_i and modified_entry_i is still
110 clear; and "big changes": these are all the rest. Simple changes at the
111 top of %B are applied by putting the added entries at the top of %A. The
112 changes in simple changes are applied one by one; possibly leading to
113 single-entry conflicts. Big changes are applied en bloc, possibly
114 leading to conflicts spanning multiple entries.
115 - Conflicts are output at the top of the file and cause an exit status of
127 #include <sys/types.h>
130 #include "progname.h"
132 #include "read-file.h"
133 #include "gl_xlist.h"
134 #include "gl_array_list.h"
135 #include "gl_linkedhash_list.h"
136 #include "gl_rbtreehash_list.h"
137 #include "gl_linked_list.h"
139 #include "xmalloca.h"
142 #include "c-strstr.h"
143 #include "fwriteerror.h"
145 #define ASSERT(expr) \
153 #define FSTRCMP_THRESHOLD 0.6
154 #define FSTRCMP_STRICTER_THRESHOLD 0.8
156 /* Representation of a ChangeLog entry.
157 The string may contain NUL bytes; therefore it is represented as a plain
158 opaque memory region. */
163 /* Cache for the hash code. */
164 bool hashcode_cached;
169 The memory region passed by the caller must of indefinite extent. It is
170 *not* copied here. */
171 static struct entry *
172 entry_create (char *string, size_t length)
174 struct entry *result = XMALLOC (struct entry);
175 result->string = string;
176 result->length = length;
177 result->hashcode_cached = false;
181 /* Compare two entries for equality. */
183 entry_equals (const void *elt1, const void *elt2)
185 const struct entry *entry1 = (const struct entry *) elt1;
186 const struct entry *entry2 = (const struct entry *) elt2;
187 return entry1->length == entry2->length
188 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
191 /* Return a hash code of the contents of a ChangeLog entry. */
193 entry_hashcode (const void *elt)
195 struct entry *entry = (struct entry *) elt;
196 if (!entry->hashcode_cached)
198 /* See http://www.haible.de/bruno/hashfunc.html. */
203 for (s = entry->string, n = entry->length; n > 0; s++, n--)
204 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
207 entry->hashcode_cached = true;
209 return entry->hashcode;
212 /* Perform a fuzzy comparison of two ChangeLog entries.
213 Return a similarity measure of the two entries, a value between 0 and 1.
214 0 stands for very distinct, 1 for identical.
215 If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
218 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
221 /* fstrcmp works only on NUL terminated strings. */
225 if (memchr (entry1->string, '\0', entry1->length) != NULL)
227 if (memchr (entry2->string, '\0', entry2->length) != NULL)
229 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
232 memcpy (p, entry1->string, entry1->length);
235 memcpy (p, entry2->string, entry2->length);
240 fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
245 /* This structure represents an entire ChangeLog file, after it was read
247 struct changelog_file
249 /* The entries, as a list. */
250 gl_list_t /* <struct entry *> */ entries_list;
251 /* The entries, as a list in opposite direction. */
252 gl_list_t /* <struct entry *> */ entries_reversed;
253 /* The entries, as an array. */
255 struct entry **entries;
258 /* Read a ChangeLog file into memory.
259 Return the contents in *RESULT. */
261 read_changelog_file (const char *filename, struct changelog_file *result)
263 /* Read the file in text mode, otherwise it's hard to recognize empty
266 char *contents = read_file (filename, &length);
267 if (contents == NULL)
269 fprintf (stderr, "could not read file '%s'\n", filename);
273 result->entries_list =
274 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
276 result->entries_reversed =
277 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
279 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
280 at a line following a blank line and that starts with a non-whitespace
281 character, or at the beginning of a file.
282 Split the file contents into entries. */
284 char *contents_end = contents + length;
285 char *start = contents;
286 while (start < contents_end)
288 /* Search the end of the current entry. */
292 while (ptr < contents_end)
294 ptr = memchr (ptr, '\n', contents_end - ptr);
301 if (contents_end - ptr >= 2
303 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
310 curr = entry_create (start, ptr - start);
311 gl_list_add_last (result->entries_list, curr);
312 gl_list_add_first (result->entries_reversed, curr);
318 result->num_entries = gl_list_size (result->entries_list);
319 result->entries = XNMALLOC (result->num_entries, struct entry *);
322 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
325 while (gl_list_iterator_next (&iter, &elt, &node))
326 result->entries[index++] = (struct entry *) elt;
327 gl_list_iterator_free (&iter);
328 ASSERT (index == result->num_entries);
332 /* A mapping (correspondence) between entries of FILE1 and of FILE2. */
333 struct entries_mapping
335 struct changelog_file *file1;
336 struct changelog_file *file2;
337 /* Mapping from indices in FILE1 to indices in FILE2.
338 A value -1 means that the entry from FILE1 is not found in FILE2.
339 A value -2 means that it has not yet been computed. */
340 ssize_t *index_mapping;
341 /* Mapping from indices in FILE2 to indices in FILE1.
342 A value -1 means that the entry from FILE2 is not found in FILE1.
343 A value -2 means that it has not yet been computed. */
344 ssize_t *index_mapping_reverse;
347 /* Look up (or lazily compute) the mapping of an entry in FILE1.
348 i is the index in FILE1.
349 Return the index in FILE2, or -1 when the entry is not found in FILE2. */
351 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
353 if (mapping->index_mapping[i] < -1)
355 struct changelog_file *file1 = mapping->file1;
356 struct changelog_file *file2 = mapping->file2;
357 size_t n1 = file1->num_entries;
358 size_t n2 = file2->num_entries;
359 struct entry *entry_i = file1->entries[i];
362 /* Search whether it approximately occurs in file2. */
364 double best_j_similarity = 0.0;
365 for (j = n2 - 1; j >= 0; j--)
366 if (mapping->index_mapping_reverse[j] < 0)
369 entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
370 if (similarity > best_j_similarity)
373 best_j_similarity = similarity;
376 if (best_j_similarity >= FSTRCMP_THRESHOLD)
378 /* Found a similar entry in file2. */
379 struct entry *entry_j = file2->entries[best_j];
380 /* Search whether it approximately occurs in file1 at index i. */
382 double best_i_similarity = 0.0;
384 for (ii = n1 - 1; ii >= 0; ii--)
385 if (mapping->index_mapping[ii] < 0)
388 entry_fstrcmp (file1->entries[ii], entry_j,
390 if (similarity > best_i_similarity)
393 best_i_similarity = similarity;
396 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
398 mapping->index_mapping[i] = best_j;
399 mapping->index_mapping_reverse[best_j] = i;
402 if (mapping->index_mapping[i] < -1)
403 /* It does not approximately occur in FILE2.
404 Remember it, for next time. */
405 mapping->index_mapping[i] = -1;
407 return mapping->index_mapping[i];
410 /* Look up (or lazily compute) the mapping of an entry in FILE2.
411 j is the index in FILE2.
412 Return the index in FILE1, or -1 when the entry is not found in FILE1. */
414 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
416 if (mapping->index_mapping_reverse[j] < -1)
418 struct changelog_file *file1 = mapping->file1;
419 struct changelog_file *file2 = mapping->file2;
420 size_t n1 = file1->num_entries;
421 size_t n2 = file2->num_entries;
422 struct entry *entry_j = file2->entries[j];
425 /* Search whether it approximately occurs in file1. */
427 double best_i_similarity = 0.0;
428 for (i = n1 - 1; i >= 0; i--)
429 if (mapping->index_mapping[i] < 0)
432 entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
433 if (similarity > best_i_similarity)
436 best_i_similarity = similarity;
439 if (best_i_similarity >= FSTRCMP_THRESHOLD)
441 /* Found a similar entry in file1. */
442 struct entry *entry_i = file1->entries[best_i];
443 /* Search whether it approximately occurs in file2 at index j. */
445 double best_j_similarity = 0.0;
447 for (jj = n2 - 1; jj >= 0; jj--)
448 if (mapping->index_mapping_reverse[jj] < 0)
451 entry_fstrcmp (entry_i, file2->entries[jj],
453 if (similarity > best_j_similarity)
456 best_j_similarity = similarity;
459 if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
461 mapping->index_mapping_reverse[j] = best_i;
462 mapping->index_mapping[best_i] = j;
465 if (mapping->index_mapping_reverse[j] < -1)
466 /* It does not approximately occur in FILE1.
467 Remember it, for next time. */
468 mapping->index_mapping_reverse[j] = -1;
470 return mapping->index_mapping_reverse[j];
473 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
474 The correspondence also takes into account small modifications; i.e. the
475 indicated relation is not equality of entries but best-match similarity
477 If FULL is true, the maximum of matching is done up-front. If it is false,
478 it is done in a lazy way through the functions entries_mapping_get and
479 entries_mapping_reverse_get.
480 Return the result in *RESULT. */
482 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
484 struct entries_mapping *result)
486 /* Mapping from indices in file1 to indices in file2. */
487 ssize_t *index_mapping;
488 /* Mapping from indices in file2 to indices in file1. */
489 ssize_t *index_mapping_reverse;
490 size_t n1 = file1->num_entries;
491 size_t n2 = file2->num_entries;
494 index_mapping = XNMALLOC (n1, ssize_t);
495 for (i = 0; i < n1; i++)
496 index_mapping[i] = -2;
498 index_mapping_reverse = XNMALLOC (n2, ssize_t);
499 for (j = 0; j < n2; j++)
500 index_mapping_reverse[j] = -2;
502 for (i = n1 - 1; i >= 0; i--)
503 /* Take an entry from file1. */
504 if (index_mapping[i] < -1)
506 struct entry *entry = file1->entries[i];
507 /* Search whether it occurs in file2. */
508 j = gl_list_indexof (file2->entries_reversed, entry);
512 /* Found an exact correspondence. */
513 /* If index_mapping_reverse[j] >= 0, we have already seen other
514 copies of this entry, and there were more occurrences of it in
515 file1 than in file2. In this case, do nothing. */
516 if (index_mapping_reverse[j] < 0)
518 index_mapping[i] = j;
519 index_mapping_reverse[j] = i;
520 /* Look for more occurrences of the same entry. Match them
521 as long as they pair up. Unpaired occurrences of the same
522 entry are left without mapping. */
533 gl_list_indexof_from (file1->entries_reversed,
538 gl_list_indexof_from (file2->entries_reversed,
542 curr_i = n1 - 1 - next_i;
543 curr_j = n2 - 1 - next_j;
544 ASSERT (index_mapping[curr_i] < 0);
545 ASSERT (index_mapping_reverse[curr_j] < 0);
546 index_mapping[curr_i] = curr_j;
547 index_mapping_reverse[curr_j] = curr_i;
554 result->file1 = file1;
555 result->file2 = file2;
556 result->index_mapping = index_mapping;
557 result->index_mapping_reverse = index_mapping_reverse;
560 for (i = n1 - 1; i >= 0; i--)
561 entries_mapping_get (result, i);
564 /* An "edit" is a textual modification performed by the user, that needs to
565 be applied to the other file. */
568 /* Some consecutive entries were added. */
570 /* Some consecutive entries were removed; some other consecutive entries
571 were added at the same position. (Not necessarily the same number of
574 /* Some consecutive entries were removed. */
578 /* This structure represents an edit. */
582 /* Range of indices into the entries of FILE1. */
583 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
584 /* Range of indices into the entries of FILE2. */
585 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
588 /* This structure represents the differences from one file, FILE1, to another
592 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
593 from FILE1 is not found in FILE2). */
594 ssize_t *index_mapping;
595 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
596 from FILE2 is not found in FILE1). */
597 ssize_t *index_mapping_reverse;
598 /* The edits that transform FILE1 into FILE2. */
603 /* Import the difference detection algorithm from GNU diff. */
604 #define ELEMENT struct entry *
605 #define EQUAL entry_equals
606 #define OFFSET ssize_t
607 #define EXTRA_CONTEXT_FIELDS \
608 ssize_t *index_mapping; \
609 ssize_t *index_mapping_reverse;
610 #define NOTE_DELETE(ctxt, xoff) \
611 ctxt->index_mapping[xoff] = -1
612 #define NOTE_INSERT(ctxt, yoff) \
613 ctxt->index_mapping_reverse[yoff] = -1
616 /* Compute the differences between the entries of FILE1 and the entries of
619 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
620 struct differences *result)
622 /* Unlike compute_mapping, which mostly ignores the order of the entries and
623 therefore works well when some entries are permuted, here we use the order.
624 I think this is needed in order to distinguish changes from
625 additions+removals; I don't know how to say what is a "change" if the
626 files are considered as unordered sets of entries. */
628 size_t n1 = file1->num_entries;
629 size_t n2 = file2->num_entries;
632 gl_list_t /* <struct edit *> */ edits;
634 ctxt.xvec = file1->entries;
635 ctxt.yvec = file2->entries;
636 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
637 for (i = 0; i < n1; i++)
638 ctxt.index_mapping[i] = 0;
639 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
640 for (j = 0; j < n2; j++)
641 ctxt.index_mapping_reverse[j] = 0;
642 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
643 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
644 ctxt.too_expensive = n1 + n2;
646 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
647 each removed or added entry. */
648 compareseq (0, n1, 0, n2, 0, &ctxt);
650 /* Complete the index_mapping and index_mapping_reverse arrays. */
653 while (i < n1 || j < n2)
655 while (i < n1 && ctxt.index_mapping[i] < 0)
657 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
659 ASSERT ((i < n1) == (j < n2));
660 if (i == n1 && j == n2)
662 ctxt.index_mapping[i] = j;
663 ctxt.index_mapping_reverse[j] = i;
668 /* Create the edits. */
669 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
672 while (i < n1 || j < n2)
678 e = XMALLOC (struct edit);
682 gl_list_add_last (edits, e);
689 e = XMALLOC (struct edit);
693 gl_list_add_last (edits, e);
696 if (ctxt.index_mapping[i] >= 0)
698 if (ctxt.index_mapping_reverse[j] >= 0)
700 ASSERT (ctxt.index_mapping[i] == j);
701 ASSERT (ctxt.index_mapping_reverse[j] == i);
708 ASSERT (ctxt.index_mapping_reverse[j] < 0);
709 e = XMALLOC (struct edit);
714 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
716 gl_list_add_last (edits, e);
721 if (ctxt.index_mapping_reverse[j] >= 0)
724 ASSERT (ctxt.index_mapping[i] < 0);
725 e = XMALLOC (struct edit);
730 while (i < n1 && ctxt.index_mapping[i] < 0);
732 gl_list_add_last (edits, e);
737 ASSERT (ctxt.index_mapping[i] < 0);
738 ASSERT (ctxt.index_mapping_reverse[j] < 0);
739 e = XMALLOC (struct edit);
744 while (i < n1 && ctxt.index_mapping[i] < 0);
749 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
751 gl_list_add_last (edits, e);
756 result->index_mapping = ctxt.index_mapping;
757 result->index_mapping_reverse = ctxt.index_mapping_reverse;
758 result->num_edits = gl_list_size (edits);
759 result->edits = XNMALLOC (result->num_edits, struct edit *);
762 gl_list_iterator_t iter = gl_list_iterator (edits);
765 while (gl_list_iterator_next (&iter, &elt, &node))
766 result->edits[index++] = (struct edit *) elt;
767 gl_list_iterator_free (&iter);
768 ASSERT (index == result->num_edits);
772 /* An empty entry. */
773 static struct entry empty_entry = { NULL, 0 };
775 /* Return the end a paragraph.
777 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
778 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
779 it is the start of a blank line or the end of the entry. */
781 find_paragraph_end (const struct entry *entry, size_t offset)
783 const char *string = entry->string;
784 size_t length = entry->length;
788 const char *nl = memchr (string + offset, '\n', length - offset);
791 offset = (nl - string) + 1;
792 if (offset < length && string[offset] == '\n')
797 /* Split a merged entry.
798 Given an old entry of the form
801 and a new entry of the form
805 where the two titles are the same and BODY and BODY' are very similar,
806 this computes two new entries
813 If the entries don't have this form, it returns false. */
815 try_split_merged_entry (const struct entry *old_entry,
816 const struct entry *new_entry,
817 struct entry *new_split[2])
819 size_t old_title_len = find_paragraph_end (old_entry, 0);
820 size_t new_title_len = find_paragraph_end (new_entry, 0);
821 struct entry old_body;
822 struct entry new_body;
823 size_t best_split_offset;
824 double best_similarity;
828 if (!(old_title_len == new_title_len
829 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
832 old_body.string = old_entry->string + old_title_len;
833 old_body.length = old_entry->length - old_title_len;
835 /* Determine where to split the new entry.
836 This is done by maximizing the similarity between BODY and BODY'. */
837 best_split_offset = split_offset = new_title_len;
838 best_similarity = 0.0;
843 new_body.string = new_entry->string + split_offset;
844 new_body.length = new_entry->length - split_offset;
846 entry_fstrcmp (&old_body, &new_body, best_similarity);
847 if (similarity > best_similarity)
849 best_split_offset = split_offset;
850 best_similarity = similarity;
852 if (best_similarity == 1.0)
853 /* It cannot get better. */
856 if (split_offset < new_entry->length)
857 split_offset = find_paragraph_end (new_entry, split_offset + 1);
862 /* BODY' should not be empty. */
863 if (best_split_offset == new_entry->length)
865 ASSERT (new_entry->string[best_split_offset] == '\n');
867 /* A certain similarity between BODY and BODY' is required. */
868 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
871 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
874 size_t len1 = new_title_len;
875 size_t len2 = new_entry->length - best_split_offset;
876 char *combined = XNMALLOC (len1 + len2, char);
877 memcpy (combined, new_entry->string, len1);
878 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
879 new_split[1] = entry_create (combined, len1 + len2);
885 /* Write the contents of an entry to the output stream FP. */
887 entry_write (FILE *fp, struct entry *entry)
889 if (entry->length > 0)
890 fwrite (entry->string, 1, entry->length, fp);
893 /* This structure represents a conflict.
894 A conflict can occur for various reasons. */
897 /* Parts from the ancestor file. */
898 size_t num_old_entries;
899 struct entry **old_entries;
900 /* Parts of the modified file. */
901 size_t num_modified_entries;
902 struct entry **modified_entries;
905 /* Write a conflict to the output stream FP, including markers. */
907 conflict_write (FILE *fp, struct conflict *c)
911 /* Use the same syntax as git's default merge driver.
912 Don't indent the contents of the entries (with things like ">" or "-"),
913 otherwise the user needs more textual editing to resolve the conflict. */
914 fputs ("<<<<<<<\n", fp);
915 for (i = 0; i < c->num_old_entries; i++)
916 entry_write (fp, c->old_entries[i]);
917 fputs ("=======\n", fp);
918 for (i = 0; i < c->num_modified_entries; i++)
919 entry_write (fp, c->modified_entries[i]);
920 fputs (">>>>>>>\n", fp);
924 static const struct option long_options[] =
926 { "help", no_argument, NULL, 'h' },
927 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
928 { "version", no_argument, NULL, 'V' },
932 /* Print a usage mesage and exit. */
936 if (status != EXIT_SUCCESS)
937 fprintf (stderr, "Try `%s --help' for more information.\n",
941 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
944 printf ("Merges independent modifications of a ChangeLog style file.\n");
945 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
946 printf ("A-FILE-NAME names the publicly modified file.\n");
947 printf ("B-FILE-NAME names the user-modified file.\n");
948 printf ("Writes the merged file into A-FILE-NAME.\n");
950 #if 0 /* --split-merged-entry is now on by default. */
951 printf ("Operation modifiers:\n");
953 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
954 Use this if you have the habit to merge unrelated\n\
955 entries into a single one, separated only by a\n\
956 newline, just because they happened on the same\n\
960 printf ("Informative output:\n");
961 printf (" -h, --help display this help and exit\n");
962 printf (" -V, --version output version information and exit\n");
964 fputs ("Report bugs to <bug-gnu-gettext@gnu.org>.\n",
972 main (int argc, char *argv[])
977 bool split_merged_entry;
979 /* Set program name for messages. */
980 set_program_name (argv[0]);
982 /* Set default values for variables. */
985 split_merged_entry = true;
987 /* Parse command line options. */
988 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
991 case '\0': /* Long option. */
999 case CHAR_MAX + 1: /* --split-merged-entry */
1002 usage (EXIT_FAILURE);
1007 /* Version information is requested. */
1008 printf ("%s\n", program_name);
1009 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1010 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
1011 This is free software: you are free to change and redistribute it.\n\
1012 There is NO WARRANTY, to the extent permitted by law.\n\
1015 printf ("Written by %s.\n", "Bruno Haible");
1016 exit (EXIT_SUCCESS);
1021 /* Help is requested. */
1022 usage (EXIT_SUCCESS);
1025 /* Test argument count. */
1026 if (optind + 3 != argc)
1027 error (EXIT_FAILURE, 0, "expected three arguments");
1030 const char *ancestor_file_name; /* O-FILE-NAME */
1031 const char *destination_file_name; /* A-FILE-NAME */
1033 const char *other_file_name; /* B-FILE-NAME */
1034 const char *mainstream_file_name;
1035 const char *modified_file_name;
1036 struct changelog_file ancestor_file;
1037 struct changelog_file mainstream_file;
1038 struct changelog_file modified_file;
1039 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
1040 struct entries_mapping mapping;
1041 struct differences diffs;
1042 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1043 gl_list_t /* <struct entry *> */ result_entries;
1044 gl_list_t /* <struct conflict *> */ result_conflicts;
1046 ancestor_file_name = argv[optind];
1047 destination_file_name = argv[optind + 1];
1048 other_file_name = argv[optind + 2];
1050 /* Heuristic to determine whether it's a pull in downstream direction
1051 (e.g. pull from a centralized server) or a pull in upstream direction
1052 (e.g. "git stash apply").
1054 For ChangeLog this distinction is important. The difference between
1055 an "upstream" and a "downstream" repository is that more people are
1056 looking at the "upstream" repository. They want to be informed about
1057 changes and expect them to be shown at the top of the ChangeLog.
1058 When a user pulls downstream, on the other hand, he has two options:
1059 a) He gets the change entries from the central repository also at the
1060 top of his ChangeLog, and his own changes come after them.
1061 b) He gets the change entries from the central repository after those
1062 he has collected for his branch. His own change entries stay at
1063 the top of the ChangeLog file.
1064 In the case a) he has to reorder the ChangeLog before he can commit.
1065 No one does that. So most people want b).
1066 In other words, the order of entries in a ChangeLog should represent
1067 the order in which they have flown (or will flow) into the *central*
1070 But in git this is fundamentally indistinguishable, because when Linus
1071 pulls patches from akpm and akpm pulls patches from Linus, it's not
1072 clear which of the two is more "upstream". Also, when you have many
1073 branches in a repository and pull from one to another, "git" has no way
1074 to know which branch is more "upstream" than the other. The git-tag(1)
1075 manual page also says:
1076 "One important aspect of git is it is distributed, and being
1077 distributed largely means there is no inherent "upstream" or
1078 "downstream" in the system."
1079 Therefore anyone who attempts to produce a ChangeLog from the merge
1082 Here we allow the user to specify the pull direction through an
1083 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
1084 environment variables are not set, we assume a "simple single user"
1085 usage pattern: He manages local changes through stashes and uses
1086 "git pull" only to pull downstream.
1088 How to distinguish these situation? There are several hints:
1089 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
1090 a "git pull", it is set to 'pull '. During a "git pull --rebase",
1091 it is set to 'pull --rebase'. During a "git cherry-pick", it is
1092 set to 'cherry-pick'.
1093 - During a "git stash apply", there is an environment variable of
1094 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
1098 var = getenv ("GIT_DOWNSTREAM");
1099 if (var != NULL && var[0] != '\0')
1103 var = getenv ("GIT_UPSTREAM");
1104 if (var != NULL && var[0] != '\0')
1108 var = getenv ("GIT_REFLOG_ACTION");
1109 #if 0 /* Debugging code */
1110 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1113 && ((strncmp (var, "pull", 4) == 0
1114 && c_strstr (var, " --rebase") == NULL)
1115 || strncmp (var, "merge origin", 12) == 0))
1119 /* "git stash apply", "git rebase", "git cherry-pick" and
1127 #if 0 /* Debugging code */
1130 printf ("First line of %%A:\n");
1131 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1132 printf ("First line of %%B:\n");
1133 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1134 printf ("Guessing calling convention: %s\n",
1136 ? "%A = modified by user, %B = upstream"
1137 : "%A = upstream, %B = modified by user");
1143 mainstream_file_name = other_file_name;
1144 modified_file_name = destination_file_name;
1148 mainstream_file_name = destination_file_name;
1149 modified_file_name = other_file_name;
1152 /* Read the three files into memory. */
1153 read_changelog_file (ancestor_file_name, &ancestor_file);
1154 read_changelog_file (mainstream_file_name, &mainstream_file);
1155 read_changelog_file (modified_file_name, &modified_file);
1157 /* Compute correspondence between the entries of ancestor_file and of
1159 compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1160 (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1162 /* Compute differences between the entries of ancestor_file and of
1164 compute_differences (&ancestor_file, &modified_file, &diffs);
1166 /* Compute the result. */
1167 result_entries_pointers =
1168 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1170 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1174 for (k = 0; k < mainstream_file.num_entries; k++)
1175 result_entries_pointers[k] =
1176 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1179 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1182 for (e = 0; e < diffs.num_edits; e++)
1184 struct edit *edit = diffs.edits[e];
1190 /* An addition to the top of modified_file.
1191 Apply it to the top of mainstream_file. */
1193 for (j = edit->j2; j >= edit->j1; j--)
1195 struct entry *added_entry = modified_file.entries[j];
1196 gl_list_add_first (result_entries, added_entry);
1205 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1206 ASSERT (i_before >= 0);
1207 i_after = (edit->j2 + 1 == modified_file.num_entries
1208 ? ancestor_file.num_entries
1209 : diffs.index_mapping_reverse[edit->j2 + 1]);
1210 ASSERT (i_after >= 0);
1211 ASSERT (i_after == i_before + 1);
1212 /* An addition between ancestor_file.entries[i_before] and
1213 ancestor_file.entries[i_after]. See whether these two
1214 entries still exist in mainstream_file and are still
1216 k_before = entries_mapping_get (&mapping, i_before);
1217 k_after = (i_after == ancestor_file.num_entries
1218 ? mainstream_file.num_entries
1219 : entries_mapping_get (&mapping, i_after));
1220 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1222 /* Yes, the entry before and after are still neighbours
1223 in mainstream_file. Apply the addition between
1225 if (k_after == mainstream_file.num_entries)
1228 for (j = edit->j1; j <= edit->j2; j++)
1230 struct entry *added_entry = modified_file.entries[j];
1231 gl_list_add_last (result_entries, added_entry);
1236 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1238 for (j = edit->j1; j <= edit->j2; j++)
1240 struct entry *added_entry = modified_file.entries[j];
1241 gl_list_add_before (result_entries, node_k_after, added_entry);
1247 /* It's not clear where the additions should be applied.
1248 Let the user decide. */
1249 struct conflict *c = XMALLOC (struct conflict);
1251 c->num_old_entries = 0;
1252 c->old_entries = NULL;
1253 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1254 c->modified_entries =
1255 XNMALLOC (c->num_modified_entries, struct entry *);
1256 for (j = edit->j1; j <= edit->j2; j++)
1257 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1258 gl_list_add_last (result_conflicts, c);
1264 /* Apply the removals one by one. */
1266 for (i = edit->i1; i <= edit->i2; i++)
1268 struct entry *removed_entry = ancestor_file.entries[i];
1269 ssize_t k = entries_mapping_get (&mapping, i);
1271 && entry_equals (removed_entry,
1272 mainstream_file.entries[k]))
1274 /* The entry to be removed still exists in
1275 mainstream_file. Remove it. */
1276 gl_list_node_set_value (result_entries,
1277 result_entries_pointers[k],
1282 /* The entry to be removed was already removed or was
1283 modified. This is a conflict. */
1284 struct conflict *c = XMALLOC (struct conflict);
1285 c->num_old_entries = 1;
1287 XNMALLOC (c->num_old_entries, struct entry *);
1288 c->old_entries[0] = removed_entry;
1289 c->num_modified_entries = 0;
1290 c->modified_entries = NULL;
1291 gl_list_add_last (result_conflicts, c);
1299 /* When the user usually merges entries from the same day,
1300 and this edit is at the top of the file: */
1301 if (split_merged_entry && edit->j1 == 0)
1303 /* Test whether the change is "simple merged", i.e. whether
1304 it consists of additions, followed by an augmentation of
1305 the first changed entry, followed by small changes of the
1318 modified_entry_n. */
1319 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1321 struct entry *split[2];
1322 bool simple_merged =
1323 try_split_merged_entry (ancestor_file.entries[edit->i1],
1324 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1329 for (i = edit->i1 + 1; i <= edit->i2; i++)
1330 if (entry_fstrcmp (ancestor_file.entries[i],
1331 modified_file.entries[i + edit->j2 - edit->i2],
1333 < FSTRCMP_THRESHOLD)
1335 simple_merged = false;
1341 /* Apply the additions at the top of modified_file.
1342 Apply each of the single-entry changes
1344 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1345 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1347 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1348 gl_list_add_first (result_entries, split[0]);
1349 /* The additions. */
1350 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1352 struct entry *added_entry = modified_file.entries[j];
1353 gl_list_add_first (result_entries, added_entry);
1355 /* Now the single-entry changes. */
1356 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1358 struct entry *changed_entry =
1359 (j == edit->j1 + num_added
1361 : modified_file.entries[j]);
1362 size_t i = j + edit->i2 - edit->j2;
1363 ssize_t k = entries_mapping_get (&mapping, i);
1365 && entry_equals (ancestor_file.entries[i],
1366 mainstream_file.entries[k]))
1368 gl_list_node_set_value (result_entries,
1369 result_entries_pointers[k],
1372 else if (!entry_equals (ancestor_file.entries[i],
1375 struct conflict *c = XMALLOC (struct conflict);
1376 c->num_old_entries = 1;
1378 XNMALLOC (c->num_old_entries, struct entry *);
1379 c->old_entries[0] = ancestor_file.entries[i];
1380 c->num_modified_entries = 1;
1381 c->modified_entries =
1382 XNMALLOC (c->num_modified_entries, struct entry *);
1383 c->modified_entries[0] = changed_entry;
1384 gl_list_add_last (result_conflicts, c);
1394 /* Test whether the change is "simple", i.e. whether it
1395 consists of small changes to the old ChangeLog entries
1396 and additions before them:
1406 modified_entry_n. */
1407 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1411 for (i = edit->i1; i <= edit->i2; i++)
1412 if (entry_fstrcmp (ancestor_file.entries[i],
1413 modified_file.entries[i + edit->j2 - edit->i2],
1415 < FSTRCMP_THRESHOLD)
1425 /* Apply the additions and each of the single-entry
1426 changes separately. */
1427 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1428 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1431 /* A simple change at the top of modified_file.
1432 Apply it to the top of mainstream_file. */
1434 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1436 struct entry *added_entry = modified_file.entries[j];
1437 gl_list_add_first (result_entries, added_entry);
1439 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1441 struct entry *changed_entry = modified_file.entries[j];
1442 size_t i = j + edit->i2 - edit->j2;
1443 ssize_t k = entries_mapping_get (&mapping, i);
1445 && entry_equals (ancestor_file.entries[i],
1446 mainstream_file.entries[k]))
1448 gl_list_node_set_value (result_entries,
1449 result_entries_pointers[k],
1455 ASSERT (!entry_equals (ancestor_file.entries[i],
1457 c = XMALLOC (struct conflict);
1458 c->num_old_entries = 1;
1460 XNMALLOC (c->num_old_entries, struct entry *);
1461 c->old_entries[0] = ancestor_file.entries[i];
1462 c->num_modified_entries = 1;
1463 c->modified_entries =
1464 XNMALLOC (c->num_modified_entries, struct entry *);
1465 c->modified_entries[0] = changed_entry;
1466 gl_list_add_last (result_conflicts, c);
1476 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1477 ASSERT (i_before >= 0);
1478 /* A simple change after ancestor_file.entries[i_before].
1479 See whether this entry and the following num_changed
1480 entries still exist in mainstream_file and are still
1482 k_before = entries_mapping_get (&mapping, i_before);
1483 linear = (k_before >= 0);
1487 for (i = i_before + 1; i <= i_before + num_changed; i++)
1488 if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1496 gl_list_node_t node_for_insert =
1497 result_entries_pointers[k_before + 1];
1499 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1501 struct entry *added_entry = modified_file.entries[j];
1502 gl_list_add_before (result_entries, node_for_insert, added_entry);
1504 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1506 struct entry *changed_entry = modified_file.entries[j];
1507 size_t i = j + edit->i2 - edit->j2;
1508 ssize_t k = entries_mapping_get (&mapping, i);
1510 if (entry_equals (ancestor_file.entries[i],
1511 mainstream_file.entries[k]))
1513 gl_list_node_set_value (result_entries,
1514 result_entries_pointers[k],
1520 ASSERT (!entry_equals (ancestor_file.entries[i],
1522 c = XMALLOC (struct conflict);
1523 c->num_old_entries = 1;
1525 XNMALLOC (c->num_old_entries, struct entry *);
1526 c->old_entries[0] = ancestor_file.entries[i];
1527 c->num_modified_entries = 1;
1528 c->modified_entries =
1529 XNMALLOC (c->num_modified_entries, struct entry *);
1530 c->modified_entries[0] = changed_entry;
1531 gl_list_add_last (result_conflicts, c);
1541 See whether the num_changed entries still exist
1542 unchanged in mainstream_file and are still
1546 bool linear_unchanged;
1548 k_first = entries_mapping_get (&mapping, i_first);
1551 && entry_equals (ancestor_file.entries[i_first],
1552 mainstream_file.entries[k_first]));
1553 if (linear_unchanged)
1556 for (i = i_first + 1; i <= edit->i2; i++)
1557 if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1558 && entry_equals (ancestor_file.entries[i],
1559 mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1561 linear_unchanged = false;
1565 if (linear_unchanged)
1567 gl_list_node_t node_for_insert =
1568 result_entries_pointers[k_first];
1571 for (j = edit->j2; j >= edit->j1; j--)
1573 struct entry *new_entry = modified_file.entries[j];
1574 gl_list_add_before (result_entries, node_for_insert, new_entry);
1576 for (i = edit->i1; i <= edit->i2; i++)
1578 ssize_t k = entries_mapping_get (&mapping, i);
1580 ASSERT (entry_equals (ancestor_file.entries[i],
1581 mainstream_file.entries[k]));
1582 gl_list_node_set_value (result_entries,
1583 result_entries_pointers[k],
1592 struct conflict *c = XMALLOC (struct conflict);
1594 c->num_old_entries = edit->i2 - edit->i1 + 1;
1596 XNMALLOC (c->num_old_entries, struct entry *);
1597 for (i = edit->i1; i <= edit->i2; i++)
1598 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1599 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1600 c->modified_entries =
1601 XNMALLOC (c->num_modified_entries, struct entry *);
1602 for (j = edit->j1; j <= edit->j2; j++)
1603 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1604 gl_list_add_last (result_conflicts, c);
1612 /* Output the result. */
1614 FILE *fp = fopen (destination_file_name, "w");
1617 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1618 exit (EXIT_FAILURE);
1621 /* Output the conflicts at the top. */
1623 size_t n = gl_list_size (result_conflicts);
1625 for (i = 0; i < n; i++)
1626 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1628 /* Output the modified and unmodified entries, in order. */
1630 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1632 gl_list_node_t node;
1633 while (gl_list_iterator_next (&iter, &elt, &node))
1634 entry_write (fp, (struct entry *) elt);
1635 gl_list_iterator_free (&iter);
1638 if (fwriteerror (fp))
1640 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1641 exit (EXIT_FAILURE);
1645 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);