1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008-2009 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 efforts to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
43 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
48 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the lines
50 [merge "merge-changelog"]
51 name = GNU-style ChangeLog merge driver
52 driver = /usr/local/bin/git-merge-changelog %O %A %B
54 - In every directory that contains a ChangeLog file, add a file
55 '.gitattributes' with this line:
57 ChangeLog merge=merge-changelog
59 (See "man 5 gitattributes" for more info.)
62 /* Calling convention:
63 A merge driver is called with three filename arguments:
64 1. %O = The common ancestor of %A and %B.
65 2. %A = The file's contents from the "current branch".
66 3. %B = The file's contents from the "other branch"; this is the contents
69 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
70 maintainer to a central maintainer) or of a downstream pull with --rebase:
71 2. %A = The file's newest pulled contents; modified by other committers.
72 3. %B = The user's newest copy of the file; modified by the user.
73 In case of a downstream pull (e.g. from a central repository to the user)
74 or of an upstream pull with --rebase:
75 2. %A = The user's newest copy of the file; modified by the user.
76 3. %B = The file's newest pulled contents; modified by other committers.
78 It should write its merged output into file %A. It can also echo some
79 remarks to stdout. It should exit with return code 0 if the merge could
80 be resolved cleanly, or with non-zero return code if there were conflicts.
84 The structure of a ChangeLog file: It consists of ChangeLog entries. A
85 ChangeLog entry starts at a line following a blank line and that starts with
86 a non-whitespace character, or at the beginning of a file.
87 The merge driver works as follows: It reads the three files into memory and
88 dissects them into ChangeLog entries. It then finds the differences between
89 %O and %B. They are classified as:
90 - removals (some consecutive entries removed),
91 - changes (some consecutive entries removed, some consecutive entries
93 - additions (some consecutive entries added).
94 The driver then attempts to apply the changes to %A.
95 To this effect, it first computes a correspondence between the entries in %O
96 and the entries in %A, using fuzzy string matching to still identify changed
98 - Removals are applied one by one. If the entry is present in %A, at any
99 position, it is removed. If not, the removal is marked as a conflict.
100 - Additions at the top of %B are applied at the top of %A.
101 - Additions between entry x and entry y (y may be the file end) in %B are
102 applied between entry x and entry y in %A (if they still exist and are
103 still consecutive in %A), otherwise the additions are marked as a
105 - Changes are categorized into "simple changes":
108 added_entry ... added_entry modified_entry1 ... modified_entryn,
109 where the correspondence between entry_i and modified_entry_i is still
110 clear; and "big changes": these are all the rest. Simple changes at the
111 top of %B are applied by putting the added entries at the top of %A. The
112 changes in simple changes are applied one by one; possibly leading to
113 single-entry conflicts. Big changes are applied en bloc, possibly
114 leading to conflicts spanning multiple entries.
115 - Conflicts are output at the top of the file and cause an exit status of
127 #include <sys/types.h>
130 #include "progname.h"
132 #include "read-file.h"
133 #include "gl_xlist.h"
134 #include "gl_array_list.h"
135 #include "gl_linkedhash_list.h"
136 #include "gl_rbtreehash_list.h"
137 #include "gl_linked_list.h"
139 #include "xmalloca.h"
142 #include "c-strstr.h"
143 #include "fwriteerror.h"
145 #define ASSERT(expr) \
153 #define FSTRCMP_THRESHOLD 0.6
154 #define FSTRCMP_STRICTER_THRESHOLD 0.8
156 /* Representation of a ChangeLog entry.
157 The string may contain NUL bytes; therefore it is represented as a plain
158 opaque memory region. */
163 /* Cache for the hash code. */
164 bool hashcode_cached;
169 The memory region passed by the caller must of indefinite extent. It is
170 *not* copied here. */
171 static struct entry *
172 entry_create (char *string, size_t length)
174 struct entry *result = XMALLOC (struct entry);
175 result->string = string;
176 result->length = length;
177 result->hashcode_cached = false;
181 /* Compare two entries for equality. */
183 entry_equals (const void *elt1, const void *elt2)
185 const struct entry *entry1 = (const struct entry *) elt1;
186 const struct entry *entry2 = (const struct entry *) elt2;
187 return entry1->length == entry2->length
188 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
191 /* Return a hash code of the contents of a ChangeLog entry. */
193 entry_hashcode (const void *elt)
195 struct entry *entry = (struct entry *) elt;
196 if (!entry->hashcode_cached)
198 /* See http://www.haible.de/bruno/hashfunc.html. */
203 for (s = entry->string, n = entry->length; n > 0; s++, n--)
204 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
207 entry->hashcode_cached = true;
209 return entry->hashcode;
212 /* Perform a fuzzy comparison of two ChangeLog entries.
213 Return a similarity measure of the two entries, a value between 0 and 1.
214 0 stands for very distinct, 1 for identical.
215 If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
218 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
221 /* fstrcmp works only on NUL terminated strings. */
225 if (memchr (entry1->string, '\0', entry1->length) != NULL)
227 if (memchr (entry2->string, '\0', entry2->length) != NULL)
229 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
232 memcpy (p, entry1->string, entry1->length);
235 memcpy (p, entry2->string, entry2->length);
240 fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
245 /* This structure represents an entire ChangeLog file, after it was read
247 struct changelog_file
249 /* The entries, as a list. */
250 gl_list_t /* <struct entry *> */ entries_list;
251 /* The entries, as a list in opposite direction. */
252 gl_list_t /* <struct entry *> */ entries_reversed;
253 /* The entries, as an array. */
255 struct entry **entries;
258 /* Read a ChangeLog file into memory.
259 Return the contents in *RESULT. */
261 read_changelog_file (const char *filename, struct changelog_file *result)
263 /* Read the file in text mode, otherwise it's hard to recognize empty
266 char *contents = read_file (filename, &length);
267 if (contents == NULL)
269 fprintf (stderr, "could not read file '%s'\n", filename);
273 result->entries_list =
274 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
276 result->entries_reversed =
277 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
279 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
280 at a line following a blank line and that starts with a non-whitespace
281 character, or at the beginning of a file.
282 Split the file contents into entries. */
284 char *contents_end = contents + length;
285 char *start = contents;
286 while (start < contents_end)
288 /* Search the end of the current entry. */
292 while (ptr < contents_end)
294 ptr = memchr (ptr, '\n', contents_end - ptr);
301 if (contents_end - ptr >= 2
303 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
310 curr = entry_create (start, ptr - start);
311 gl_list_add_last (result->entries_list, curr);
312 gl_list_add_first (result->entries_reversed, curr);
318 result->num_entries = gl_list_size (result->entries_list);
319 result->entries = XNMALLOC (result->num_entries, struct entry *);
322 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
325 while (gl_list_iterator_next (&iter, &elt, &node))
326 result->entries[index++] = (struct entry *) elt;
327 gl_list_iterator_free (&iter);
328 ASSERT (index == result->num_entries);
332 /* A mapping (correspondence) between entries of FILE1 and of FILE2. */
333 struct entries_mapping
335 struct changelog_file *file1;
336 struct changelog_file *file2;
337 /* Mapping from indices in FILE1 to indices in FILE2.
338 A value -1 means that the entry from FILE1 is not found in FILE2.
339 A value -2 means that it has not yet been computed. */
340 ssize_t *index_mapping;
341 /* Mapping from indices in FILE2 to indices in FILE1.
342 A value -1 means that the entry from FILE2 is not found in FILE1.
343 A value -2 means that it has not yet been computed. */
344 ssize_t *index_mapping_reverse;
347 /* Look up (or lazily compute) the mapping of an entry in FILE1.
348 i is the index in FILE1.
349 Return the index in FILE2, or -1 when the entry is not found in FILE2. */
351 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
353 if (mapping->index_mapping[i] < -1)
355 struct changelog_file *file1 = mapping->file1;
356 struct changelog_file *file2 = mapping->file2;
357 size_t n1 = file1->num_entries;
358 size_t n2 = file2->num_entries;
359 struct entry *entry_i = file1->entries[i];
362 /* Search whether it approximately occurs in file2. */
364 double best_j_similarity = 0.0;
365 for (j = n2 - 1; j >= 0; j--)
366 if (mapping->index_mapping_reverse[j] < 0)
369 entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
370 if (similarity > best_j_similarity)
373 best_j_similarity = similarity;
376 if (best_j_similarity >= FSTRCMP_THRESHOLD)
378 /* Found a similar entry in file2. */
379 struct entry *entry_j = file2->entries[best_j];
380 /* Search whether it approximately occurs in file1 at index i. */
382 double best_i_similarity = 0.0;
384 for (ii = n1 - 1; ii >= 0; ii--)
385 if (mapping->index_mapping[ii] < 0)
388 entry_fstrcmp (file1->entries[ii], entry_j,
390 if (similarity > best_i_similarity)
393 best_i_similarity = similarity;
396 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
398 mapping->index_mapping[i] = best_j;
399 mapping->index_mapping_reverse[best_j] = i;
402 if (mapping->index_mapping[i] < -1)
403 /* It does not approximately occur in FILE2.
404 Remember it, for next time. */
405 mapping->index_mapping[i] = -1;
407 return mapping->index_mapping[i];
410 /* Look up (or lazily compute) the mapping of an entry in FILE2.
411 j is the index in FILE2.
412 Return the index in FILE1, or -1 when the entry is not found in FILE1. */
414 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
416 if (mapping->index_mapping_reverse[j] < -1)
418 struct changelog_file *file1 = mapping->file1;
419 struct changelog_file *file2 = mapping->file2;
420 size_t n1 = file1->num_entries;
421 size_t n2 = file2->num_entries;
422 struct entry *entry_j = file2->entries[j];
425 /* Search whether it approximately occurs in file1. */
427 double best_i_similarity = 0.0;
428 for (i = n1 - 1; i >= 0; i--)
429 if (mapping->index_mapping[i] < 0)
432 entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
433 if (similarity > best_i_similarity)
436 best_i_similarity = similarity;
439 if (best_i_similarity >= FSTRCMP_THRESHOLD)
441 /* Found a similar entry in file1. */
442 struct entry *entry_i = file1->entries[best_i];
443 /* Search whether it approximately occurs in file2 at index j. */
445 double best_j_similarity = 0.0;
447 for (jj = n2 - 1; jj >= 0; jj--)
448 if (mapping->index_mapping_reverse[jj] < 0)
451 entry_fstrcmp (entry_i, file2->entries[jj],
453 if (similarity > best_j_similarity)
456 best_j_similarity = similarity;
459 if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
461 mapping->index_mapping_reverse[j] = best_i;
462 mapping->index_mapping[best_i] = j;
465 if (mapping->index_mapping_reverse[j] < -1)
466 /* It does not approximately occur in FILE1.
467 Remember it, for next time. */
468 mapping->index_mapping_reverse[j] = -1;
470 return mapping->index_mapping_reverse[j];
473 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
474 The correspondence also takes into account small modifications; i.e. the
475 indicated relation is not equality of entries but best-match similarity
477 If FULL is true, the maximum of matching is done up-front. If it is false,
478 it is done in a lazy way through the functions entries_mapping_get and
479 entries_mapping_reverse_get.
480 Return the result in *RESULT. */
482 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
484 struct entries_mapping *result)
486 /* Mapping from indices in file1 to indices in file2. */
487 ssize_t *index_mapping;
488 /* Mapping from indices in file2 to indices in file1. */
489 ssize_t *index_mapping_reverse;
490 size_t n1 = file1->num_entries;
491 size_t n2 = file2->num_entries;
494 index_mapping = XNMALLOC (n1, ssize_t);
495 for (i = 0; i < n1; i++)
496 index_mapping[i] = -2;
498 index_mapping_reverse = XNMALLOC (n2, ssize_t);
499 for (j = 0; j < n2; j++)
500 index_mapping_reverse[j] = -2;
502 for (i = n1 - 1; i >= 0; i--)
503 /* Take an entry from file1. */
504 if (index_mapping[i] < -1)
506 struct entry *entry = file1->entries[i];
507 /* Search whether it occurs in file2. */
508 j = gl_list_indexof (file2->entries_reversed, entry);
512 /* Found an exact correspondence. */
513 /* If index_mapping_reverse[j] >= 0, we have already seen other
514 copies of this entry, and there were more occurrences of it in
515 file1 than in file2. In this case, do nothing. */
516 if (index_mapping_reverse[j] < 0)
518 index_mapping[i] = j;
519 index_mapping_reverse[j] = i;
520 /* Look for more occurrences of the same entry. Match them
521 as long as they pair up. Unpaired occurrences of the same
522 entry are left without mapping. */
533 gl_list_indexof_from (file1->entries_reversed,
538 gl_list_indexof_from (file2->entries_reversed,
542 curr_i = n1 - 1 - next_i;
543 curr_j = n2 - 1 - next_j;
544 ASSERT (index_mapping[curr_i] < 0);
545 ASSERT (index_mapping_reverse[curr_j] < 0);
546 index_mapping[curr_i] = curr_j;
547 index_mapping_reverse[curr_j] = curr_i;
554 result->file1 = file1;
555 result->file2 = file2;
556 result->index_mapping = index_mapping;
557 result->index_mapping_reverse = index_mapping_reverse;
560 for (i = n1 - 1; i >= 0; i--)
561 entries_mapping_get (result, i);
564 /* An "edit" is a textual modification performed by the user, that needs to
565 be applied to the other file. */
568 /* Some consecutive entries were added. */
570 /* Some consecutive entries were removed; some other consecutive entries
571 were added at the same position. (Not necessarily the same number of
574 /* Some consecutive entries were removed. */
578 /* This structure represents an edit. */
582 /* Range of indices into the entries of FILE1. */
583 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
584 /* Range of indices into the entries of FILE2. */
585 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
588 /* This structure represents the differences from one file, FILE1, to another
592 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
593 from FILE1 is not found in FILE2). */
594 ssize_t *index_mapping;
595 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
596 from FILE2 is not found in FILE1). */
597 ssize_t *index_mapping_reverse;
598 /* The edits that transform FILE1 into FILE2. */
603 /* Import the difference detection algorithm from GNU diff. */
604 #define ELEMENT struct entry *
605 #define EQUAL entry_equals
606 #define OFFSET ssize_t
607 #define EXTRA_CONTEXT_FIELDS \
608 ssize_t *index_mapping; \
609 ssize_t *index_mapping_reverse;
610 #define NOTE_DELETE(ctxt, xoff) \
611 ctxt->index_mapping[xoff] = -1
612 #define NOTE_INSERT(ctxt, yoff) \
613 ctxt->index_mapping_reverse[yoff] = -1
616 /* Compute the differences between the entries of FILE1 and the entries of
619 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
620 struct differences *result)
622 /* Unlike compute_mapping, which mostly ignores the order of the entries and
623 therefore works well when some entries are permuted, here we use the order.
624 I think this is needed in order to distinguish changes from
625 additions+removals; I don't know how to say what is a "change" if the
626 files are considered as unordered sets of entries. */
628 size_t n1 = file1->num_entries;
629 size_t n2 = file2->num_entries;
632 gl_list_t /* <struct edit *> */ edits;
634 ctxt.xvec = file1->entries;
635 ctxt.yvec = file2->entries;
636 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
637 for (i = 0; i < n1; i++)
638 ctxt.index_mapping[i] = 0;
639 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
640 for (j = 0; j < n2; j++)
641 ctxt.index_mapping_reverse[j] = 0;
642 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
643 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
644 ctxt.too_expensive = n1 + n2;
646 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
647 each removed or added entry. */
648 compareseq (0, n1, 0, n2, 0, &ctxt);
650 /* Complete the index_mapping and index_mapping_reverse arrays. */
653 while (i < n1 || j < n2)
655 while (i < n1 && ctxt.index_mapping[i] < 0)
657 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
659 ASSERT ((i < n1) == (j < n2));
660 if (i == n1 && j == n2)
662 ctxt.index_mapping[i] = j;
663 ctxt.index_mapping_reverse[j] = i;
668 /* Create the edits. */
669 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
672 while (i < n1 || j < n2)
678 e = XMALLOC (struct edit);
682 gl_list_add_last (edits, e);
689 e = XMALLOC (struct edit);
693 gl_list_add_last (edits, e);
696 if (ctxt.index_mapping[i] >= 0)
698 if (ctxt.index_mapping_reverse[j] >= 0)
700 ASSERT (ctxt.index_mapping[i] == j);
701 ASSERT (ctxt.index_mapping_reverse[j] == i);
708 ASSERT (ctxt.index_mapping_reverse[j] < 0);
709 e = XMALLOC (struct edit);
714 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
716 gl_list_add_last (edits, e);
721 if (ctxt.index_mapping_reverse[j] >= 0)
724 ASSERT (ctxt.index_mapping[i] < 0);
725 e = XMALLOC (struct edit);
730 while (i < n1 && ctxt.index_mapping[i] < 0);
732 gl_list_add_last (edits, e);
737 ASSERT (ctxt.index_mapping[i] < 0);
738 ASSERT (ctxt.index_mapping_reverse[j] < 0);
739 e = XMALLOC (struct edit);
744 while (i < n1 && ctxt.index_mapping[i] < 0);
749 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
751 gl_list_add_last (edits, e);
756 result->index_mapping = ctxt.index_mapping;
757 result->index_mapping_reverse = ctxt.index_mapping_reverse;
758 result->num_edits = gl_list_size (edits);
759 result->edits = XNMALLOC (result->num_edits, struct edit *);
762 gl_list_iterator_t iter = gl_list_iterator (edits);
765 while (gl_list_iterator_next (&iter, &elt, &node))
766 result->edits[index++] = (struct edit *) elt;
767 gl_list_iterator_free (&iter);
768 ASSERT (index == result->num_edits);
772 /* An empty entry. */
773 static struct entry empty_entry = { NULL, 0 };
775 /* Return the end a paragraph.
777 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
778 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
779 it is the start of a blank line or the end of the entry. */
781 find_paragraph_end (const struct entry *entry, size_t offset)
783 const char *string = entry->string;
784 size_t length = entry->length;
788 const char *nl = memchr (string + offset, '\n', length - offset);
791 offset = (nl - string) + 1;
792 if (offset < length && string[offset] == '\n')
797 /* Split a merged entry.
798 Given an old entry of the form
801 and a new entry of the form
805 where the two titles are the same and BODY and BODY' are very similar,
806 this computes two new entries
813 If the entries don't have this form, it returns false. */
815 try_split_merged_entry (const struct entry *old_entry,
816 const struct entry *new_entry,
817 struct entry *new_split[2])
819 size_t old_title_len = find_paragraph_end (old_entry, 0);
820 size_t new_title_len = find_paragraph_end (new_entry, 0);
821 struct entry old_body;
822 struct entry new_body;
823 size_t best_split_offset;
824 double best_similarity;
828 if (!(old_title_len == new_title_len
829 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
832 old_body.string = old_entry->string + old_title_len;
833 old_body.length = old_entry->length - old_title_len;
835 /* Determine where to split the new entry.
836 This is done by maximizing the similarity between BODY and BODY'. */
837 best_split_offset = split_offset = new_title_len;
838 best_similarity = 0.0;
843 new_body.string = new_entry->string + split_offset;
844 new_body.length = new_entry->length - split_offset;
846 entry_fstrcmp (&old_body, &new_body, best_similarity);
847 if (similarity > best_similarity)
849 best_split_offset = split_offset;
850 best_similarity = similarity;
852 if (best_similarity == 1.0)
853 /* It cannot get better. */
856 if (split_offset < new_entry->length)
857 split_offset = find_paragraph_end (new_entry, split_offset + 1);
862 /* BODY' should not be empty. */
863 if (best_split_offset == new_entry->length)
865 ASSERT (new_entry->string[best_split_offset] == '\n');
867 /* A certain similarity between BODY and BODY' is required. */
868 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
871 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
874 size_t len1 = new_title_len;
875 size_t len2 = new_entry->length - best_split_offset;
876 char *combined = XNMALLOC (len1 + len2, char);
877 memcpy (combined, new_entry->string, len1);
878 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
879 new_split[1] = entry_create (combined, len1 + len2);
885 /* Write the contents of an entry to the output stream FP. */
887 entry_write (FILE *fp, struct entry *entry)
889 if (entry->length > 0)
890 fwrite (entry->string, 1, entry->length, fp);
893 /* This structure represents a conflict.
894 A conflict can occur for various reasons. */
897 /* Parts from the ancestor file. */
898 size_t num_old_entries;
899 struct entry **old_entries;
900 /* Parts of the modified file. */
901 size_t num_modified_entries;
902 struct entry **modified_entries;
905 /* Write a conflict to the output stream FP, including markers. */
907 conflict_write (FILE *fp, struct conflict *c)
911 /* Use the same syntax as git's default merge driver.
912 Don't indent the contents of the entries (with things like ">" or "-"),
913 otherwise the user needs more textual editing to resolve the conflict. */
914 fputs ("<<<<<<<\n", fp);
915 for (i = 0; i < c->num_old_entries; i++)
916 entry_write (fp, c->old_entries[i]);
917 fputs ("=======\n", fp);
918 for (i = 0; i < c->num_modified_entries; i++)
919 entry_write (fp, c->modified_entries[i]);
920 fputs (">>>>>>>\n", fp);
924 static const struct option long_options[] =
926 { "help", no_argument, NULL, 'h' },
927 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
928 { "version", no_argument, NULL, 'V' },
932 /* Print a usage mesage and exit. */
936 if (status != EXIT_SUCCESS)
937 fprintf (stderr, "Try `%s --help' for more information.\n",
941 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
944 printf ("Merges independent modifications of a ChangeLog style file.\n");
945 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
946 printf ("A-FILE-NAME names the publicly modified file.\n");
947 printf ("B-FILE-NAME names the user-modified file.\n");
948 printf ("Writes the merged file into A-FILE-NAME.\n");
950 printf ("Operation modifiers:\n");
952 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
953 Use this if you have the habit to merge unrelated\n\
954 entries into a single one, separated only by a\n\
955 newline, just because they happened on the same\n\
958 printf ("Informative output:\n");
959 printf (" -h, --help display this help and exit\n");
960 printf (" -V, --version output version information and exit\n");
962 fputs ("Report bugs to <bug-gnu-gettext@gnu.org>.\n",
970 main (int argc, char *argv[])
975 bool split_merged_entry;
977 /* Set program name for messages. */
978 set_program_name (argv[0]);
980 /* Set default values for variables. */
983 split_merged_entry = false;
985 /* Parse command line options. */
986 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
989 case '\0': /* Long option. */
997 case CHAR_MAX + 1: /* --split-merged-entry */
998 split_merged_entry = true;
1001 usage (EXIT_FAILURE);
1006 /* Version information is requested. */
1007 printf ("%s\n", program_name);
1008 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1009 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
1010 This is free software: you are free to change and redistribute it.\n\
1011 There is NO WARRANTY, to the extent permitted by law.\n\
1014 printf ("Written by %s.\n", "Bruno Haible");
1015 exit (EXIT_SUCCESS);
1020 /* Help is requested. */
1021 usage (EXIT_SUCCESS);
1024 /* Test argument count. */
1025 if (optind + 3 != argc)
1026 error (EXIT_FAILURE, 0, "expected three arguments");
1029 const char *ancestor_file_name; /* O-FILE-NAME */
1030 const char *destination_file_name; /* A-FILE-NAME */
1032 const char *other_file_name; /* B-FILE-NAME */
1033 const char *mainstream_file_name;
1034 const char *modified_file_name;
1035 struct changelog_file ancestor_file;
1036 struct changelog_file mainstream_file;
1037 struct changelog_file modified_file;
1038 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
1039 struct entries_mapping mapping;
1040 struct differences diffs;
1041 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1042 gl_list_t /* <struct entry *> */ result_entries;
1043 gl_list_t /* <struct conflict *> */ result_conflicts;
1045 ancestor_file_name = argv[optind];
1046 destination_file_name = argv[optind + 1];
1047 other_file_name = argv[optind + 2];
1049 /* Heuristic to determine whether it's a pull in downstream direction
1050 (e.g. pull from a centralized server) or a pull in upstream direction
1051 (e.g. "git stash apply").
1053 For ChangeLog this distinction is important. The difference between
1054 an "upstream" and a "downstream" repository is that more people are
1055 looking at the "upstream" repository. They want to be informed about
1056 changes and expect them to be shown at the top of the ChangeLog.
1057 When a user pulls downstream, on the other hand, he has two options:
1058 a) He gets the change entries from the central repository also at the
1059 top of his ChangeLog, and his own changes come after them.
1060 b) He gets the change entries from the central repository after those
1061 he has collected for his branch. His own change entries stay at
1062 the top of the ChangeLog file.
1063 In the case a) he has to reorder the ChangeLog before he can commit.
1064 No one does that. So most people want b).
1065 In other words, the order of entries in a ChangeLog should represent
1066 the order in which they have flown (or will flow) into the *central*
1069 But in git this is fundamentally indistinguishable, because when Linus
1070 pulls patches from akpm and akpm pulls patches from Linus, it's not
1071 clear which of the two is more "upstream". Also, when you have many
1072 branches in a repository and pull from one to another, "git" has no way
1073 to know which branch is more "upstream" than the other. The git-tag(1)
1074 manual page also says:
1075 "One important aspect of git is it is distributed, and being
1076 distributed largely means there is no inherent "upstream" or
1077 "downstream" in the system."
1078 Therefore anyone who attempts to produce a ChangeLog from the merge
1081 Here we allow the user to specify the pull direction through an
1082 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
1083 environment variables are not set, we assume a "simple single user"
1084 usage pattern: He manages local changes through stashes and uses
1085 "git pull" only to pull downstream.
1087 How to distinguish these situation? There are several hints:
1088 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
1089 a "git pull", it is set to 'pull '. During a "git pull --rebase",
1090 it is set to 'pull --rebase'. During a "git cherry-pick", it is
1091 set to 'cherry-pick'.
1092 - During a "git stash apply", there is an environment variable of
1093 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
1097 var = getenv ("GIT_DOWNSTREAM");
1098 if (var != NULL && var[0] != '\0')
1102 var = getenv ("GIT_UPSTREAM");
1103 if (var != NULL && var[0] != '\0')
1107 var = getenv ("GIT_REFLOG_ACTION");
1108 #if 0 /* Debugging code */
1109 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1112 && ((strncmp (var, "pull", 4) == 0
1113 && c_strstr (var, " --rebase") == NULL)
1114 || strncmp (var, "merge origin", 12) == 0))
1118 /* "git stash apply", "git rebase", "git cherry-pick" and
1126 #if 0 /* Debugging code */
1129 printf ("First line of %%A:\n");
1130 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1131 printf ("First line of %%B:\n");
1132 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1133 printf ("Guessing calling convention: %s\n",
1135 ? "%A = modified by user, %B = upstream"
1136 : "%A = upstream, %B = modified by user");
1142 mainstream_file_name = other_file_name;
1143 modified_file_name = destination_file_name;
1147 mainstream_file_name = destination_file_name;
1148 modified_file_name = other_file_name;
1151 /* Read the three files into memory. */
1152 read_changelog_file (ancestor_file_name, &ancestor_file);
1153 read_changelog_file (mainstream_file_name, &mainstream_file);
1154 read_changelog_file (modified_file_name, &modified_file);
1156 /* Compute correspondence between the entries of ancestor_file and of
1158 compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1159 (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1161 /* Compute differences between the entries of ancestor_file and of
1163 compute_differences (&ancestor_file, &modified_file, &diffs);
1165 /* Compute the result. */
1166 result_entries_pointers =
1167 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1169 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1173 for (k = 0; k < mainstream_file.num_entries; k++)
1174 result_entries_pointers[k] =
1175 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1178 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1181 for (e = 0; e < diffs.num_edits; e++)
1183 struct edit *edit = diffs.edits[e];
1189 /* An addition to the top of modified_file.
1190 Apply it to the top of mainstream_file. */
1192 for (j = edit->j2; j >= edit->j1; j--)
1194 struct entry *added_entry = modified_file.entries[j];
1195 gl_list_add_first (result_entries, added_entry);
1204 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1205 ASSERT (i_before >= 0);
1206 i_after = (edit->j2 + 1 == modified_file.num_entries
1207 ? ancestor_file.num_entries
1208 : diffs.index_mapping_reverse[edit->j2 + 1]);
1209 ASSERT (i_after >= 0);
1210 ASSERT (i_after == i_before + 1);
1211 /* An addition between ancestor_file.entries[i_before] and
1212 ancestor_file.entries[i_after]. See whether these two
1213 entries still exist in mainstream_file and are still
1215 k_before = entries_mapping_get (&mapping, i_before);
1216 k_after = (i_after == ancestor_file.num_entries
1217 ? mainstream_file.num_entries
1218 : entries_mapping_get (&mapping, i_after));
1219 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1221 /* Yes, the entry before and after are still neighbours
1222 in mainstream_file. Apply the addition between
1224 if (k_after == mainstream_file.num_entries)
1227 for (j = edit->j1; j <= edit->j2; j++)
1229 struct entry *added_entry = modified_file.entries[j];
1230 gl_list_add_last (result_entries, added_entry);
1235 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1237 for (j = edit->j1; j <= edit->j2; j++)
1239 struct entry *added_entry = modified_file.entries[j];
1240 gl_list_add_before (result_entries, node_k_after, added_entry);
1246 /* It's not clear where the additions should be applied.
1247 Let the user decide. */
1248 struct conflict *c = XMALLOC (struct conflict);
1250 c->num_old_entries = 0;
1251 c->old_entries = NULL;
1252 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1253 c->modified_entries =
1254 XNMALLOC (c->num_modified_entries, struct entry *);
1255 for (j = edit->j1; j <= edit->j2; j++)
1256 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1257 gl_list_add_last (result_conflicts, c);
1263 /* Apply the removals one by one. */
1265 for (i = edit->i1; i <= edit->i2; i++)
1267 struct entry *removed_entry = ancestor_file.entries[i];
1268 ssize_t k = entries_mapping_get (&mapping, i);
1270 && entry_equals (removed_entry,
1271 mainstream_file.entries[k]))
1273 /* The entry to be removed still exists in
1274 mainstream_file. Remove it. */
1275 gl_list_node_set_value (result_entries,
1276 result_entries_pointers[k],
1281 /* The entry to be removed was already removed or was
1282 modified. This is a conflict. */
1283 struct conflict *c = XMALLOC (struct conflict);
1284 c->num_old_entries = 1;
1286 XNMALLOC (c->num_old_entries, struct entry *);
1287 c->old_entries[0] = removed_entry;
1288 c->num_modified_entries = 0;
1289 c->modified_entries = NULL;
1290 gl_list_add_last (result_conflicts, c);
1298 /* When the user usually merges entries from the same day,
1299 and this edit is at the top of the file: */
1300 if (split_merged_entry && edit->j1 == 0)
1302 /* Test whether the change is "simple merged", i.e. whether
1303 it consists of additions, followed by an augmentation of
1304 the first changed entry, followed by small changes of the
1317 modified_entry_n. */
1318 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1320 struct entry *split[2];
1321 bool simple_merged =
1322 try_split_merged_entry (ancestor_file.entries[edit->i1],
1323 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1328 for (i = edit->i1 + 1; i <= edit->i2; i++)
1329 if (entry_fstrcmp (ancestor_file.entries[i],
1330 modified_file.entries[i + edit->j2 - edit->i2],
1332 < FSTRCMP_THRESHOLD)
1334 simple_merged = false;
1340 /* Apply the additions at the top of modified_file.
1341 Apply each of the single-entry changes
1343 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1344 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1346 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1347 gl_list_add_first (result_entries, split[0]);
1348 /* The additions. */
1349 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1351 struct entry *added_entry = modified_file.entries[j];
1352 gl_list_add_first (result_entries, added_entry);
1354 /* Now the single-entry changes. */
1355 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1357 struct entry *changed_entry =
1358 (j == edit->j1 + num_added
1360 : modified_file.entries[j]);
1361 size_t i = j + edit->i2 - edit->j2;
1362 ssize_t k = entries_mapping_get (&mapping, i);
1364 && entry_equals (ancestor_file.entries[i],
1365 mainstream_file.entries[k]))
1367 gl_list_node_set_value (result_entries,
1368 result_entries_pointers[k],
1371 else if (!entry_equals (ancestor_file.entries[i],
1374 struct conflict *c = XMALLOC (struct conflict);
1375 c->num_old_entries = 1;
1377 XNMALLOC (c->num_old_entries, struct entry *);
1378 c->old_entries[0] = ancestor_file.entries[i];
1379 c->num_modified_entries = 1;
1380 c->modified_entries =
1381 XNMALLOC (c->num_modified_entries, struct entry *);
1382 c->modified_entries[0] = changed_entry;
1383 gl_list_add_last (result_conflicts, c);
1393 /* Test whether the change is "simple", i.e. whether it
1394 consists of small changes to the old ChangeLog entries
1395 and additions before them:
1405 modified_entry_n. */
1406 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1410 for (i = edit->i1; i <= edit->i2; i++)
1411 if (entry_fstrcmp (ancestor_file.entries[i],
1412 modified_file.entries[i + edit->j2 - edit->i2],
1414 < FSTRCMP_THRESHOLD)
1424 /* Apply the additions and each of the single-entry
1425 changes separately. */
1426 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1427 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1430 /* A simple change at the top of modified_file.
1431 Apply it to the top of mainstream_file. */
1433 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1435 struct entry *added_entry = modified_file.entries[j];
1436 gl_list_add_first (result_entries, added_entry);
1438 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1440 struct entry *changed_entry = modified_file.entries[j];
1441 size_t i = j + edit->i2 - edit->j2;
1442 ssize_t k = entries_mapping_get (&mapping, i);
1444 && entry_equals (ancestor_file.entries[i],
1445 mainstream_file.entries[k]))
1447 gl_list_node_set_value (result_entries,
1448 result_entries_pointers[k],
1454 ASSERT (!entry_equals (ancestor_file.entries[i],
1456 c = XMALLOC (struct conflict);
1457 c->num_old_entries = 1;
1459 XNMALLOC (c->num_old_entries, struct entry *);
1460 c->old_entries[0] = ancestor_file.entries[i];
1461 c->num_modified_entries = 1;
1462 c->modified_entries =
1463 XNMALLOC (c->num_modified_entries, struct entry *);
1464 c->modified_entries[0] = changed_entry;
1465 gl_list_add_last (result_conflicts, c);
1475 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1476 ASSERT (i_before >= 0);
1477 /* A simple change after ancestor_file.entries[i_before].
1478 See whether this entry and the following num_changed
1479 entries still exist in mainstream_file and are still
1481 k_before = entries_mapping_get (&mapping, i_before);
1482 linear = (k_before >= 0);
1486 for (i = i_before + 1; i <= i_before + num_changed; i++)
1487 if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1495 gl_list_node_t node_for_insert =
1496 result_entries_pointers[k_before + 1];
1498 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1500 struct entry *added_entry = modified_file.entries[j];
1501 gl_list_add_before (result_entries, node_for_insert, added_entry);
1503 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1505 struct entry *changed_entry = modified_file.entries[j];
1506 size_t i = j + edit->i2 - edit->j2;
1507 ssize_t k = entries_mapping_get (&mapping, i);
1509 if (entry_equals (ancestor_file.entries[i],
1510 mainstream_file.entries[k]))
1512 gl_list_node_set_value (result_entries,
1513 result_entries_pointers[k],
1519 ASSERT (!entry_equals (ancestor_file.entries[i],
1521 c = XMALLOC (struct conflict);
1522 c->num_old_entries = 1;
1524 XNMALLOC (c->num_old_entries, struct entry *);
1525 c->old_entries[0] = ancestor_file.entries[i];
1526 c->num_modified_entries = 1;
1527 c->modified_entries =
1528 XNMALLOC (c->num_modified_entries, struct entry *);
1529 c->modified_entries[0] = changed_entry;
1530 gl_list_add_last (result_conflicts, c);
1540 See whether the num_changed entries still exist
1541 unchanged in mainstream_file and are still
1545 bool linear_unchanged;
1547 k_first = entries_mapping_get (&mapping, i_first);
1550 && entry_equals (ancestor_file.entries[i_first],
1551 mainstream_file.entries[k_first]));
1552 if (linear_unchanged)
1555 for (i = i_first + 1; i <= edit->i2; i++)
1556 if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1557 && entry_equals (ancestor_file.entries[i],
1558 mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1560 linear_unchanged = false;
1564 if (linear_unchanged)
1566 gl_list_node_t node_for_insert =
1567 result_entries_pointers[k_first];
1570 for (j = edit->j2; j >= edit->j1; j--)
1572 struct entry *new_entry = modified_file.entries[j];
1573 gl_list_add_before (result_entries, node_for_insert, new_entry);
1575 for (i = edit->i1; i <= edit->i2; i++)
1577 ssize_t k = entries_mapping_get (&mapping, i);
1579 ASSERT (entry_equals (ancestor_file.entries[i],
1580 mainstream_file.entries[k]));
1581 gl_list_node_set_value (result_entries,
1582 result_entries_pointers[k],
1591 struct conflict *c = XMALLOC (struct conflict);
1593 c->num_old_entries = edit->i2 - edit->i1 + 1;
1595 XNMALLOC (c->num_old_entries, struct entry *);
1596 for (i = edit->i1; i <= edit->i2; i++)
1597 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1598 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1599 c->modified_entries =
1600 XNMALLOC (c->num_modified_entries, struct entry *);
1601 for (j = edit->j1; j <= edit->j2; j++)
1602 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1603 gl_list_add_last (result_conflicts, c);
1611 /* Output the result. */
1613 FILE *fp = fopen (destination_file_name, "w");
1616 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1617 exit (EXIT_FAILURE);
1620 /* Output the conflicts at the top. */
1622 size_t n = gl_list_size (result_conflicts);
1624 for (i = 0; i < n; i++)
1625 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1627 /* Output the modified and unmodified entries, in order. */
1629 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1631 gl_list_node_t node;
1632 while (gl_list_iterator_next (&iter, &elt, &node))
1633 entry_write (fp, (struct entry *) elt);
1634 gl_list_iterator_free (&iter);
1637 if (fwriteerror (fp))
1639 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1640 exit (EXIT_FAILURE);
1644 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);