1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
2 Copyright (C) 2008-2009 Bruno Haible <bruno@clisp.org>
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 The default merge driver of 'git' *always* produces conflicts when
19 pulling public modifications into a privately modified ChangeLog file.
20 This is because ChangeLog files are always modified at the top; the
21 default merge driver has no clue how to deal with this. Furthermore
22 the conflicts are presented with more <<<< ==== >>>> markers than
23 necessary; this is because the default merge driver makes pointless
24 efforts to look at the individual line changes inside a ChangeLog entry.
26 This program serves as a 'git' merge driver that avoids these problems.
27 1. It produces no conflict when ChangeLog entries have been inserted
28 at the top both in the public and in the private modification. It
29 puts the privately added entries above the publicly added entries.
30 2. It respects the structure of ChangeLog files: entries are not split
31 into lines but kept together.
32 3. It also handles the case of small modifications of past ChangeLog
33 entries, or of removed ChangeLog entries: they are merged as one
35 4. Conflicts are presented at the top of the file, rather than where
36 they occurred, so that the user will see them immediately. (Unlike
37 for source code written in some programming language, conflict markers
38 that are located several hundreds lines from the top will not cause
39 any syntax error and therefore would be likely to remain unnoticed.)
43 $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
48 - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the lines
50 [merge "merge-changelog"]
51 name = GNU-style ChangeLog merge driver
52 driver = /usr/local/bin/git-merge-changelog %O %A %B
54 - In every directory that contains a ChangeLog file, add a file
55 '.gitattributes' with this line:
57 ChangeLog merge=merge-changelog
59 (See "man 5 gitattributes" for more info.)
62 /* Calling convention:
63 A merge driver is called with three filename arguments:
64 1. %O = The common ancestor of %A and %B.
65 2. %A = The file's contents from the "current branch".
66 3. %B = The file's contents from the "other branch"; this is the contents
69 In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
70 maintainer to a central maintainer) or of a downstream pull with --rebase:
71 2. %A = The file's newest pulled contents; modified by other committers.
72 3. %B = The user's newest copy of the file; modified by the user.
73 In case of a downstream pull (e.g. from a central repository to the user)
74 or of an upstream pull with --rebase:
75 2. %A = The user's newest copy of the file; modified by the user.
76 3. %B = The file's newest pulled contents; modified by other committers.
78 It should write its merged output into file %A. It can also echo some
79 remarks to stdout. It should exit with return code 0 if the merge could
80 be resolved cleanly, or with non-zero return code if there were conflicts.
84 The structure of a ChangeLog file: It consists of ChangeLog entries. A
85 ChangeLog entry starts at a line following a blank line and that starts with
86 a non-whitespace character, or at the beginning of a file.
87 The merge driver works as follows: It reads the three files into memory and
88 dissects them into ChangeLog entries. It then finds the differences between
89 %O and %B. They are classified as:
90 - removals (some consecutive entries removed),
91 - changes (some consecutive entries removed, some consecutive entries
93 - additions (some consecutive entries added).
94 The driver then attempts to apply the changes to %A.
95 To this effect, it first computes a correspondence between the entries in %O
96 and the entries in %A, using fuzzy string matching to still identify changed
98 - Removals are applied one by one. If the entry is present in %A, at any
99 position, it is removed. If not, the removal is marked as a conflict.
100 - Additions at the top of %B are applied at the top of %A.
101 - Additions between entry x and entry y (y may be the file end) in %B are
102 applied between entry x and entry y in %A (if they still exist and are
103 still consecutive in %A), otherwise the additions are marked as a
105 - Changes are categorized into "simple changes":
108 added_entry ... added_entry modified_entry1 ... modified_entryn,
109 where the correspondence between entry_i and modified_entry_i is still
110 clear; and "big changes": these are all the rest. Simple changes at the
111 top of %B are applied by putting the added entries at the top of %A. The
112 changes in simple changes are applied one by one; possibly leading to
113 single-entry conflicts. Big changes are applied en bloc, possibly
114 leading to conflicts spanning multiple entries.
115 - Conflicts are output at the top of the file and cause an exit status of
127 #include <sys/types.h>
130 #include "progname.h"
132 #include "read-file.h"
134 #include "gl_array_list.h"
135 #include "gl_linkedhash_list.h"
136 #include "gl_rbtreehash_list.h"
137 #include "gl_linked_list.h"
139 #include "xmalloca.h"
142 #include "c-strstr.h"
143 #include "fwriteerror.h"
145 #define ASSERT(expr) \
153 #define FSTRCMP_THRESHOLD 0.6
154 #define FSTRCMP_STRICTER_THRESHOLD 0.8
156 /* Representation of a ChangeLog entry.
157 The string may contain NUL bytes; therefore it is represented as a plain
158 opaque memory region. */
163 /* Cache for the hash code. */
164 bool hashcode_cached;
169 The memory region passed by the caller must of indefinite extent. It is
170 *not* copied here. */
171 static struct entry *
172 entry_create (char *string, size_t length)
174 struct entry *result = XMALLOC (struct entry);
175 result->string = string;
176 result->length = length;
177 result->hashcode_cached = false;
181 /* Compare two entries for equality. */
183 entry_equals (const void *elt1, const void *elt2)
185 const struct entry *entry1 = (const struct entry *) elt1;
186 const struct entry *entry2 = (const struct entry *) elt2;
187 return entry1->length == entry2->length
188 && memcmp (entry1->string, entry2->string, entry1->length) == 0;
191 /* Return a hash code of the contents of a ChangeLog entry. */
193 entry_hashcode (const void *elt)
195 struct entry *entry = (struct entry *) elt;
196 if (!entry->hashcode_cached)
198 /* See http://www.haible.de/bruno/hashfunc.html. */
203 for (s = entry->string, n = entry->length; n > 0; s++, n--)
204 h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
207 entry->hashcode_cached = true;
209 return entry->hashcode;
212 /* Perform a fuzzy comparison of two ChangeLog entries.
213 Return a similarity measure of the two entries, a value between 0 and 1.
214 0 stands for very distinct, 1 for identical.
215 If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
218 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
221 /* fstrcmp works only on NUL terminated strings. */
225 if (memchr (entry1->string, '\0', entry1->length) != NULL)
227 if (memchr (entry2->string, '\0', entry2->length) != NULL)
229 memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
232 memcpy (p, entry1->string, entry1->length);
235 memcpy (p, entry2->string, entry2->length);
240 fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
245 /* This structure represents an entire ChangeLog file, after it was read
247 struct changelog_file
249 /* The entries, as a list. */
250 gl_list_t /* <struct entry *> */ entries_list;
251 /* The entries, as a list in opposite direction. */
252 gl_list_t /* <struct entry *> */ entries_reversed;
253 /* The entries, as an array. */
255 struct entry **entries;
258 /* Read a ChangeLog file into memory.
259 Return the contents in *RESULT. */
261 read_changelog_file (const char *filename, struct changelog_file *result)
263 /* Read the file in text mode, otherwise it's hard to recognize empty
266 char *contents = read_file (filename, &length);
267 if (contents == NULL)
269 fprintf (stderr, "could not read file '%s'\n", filename);
273 result->entries_list =
274 gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
276 result->entries_reversed =
277 gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
279 /* A ChangeLog file consists of ChangeLog entries. A ChangeLog entry starts
280 at a line following a blank line and that starts with a non-whitespace
281 character, or at the beginning of a file.
282 Split the file contents into entries. */
284 char *contents_end = contents + length;
285 char *start = contents;
286 while (start < contents_end)
288 /* Search the end of the current entry. */
292 while (ptr < contents_end)
294 ptr = memchr (ptr, '\n', contents_end - ptr);
301 if (contents_end - ptr >= 2
303 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
310 curr = entry_create (start, ptr - start);
311 gl_list_add_last (result->entries_list, curr);
312 gl_list_add_first (result->entries_reversed, curr);
318 result->num_entries = gl_list_size (result->entries_list);
319 result->entries = XNMALLOC (result->num_entries, struct entry *);
322 gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
325 while (gl_list_iterator_next (&iter, &elt, &node))
326 result->entries[index++] = (struct entry *) elt;
327 gl_list_iterator_free (&iter);
328 ASSERT (index == result->num_entries);
332 /* A mapping (correspondence) between entries of FILE1 and of FILE2. */
333 struct entries_mapping
335 struct changelog_file *file1;
336 struct changelog_file *file2;
337 /* Mapping from indices in FILE1 to indices in FILE2.
338 A value -1 means that the entry from FILE1 is not found in FILE2.
339 A value -2 means that it has not yet been computed. */
340 ssize_t *index_mapping;
341 /* Mapping from indices in FILE2 to indices in FILE1.
342 A value -1 means that the entry from FILE2 is not found in FILE1.
343 A value -2 means that it has not yet been computed. */
344 ssize_t *index_mapping_reverse;
347 /* Look up (or lazily compute) the mapping of an entry in FILE1.
348 i is the index in FILE1.
349 Return the index in FILE2, or -1 when the entry is not found in FILE2. */
351 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
353 if (mapping->index_mapping[i] < -1)
355 struct changelog_file *file1 = mapping->file1;
356 struct changelog_file *file2 = mapping->file2;
357 size_t n1 = file1->num_entries;
358 size_t n2 = file2->num_entries;
359 struct entry *entry_i = file1->entries[i];
362 /* Search whether it approximately occurs in file2. */
364 double best_j_similarity = 0.0;
365 for (j = n2 - 1; j >= 0; j--)
366 if (mapping->index_mapping_reverse[j] < 0)
369 entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
370 if (similarity > best_j_similarity)
373 best_j_similarity = similarity;
376 if (best_j_similarity >= FSTRCMP_THRESHOLD)
378 /* Found a similar entry in file2. */
379 struct entry *entry_j = file2->entries[best_j];
380 /* Search whether it approximately occurs in file1 at index i. */
382 double best_i_similarity = 0.0;
384 for (ii = n1 - 1; ii >= 0; ii--)
385 if (mapping->index_mapping[ii] < 0)
388 entry_fstrcmp (file1->entries[ii], entry_j,
390 if (similarity > best_i_similarity)
393 best_i_similarity = similarity;
396 if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
398 mapping->index_mapping[i] = best_j;
399 mapping->index_mapping_reverse[best_j] = i;
402 if (mapping->index_mapping[i] < -1)
403 /* It does not approximately occur in FILE2.
404 Remember it, for next time. */
405 mapping->index_mapping[i] = -1;
407 return mapping->index_mapping[i];
410 /* Look up (or lazily compute) the mapping of an entry in FILE2.
411 j is the index in FILE2.
412 Return the index in FILE1, or -1 when the entry is not found in FILE1. */
414 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
416 if (mapping->index_mapping_reverse[j] < -1)
418 struct changelog_file *file1 = mapping->file1;
419 struct changelog_file *file2 = mapping->file2;
420 size_t n1 = file1->num_entries;
421 size_t n2 = file2->num_entries;
422 struct entry *entry_j = file2->entries[j];
425 /* Search whether it approximately occurs in file1. */
427 double best_i_similarity = 0.0;
428 for (i = n1 - 1; i >= 0; i--)
429 if (mapping->index_mapping[i] < 0)
432 entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
433 if (similarity > best_i_similarity)
436 best_i_similarity = similarity;
439 if (best_i_similarity >= FSTRCMP_THRESHOLD)
441 /* Found a similar entry in file1. */
442 struct entry *entry_i = file1->entries[best_i];
443 /* Search whether it approximately occurs in file2 at index j. */
445 double best_j_similarity = 0.0;
447 for (jj = n2 - 1; jj >= 0; jj--)
448 if (mapping->index_mapping_reverse[jj] < 0)
451 entry_fstrcmp (entry_i, file2->entries[jj],
453 if (similarity > best_j_similarity)
456 best_j_similarity = similarity;
459 if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
461 mapping->index_mapping_reverse[j] = best_i;
462 mapping->index_mapping[best_i] = j;
465 if (mapping->index_mapping_reverse[j] < -1)
466 /* It does not approximately occur in FILE1.
467 Remember it, for next time. */
468 mapping->index_mapping_reverse[j] = -1;
470 return mapping->index_mapping_reverse[j];
473 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
474 The correspondence also takes into account small modifications; i.e. the
475 indicated relation is not equality of entries but best-match similarity
477 If FULL is true, the maximum of matching is done up-front. If it is false,
478 it is done in a lazy way through the functions entries_mapping_get and
479 entries_mapping_reverse_get.
480 Return the result in *RESULT. */
482 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
484 struct entries_mapping *result)
486 /* Mapping from indices in file1 to indices in file2. */
487 ssize_t *index_mapping;
488 /* Mapping from indices in file2 to indices in file1. */
489 ssize_t *index_mapping_reverse;
490 size_t n1 = file1->num_entries;
491 size_t n2 = file2->num_entries;
494 index_mapping = XNMALLOC (n1, ssize_t);
495 for (i = 0; i < n1; i++)
496 index_mapping[i] = -2;
498 index_mapping_reverse = XNMALLOC (n2, ssize_t);
499 for (j = 0; j < n2; j++)
500 index_mapping_reverse[j] = -2;
502 for (i = n1 - 1; i >= 0; i--)
503 /* Take an entry from file1. */
504 if (index_mapping[i] < -1)
506 struct entry *entry = file1->entries[i];
507 /* Search whether it occurs in file2. */
508 j = gl_list_indexof (file2->entries_reversed, entry);
512 /* Found an exact correspondence. */
513 ASSERT (index_mapping_reverse[j] < 0);
514 index_mapping[i] = j;
515 index_mapping_reverse[j] = i;
516 /* Look for more occurrences of the same entry. */
527 gl_list_indexof_from (file1->entries_reversed, n1 - curr_i,
532 gl_list_indexof_from (file2->entries_reversed, n2 - curr_j,
536 curr_i = n1 - 1 - next_i;
537 curr_j = n2 - 1 - next_j;
538 ASSERT (index_mapping[curr_i] < 0);
539 ASSERT (index_mapping_reverse[curr_j] < 0);
540 index_mapping[curr_i] = curr_j;
541 index_mapping_reverse[curr_j] = curr_i;
547 result->file1 = file1;
548 result->file2 = file2;
549 result->index_mapping = index_mapping;
550 result->index_mapping_reverse = index_mapping_reverse;
553 for (i = n1 - 1; i >= 0; i--)
554 entries_mapping_get (result, i);
557 /* An "edit" is a textual modification performed by the user, that needs to
558 be applied to the other file. */
561 /* Some consecutive entries were added. */
563 /* Some consecutive entries were removed; some other consecutive entries
564 were added at the same position. (Not necessarily the same number of
567 /* Some consecutive entries were removed. */
571 /* This structure represents an edit. */
575 /* Range of indices into the entries of FILE1. */
576 ssize_t i1, i2; /* first, last index; only used for CHANGE, REMOVAL */
577 /* Range of indices into the entries of FILE2. */
578 ssize_t j1, j2; /* first, last index; only used for ADDITION, CHANGE */
581 /* This structure represents the differences from one file, FILE1, to another
585 /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
586 from FILE1 is not found in FILE2). */
587 ssize_t *index_mapping;
588 /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
589 from FILE2 is not found in FILE1). */
590 ssize_t *index_mapping_reverse;
591 /* The edits that transform FILE1 into FILE2. */
596 /* Import the difference detection algorithm from GNU diff. */
597 #define ELEMENT struct entry *
598 #define EQUAL entry_equals
599 #define OFFSET ssize_t
600 #define EXTRA_CONTEXT_FIELDS \
601 ssize_t *index_mapping; \
602 ssize_t *index_mapping_reverse;
603 #define NOTE_DELETE(ctxt, xoff) \
604 ctxt->index_mapping[xoff] = -1
605 #define NOTE_INSERT(ctxt, yoff) \
606 ctxt->index_mapping_reverse[yoff] = -1
609 /* Compute the differences between the entries of FILE1 and the entries of
612 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
613 struct differences *result)
615 /* Unlike compute_mapping, which mostly ignores the order of the entries and
616 therefore works well when some entries are permuted, here we use the order.
617 I think this is needed in order to distinguish changes from
618 additions+removals; I don't know how to say what is a "change" if the
619 files are considered as unordered sets of entries. */
621 size_t n1 = file1->num_entries;
622 size_t n2 = file2->num_entries;
625 gl_list_t /* <struct edit *> */ edits;
627 ctxt.xvec = file1->entries;
628 ctxt.yvec = file2->entries;
629 ctxt.index_mapping = XNMALLOC (n1, ssize_t);
630 for (i = 0; i < n1; i++)
631 ctxt.index_mapping[i] = 0;
632 ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
633 for (j = 0; j < n2; j++)
634 ctxt.index_mapping_reverse[j] = 0;
635 ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
636 ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
637 ctxt.too_expensive = n1 + n2;
639 /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
640 each removed or added entry. */
641 compareseq (0, n1, 0, n2, 0, &ctxt);
643 /* Complete the index_mapping and index_mapping_reverse arrays. */
646 while (i < n1 || j < n2)
648 while (i < n1 && ctxt.index_mapping[i] < 0)
650 while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
652 ASSERT ((i < n1) == (j < n2));
653 if (i == n1 && j == n2)
655 ctxt.index_mapping[i] = j;
656 ctxt.index_mapping_reverse[j] = i;
661 /* Create the edits. */
662 edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
665 while (i < n1 || j < n2)
671 e = XMALLOC (struct edit);
675 gl_list_add_last (edits, e);
682 e = XMALLOC (struct edit);
686 gl_list_add_last (edits, e);
689 if (ctxt.index_mapping[i] >= 0)
691 if (ctxt.index_mapping_reverse[j] >= 0)
693 ASSERT (ctxt.index_mapping[i] == j);
694 ASSERT (ctxt.index_mapping_reverse[j] == i);
701 ASSERT (ctxt.index_mapping_reverse[j] < 0);
702 e = XMALLOC (struct edit);
707 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
709 gl_list_add_last (edits, e);
714 if (ctxt.index_mapping_reverse[j] >= 0)
717 ASSERT (ctxt.index_mapping[i] < 0);
718 e = XMALLOC (struct edit);
723 while (i < n1 && ctxt.index_mapping[i] < 0);
725 gl_list_add_last (edits, e);
730 ASSERT (ctxt.index_mapping[i] < 0);
731 ASSERT (ctxt.index_mapping_reverse[j] < 0);
732 e = XMALLOC (struct edit);
737 while (i < n1 && ctxt.index_mapping[i] < 0);
742 while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
744 gl_list_add_last (edits, e);
749 result->index_mapping = ctxt.index_mapping;
750 result->index_mapping_reverse = ctxt.index_mapping_reverse;
751 result->num_edits = gl_list_size (edits);
752 result->edits = XNMALLOC (result->num_edits, struct edit *);
755 gl_list_iterator_t iter = gl_list_iterator (edits);
758 while (gl_list_iterator_next (&iter, &elt, &node))
759 result->edits[index++] = (struct edit *) elt;
760 gl_list_iterator_free (&iter);
761 ASSERT (index == result->num_edits);
765 /* An empty entry. */
766 static struct entry empty_entry = { NULL, 0 };
768 /* Return the end a paragraph.
770 OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
771 Return the offset of the end of paragraph, as an offset <= ENTRY->length;
772 it is the start of a blank line or the end of the entry. */
774 find_paragraph_end (const struct entry *entry, size_t offset)
776 const char *string = entry->string;
777 size_t length = entry->length;
781 const char *nl = memchr (string + offset, '\n', length - offset);
784 offset = (nl - string) + 1;
785 if (offset < length && string[offset] == '\n')
790 /* Split a merged entry.
791 Given an old entry of the form
794 and a new entry of the form
798 where the two titles are the same and BODY and BODY' are very similar,
799 this computes two new entries
806 If the entries don't have this form, it returns false. */
808 try_split_merged_entry (const struct entry *old_entry,
809 const struct entry *new_entry,
810 struct entry *new_split[2])
812 size_t old_title_len = find_paragraph_end (old_entry, 0);
813 size_t new_title_len = find_paragraph_end (new_entry, 0);
814 struct entry old_body;
815 struct entry new_body;
816 size_t best_split_offset;
817 double best_similarity;
821 if (!(old_title_len == new_title_len
822 && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
825 old_body.string = old_entry->string + old_title_len;
826 old_body.length = old_entry->length - old_title_len;
828 /* Determine where to split the new entry.
829 This is done by maximizing the similarity between BODY and BODY'. */
830 best_split_offset = split_offset = new_title_len;
831 best_similarity = 0.0;
836 new_body.string = new_entry->string + split_offset;
837 new_body.length = new_entry->length - split_offset;
839 entry_fstrcmp (&old_body, &new_body, best_similarity);
840 if (similarity > best_similarity)
842 best_split_offset = split_offset;
843 best_similarity = similarity;
845 if (best_similarity == 1.0)
846 /* It cannot get better. */
849 if (split_offset < new_entry->length)
850 split_offset = find_paragraph_end (new_entry, split_offset + 1);
855 /* BODY' should not be empty. */
856 if (best_split_offset == new_entry->length)
858 ASSERT (new_entry->string[best_split_offset] == '\n');
860 /* A certain similarity between BODY and BODY' is required. */
861 if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
864 new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
867 size_t len1 = new_title_len;
868 size_t len2 = new_entry->length - best_split_offset;
869 char *combined = XNMALLOC (len1 + len2, char);
870 memcpy (combined, new_entry->string, len1);
871 memcpy (combined + len1, new_entry->string + best_split_offset, len2);
872 new_split[1] = entry_create (combined, len1 + len2);
878 /* Write the contents of an entry to the output stream FP. */
880 entry_write (FILE *fp, struct entry *entry)
882 if (entry->length > 0)
883 fwrite (entry->string, 1, entry->length, fp);
886 /* This structure represents a conflict.
887 A conflict can occur for various reasons. */
890 /* Parts from the ancestor file. */
891 size_t num_old_entries;
892 struct entry **old_entries;
893 /* Parts of the modified file. */
894 size_t num_modified_entries;
895 struct entry **modified_entries;
898 /* Write a conflict to the output stream FP, including markers. */
900 conflict_write (FILE *fp, struct conflict *c)
904 /* Use the same syntax as git's default merge driver.
905 Don't indent the contents of the entries (with things like ">" or "-"),
906 otherwise the user needs more textual editing to resolve the conflict. */
907 fputs ("<<<<<<<\n", fp);
908 for (i = 0; i < c->num_old_entries; i++)
909 entry_write (fp, c->old_entries[i]);
910 fputs ("=======\n", fp);
911 for (i = 0; i < c->num_modified_entries; i++)
912 entry_write (fp, c->modified_entries[i]);
913 fputs (">>>>>>>\n", fp);
917 static const struct option long_options[] =
919 { "help", no_argument, NULL, 'h' },
920 { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
921 { "version", no_argument, NULL, 'V' },
925 /* Print a usage mesage and exit. */
929 if (status != EXIT_SUCCESS)
930 fprintf (stderr, "Try `%s --help' for more information.\n",
934 printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
937 printf ("Merges independent modifications of a ChangeLog style file.\n");
938 printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
939 printf ("A-FILE-NAME names the publicly modified file.\n");
940 printf ("B-FILE-NAME names the user-modified file.\n");
941 printf ("Writes the merged file into A-FILE-NAME.\n");
943 printf ("Operation modifiers:\n");
945 --split-merged-entry Possibly split a merged entry between paragraphs.\n\
946 Use this if you have the habit to merge unrelated\n\
947 entries into a single one, separated only by a\n\
948 newline, just because they happened on the same\n\
951 printf ("Informative output:\n");
952 printf (" -h, --help display this help and exit\n");
953 printf (" -V, --version output version information and exit\n");
955 fputs ("Report bugs to <bug-gnu-gettext@gnu.org>.\n",
963 main (int argc, char *argv[])
968 bool split_merged_entry;
970 /* Set program name for messages. */
971 set_program_name (argv[0]);
973 /* Set default values for variables. */
976 split_merged_entry = false;
978 /* Parse command line options. */
979 while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
982 case '\0': /* Long option. */
990 case CHAR_MAX + 1: /* --split-merged-entry */
991 split_merged_entry = true;
994 usage (EXIT_FAILURE);
999 /* Version information is requested. */
1000 printf ("%s\n", program_name);
1001 printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1002 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
1003 This is free software: you are free to change and redistribute it.\n\
1004 There is NO WARRANTY, to the extent permitted by law.\n\
1007 printf ("Written by %s.\n", "Bruno Haible");
1008 exit (EXIT_SUCCESS);
1013 /* Help is requested. */
1014 usage (EXIT_SUCCESS);
1017 /* Test argument count. */
1018 if (optind + 3 != argc)
1019 error (EXIT_FAILURE, 0, "expected three arguments");
1022 const char *ancestor_file_name; /* O-FILE-NAME */
1023 const char *destination_file_name; /* A-FILE-NAME */
1025 const char *other_file_name; /* B-FILE-NAME */
1026 const char *mainstream_file_name;
1027 const char *modified_file_name;
1028 struct changelog_file ancestor_file;
1029 struct changelog_file mainstream_file;
1030 struct changelog_file modified_file;
1031 /* Mapping from indices in ancestor_file to indices in mainstream_file. */
1032 struct entries_mapping mapping;
1033 struct differences diffs;
1034 gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1035 gl_list_t /* <struct entry *> */ result_entries;
1036 gl_list_t /* <struct conflict *> */ result_conflicts;
1038 ancestor_file_name = argv[optind];
1039 destination_file_name = argv[optind + 1];
1040 other_file_name = argv[optind + 2];
1042 /* Heuristic to determine whether it's a pull in downstream direction
1043 (e.g. pull from a centralized server) or a pull in upstream direction
1044 (e.g. "git stash apply").
1046 For ChangeLog this distinction is important. The difference between
1047 an "upstream" and a "downstream" repository is that more people are
1048 looking at the "upstream" repository. They want to be informed about
1049 changes and expect them to be shown at the top of the ChangeLog.
1050 When a user pulls downstream, on the other hand, he has two options:
1051 a) He gets the change entries from the central repository also at the
1052 top of his ChangeLog, and his own changes come after them.
1053 b) He gets the change entries from the central repository after those
1054 he has collected for his branch. His own change entries stay at
1055 the top of the ChangeLog file.
1056 In the case a) he has to reorder the ChangeLog before he can commit.
1057 No one does that. So most people want b).
1058 In other words, the order of entries in a ChangeLog should represent
1059 the order in which they have flown (or will flow) into the *central*
1062 But in git this is fundamentally indistinguishable, because when Linus
1063 pulls patches from akpm and akpm pulls patches from Linus, it's not
1064 clear which of the two is more "upstream". Also, when you have many
1065 branches in a repository and pull from one to another, "git" has no way
1066 to know which branch is more "upstream" than the other. The git-tag(1)
1067 manual page also says:
1068 "One important aspect of git is it is distributed, and being
1069 distributed largely means there is no inherent "upstream" or
1070 "downstream" in the system."
1071 Therefore anyone who attempts to produce a ChangeLog from the merge
1074 Here we allow the user to specify the pull direction through an
1075 environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM). If these two
1076 environment variables are not set, we assume a "simple single user"
1077 usage pattern: He manages local changes through stashes and uses
1078 "git pull" only to pull downstream.
1080 How to distinguish these situation? There are several hints:
1081 - During a "git stash apply", GIT_REFLOG_ACTION is not set. During
1082 a "git pull", it is set to 'pull '. During a "git pull --rebase",
1083 it is set to 'pull --rebase'. During a "git cherry-pick", it is
1084 set to 'cherry-pick'.
1085 - During a "git stash apply", there is an environment variable of
1086 the form GITHEAD_<40_hex_digits>='Stashed changes'. */
1090 var = getenv ("GIT_DOWNSTREAM");
1091 if (var != NULL && var[0] != '\0')
1095 var = getenv ("GIT_UPSTREAM");
1096 if (var != NULL && var[0] != '\0')
1100 var = getenv ("GIT_REFLOG_ACTION");
1101 #if 0 /* Debugging code */
1102 printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1105 && ((strncmp (var, "pull", 4) == 0
1106 && c_strstr (var, " --rebase") == NULL)
1107 || strncmp (var, "merge origin", 12) == 0))
1111 /* "git stash apply", "git rebase", "git cherry-pick" and
1119 #if 0 /* Debugging code */
1122 printf ("First line of %%A:\n");
1123 sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1124 printf ("First line of %%B:\n");
1125 sprintf (buf, "head -1 %s", other_file_name); system (buf);
1126 printf ("Guessing calling convention: %s\n",
1128 ? "%A = modified by user, %B = upstream"
1129 : "%A = upstream, %B = modified by user");
1135 mainstream_file_name = other_file_name;
1136 modified_file_name = destination_file_name;
1140 mainstream_file_name = destination_file_name;
1141 modified_file_name = other_file_name;
1144 /* Read the three files into memory. */
1145 read_changelog_file (ancestor_file_name, &ancestor_file);
1146 read_changelog_file (mainstream_file_name, &mainstream_file);
1147 read_changelog_file (modified_file_name, &modified_file);
1149 /* Compute correspondence between the entries of ancestor_file and of
1151 compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1152 (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1154 /* Compute differences between the entries of ancestor_file and of
1156 compute_differences (&ancestor_file, &modified_file, &diffs);
1158 /* Compute the result. */
1159 result_entries_pointers =
1160 XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1162 gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1166 for (k = 0; k < mainstream_file.num_entries; k++)
1167 result_entries_pointers[k] =
1168 gl_list_add_last (result_entries, mainstream_file.entries[k]);
1171 gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1174 for (e = 0; e < diffs.num_edits; e++)
1176 struct edit *edit = diffs.edits[e];
1182 /* An addition to the top of modified_file.
1183 Apply it to the top of mainstream_file. */
1185 for (j = edit->j2; j >= edit->j1; j--)
1187 struct entry *added_entry = modified_file.entries[j];
1188 gl_list_add_first (result_entries, added_entry);
1197 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1198 ASSERT (i_before >= 0);
1199 i_after = (edit->j2 + 1 == modified_file.num_entries
1200 ? ancestor_file.num_entries
1201 : diffs.index_mapping_reverse[edit->j2 + 1]);
1202 ASSERT (i_after >= 0);
1203 ASSERT (i_after == i_before + 1);
1204 /* An addition between ancestor_file.entries[i_before] and
1205 ancestor_file.entries[i_after]. See whether these two
1206 entries still exist in mainstream_file and are still
1208 k_before = entries_mapping_get (&mapping, i_before);
1209 k_after = (i_after == ancestor_file.num_entries
1210 ? mainstream_file.num_entries
1211 : entries_mapping_get (&mapping, i_after));
1212 if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1214 /* Yes, the entry before and after are still neighbours
1215 in mainstream_file. Apply the addition between
1217 if (k_after == mainstream_file.num_entries)
1220 for (j = edit->j1; j <= edit->j2; j++)
1222 struct entry *added_entry = modified_file.entries[j];
1223 gl_list_add_last (result_entries, added_entry);
1228 gl_list_node_t node_k_after = result_entries_pointers[k_after];
1230 for (j = edit->j1; j <= edit->j2; j++)
1232 struct entry *added_entry = modified_file.entries[j];
1233 gl_list_add_before (result_entries, node_k_after, added_entry);
1239 /* It's not clear where the additions should be applied.
1240 Let the user decide. */
1241 struct conflict *c = XMALLOC (struct conflict);
1243 c->num_old_entries = 0;
1244 c->old_entries = NULL;
1245 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1246 c->modified_entries =
1247 XNMALLOC (c->num_modified_entries, struct entry *);
1248 for (j = edit->j1; j <= edit->j2; j++)
1249 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1250 gl_list_add_last (result_conflicts, c);
1256 /* Apply the removals one by one. */
1258 for (i = edit->i1; i <= edit->i2; i++)
1260 struct entry *removed_entry = ancestor_file.entries[i];
1261 ssize_t k = entries_mapping_get (&mapping, i);
1263 && entry_equals (removed_entry,
1264 mainstream_file.entries[k]))
1266 /* The entry to be removed still exists in
1267 mainstream_file. Remove it. */
1268 gl_list_node_set_value (result_entries,
1269 result_entries_pointers[k],
1274 /* The entry to be removed was already removed or was
1275 modified. This is a conflict. */
1276 struct conflict *c = XMALLOC (struct conflict);
1277 c->num_old_entries = 1;
1279 XNMALLOC (c->num_old_entries, struct entry *);
1280 c->old_entries[0] = removed_entry;
1281 c->num_modified_entries = 0;
1282 c->modified_entries = NULL;
1283 gl_list_add_last (result_conflicts, c);
1291 /* When the user usually merges entries from the same day,
1292 and this edit is at the top of the file: */
1293 if (split_merged_entry && edit->j1 == 0)
1295 /* Test whether the change is "simple merged", i.e. whether
1296 it consists of additions, followed by an augmentation of
1297 the first changed entry, followed by small changes of the
1310 modified_entry_n. */
1311 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1313 struct entry *split[2];
1314 bool simple_merged =
1315 try_split_merged_entry (ancestor_file.entries[edit->i1],
1316 modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1321 for (i = edit->i1 + 1; i <= edit->i2; i++)
1322 if (entry_fstrcmp (ancestor_file.entries[i],
1323 modified_file.entries[i + edit->j2 - edit->i2],
1325 < FSTRCMP_THRESHOLD)
1327 simple_merged = false;
1333 /* Apply the additions at the top of modified_file.
1334 Apply each of the single-entry changes
1336 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1337 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1339 /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]: */
1340 gl_list_add_first (result_entries, split[0]);
1341 /* The additions. */
1342 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1344 struct entry *added_entry = modified_file.entries[j];
1345 gl_list_add_first (result_entries, added_entry);
1347 /* Now the single-entry changes. */
1348 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1350 struct entry *changed_entry =
1351 (j == edit->j1 + num_added
1353 : modified_file.entries[j]);
1354 size_t i = j + edit->i2 - edit->j2;
1355 ssize_t k = entries_mapping_get (&mapping, i);
1357 && entry_equals (ancestor_file.entries[i],
1358 mainstream_file.entries[k]))
1360 gl_list_node_set_value (result_entries,
1361 result_entries_pointers[k],
1364 else if (!entry_equals (ancestor_file.entries[i],
1367 struct conflict *c = XMALLOC (struct conflict);
1368 c->num_old_entries = 1;
1370 XNMALLOC (c->num_old_entries, struct entry *);
1371 c->old_entries[0] = ancestor_file.entries[i];
1372 c->num_modified_entries = 1;
1373 c->modified_entries =
1374 XNMALLOC (c->num_modified_entries, struct entry *);
1375 c->modified_entries[0] = changed_entry;
1376 gl_list_add_last (result_conflicts, c);
1386 /* Test whether the change is "simple", i.e. whether it
1387 consists of small changes to the old ChangeLog entries
1388 and additions before them:
1398 modified_entry_n. */
1399 if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1403 for (i = edit->i1; i <= edit->i2; i++)
1404 if (entry_fstrcmp (ancestor_file.entries[i],
1405 modified_file.entries[i + edit->j2 - edit->i2],
1407 < FSTRCMP_THRESHOLD)
1417 /* Apply the additions and each of the single-entry
1418 changes separately. */
1419 size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1420 size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1423 /* A simple change at the top of modified_file.
1424 Apply it to the top of mainstream_file. */
1426 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1428 struct entry *added_entry = modified_file.entries[j];
1429 gl_list_add_first (result_entries, added_entry);
1431 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1433 struct entry *changed_entry = modified_file.entries[j];
1434 size_t i = j + edit->i2 - edit->j2;
1435 ssize_t k = entries_mapping_get (&mapping, i);
1437 && entry_equals (ancestor_file.entries[i],
1438 mainstream_file.entries[k]))
1440 gl_list_node_set_value (result_entries,
1441 result_entries_pointers[k],
1447 ASSERT (!entry_equals (ancestor_file.entries[i],
1449 c = XMALLOC (struct conflict);
1450 c->num_old_entries = 1;
1452 XNMALLOC (c->num_old_entries, struct entry *);
1453 c->old_entries[0] = ancestor_file.entries[i];
1454 c->num_modified_entries = 1;
1455 c->modified_entries =
1456 XNMALLOC (c->num_modified_entries, struct entry *);
1457 c->modified_entries[0] = changed_entry;
1458 gl_list_add_last (result_conflicts, c);
1468 i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1469 ASSERT (i_before >= 0);
1470 /* A simple change after ancestor_file.entries[i_before].
1471 See whether this entry and the following num_changed
1472 entries still exist in mainstream_file and are still
1474 k_before = entries_mapping_get (&mapping, i_before);
1475 linear = (k_before >= 0);
1479 for (i = i_before + 1; i <= i_before + num_changed; i++)
1480 if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1488 gl_list_node_t node_for_insert =
1489 result_entries_pointers[k_before + 1];
1491 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1493 struct entry *added_entry = modified_file.entries[j];
1494 gl_list_add_before (result_entries, node_for_insert, added_entry);
1496 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1498 struct entry *changed_entry = modified_file.entries[j];
1499 size_t i = j + edit->i2 - edit->j2;
1500 ssize_t k = entries_mapping_get (&mapping, i);
1502 if (entry_equals (ancestor_file.entries[i],
1503 mainstream_file.entries[k]))
1505 gl_list_node_set_value (result_entries,
1506 result_entries_pointers[k],
1512 ASSERT (!entry_equals (ancestor_file.entries[i],
1514 c = XMALLOC (struct conflict);
1515 c->num_old_entries = 1;
1517 XNMALLOC (c->num_old_entries, struct entry *);
1518 c->old_entries[0] = ancestor_file.entries[i];
1519 c->num_modified_entries = 1;
1520 c->modified_entries =
1521 XNMALLOC (c->num_modified_entries, struct entry *);
1522 c->modified_entries[0] = changed_entry;
1523 gl_list_add_last (result_conflicts, c);
1533 See whether the num_changed entries still exist
1534 unchanged in mainstream_file and are still
1538 bool linear_unchanged;
1540 k_first = entries_mapping_get (&mapping, i_first);
1543 && entry_equals (ancestor_file.entries[i_first],
1544 mainstream_file.entries[k_first]));
1545 if (linear_unchanged)
1548 for (i = i_first + 1; i <= edit->i2; i++)
1549 if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1550 && entry_equals (ancestor_file.entries[i],
1551 mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1553 linear_unchanged = false;
1557 if (linear_unchanged)
1559 gl_list_node_t node_for_insert =
1560 result_entries_pointers[k_first];
1563 for (j = edit->j2; j >= edit->j1; j--)
1565 struct entry *new_entry = modified_file.entries[j];
1566 gl_list_add_before (result_entries, node_for_insert, new_entry);
1568 for (i = edit->i1; i <= edit->i2; i++)
1570 ssize_t k = entries_mapping_get (&mapping, i);
1572 ASSERT (entry_equals (ancestor_file.entries[i],
1573 mainstream_file.entries[k]));
1574 gl_list_node_set_value (result_entries,
1575 result_entries_pointers[k],
1584 struct conflict *c = XMALLOC (struct conflict);
1586 c->num_old_entries = edit->i2 - edit->i1 + 1;
1588 XNMALLOC (c->num_old_entries, struct entry *);
1589 for (i = edit->i1; i <= edit->i2; i++)
1590 c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1591 c->num_modified_entries = edit->j2 - edit->j1 + 1;
1592 c->modified_entries =
1593 XNMALLOC (c->num_modified_entries, struct entry *);
1594 for (j = edit->j1; j <= edit->j2; j++)
1595 c->modified_entries[j - edit->j1] = modified_file.entries[j];
1596 gl_list_add_last (result_conflicts, c);
1604 /* Output the result. */
1606 FILE *fp = fopen (destination_file_name, "w");
1609 fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1610 exit (EXIT_FAILURE);
1613 /* Output the conflicts at the top. */
1615 size_t n = gl_list_size (result_conflicts);
1617 for (i = 0; i < n; i++)
1618 conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1620 /* Output the modified and unmodified entries, in order. */
1622 gl_list_iterator_t iter = gl_list_iterator (result_entries);
1624 gl_list_node_t node;
1625 while (gl_list_iterator_next (&iter, &elt, &node))
1626 entry_write (fp, (struct entry *) elt);
1627 gl_list_iterator_free (&iter);
1630 if (fwriteerror (fp))
1632 fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1633 exit (EXIT_FAILURE);
1637 exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);