lib/git-merge-changelog.c

   1 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
   2    Copyright (C) 2008-2010 Bruno Haible <bruno@clisp.org>
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  16
  17 /* README:
  18    The default merge driver of 'git' *always* produces conflicts when
  19    pulling public modifications into a privately modified ChangeLog file.
  20    This is because ChangeLog files are always modified at the top; the
  21    default merge driver has no clue how to deal with this. Furthermore
  22    the conflicts are presented with more <<<< ==== >>>> markers than
  23    necessary; this is because the default merge driver makes pointless
  24    efforts to look at the individual line changes inside a ChangeLog entry.
  25
  26    This program serves as a 'git' merge driver that avoids these problems.
  27    1. It produces no conflict when ChangeLog entries have been inserted
  28       at the top both in the public and in the private modification. It
  29       puts the privately added entries above the publicly added entries.
  30    2. It respects the structure of ChangeLog files: entries are not split
  31       into lines but kept together.
  32    3. It also handles the case of small modifications of past ChangeLog
  33       entries, or of removed ChangeLog entries: they are merged as one
  34       would expect it.
  35    4. Conflicts are presented at the top of the file, rather than where
  36       they occurred, so that the user will see them immediately. (Unlike
  37       for source code written in some programming language, conflict markers
  38       that are located several hundreds lines from the top will not cause
  39       any syntax error and therefore would be likely to remain unnoticed.)
  40  */
  41
  42 /* Installation:
  43    $ gnulib-tool --create-testdir --dir=/tmp/testdir123 git-merge-changelog
  44    $ cd /tmp/testdir123
  45    $ ./configure
  46    $ make
  47    $ make install
  48    - Add to .git/config of the checkout (or to your $HOME/.gitconfig) the lines
  49
  50         [merge "merge-changelog"]
  51                 name = GNU-style ChangeLog merge driver
  52                 driver = /usr/local/bin/git-merge-changelog %O %A %B
  53
  54    - In every directory that contains a ChangeLog file, add a file
  55      '.gitattributes' with this line:
  56
  57         ChangeLog    merge=merge-changelog
  58
  59      (See "man 5 gitattributes" for more info.)
  60  */
  61
  62 /* Calling convention:
  63    A merge driver is called with three filename arguments:
  64      1. %O = The common ancestor of %A and %B.
  65      2. %A = The file's contents from the "current branch".
  66      3. %B = The file's contents from the "other branch"; this is the contents
  67         being merged in.
  68
  69    In case of a "git stash apply" or of an upstream pull (e.g. from a subsystem
  70    maintainer to a central maintainer) or of a downstream pull with --rebase:
  71      2. %A = The file's newest pulled contents; modified by other committers.
  72      3. %B = The user's newest copy of the file; modified by the user.
  73    In case of a downstream pull (e.g. from a central repository to the user)
  74    or of an upstream pull with --rebase:
  75      2. %A = The user's newest copy of the file; modified by the user.
  76      3. %B = The file's newest pulled contents; modified by other committers.
  77
  78    It should write its merged output into file %A. It can also echo some
  79    remarks to stdout.  It should exit with return code 0 if the merge could
  80    be resolved cleanly, or with non-zero return code if there were conflicts.
  81  */
  82
  83 /* How it works:
  84    The structure of a ChangeLog file: It consists of ChangeLog entries. A
  85    ChangeLog entry starts at a line following a blank line and that starts with
  86    a non-whitespace character, or at the beginning of a file.
  87    The merge driver works as follows: It reads the three files into memory and
  88    dissects them into ChangeLog entries. It then finds the differences between
  89    %O and %B. They are classified as:
  90      - removals (some consecutive entries removed),
  91      - changes (some consecutive entries removed, some consecutive entries
  92        added),
  93      - additions (some consecutive entries added).
  94    The driver then attempts to apply the changes to %A.
  95    To this effect, it first computes a correspondence between the entries in %O
  96    and the entries in %A, using fuzzy string matching to still identify changed
  97    entries.
  98      - Removals are applied one by one. If the entry is present in %A, at any
  99        position, it is removed. If not, the removal is marked as a conflict.
 100      - Additions at the top of %B are applied at the top of %A.
 101      - Additions between entry x and entry y (y may be the file end) in %B are
 102        applied between entry x and entry y in %A (if they still exist and are
 103        still consecutive in %A), otherwise the additions are marked as a
 104        conflict.
 105      - Changes are categorized into "simple changes":
 106          entry1 ... entryn
 107        are mapped to
 108          added_entry ... added_entry modified_entry1 ... modified_entryn,
 109        where the correspondence between entry_i and modified_entry_i is still
 110        clear; and "big changes": these are all the rest. Simple changes at the
 111        top of %B are applied by putting the added entries at the top of %A. The
 112        changes in simple changes are applied one by one; possibly leading to
 113        single-entry conflicts. Big changes are applied en bloc, possibly
 114        leading to conflicts spanning multiple entries.
 115      - Conflicts are output at the top of the file and cause an exit status of
 116        1.
 117  */
 118
 119 #include <config.h>
 120
 121 #include <getopt.h>
 122 #include <limits.h>
 123 #include <stdbool.h>
 124 #include <stdio.h>
 125 #include <stdlib.h>
 126 #include <string.h>
 127 #include <sys/types.h>
 128 #include <unistd.h>
 129
 130 #include "progname.h"
 131 #include "error.h"
 132 #include "read-file.h"
 133 #include "gl_xlist.h"
 134 #include "gl_array_list.h"
 135 #include "gl_linkedhash_list.h"
 136 #include "gl_rbtreehash_list.h"
 137 #include "gl_linked_list.h"
 138 #include "xalloc.h"
 139 #include "xmalloca.h"
 140 #include "fstrcmp.h"
 141 #include "minmax.h"
 142 #include "c-strstr.h"
 143 #include "fwriteerror.h"
 144
 145 #define ASSERT(expr) \
 146   do                                                                         \
 147     {                                                                        \
 148       if (!(expr))                                                           \
 149         abort ();                                                            \
 150     }                                                                        \
 151   while (0)
 152
 153 #define FSTRCMP_THRESHOLD 0.6
 154 #define FSTRCMP_STRICTER_THRESHOLD 0.8
 155
 156 /* Representation of a ChangeLog entry.
 157    The string may contain NUL bytes; therefore it is represented as a plain
 158    opaque memory region.  */
 159 struct entry
 160 {
 161   char *string;
 162   size_t length;
 163   /* Cache for the hash code.  */
 164   bool hashcode_cached;
 165   size_t hashcode;
 166 };
 167
 168 /* Create an entry.
 169    The memory region passed by the caller must of indefinite extent.  It is
 170    *not* copied here.  */
 171 static struct entry *
 172 entry_create (char *string, size_t length)
 173 {
 174   struct entry *result = XMALLOC (struct entry);
 175   result->string = string;
 176   result->length = length;
 177   result->hashcode_cached = false;
 178   return result;
 179 }
 180
 181 /* Compare two entries for equality.  */
 182 static bool
 183 entry_equals (const void *elt1, const void *elt2)
 184 {
 185   const struct entry *entry1 = (const struct entry *) elt1;
 186   const struct entry *entry2 = (const struct entry *) elt2;
 187   return entry1->length == entry2->length
 188          && memcmp (entry1->string, entry2->string, entry1->length) == 0;
 189 }
 190
 191 /* Return a hash code of the contents of a ChangeLog entry.  */
 192 static size_t
 193 entry_hashcode (const void *elt)
 194 {
 195   struct entry *entry = (struct entry *) elt;
 196   if (!entry->hashcode_cached)
 197     {
 198       /* See http://www.haible.de/bruno/hashfunc.html.  */
 199       const char *s;
 200       size_t n;
 201       size_t h = 0;
 202
 203       for (s = entry->string, n = entry->length; n > 0; s++, n--)
 204         h = (unsigned char) *s + ((h << 9) | (h >> (sizeof (size_t) * CHAR_BIT - 9)));
 205
 206       entry->hashcode = h;
 207       entry->hashcode_cached = true;
 208     }
 209   return entry->hashcode;
 210 }
 211
 212 /* Perform a fuzzy comparison of two ChangeLog entries.
 213    Return a similarity measure of the two entries, a value between 0 and 1.
 214    0 stands for very distinct, 1 for identical.
 215    If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
 216    be returned.  */
 217 static double
 218 entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
 219                double lower_bound)
 220 {
 221   /* fstrcmp works only on NUL terminated strings.  */
 222   char *memory;
 223   double similarity;
 224
 225   if (memchr (entry1->string, '\0', entry1->length) != NULL)
 226     return 0.0;
 227   if (memchr (entry2->string, '\0', entry2->length) != NULL)
 228     return 0.0;
 229   memory = (char *) xmalloca (entry1->length + 1 + entry2->length + 1);
 230   {
 231     char *p = memory;
 232     memcpy (p, entry1->string, entry1->length);
 233     p += entry1->length;
 234     *p++ = '\0';
 235     memcpy (p, entry2->string, entry2->length);
 236     p += entry2->length;
 237     *p++ = '\0';
 238   }
 239   similarity =
 240     fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
 241   freea (memory);
 242   return similarity;
 243 }
 244
 245 /* This structure represents an entire ChangeLog file, after it was read
 246    into memory.  */
 247 struct changelog_file
 248 {
 249   /* The entries, as a list.  */
 250   gl_list_t /* <struct entry *> */ entries_list;
 251   /* The entries, as a list in opposite direction.  */
 252   gl_list_t /* <struct entry *> */ entries_reversed;
 253   /* The entries, as an array.  */
 254   size_t num_entries;
 255   struct entry **entries;
 256 };
 257
 258 /* Read a ChangeLog file into memory.
 259    Return the contents in *RESULT.  */
 260 static void
 261 read_changelog_file (const char *filename, struct changelog_file *result)
 262 {
 263   /* Read the file in text mode, otherwise it's hard to recognize empty
 264      lines.  */
 265   size_t length;
 266   char *contents = read_file (filename, &length);
 267   if (contents == NULL)
 268     {
 269       fprintf (stderr, "could not read file '%s'\n", filename);
 270       exit (EXIT_FAILURE);
 271     }
 272
 273   result->entries_list =
 274     gl_list_create_empty (GL_LINKEDHASH_LIST, entry_equals, entry_hashcode,
 275                           NULL, true);
 276   result->entries_reversed =
 277     gl_list_create_empty (GL_RBTREEHASH_LIST, entry_equals, entry_hashcode,
 278                           NULL, true);
 279   /* A ChangeLog file consists of ChangeLog entries.  A ChangeLog entry starts
 280      at a line following a blank line and that starts with a non-whitespace
 281      character, or at the beginning of a file.
 282      Split the file contents into entries.  */
 283   {
 284     char *contents_end = contents + length;
 285     char *start = contents;
 286     while (start < contents_end)
 287       {
 288         /* Search the end of the current entry.  */
 289         char *ptr = start;
 290         struct entry *curr;
 291
 292         while (ptr < contents_end)
 293           {
 294             ptr = memchr (ptr, '\n', contents_end - ptr);
 295             if (ptr == NULL)
 296               {
 297                 ptr = contents_end;
 298                 break;
 299               }
 300             ptr++;
 301             if (contents_end - ptr >= 2
 302                 && ptr[0] == '\n'
 303                 && !(ptr[1] == '\n' || ptr[1] == '\t' || ptr[1] == ' '))
 304               {
 305                 ptr++;
 306                 break;
 307               }
 308           }
 309
 310         curr = entry_create (start, ptr - start);
 311         gl_list_add_last (result->entries_list, curr);
 312         gl_list_add_first (result->entries_reversed, curr);
 313
 314         start = ptr;
 315       }
 316   }
 317
 318   result->num_entries = gl_list_size (result->entries_list);
 319   result->entries = XNMALLOC (result->num_entries, struct entry *);
 320   {
 321     size_t index = 0;
 322     gl_list_iterator_t iter = gl_list_iterator (result->entries_list);
 323     const void *elt;
 324     gl_list_node_t node;
 325     while (gl_list_iterator_next (&iter, &elt, &node))
 326       result->entries[index++] = (struct entry *) elt;
 327     gl_list_iterator_free (&iter);
 328     ASSERT (index == result->num_entries);
 329   }
 330 }
 331
 332 /* A mapping (correspondence) between entries of FILE1 and of FILE2.  */
 333 struct entries_mapping
 334 {
 335   struct changelog_file *file1;
 336   struct changelog_file *file2;
 337   /* Mapping from indices in FILE1 to indices in FILE2.
 338      A value -1 means that the entry from FILE1 is not found in FILE2.
 339      A value -2 means that it has not yet been computed.  */
 340   ssize_t *index_mapping;
 341   /* Mapping from indices in FILE2 to indices in FILE1.
 342      A value -1 means that the entry from FILE2 is not found in FILE1.
 343      A value -2 means that it has not yet been computed.  */
 344   ssize_t *index_mapping_reverse;
 345 };
 346
 347 /* Look up (or lazily compute) the mapping of an entry in FILE1.
 348    i is the index in FILE1.
 349    Return the index in FILE2, or -1 when the entry is not found in FILE2.  */
 350 static ssize_t
 351 entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
 352 {
 353   if (mapping->index_mapping[i] < -1)
 354     {
 355       struct changelog_file *file1 = mapping->file1;
 356       struct changelog_file *file2 = mapping->file2;
 357       size_t n1 = file1->num_entries;
 358       size_t n2 = file2->num_entries;
 359       struct entry *entry_i = file1->entries[i];
 360       ssize_t j;
 361
 362       /* Search whether it approximately occurs in file2.  */
 363       ssize_t best_j = -1;
 364       double best_j_similarity = 0.0;
 365       for (j = n2 - 1; j >= 0; j--)
 366         if (mapping->index_mapping_reverse[j] < 0)
 367           {
 368             double similarity =
 369               entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
 370             if (similarity > best_j_similarity)
 371               {
 372                 best_j = j;
 373                 best_j_similarity = similarity;
 374               }
 375           }
 376       if (best_j_similarity >= FSTRCMP_THRESHOLD)
 377         {
 378           /* Found a similar entry in file2.  */
 379           struct entry *entry_j = file2->entries[best_j];
 380           /* Search whether it approximately occurs in file1 at index i.  */
 381           ssize_t best_i = -1;
 382           double best_i_similarity = 0.0;
 383           ssize_t ii;
 384           for (ii = n1 - 1; ii >= 0; ii--)
 385             if (mapping->index_mapping[ii] < 0)
 386               {
 387                 double similarity =
 388                   entry_fstrcmp (file1->entries[ii], entry_j,
 389                                  best_i_similarity);
 390                 if (similarity > best_i_similarity)
 391                   {
 392                     best_i = ii;
 393                     best_i_similarity = similarity;
 394                   }
 395               }
 396           if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
 397             {
 398               mapping->index_mapping[i] = best_j;
 399               mapping->index_mapping_reverse[best_j] = i;
 400             }
 401         }
 402       if (mapping->index_mapping[i] < -1)
 403         /* It does not approximately occur in FILE2.
 404            Remember it, for next time.  */
 405         mapping->index_mapping[i] = -1;
 406     }
 407   return mapping->index_mapping[i];
 408 }
 409
 410 /* Look up (or lazily compute) the mapping of an entry in FILE2.
 411    j is the index in FILE2.
 412    Return the index in FILE1, or -1 when the entry is not found in FILE1.  */
 413 static ssize_t
 414 entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
 415 {
 416   if (mapping->index_mapping_reverse[j] < -1)
 417     {
 418       struct changelog_file *file1 = mapping->file1;
 419       struct changelog_file *file2 = mapping->file2;
 420       size_t n1 = file1->num_entries;
 421       size_t n2 = file2->num_entries;
 422       struct entry *entry_j = file2->entries[j];
 423       ssize_t i;
 424
 425       /* Search whether it approximately occurs in file1.  */
 426       ssize_t best_i = -1;
 427       double best_i_similarity = 0.0;
 428       for (i = n1 - 1; i >= 0; i--)
 429         if (mapping->index_mapping[i] < 0)
 430           {
 431             double similarity =
 432               entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
 433             if (similarity > best_i_similarity)
 434               {
 435                 best_i = i;
 436                 best_i_similarity = similarity;
 437               }
 438           }
 439       if (best_i_similarity >= FSTRCMP_THRESHOLD)
 440         {
 441           /* Found a similar entry in file1.  */
 442           struct entry *entry_i = file1->entries[best_i];
 443           /* Search whether it approximately occurs in file2 at index j.  */
 444           ssize_t best_j = -1;
 445           double best_j_similarity = 0.0;
 446           ssize_t jj;
 447           for (jj = n2 - 1; jj >= 0; jj--)
 448             if (mapping->index_mapping_reverse[jj] < 0)
 449               {
 450                 double similarity =
 451                   entry_fstrcmp (entry_i, file2->entries[jj],
 452                                  best_j_similarity);
 453                 if (similarity > best_j_similarity)
 454                   {
 455                     best_j = jj;
 456                     best_j_similarity = similarity;
 457                   }
 458               }
 459           if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
 460             {
 461               mapping->index_mapping_reverse[j] = best_i;
 462               mapping->index_mapping[best_i] = j;
 463             }
 464         }
 465       if (mapping->index_mapping_reverse[j] < -1)
 466         /* It does not approximately occur in FILE1.
 467            Remember it, for next time.  */
 468         mapping->index_mapping_reverse[j] = -1;
 469     }
 470   return mapping->index_mapping_reverse[j];
 471 }
 472
 473 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
 474    The correspondence also takes into account small modifications; i.e. the
 475    indicated relation is not equality of entries but best-match similarity
 476    of entries.
 477    If FULL is true, the maximum of matching is done up-front.  If it is false,
 478    it is done in a lazy way through the functions entries_mapping_get and
 479    entries_mapping_reverse_get.
 480    Return the result in *RESULT.  */
 481 static void
 482 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
 483                  bool full,
 484                  struct entries_mapping *result)
 485 {
 486   /* Mapping from indices in file1 to indices in file2.  */
 487   ssize_t *index_mapping;
 488   /* Mapping from indices in file2 to indices in file1.  */
 489   ssize_t *index_mapping_reverse;
 490   size_t n1 = file1->num_entries;
 491   size_t n2 = file2->num_entries;
 492   ssize_t i, j;
 493
 494   index_mapping = XNMALLOC (n1, ssize_t);
 495   for (i = 0; i < n1; i++)
 496     index_mapping[i] = -2;
 497
 498   index_mapping_reverse = XNMALLOC (n2, ssize_t);
 499   for (j = 0; j < n2; j++)
 500     index_mapping_reverse[j] = -2;
 501
 502   for (i = n1 - 1; i >= 0; i--)
 503     /* Take an entry from file1.  */
 504     if (index_mapping[i] < -1)
 505       {
 506         struct entry *entry = file1->entries[i];
 507         /* Search whether it occurs in file2.  */
 508         j = gl_list_indexof (file2->entries_reversed, entry);
 509         if (j >= 0)
 510           {
 511             j = n2 - 1 - j;
 512             /* Found an exact correspondence.  */
 513             /* If index_mapping_reverse[j] >= 0, we have already seen other
 514                copies of this entry, and there were more occurrences of it in
 515                file1 than in file2.  In this case, do nothing.  */
 516             if (index_mapping_reverse[j] < 0)
 517               {
 518                 index_mapping[i] = j;
 519                 index_mapping_reverse[j] = i;
 520                 /* Look for more occurrences of the same entry.  Match them
 521                    as long as they pair up.  Unpaired occurrences of the same
 522                    entry are left without mapping.  */
 523                 {
 524                   ssize_t curr_i = i;
 525                   ssize_t curr_j = j;
 526
 527                   for (;;)
 528                     {
 529                       ssize_t next_i;
 530                       ssize_t next_j;
 531
 532                       next_i =
 533                         gl_list_indexof_from (file1->entries_reversed,
 534                                               n1 - curr_i, entry);
 535                       if (next_i < 0)
 536                         break;
 537                       next_j =
 538                         gl_list_indexof_from (file2->entries_reversed,
 539                                               n2 - curr_j, entry);
 540                       if (next_j < 0)
 541                         break;
 542                       curr_i = n1 - 1 - next_i;
 543                       curr_j = n2 - 1 - next_j;
 544                       ASSERT (index_mapping[curr_i] < 0);
 545                       ASSERT (index_mapping_reverse[curr_j] < 0);
 546                       index_mapping[curr_i] = curr_j;
 547                       index_mapping_reverse[curr_j] = curr_i;
 548                     }
 549                 }
 550               }
 551           }
 552       }
 553
 554   result->file1 = file1;
 555   result->file2 = file2;
 556   result->index_mapping = index_mapping;
 557   result->index_mapping_reverse = index_mapping_reverse;
 558
 559   if (full)
 560     for (i = n1 - 1; i >= 0; i--)
 561       entries_mapping_get (result, i);
 562 }
 563
 564 /* An "edit" is a textual modification performed by the user, that needs to
 565    be applied to the other file.  */
 566 enum edit_type
 567 {
 568   /* Some consecutive entries were added.  */
 569   ADDITION,
 570   /* Some consecutive entries were removed; some other consecutive entries
 571      were added at the same position.  (Not necessarily the same number of
 572      entries.)  */
 573   CHANGE,
 574   /* Some consecutive entries were removed.  */
 575   REMOVAL
 576 };
 577
 578 /* This structure represents an edit.  */
 579 struct edit
 580 {
 581   enum edit_type type;
 582   /* Range of indices into the entries of FILE1.  */
 583   ssize_t i1, i2;       /* first, last index; only used for CHANGE, REMOVAL */
 584   /* Range of indices into the entries of FILE2.  */
 585   ssize_t j1, j2;       /* first, last index; only used for ADDITION, CHANGE */
 586 };
 587
 588 /* This structure represents the differences from one file, FILE1, to another
 589    file, FILE2.  */
 590 struct differences
 591 {
 592   /* An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
 593      from FILE1 is not found in FILE2).  */
 594   ssize_t *index_mapping;
 595   /* An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
 596      from FILE2 is not found in FILE1).  */
 597   ssize_t *index_mapping_reverse;
 598   /* The edits that transform FILE1 into FILE2.  */
 599   size_t num_edits;
 600   struct edit **edits;
 601 };
 602
 603 /* Import the difference detection algorithm from GNU diff.  */
 604 #define ELEMENT struct entry *
 605 #define EQUAL entry_equals
 606 #define OFFSET ssize_t
 607 #define EXTRA_CONTEXT_FIELDS \
 608   ssize_t *index_mapping; \
 609   ssize_t *index_mapping_reverse;
 610 #define NOTE_DELETE(ctxt, xoff) \
 611   ctxt->index_mapping[xoff] = -1
 612 #define NOTE_INSERT(ctxt, yoff) \
 613   ctxt->index_mapping_reverse[yoff] = -1
 614 #include "diffseq.h"
 615
 616 /* Compute the differences between the entries of FILE1 and the entries of
 617    FILE2.  */
 618 static void
 619 compute_differences (struct changelog_file *file1, struct changelog_file *file2,
 620                      struct differences *result)
 621 {
 622   /* Unlike compute_mapping, which mostly ignores the order of the entries and
 623      therefore works well when some entries are permuted, here we use the order.
 624      I think this is needed in order to distinguish changes from
 625      additions+removals; I don't know how to say what is a "change" if the
 626      files are considered as unordered sets of entries.  */
 627   struct context ctxt;
 628   size_t n1 = file1->num_entries;
 629   size_t n2 = file2->num_entries;
 630   ssize_t i;
 631   ssize_t j;
 632   gl_list_t /* <struct edit *> */ edits;
 633
 634   ctxt.xvec = file1->entries;
 635   ctxt.yvec = file2->entries;
 636   ctxt.index_mapping = XNMALLOC (n1, ssize_t);
 637   for (i = 0; i < n1; i++)
 638     ctxt.index_mapping[i] = 0;
 639   ctxt.index_mapping_reverse = XNMALLOC (n2, ssize_t);
 640   for (j = 0; j < n2; j++)
 641     ctxt.index_mapping_reverse[j] = 0;
 642   ctxt.fdiag = XNMALLOC (2 * (n1 + n2 + 3), ssize_t) + n2 + 1;
 643   ctxt.bdiag = ctxt.fdiag + n1 + n2 + 3;
 644   ctxt.too_expensive = n1 + n2;
 645
 646   /* Store in ctxt.index_mapping and ctxt.index_mapping_reverse a -1 for
 647      each removed or added entry.  */
 648   compareseq (0, n1, 0, n2, 0, &ctxt);
 649
 650   /* Complete the index_mapping and index_mapping_reverse arrays.  */
 651   i = 0;
 652   j = 0;
 653   while (i < n1 || j < n2)
 654     {
 655       while (i < n1 && ctxt.index_mapping[i] < 0)
 656         i++;
 657       while (j < n2 && ctxt.index_mapping_reverse[j] < 0)
 658         j++;
 659       ASSERT ((i < n1) == (j < n2));
 660       if (i == n1 && j == n2)
 661         break;
 662       ctxt.index_mapping[i] = j;
 663       ctxt.index_mapping_reverse[j] = i;
 664       i++;
 665       j++;
 666     }
 667
 668   /* Create the edits.  */
 669   edits = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
 670   i = 0;
 671   j = 0;
 672   while (i < n1 || j < n2)
 673     {
 674       if (i == n1)
 675         {
 676           struct edit *e;
 677           ASSERT (j < n2);
 678           e = XMALLOC (struct edit);
 679           e->type = ADDITION;
 680           e->j1 = j;
 681           e->j2 = n2 - 1;
 682           gl_list_add_last (edits, e);
 683           break;
 684         }
 685       if (j == n2)
 686         {
 687           struct edit *e;
 688           ASSERT (i < n1);
 689           e = XMALLOC (struct edit);
 690           e->type = REMOVAL;
 691           e->i1 = i;
 692           e->i2 = n1 - 1;
 693           gl_list_add_last (edits, e);
 694           break;
 695         }
 696       if (ctxt.index_mapping[i] >= 0)
 697         {
 698           if (ctxt.index_mapping_reverse[j] >= 0)
 699             {
 700               ASSERT (ctxt.index_mapping[i] == j);
 701               ASSERT (ctxt.index_mapping_reverse[j] == i);
 702               i++;
 703               j++;
 704             }
 705           else
 706             {
 707               struct edit *e;
 708               ASSERT (ctxt.index_mapping_reverse[j] < 0);
 709               e = XMALLOC (struct edit);
 710               e->type = ADDITION;
 711               e->j1 = j;
 712               do
 713                 j++;
 714               while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
 715               e->j2 = j - 1;
 716               gl_list_add_last (edits, e);
 717             }
 718         }
 719       else
 720         {
 721           if (ctxt.index_mapping_reverse[j] >= 0)
 722             {
 723               struct edit *e;
 724               ASSERT (ctxt.index_mapping[i] < 0);
 725               e = XMALLOC (struct edit);
 726               e->type = REMOVAL;
 727               e->i1 = i;
 728               do
 729                 i++;
 730               while (i < n1 && ctxt.index_mapping[i] < 0);
 731               e->i2 = i - 1;
 732               gl_list_add_last (edits, e);
 733             }
 734           else
 735             {
 736               struct edit *e;
 737               ASSERT (ctxt.index_mapping[i] < 0);
 738               ASSERT (ctxt.index_mapping_reverse[j] < 0);
 739               e = XMALLOC (struct edit);
 740               e->type = CHANGE;
 741               e->i1 = i;
 742               do
 743                 i++;
 744               while (i < n1 && ctxt.index_mapping[i] < 0);
 745               e->i2 = i - 1;
 746               e->j1 = j;
 747               do
 748                 j++;
 749               while (j < n2 && ctxt.index_mapping_reverse[j] < 0);
 750               e->j2 = j - 1;
 751               gl_list_add_last (edits, e);
 752             }
 753         }
 754     }
 755
 756   result->index_mapping = ctxt.index_mapping;
 757   result->index_mapping_reverse = ctxt.index_mapping_reverse;
 758   result->num_edits = gl_list_size (edits);
 759   result->edits = XNMALLOC (result->num_edits, struct edit *);
 760   {
 761     size_t index = 0;
 762     gl_list_iterator_t iter = gl_list_iterator (edits);
 763     const void *elt;
 764     gl_list_node_t node;
 765     while (gl_list_iterator_next (&iter, &elt, &node))
 766       result->edits[index++] = (struct edit *) elt;
 767     gl_list_iterator_free (&iter);
 768     ASSERT (index == result->num_edits);
 769   }
 770 }
 771
 772 /* An empty entry.  */
 773 static struct entry empty_entry = { NULL, 0 };
 774
 775 /* Return the end a paragraph.
 776    ENTRY is an entry.
 777    OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
 778    Return the offset of the end of paragraph, as an offset <= ENTRY->length;
 779    it is the start of a blank line or the end of the entry.  */
 780 static size_t
 781 find_paragraph_end (const struct entry *entry, size_t offset)
 782 {
 783   const char *string = entry->string;
 784   size_t length = entry->length;
 785
 786   for (;;)
 787     {
 788       const char *nl = memchr (string + offset, '\n', length - offset);
 789       if (nl == NULL)
 790         return length;
 791       offset = (nl - string) + 1;
 792       if (offset < length && string[offset] == '\n')
 793         return offset;
 794     }
 795 }
 796
 797 /* Split a merged entry.
 798    Given an old entry of the form
 799        TITLE
 800        BODY
 801    and a new entry of the form
 802        TITLE
 803        BODY1
 804        BODY'
 805    where the two titles are the same and BODY and BODY' are very similar,
 806    this computes two new entries
 807        TITLE
 808        BODY1
 809    and
 810        TITLE
 811        BODY'
 812    and returns true.
 813    If the entries don't have this form, it returns false.  */
 814 static bool
 815 try_split_merged_entry (const struct entry *old_entry,
 816                         const struct entry *new_entry,
 817                         struct entry *new_split[2])
 818 {
 819   size_t old_title_len = find_paragraph_end (old_entry, 0);
 820   size_t new_title_len = find_paragraph_end (new_entry, 0);
 821   struct entry old_body;
 822   struct entry new_body;
 823   size_t best_split_offset;
 824   double best_similarity;
 825   size_t split_offset;
 826
 827   /* Same title? */
 828   if (!(old_title_len == new_title_len
 829         && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
 830     return false;
 831
 832   old_body.string = old_entry->string + old_title_len;
 833   old_body.length = old_entry->length - old_title_len;
 834
 835   /* Determine where to split the new entry.
 836      This is done by maximizing the similarity between BODY and BODY'.  */
 837   best_split_offset = split_offset = new_title_len;
 838   best_similarity = 0.0;
 839   for (;;)
 840     {
 841       double similarity;
 842
 843       new_body.string = new_entry->string + split_offset;
 844       new_body.length = new_entry->length - split_offset;
 845       similarity =
 846         entry_fstrcmp (&old_body, &new_body, best_similarity);
 847       if (similarity > best_similarity)
 848         {
 849           best_split_offset = split_offset;
 850           best_similarity = similarity;
 851         }
 852       if (best_similarity == 1.0)
 853         /* It cannot get better.  */
 854         break;
 855
 856       if (split_offset < new_entry->length)
 857         split_offset = find_paragraph_end (new_entry, split_offset + 1);
 858       else
 859         break;
 860     }
 861
 862   /* BODY' should not be empty.  */
 863   if (best_split_offset == new_entry->length)
 864     return false;
 865   ASSERT (new_entry->string[best_split_offset] == '\n');
 866
 867   /* A certain similarity between BODY and BODY' is required.  */
 868   if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
 869     return false;
 870
 871   new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
 872
 873   {
 874     size_t len1 = new_title_len;
 875     size_t len2 = new_entry->length - best_split_offset;
 876     char *combined = XNMALLOC (len1 + len2, char);
 877     memcpy (combined, new_entry->string, len1);
 878     memcpy (combined + len1, new_entry->string + best_split_offset, len2);
 879     new_split[1] = entry_create (combined, len1 + len2);
 880   }
 881
 882   return true;
 883 }
 884
 885 /* Write the contents of an entry to the output stream FP.  */
 886 static void
 887 entry_write (FILE *fp, struct entry *entry)
 888 {
 889   if (entry->length > 0)
 890     fwrite (entry->string, 1, entry->length, fp);
 891 }
 892
 893 /* This structure represents a conflict.
 894    A conflict can occur for various reasons.  */
 895 struct conflict
 896 {
 897   /* Parts from the ancestor file.  */
 898   size_t num_old_entries;
 899   struct entry **old_entries;
 900   /* Parts of the modified file.  */
 901   size_t num_modified_entries;
 902   struct entry **modified_entries;
 903 };
 904
 905 /* Write a conflict to the output stream FP, including markers.  */
 906 static void
 907 conflict_write (FILE *fp, struct conflict *c)
 908 {
 909   size_t i;
 910
 911   /* Use the same syntax as git's default merge driver.
 912      Don't indent the contents of the entries (with things like ">" or "-"),
 913      otherwise the user needs more textual editing to resolve the conflict.  */
 914   fputs ("<<<<<<<\n", fp);
 915   for (i = 0; i < c->num_old_entries; i++)
 916     entry_write (fp, c->old_entries[i]);
 917   fputs ("=======\n", fp);
 918   for (i = 0; i < c->num_modified_entries; i++)
 919     entry_write (fp, c->modified_entries[i]);
 920   fputs (">>>>>>>\n", fp);
 921 }
 922
 923 /* Long options.  */
 924 static const struct option long_options[] =
 925 {
 926   { "help", no_argument, NULL, 'h' },
 927   { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
 928   { "version", no_argument, NULL, 'V' },
 929   { NULL, 0, NULL, 0 }
 930 };
 931
 932 /* Print a usage mesage and exit.  */
 933 static void
 934 usage (int status)
 935 {
 936   if (status != EXIT_SUCCESS)
 937     fprintf (stderr, "Try `%s --help' for more information.\n",
 938              program_name);
 939   else
 940     {
 941       printf ("Usage: %s [OPTION] O-FILE-NAME A-FILE-NAME B-FILE-NAME\n",
 942               program_name);
 943       printf ("\n");
 944       printf ("Merges independent modifications of a ChangeLog style file.\n");
 945       printf ("O-FILE-NAME names the original file, the ancestor of the two others.\n");
 946       printf ("A-FILE-NAME names the publicly modified file.\n");
 947       printf ("B-FILE-NAME names the user-modified file.\n");
 948       printf ("Writes the merged file into A-FILE-NAME.\n");
 949       printf ("\n");
 950       #if 0 /* --split-merged-entry is now on by default.  */
 951       printf ("Operation modifiers:\n");
 952       printf ("\
 953       --split-merged-entry    Possibly split a merged entry between paragraphs.\n\
 954                               Use this if you have the habit to merge unrelated\n\
 955                               entries into a single one, separated only by a\n\
 956                               newline, just because they happened on the same\n\
 957                               date.\n");
 958       printf ("\n");
 959       #endif
 960       printf ("Informative output:\n");
 961       printf ("  -h, --help                  display this help and exit\n");
 962       printf ("  -V, --version               output version information and exit\n");
 963       printf ("\n");
 964       fputs ("Report bugs to <bug-gnu-gettext@gnu.org>.\n",
 965              stdout);
 966     }
 967
 968   exit (status);
 969 }
 970
 971 int
 972 main (int argc, char *argv[])
 973 {
 974   int optchar;
 975   bool do_help;
 976   bool do_version;
 977   bool split_merged_entry;
 978
 979   /* Set program name for messages.  */
 980   set_program_name (argv[0]);
 981
 982   /* Set default values for variables.  */
 983   do_help = false;
 984   do_version = false;
 985   split_merged_entry = true;
 986
 987   /* Parse command line options.  */
 988   while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
 989     switch (optchar)
 990     {
 991     case '\0':          /* Long option.  */
 992       break;
 993     case 'h':
 994       do_help = true;
 995       break;
 996     case 'V':
 997       do_version = true;
 998       break;
 999     case CHAR_MAX + 1:  /* --split-merged-entry */
1000       break;
1001     default:
1002       usage (EXIT_FAILURE);
1003     }
1004
1005   if (do_version)
1006     {
1007       /* Version information is requested.  */
1008       printf ("%s\n", program_name);
1009       printf ("Copyright (C) %s Free Software Foundation, Inc.\n\
1010 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n\
1011 This is free software: you are free to change and redistribute it.\n\
1012 There is NO WARRANTY, to the extent permitted by law.\n\
1013 ",
1014               "2008");
1015       printf ("Written by %s.\n", "Bruno Haible");
1016       exit (EXIT_SUCCESS);
1017     }
1018
1019   if (do_help)
1020     {
1021       /* Help is requested.  */
1022       usage (EXIT_SUCCESS);
1023     }
1024
1025   /* Test argument count.  */
1026   if (optind + 3 != argc)
1027     error (EXIT_FAILURE, 0, "expected three arguments");
1028
1029   {
1030     const char *ancestor_file_name; /* O-FILE-NAME */
1031     const char *destination_file_name; /* A-FILE-NAME */
1032     bool downstream;
1033     const char *other_file_name; /* B-FILE-NAME */
1034     const char *mainstream_file_name;
1035     const char *modified_file_name;
1036     struct changelog_file ancestor_file;
1037     struct changelog_file mainstream_file;
1038     struct changelog_file modified_file;
1039     /* Mapping from indices in ancestor_file to indices in mainstream_file.  */
1040     struct entries_mapping mapping;
1041     struct differences diffs;
1042     gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
1043     gl_list_t /* <struct entry *> */ result_entries;
1044     gl_list_t /* <struct conflict *> */ result_conflicts;
1045
1046     ancestor_file_name = argv[optind];
1047     destination_file_name = argv[optind + 1];
1048     other_file_name = argv[optind + 2];
1049
1050     /* Heuristic to determine whether it's a pull in downstream direction
1051        (e.g. pull from a centralized server) or a pull in upstream direction
1052        (e.g. "git stash apply").
1053
1054        For ChangeLog this distinction is important. The difference between
1055        an "upstream" and a "downstream" repository is that more people are
1056        looking at the "upstream" repository.  They want to be informed about
1057        changes and expect them to be shown at the top of the ChangeLog.
1058        When a user pulls downstream, on the other hand, he has two options:
1059          a) He gets the change entries from the central repository also at the
1060             top of his ChangeLog, and his own changes come after them.
1061          b) He gets the change entries from the central repository after those
1062             he has collected for his branch.  His own change entries stay at
1063             the top of the ChangeLog file.
1064        In the case a) he has to reorder the ChangeLog before he can commit.
1065        No one does that.  So most people want b).
1066        In other words, the order of entries in a ChangeLog should represent
1067        the order in which they have flown (or will flow) into the *central*
1068        repository.
1069
1070        But in git this is fundamentally indistinguishable, because when Linus
1071        pulls patches from akpm and akpm pulls patches from Linus, it's not
1072        clear which of the two is more "upstream".  Also, when you have many
1073        branches in a repository and pull from one to another, "git" has no way
1074        to know which branch is more "upstream" than the other.  The git-tag(1)
1075        manual page also says:
1076          "One important aspect of git is it is distributed, and being
1077           distributed largely means there is no inherent "upstream" or
1078           "downstream" in the system."
1079        Therefore anyone who attempts to produce a ChangeLog from the merge
1080        history will fail.
1081
1082        Here we allow the user to specify the pull direction through an
1083        environment variable (GIT_UPSTREAM or GIT_DOWNSTREAM).  If these two
1084        environment variables are not set, we assume a "simple single user"
1085        usage pattern: He manages local changes through stashes and uses
1086        "git pull" only to pull downstream.
1087
1088        How to distinguish these situation? There are several hints:
1089          - During a "git stash apply", GIT_REFLOG_ACTION is not set.  During
1090            a "git pull", it is set to 'pull '. During a "git pull --rebase",
1091            it is set to 'pull --rebase'.  During a "git cherry-pick", it is
1092            set to 'cherry-pick'.
1093          - During a "git stash apply", there is an environment variable of
1094            the form GITHEAD_<40_hex_digits>='Stashed changes'.  */
1095     {
1096       const char *var;
1097
1098       var = getenv ("GIT_DOWNSTREAM");
1099       if (var != NULL && var[0] != '\0')
1100         downstream = true;
1101       else
1102         {
1103           var = getenv ("GIT_UPSTREAM");
1104           if (var != NULL && var[0] != '\0')
1105             downstream = false;
1106           else
1107             {
1108               var = getenv ("GIT_REFLOG_ACTION");
1109               #if 0 /* Debugging code */
1110               printf ("GIT_REFLOG_ACTION=|%s|\n", var);
1111               #endif
1112               if (var != NULL
1113                   && ((strncmp (var, "pull", 4) == 0
1114                        && c_strstr (var, " --rebase") == NULL)
1115                       || strncmp (var, "merge origin", 12) == 0))
1116                 downstream = true;
1117               else
1118                 {
1119                   /* "git stash apply", "git rebase", "git cherry-pick" and
1120                      similar.  */
1121                   downstream = false;
1122                 }
1123             }
1124         }
1125     }
1126
1127     #if 0 /* Debugging code */
1128     {
1129       char buf[1000];
1130       printf ("First line of %%A:\n");
1131       sprintf (buf, "head -1 %s", destination_file_name); system (buf);
1132       printf ("First line of %%B:\n");
1133       sprintf (buf, "head -1 %s", other_file_name); system (buf);
1134       printf ("Guessing calling convention: %s\n",
1135               downstream
1136               ? "%A = modified by user, %B = upstream"
1137               : "%A = upstream, %B = modified by user");
1138     }
1139     #endif
1140
1141     if (downstream)
1142       {
1143         mainstream_file_name = other_file_name;
1144         modified_file_name = destination_file_name;
1145       }
1146     else
1147       {
1148         mainstream_file_name = destination_file_name;
1149         modified_file_name = other_file_name;
1150       }
1151
1152     /* Read the three files into memory.  */
1153     read_changelog_file (ancestor_file_name, &ancestor_file);
1154     read_changelog_file (mainstream_file_name, &mainstream_file);
1155     read_changelog_file (modified_file_name, &modified_file);
1156
1157     /* Compute correspondence between the entries of ancestor_file and of
1158        mainstream_file.  */
1159     compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
1160     (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
1161
1162     /* Compute differences between the entries of ancestor_file and of
1163        modified_file.  */
1164     compute_differences (&ancestor_file, &modified_file, &diffs);
1165
1166     /* Compute the result.  */
1167     result_entries_pointers =
1168       XNMALLOC (mainstream_file.num_entries, gl_list_node_t);
1169     result_entries =
1170       gl_list_create_empty (GL_LINKED_LIST, entry_equals, entry_hashcode,
1171                             NULL, true);
1172     {
1173       size_t k;
1174       for (k = 0; k < mainstream_file.num_entries; k++)
1175         result_entries_pointers[k] =
1176           gl_list_add_last (result_entries, mainstream_file.entries[k]);
1177     }
1178     result_conflicts =
1179       gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL, true);
1180     {
1181       size_t e;
1182       for (e = 0; e < diffs.num_edits; e++)
1183         {
1184           struct edit *edit = diffs.edits[e];
1185           switch (edit->type)
1186             {
1187             case ADDITION:
1188               if (edit->j1 == 0)
1189                 {
1190                   /* An addition to the top of modified_file.
1191                      Apply it to the top of mainstream_file.  */
1192                   ssize_t j;
1193                   for (j = edit->j2; j >= edit->j1; j--)
1194                     {
1195                       struct entry *added_entry = modified_file.entries[j];
1196                       gl_list_add_first (result_entries, added_entry);
1197                     }
1198                 }
1199               else
1200                 {
1201                   ssize_t i_before;
1202                   ssize_t i_after;
1203                   ssize_t k_before;
1204                   ssize_t k_after;
1205                   i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1206                   ASSERT (i_before >= 0);
1207                   i_after = (edit->j2 + 1 == modified_file.num_entries
1208                              ? ancestor_file.num_entries
1209                              : diffs.index_mapping_reverse[edit->j2 + 1]);
1210                   ASSERT (i_after >= 0);
1211                   ASSERT (i_after == i_before + 1);
1212                   /* An addition between ancestor_file.entries[i_before] and
1213                      ancestor_file.entries[i_after].  See whether these two
1214                      entries still exist in mainstream_file and are still
1215                      consecutive.  */
1216                   k_before = entries_mapping_get (&mapping, i_before);
1217                   k_after = (i_after == ancestor_file.num_entries
1218                              ? mainstream_file.num_entries
1219                              : entries_mapping_get (&mapping, i_after));
1220                   if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
1221                     {
1222                       /* Yes, the entry before and after are still neighbours
1223                          in mainstream_file.  Apply the addition between
1224                          them.  */
1225                       if (k_after == mainstream_file.num_entries)
1226                         {
1227                           size_t j;
1228                           for (j = edit->j1; j <= edit->j2; j++)
1229                             {
1230                               struct entry *added_entry = modified_file.entries[j];
1231                               gl_list_add_last (result_entries, added_entry);
1232                             }
1233                         }
1234                       else
1235                         {
1236                           gl_list_node_t node_k_after = result_entries_pointers[k_after];
1237                           size_t j;
1238                           for (j = edit->j1; j <= edit->j2; j++)
1239                             {
1240                               struct entry *added_entry = modified_file.entries[j];
1241                               gl_list_add_before (result_entries, node_k_after, added_entry);
1242                             }
1243                         }
1244                     }
1245                   else
1246                     {
1247                       /* It's not clear where the additions should be applied.
1248                          Let the user decide.  */
1249                       struct conflict *c = XMALLOC (struct conflict);
1250                       size_t j;
1251                       c->num_old_entries = 0;
1252                       c->old_entries = NULL;
1253                       c->num_modified_entries = edit->j2 - edit->j1 + 1;
1254                       c->modified_entries =
1255                         XNMALLOC (c->num_modified_entries, struct entry *);
1256                       for (j = edit->j1; j <= edit->j2; j++)
1257                         c->modified_entries[j - edit->j1] = modified_file.entries[j];
1258                       gl_list_add_last (result_conflicts, c);
1259                     }
1260                 }
1261               break;
1262             case REMOVAL:
1263               {
1264                 /* Apply the removals one by one.  */
1265                 size_t i;
1266                 for (i = edit->i1; i <= edit->i2; i++)
1267                   {
1268                     struct entry *removed_entry = ancestor_file.entries[i];
1269                     ssize_t k = entries_mapping_get (&mapping, i);
1270                     if (k >= 0
1271                         && entry_equals (removed_entry,
1272                                          mainstream_file.entries[k]))
1273                       {
1274                         /* The entry to be removed still exists in
1275                            mainstream_file.  Remove it.  */
1276                         gl_list_node_set_value (result_entries,
1277                                                 result_entries_pointers[k],
1278                                                 &empty_entry);
1279                       }
1280                     else
1281                       {
1282                         /* The entry to be removed was already removed or was
1283                            modified.  This is a conflict.  */
1284                         struct conflict *c = XMALLOC (struct conflict);
1285                         c->num_old_entries = 1;
1286                         c->old_entries =
1287                           XNMALLOC (c->num_old_entries, struct entry *);
1288                         c->old_entries[0] = removed_entry;
1289                         c->num_modified_entries = 0;
1290                         c->modified_entries = NULL;
1291                         gl_list_add_last (result_conflicts, c);
1292                       }
1293                   }
1294               }
1295               break;
1296             case CHANGE:
1297               {
1298                 bool done = false;
1299                 /* When the user usually merges entries from the same day,
1300                    and this edit is at the top of the file:  */
1301                 if (split_merged_entry && edit->j1 == 0)
1302                   {
1303                     /* Test whether the change is "simple merged", i.e. whether
1304                        it consists of additions, followed by an augmentation of
1305                        the first changed entry, followed by small changes of the
1306                        remaining entries:
1307                          entry_1
1308                          entry_2
1309                          ...
1310                          entry_n
1311                        are mapped to
1312                          added_entry
1313                          ...
1314                          added_entry
1315                          augmented_entry_1
1316                          modified_entry_2
1317                          ...
1318                          modified_entry_n.  */
1319                     if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1320                       {
1321                         struct entry *split[2];
1322                         bool simple_merged =
1323                           try_split_merged_entry (ancestor_file.entries[edit->i1],
1324                                                   modified_file.entries[edit->i1 + edit->j2 - edit->i2],
1325                                                   split);
1326                         if (simple_merged)
1327                           {
1328                             size_t i;
1329                             for (i = edit->i1 + 1; i <= edit->i2; i++)
1330                               if (entry_fstrcmp (ancestor_file.entries[i],
1331                                                  modified_file.entries[i + edit->j2 - edit->i2],
1332                                                  FSTRCMP_THRESHOLD)
1333                                   < FSTRCMP_THRESHOLD)
1334                                 {
1335                                   simple_merged = false;
1336                                   break;
1337                                 }
1338                           }
1339                         if (simple_merged)
1340                           {
1341                             /* Apply the additions at the top of modified_file.
1342                                Apply each of the single-entry changes
1343                                separately.  */
1344                             size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1345                             size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1346                             ssize_t j;
1347                             /* First part of the split modified_file.entries[edit->j2 - edit->i2 + edit->i1]:  */
1348                             gl_list_add_first (result_entries, split[0]);
1349                             /* The additions.  */
1350                             for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1351                               {
1352                                 struct entry *added_entry = modified_file.entries[j];
1353                                 gl_list_add_first (result_entries, added_entry);
1354                               }
1355                             /* Now the single-entry changes.  */
1356                             for (j = edit->j1 + num_added; j <= edit->j2; j++)
1357                               {
1358                                 struct entry *changed_entry =
1359                                   (j == edit->j1 + num_added
1360                                    ? split[1]
1361                                    : modified_file.entries[j]);
1362                                 size_t i = j + edit->i2 - edit->j2;
1363                                 ssize_t k = entries_mapping_get (&mapping, i);
1364                                 if (k >= 0
1365                                     && entry_equals (ancestor_file.entries[i],
1366                                                      mainstream_file.entries[k]))
1367                                   {
1368                                     gl_list_node_set_value (result_entries,
1369                                                             result_entries_pointers[k],
1370                                                             changed_entry);
1371                                   }
1372                                 else if (!entry_equals (ancestor_file.entries[i],
1373                                                         changed_entry))
1374                                   {
1375                                     struct conflict *c = XMALLOC (struct conflict);
1376                                     c->num_old_entries = 1;
1377                                     c->old_entries =
1378                                       XNMALLOC (c->num_old_entries, struct entry *);
1379                                     c->old_entries[0] = ancestor_file.entries[i];
1380                                     c->num_modified_entries = 1;
1381                                     c->modified_entries =
1382                                       XNMALLOC (c->num_modified_entries, struct entry *);
1383                                     c->modified_entries[0] = changed_entry;
1384                                     gl_list_add_last (result_conflicts, c);
1385                                   }
1386                               }
1387                             done = true;
1388                           }
1389                       }
1390                   }
1391                 if (!done)
1392                   {
1393                     bool simple;
1394                     /* Test whether the change is "simple", i.e. whether it
1395                        consists of small changes to the old ChangeLog entries
1396                        and additions before them:
1397                          entry_1
1398                          ...
1399                          entry_n
1400                        are mapped to
1401                          added_entry
1402                          ...
1403                          added_entry
1404                          modified_entry_1
1405                          ...
1406                          modified_entry_n.  */
1407                     if (edit->i2 - edit->i1 <= edit->j2 - edit->j1)
1408                       {
1409                         size_t i;
1410                         simple = true;
1411                         for (i = edit->i1; i <= edit->i2; i++)
1412                           if (entry_fstrcmp (ancestor_file.entries[i],
1413                                              modified_file.entries[i + edit->j2 - edit->i2],
1414                                              FSTRCMP_THRESHOLD)
1415                               < FSTRCMP_THRESHOLD)
1416                             {
1417                               simple = false;
1418                               break;
1419                             }
1420                       }
1421                     else
1422                       simple = false;
1423                     if (simple)
1424                       {
1425                         /* Apply the additions and each of the single-entry
1426                            changes separately.  */
1427                         size_t num_changed = edit->i2 - edit->i1 + 1; /* > 0 */
1428                         size_t num_added = (edit->j2 - edit->j1 + 1) - num_changed;
1429                         if (edit->j1 == 0)
1430                           {
1431                             /* A simple change at the top of modified_file.
1432                                Apply it to the top of mainstream_file.  */
1433                             ssize_t j;
1434                             for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1435                               {
1436                                 struct entry *added_entry = modified_file.entries[j];
1437                                 gl_list_add_first (result_entries, added_entry);
1438                               }
1439                             for (j = edit->j1 + num_added; j <= edit->j2; j++)
1440                               {
1441                                 struct entry *changed_entry = modified_file.entries[j];
1442                                 size_t i = j + edit->i2 - edit->j2;
1443                                 ssize_t k = entries_mapping_get (&mapping, i);
1444                                 if (k >= 0
1445                                     && entry_equals (ancestor_file.entries[i],
1446                                                      mainstream_file.entries[k]))
1447                                   {
1448                                     gl_list_node_set_value (result_entries,
1449                                                             result_entries_pointers[k],
1450                                                             changed_entry);
1451                                   }
1452                                 else
1453                                   {
1454                                     struct conflict *c;
1455                                     ASSERT (!entry_equals (ancestor_file.entries[i],
1456                                                            changed_entry));
1457                                     c = XMALLOC (struct conflict);
1458                                     c->num_old_entries = 1;
1459                                     c->old_entries =
1460                                       XNMALLOC (c->num_old_entries, struct entry *);
1461                                     c->old_entries[0] = ancestor_file.entries[i];
1462                                     c->num_modified_entries = 1;
1463                                     c->modified_entries =
1464                                       XNMALLOC (c->num_modified_entries, struct entry *);
1465                                     c->modified_entries[0] = changed_entry;
1466                                     gl_list_add_last (result_conflicts, c);
1467                                   }
1468                               }
1469                             done = true;
1470                           }
1471                         else
1472                           {
1473                             ssize_t i_before;
1474                             ssize_t k_before;
1475                             bool linear;
1476                             i_before = diffs.index_mapping_reverse[edit->j1 - 1];
1477                             ASSERT (i_before >= 0);
1478                             /* A simple change after ancestor_file.entries[i_before].
1479                                See whether this entry and the following num_changed
1480                                entries still exist in mainstream_file and are still
1481                                consecutive.  */
1482                             k_before = entries_mapping_get (&mapping, i_before);
1483                             linear = (k_before >= 0);
1484                             if (linear)
1485                               {
1486                                 size_t i;
1487                                 for (i = i_before + 1; i <= i_before + num_changed; i++)
1488                                   if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
1489                                     {
1490                                       linear = false;
1491                                       break;
1492                                     }
1493                               }
1494                             if (linear)
1495                               {
1496                                 gl_list_node_t node_for_insert =
1497                                   result_entries_pointers[k_before + 1];
1498                                 ssize_t j;
1499                                 for (j = edit->j1 + num_added - 1; j >= edit->j1; j--)
1500                                   {
1501                                     struct entry *added_entry = modified_file.entries[j];
1502                                     gl_list_add_before (result_entries, node_for_insert, added_entry);
1503                                   }
1504                                 for (j = edit->j1 + num_added; j <= edit->j2; j++)
1505                                   {
1506                                     struct entry *changed_entry = modified_file.entries[j];
1507                                     size_t i = j + edit->i2 - edit->j2;
1508                                     ssize_t k = entries_mapping_get (&mapping, i);
1509                                     ASSERT (k >= 0);
1510                                     if (entry_equals (ancestor_file.entries[i],
1511                                                       mainstream_file.entries[k]))
1512                                       {
1513                                         gl_list_node_set_value (result_entries,
1514                                                                 result_entries_pointers[k],
1515                                                                 changed_entry);
1516                                       }
1517                                     else
1518                                       {
1519                                         struct conflict *c;
1520                                         ASSERT (!entry_equals (ancestor_file.entries[i],
1521                                                                changed_entry));
1522                                         c = XMALLOC (struct conflict);
1523                                         c->num_old_entries = 1;
1524                                         c->old_entries =
1525                                           XNMALLOC (c->num_old_entries, struct entry *);
1526                                         c->old_entries[0] = ancestor_file.entries[i];
1527                                         c->num_modified_entries = 1;
1528                                         c->modified_entries =
1529                                           XNMALLOC (c->num_modified_entries, struct entry *);
1530                                         c->modified_entries[0] = changed_entry;
1531                                         gl_list_add_last (result_conflicts, c);
1532                                       }
1533                                   }
1534                                 done = true;
1535                               }
1536                           }
1537                       }
1538                     else
1539                       {
1540                         /* A big change.
1541                            See whether the num_changed entries still exist
1542                            unchanged in mainstream_file and are still
1543                            consecutive.  */
1544                         ssize_t i_first;
1545                         ssize_t k_first;
1546                         bool linear_unchanged;
1547                         i_first = edit->i1;
1548                         k_first = entries_mapping_get (&mapping, i_first);
1549                         linear_unchanged =
1550                           (k_first >= 0
1551                            && entry_equals (ancestor_file.entries[i_first],
1552                                             mainstream_file.entries[k_first]));
1553                         if (linear_unchanged)
1554                           {
1555                             size_t i;
1556                             for (i = i_first + 1; i <= edit->i2; i++)
1557                               if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
1558                                     && entry_equals (ancestor_file.entries[i],
1559                                                      mainstream_file.entries[entries_mapping_get (&mapping, i)])))
1560                                 {
1561                                   linear_unchanged = false;
1562                                   break;
1563                                 }
1564                           }
1565                         if (linear_unchanged)
1566                           {
1567                             gl_list_node_t node_for_insert =
1568                               result_entries_pointers[k_first];
1569                             ssize_t j;
1570                             size_t i;
1571                             for (j = edit->j2; j >= edit->j1; j--)
1572                               {
1573                                 struct entry *new_entry = modified_file.entries[j];
1574                                 gl_list_add_before (result_entries, node_for_insert, new_entry);
1575                               }
1576                             for (i = edit->i1; i <= edit->i2; i++)
1577                               {
1578                                 ssize_t k = entries_mapping_get (&mapping, i);
1579                                 ASSERT (k >= 0);
1580                                 ASSERT (entry_equals (ancestor_file.entries[i],
1581                                                       mainstream_file.entries[k]));
1582                                 gl_list_node_set_value (result_entries,
1583                                                         result_entries_pointers[k],
1584                                                         &empty_entry);
1585                               }
1586                             done = true;
1587                           }
1588                       }
1589                   }
1590                 if (!done)
1591                   {
1592                     struct conflict *c = XMALLOC (struct conflict);
1593                     size_t i, j;
1594                     c->num_old_entries = edit->i2 - edit->i1 + 1;
1595                     c->old_entries =
1596                       XNMALLOC (c->num_old_entries, struct entry *);
1597                     for (i = edit->i1; i <= edit->i2; i++)
1598                       c->old_entries[i - edit->i1] = ancestor_file.entries[i];
1599                     c->num_modified_entries = edit->j2 - edit->j1 + 1;
1600                     c->modified_entries =
1601                       XNMALLOC (c->num_modified_entries, struct entry *);
1602                     for (j = edit->j1; j <= edit->j2; j++)
1603                       c->modified_entries[j - edit->j1] = modified_file.entries[j];
1604                     gl_list_add_last (result_conflicts, c);
1605                   }
1606               }
1607               break;
1608             }
1609         }
1610     }
1611
1612     /* Output the result.  */
1613     {
1614       FILE *fp = fopen (destination_file_name, "w");
1615       if (fp == NULL)
1616         {
1617           fprintf (stderr, "could not write file '%s'\n", destination_file_name);
1618           exit (EXIT_FAILURE);
1619         }
1620
1621       /* Output the conflicts at the top.  */
1622       {
1623         size_t n = gl_list_size (result_conflicts);
1624         size_t i;
1625         for (i = 0; i < n; i++)
1626           conflict_write (fp, (struct conflict *) gl_list_get_at (result_conflicts, i));
1627       }
1628       /* Output the modified and unmodified entries, in order.  */
1629       {
1630         gl_list_iterator_t iter = gl_list_iterator (result_entries);
1631         const void *elt;
1632         gl_list_node_t node;
1633         while (gl_list_iterator_next (&iter, &elt, &node))
1634           entry_write (fp, (struct entry *) elt);
1635         gl_list_iterator_free (&iter);
1636       }
1637
1638       if (fwriteerror (fp))
1639         {
1640           fprintf (stderr, "error writing to file '%s'\n", destination_file_name);
1641           exit (EXIT_FAILURE);
1642         }
1643     }
1644
1645     exit (gl_list_size (result_conflicts) > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
1646   }
1647 }