-m(dup2): Texinfo typo
[gnulib.git] / lib / git-merge-changelog.c
index 1911c08..a199f32 100644 (file)
@@ -1,5 +1,5 @@
 /* git-merge-changelog - git "merge" driver for GNU style ChangeLog files.
-   Copyright (C) 2008 Bruno Haible <bruno@clisp.org>
+   Copyright (C) 2008-2009 Bruno Haible <bruno@clisp.org>
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -21,7 +21,7 @@
    default merge driver has no clue how to deal with this. Furthermore
    the conflicts are presented with more <<<< ==== >>>> markers than
    necessary; this is because the default merge driver makes pointless
-   effects to look at the individual line changes inside a ChangeLog entry.
+   efforts to look at the individual line changes inside a ChangeLog entry.
 
    This program serves as a 'git' merge driver that avoids these problems.
    1. It produces no conflict when ChangeLog entries have been inserted
@@ -186,7 +186,7 @@ entry_equals (const void *elt1, const void *elt2)
   const struct entry *entry2 = (const struct entry *) elt2;
   return entry1->length == entry2->length
         && memcmp (entry1->string, entry2->string, entry1->length) == 0;
-};
+}
 
 /* Return a hash code of the contents of a ChangeLog entry.  */
 static size_t
@@ -211,9 +211,12 @@ entry_hashcode (const void *elt)
 
 /* Perform a fuzzy comparison of two ChangeLog entries.
    Return a similarity measure of the two entries, a value between 0 and 1.
-   0 stands for very distinct, 1 for identical.  */
+   0 stands for very distinct, 1 for identical.
+   If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
+   be returned.  */
 static double
-entry_fstrcmp (const struct entry *entry1, const struct entry *entry2)
+entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
+              double lower_bound)
 {
   /* fstrcmp works only on NUL terminated strings.  */
   char *memory;
@@ -233,7 +236,8 @@ entry_fstrcmp (const struct entry *entry1, const struct entry *entry2)
     p += entry2->length;
     *p++ = '\0';
   }
-  similarity = fstrcmp (memory, memory + entry1->length + 1);
+  similarity =
+    fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
   freea (memory);
   return similarity;
 }
@@ -325,18 +329,159 @@ read_changelog_file (const char *filename, struct changelog_file *result)
   }
 }
 
+/* A mapping (correspondence) between entries of FILE1 and of FILE2.  */
+struct entries_mapping
+{
+  struct changelog_file *file1;
+  struct changelog_file *file2;
+  /* Mapping from indices in FILE1 to indices in FILE2.
+     A value -1 means that the entry from FILE1 is not found in FILE2.
+     A value -2 means that it has not yet been computed.  */
+  ssize_t *index_mapping;
+  /* Mapping from indices in FILE2 to indices in FILE1.
+     A value -1 means that the entry from FILE2 is not found in FILE1.
+     A value -2 means that it has not yet been computed.  */
+  ssize_t *index_mapping_reverse;
+};
+
+/* Look up (or lazily compute) the mapping of an entry in FILE1.
+   i is the index in FILE1.
+   Return the index in FILE2, or -1 when the entry is not found in FILE2.  */
+static ssize_t
+entries_mapping_get (struct entries_mapping *mapping, ssize_t i)
+{
+  if (mapping->index_mapping[i] < -1)
+    {
+      struct changelog_file *file1 = mapping->file1;
+      struct changelog_file *file2 = mapping->file2;
+      size_t n1 = file1->num_entries;
+      size_t n2 = file2->num_entries;
+      struct entry *entry_i = file1->entries[i];
+      ssize_t j;
+
+      /* Search whether it approximately occurs in file2.  */
+      ssize_t best_j = -1;
+      double best_j_similarity = 0.0;
+      for (j = n2 - 1; j >= 0; j--)
+       if (mapping->index_mapping_reverse[j] < 0)
+         {
+           double similarity =
+             entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
+           if (similarity > best_j_similarity)
+             {
+               best_j = j;
+               best_j_similarity = similarity;
+             }
+         }
+      if (best_j_similarity >= FSTRCMP_THRESHOLD)
+       {
+         /* Found a similar entry in file2.  */
+         struct entry *entry_j = file2->entries[best_j];
+         /* Search whether it approximately occurs in file1 at index i.  */
+         ssize_t best_i = -1;
+         double best_i_similarity = 0.0;
+         ssize_t ii;
+         for (ii = n1 - 1; ii >= 0; ii--)
+           if (mapping->index_mapping[ii] < 0)
+             {
+               double similarity =
+                 entry_fstrcmp (file1->entries[ii], entry_j,
+                                best_i_similarity);
+               if (similarity > best_i_similarity)
+                 {
+                   best_i = ii;
+                   best_i_similarity = similarity;
+                 }
+             }
+         if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
+           {
+             mapping->index_mapping[i] = best_j;
+             mapping->index_mapping_reverse[best_j] = i;
+           }
+       }
+      if (mapping->index_mapping[i] < -1)
+       /* It does not approximately occur in FILE2.
+          Remember it, for next time.  */
+       mapping->index_mapping[i] = -1;
+    }
+  return mapping->index_mapping[i];
+}
+
+/* Look up (or lazily compute) the mapping of an entry in FILE2.
+   j is the index in FILE2.
+   Return the index in FILE1, or -1 when the entry is not found in FILE1.  */
+static ssize_t
+entries_mapping_reverse_get (struct entries_mapping *mapping, ssize_t j)
+{
+  if (mapping->index_mapping_reverse[j] < -1)
+    {
+      struct changelog_file *file1 = mapping->file1;
+      struct changelog_file *file2 = mapping->file2;
+      size_t n1 = file1->num_entries;
+      size_t n2 = file2->num_entries;
+      struct entry *entry_j = file2->entries[j];
+      ssize_t i;
+
+      /* Search whether it approximately occurs in file1.  */
+      ssize_t best_i = -1;
+      double best_i_similarity = 0.0;
+      for (i = n1 - 1; i >= 0; i--)
+       if (mapping->index_mapping[i] < 0)
+         {
+           double similarity =
+             entry_fstrcmp (file1->entries[i], entry_j, best_i_similarity);
+           if (similarity > best_i_similarity)
+             {
+               best_i = i;
+               best_i_similarity = similarity;
+             }
+         }
+      if (best_i_similarity >= FSTRCMP_THRESHOLD)
+       {
+         /* Found a similar entry in file1.  */
+         struct entry *entry_i = file1->entries[best_i];
+         /* Search whether it approximately occurs in file2 at index j.  */
+         ssize_t best_j = -1;
+         double best_j_similarity = 0.0;
+         ssize_t jj;
+         for (jj = n2 - 1; jj >= 0; jj--)
+           if (mapping->index_mapping_reverse[jj] < 0)
+             {
+               double similarity =
+                 entry_fstrcmp (entry_i, file2->entries[jj],
+                                best_j_similarity);
+               if (similarity > best_j_similarity)
+                 {
+                   best_j = jj;
+                   best_j_similarity = similarity;
+                 }
+             }
+         if (best_j_similarity >= FSTRCMP_THRESHOLD && best_j == j)
+           {
+             mapping->index_mapping_reverse[j] = best_i;
+             mapping->index_mapping[best_i] = j;
+           }
+       }
+      if (mapping->index_mapping_reverse[j] < -1)
+       /* It does not approximately occur in FILE1.
+          Remember it, for next time.  */
+       mapping->index_mapping_reverse[j] = -1;
+    }
+  return mapping->index_mapping_reverse[j];
+}
+
 /* Compute a mapping (correspondence) between entries of FILE1 and of FILE2.
-   Return a set of two arrays:
-     - An array mapping FILE1 indices to FILE2 indices (or -1 when the entry
-       from FILE1 is not found in FILE2).
-     - An array mapping FILE2 indices to FILE1 indices (or -1 when the entry
-       from FILE2 is not found in FILE1).
    The correspondence also takes into account small modifications; i.e. the
    indicated relation is not equality of entries but best-match similarity
-   of entries.  */
+   of entries.
+   If FULL is true, the maximum of matching is done up-front.  If it is false,
+   it is done in a lazy way through the functions entries_mapping_get and
+   entries_mapping_reverse_get.
+   Return the result in *RESULT.  */
 static void
 compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
-                ssize_t *result[2])
+                bool full,
+                struct entries_mapping *result)
 {
   /* Mapping from indices in file1 to indices in file2.  */
   ssize_t *index_mapping;
@@ -348,15 +493,15 @@ compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
 
   index_mapping = XNMALLOC (n1, ssize_t);
   for (i = 0; i < n1; i++)
-    index_mapping[i] = -1;
+    index_mapping[i] = -2;
 
   index_mapping_reverse = XNMALLOC (n2, ssize_t);
   for (j = 0; j < n2; j++)
-    index_mapping_reverse[j] = -1;
+    index_mapping_reverse[j] = -2;
 
   for (i = n1 - 1; i >= 0; i--)
     /* Take an entry from file1.  */
-    if (index_mapping[i] < 0)
+    if (index_mapping[i] < -1)
       {
        struct entry *entry = file1->entries[i];
        /* Search whether it occurs in file2.  */
@@ -365,87 +510,55 @@ compute_mapping (struct changelog_file *file1, struct changelog_file *file2,
          {
            j = n2 - 1 - j;
            /* Found an exact correspondence.  */
-           ASSERT (index_mapping_reverse[j] < 0);
-           index_mapping[i] = j;
-           index_mapping_reverse[j] = i;
-           /* Look for more occurrences of the same entry.  */
-           {
-             ssize_t curr_i = i;
-             ssize_t curr_j = j;
-
-             for (;;)
+           /* If index_mapping_reverse[j] >= 0, we have already seen other
+              copies of this entry, and there were more occurrences of it in
+              file1 than in file2.  In this case, do nothing.  */
+           if (index_mapping_reverse[j] < 0)
+             {
+               index_mapping[i] = j;
+               index_mapping_reverse[j] = i;
+               /* Look for more occurrences of the same entry.  Match them
+                  as long as they pair up.  Unpaired occurrences of the same
+                  entry are left without mapping.  */
                {
-                 ssize_t next_i;
-                 ssize_t next_j;
-
-                 next_i =
-                   gl_list_indexof_from (file1->entries_reversed, n1 - curr_i,
-                                         entry);
-                 if (next_i < 0)
-                   break;
-                 next_j =
-                   gl_list_indexof_from (file2->entries_reversed, n2 - curr_j,
-                                         entry);
-                 if (next_j < 0)
-                   break;
-                 curr_i = n1 - 1 - next_i;
-                 curr_j = n2 - 1 - next_j;
-                 ASSERT (index_mapping[curr_i] < 0);
-                 ASSERT (index_mapping_reverse[curr_j] < 0);
-                 index_mapping[curr_i] = curr_j;
-                 index_mapping_reverse[curr_j] = curr_i;
-               }
-           }
-         }
-      }
+                 ssize_t curr_i = i;
+                 ssize_t curr_j = j;
 
-  for (i = n1 - 1; i >= 0; i--)
-    /* Take an entry from file1.  */
-    if (index_mapping[i] < 0)
-      {
-       struct entry *entry_i = file1->entries[i];
-       /* Search whether it approximately occurs in file2.  */
-       ssize_t best_j = -1;
-       double best_j_similarity = 0.0;
-       for (j = n2 - 1; j >= 0; j--)
-         if (index_mapping_reverse[j] < 0)
-           {
-             double similarity = entry_fstrcmp (entry_i, file2->entries[j]);
-             if (similarity > best_j_similarity)
-               {
-                 best_j = j;
-                 best_j_similarity = similarity;
-               }
-           }
-       if (best_j_similarity >= FSTRCMP_THRESHOLD)
-         {
-           /* Found a similar entry in file2.  */
-           struct entry *entry_j = file2->entries[best_j];
-           /* Search whether it approximately occurs in file1 at index i.  */
-           ssize_t best_i = -1;
-           double best_i_similarity = 0.0;
-           ssize_t ii;
-           for (ii = n1 - 1; ii >= 0; ii--)
-             if (index_mapping[ii] < 0)
-               {
-                 double similarity =
-                   entry_fstrcmp (file1->entries[ii], entry_j);
-                 if (similarity > best_i_similarity)
+                 for (;;)
                    {
-                     best_i = i;
-                     best_i_similarity = similarity;
+                     ssize_t next_i;
+                     ssize_t next_j;
+
+                     next_i =
+                       gl_list_indexof_from (file1->entries_reversed,
+                                             n1 - curr_i, entry);
+                     if (next_i < 0)
+                       break;
+                     next_j =
+                       gl_list_indexof_from (file2->entries_reversed,
+                                             n2 - curr_j, entry);
+                     if (next_j < 0)
+                       break;
+                     curr_i = n1 - 1 - next_i;
+                     curr_j = n2 - 1 - next_j;
+                     ASSERT (index_mapping[curr_i] < 0);
+                     ASSERT (index_mapping_reverse[curr_j] < 0);
+                     index_mapping[curr_i] = curr_j;
+                     index_mapping_reverse[curr_j] = curr_i;
                    }
                }
-           if (best_i_similarity >= FSTRCMP_THRESHOLD && best_i == i)
-             {
-               index_mapping[i] = best_j;
-               index_mapping_reverse[best_j] = i;
              }
          }
       }
 
-  result[0] = index_mapping;
-  result[1] = index_mapping_reverse;
+  result->file1 = file1;
+  result->file2 = file2;
+  result->index_mapping = index_mapping;
+  result->index_mapping_reverse = index_mapping_reverse;
+
+  if (full)
+    for (i = n1 - 1; i >= 0; i--)
+      entries_mapping_get (result, i);
 }
 
 /* An "edit" is a textual modification performed by the user, that needs to
@@ -729,7 +842,8 @@ try_split_merged_entry (const struct entry *old_entry,
 
       new_body.string = new_entry->string + split_offset;
       new_body.length = new_entry->length - split_offset;
-      similarity = entry_fstrcmp (&old_body, &new_body);
+      similarity =
+       entry_fstrcmp (&old_body, &new_body, best_similarity);
       if (similarity > best_similarity)
        {
          best_split_offset = split_offset;
@@ -808,7 +922,7 @@ conflict_write (FILE *fp, struct conflict *c)
 
 /* Long options.  */
 static const struct option long_options[] =
-{ 
+{
   { "help", no_argument, NULL, 'h' },
   { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
   { "version", no_argument, NULL, 'V' },
@@ -922,9 +1036,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\
     struct changelog_file mainstream_file;
     struct changelog_file modified_file;
     /* Mapping from indices in ancestor_file to indices in mainstream_file.  */
-    ssize_t *index_mapping;
-    /* Mapping from indices in mainstream_file to indices in ancestor_file.  */
-    ssize_t *index_mapping_reverse;
+    struct entries_mapping mapping;
     struct differences diffs;
     gl_list_node_t *result_entries_pointers; /* array of pointers into result_entries */
     gl_list_t /* <struct entry *> */ result_entries;
@@ -975,7 +1087,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\
        How to distinguish these situation? There are several hints:
         - During a "git stash apply", GIT_REFLOG_ACTION is not set.  During
           a "git pull", it is set to 'pull '. During a "git pull --rebase",
-          it is set to 'pull --rebase'.
+          it is set to 'pull --rebase'.  During a "git cherry-pick", it is
+          set to 'cherry-pick'.
         - During a "git stash apply", there is an environment variable of
           the form GITHEAD_<40_hex_digits>='Stashed changes'.  */
     {
@@ -1002,7 +1115,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                downstream = true;
              else
                {
-                 /* "git stash apply", "git rebase" and similar.  */
+                 /* "git stash apply", "git rebase", "git cherry-pick" and
+                    similar.  */
                  downstream = false;
                }
            }
@@ -1041,12 +1155,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\
 
     /* Compute correspondence between the entries of ancestor_file and of
        mainstream_file.  */
-    {
-      ssize_t *result[2];
-      compute_mapping (&ancestor_file, &mainstream_file, result);
-      index_mapping = result[0];
-      index_mapping_reverse = result[1];
-    }
+    compute_mapping (&ancestor_file, &mainstream_file, false, &mapping);
+    (void) entries_mapping_reverse_get; /* avoid gcc "defined but not" warning */
 
     /* Compute differences between the entries of ancestor_file and of
        modified_file.  */
@@ -1102,10 +1212,10 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                     ancestor_file.entries[i_after].  See whether these two
                     entries still exist in mainstream_file and are still
                     consecutive.  */
-                 k_before = index_mapping[i_before];
+                 k_before = entries_mapping_get (&mapping, i_before);
                  k_after = (i_after == ancestor_file.num_entries
                             ? mainstream_file.num_entries
-                            : index_mapping[i_after]);
+                            : entries_mapping_get (&mapping, i_after));
                  if (k_before >= 0 && k_after >= 0 && k_after == k_before + 1)
                    {
                      /* Yes, the entry before and after are still neighbours
@@ -1155,7 +1265,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                for (i = edit->i1; i <= edit->i2; i++)
                  {
                    struct entry *removed_entry = ancestor_file.entries[i];
-                   ssize_t k = index_mapping[i];
+                   ssize_t k = entries_mapping_get (&mapping, i);
                    if (k >= 0
                        && entry_equals (removed_entry,
                                         mainstream_file.entries[k]))
@@ -1217,7 +1327,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                            size_t i;
                            for (i = edit->i1 + 1; i <= edit->i2; i++)
                              if (entry_fstrcmp (ancestor_file.entries[i],
-                                                modified_file.entries[i + edit->j2 - edit->i2])
+                                                modified_file.entries[i + edit->j2 - edit->i2],
+                                                FSTRCMP_THRESHOLD)
                                  < FSTRCMP_THRESHOLD)
                                {
                                  simple_merged = false;
@@ -1248,7 +1359,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                                   ? split[1]
                                   : modified_file.entries[j]);
                                size_t i = j + edit->i2 - edit->j2;
-                               ssize_t k = index_mapping[i];
+                               ssize_t k = entries_mapping_get (&mapping, i);
                                if (k >= 0
                                    && entry_equals (ancestor_file.entries[i],
                                                     mainstream_file.entries[k]))
@@ -1298,7 +1409,8 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                        simple = true;
                        for (i = edit->i1; i <= edit->i2; i++)
                          if (entry_fstrcmp (ancestor_file.entries[i],
-                                            modified_file.entries[i + edit->j2 - edit->i2])
+                                            modified_file.entries[i + edit->j2 - edit->i2],
+                                            FSTRCMP_THRESHOLD)
                              < FSTRCMP_THRESHOLD)
                            {
                              simple = false;
@@ -1327,7 +1439,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                              {
                                struct entry *changed_entry = modified_file.entries[j];
                                size_t i = j + edit->i2 - edit->j2;
-                               ssize_t k = index_mapping[i];
+                               ssize_t k = entries_mapping_get (&mapping, i);
                                if (k >= 0
                                    && entry_equals (ancestor_file.entries[i],
                                                     mainstream_file.entries[k]))
@@ -1366,13 +1478,13 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                               See whether this entry and the following num_changed
                               entries still exist in mainstream_file and are still
                               consecutive.  */
-                           k_before = index_mapping[i_before];
+                           k_before = entries_mapping_get (&mapping, i_before);
                            linear = (k_before >= 0);
                            if (linear)
                              {
                                size_t i;
                                for (i = i_before + 1; i <= i_before + num_changed; i++)
-                                 if (index_mapping[i] != k_before + (i - i_before))
+                                 if (entries_mapping_get (&mapping, i) != k_before + (i - i_before))
                                    {
                                      linear = false;
                                      break;
@@ -1392,7 +1504,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                                  {
                                    struct entry *changed_entry = modified_file.entries[j];
                                    size_t i = j + edit->i2 - edit->j2;
-                                   ssize_t k = index_mapping[i];
+                                   ssize_t k = entries_mapping_get (&mapping, i);
                                    ASSERT (k >= 0);
                                    if (entry_equals (ancestor_file.entries[i],
                                                      mainstream_file.entries[k]))
@@ -1432,7 +1544,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                        ssize_t k_first;
                        bool linear_unchanged;
                        i_first = edit->i1;
-                       k_first = index_mapping[i_first];
+                       k_first = entries_mapping_get (&mapping, i_first);
                        linear_unchanged =
                          (k_first >= 0
                           && entry_equals (ancestor_file.entries[i_first],
@@ -1441,9 +1553,9 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                          {
                            size_t i;
                            for (i = i_first + 1; i <= edit->i2; i++)
-                             if (!(index_mapping[i] == k_first + (i - i_first)
+                             if (!(entries_mapping_get (&mapping, i) == k_first + (i - i_first)
                                    && entry_equals (ancestor_file.entries[i],
-                                                    mainstream_file.entries[index_mapping[i]])))
+                                                    mainstream_file.entries[entries_mapping_get (&mapping, i)])))
                                {
                                  linear_unchanged = false;
                                  break;
@@ -1462,7 +1574,7 @@ There is NO WARRANTY, to the extent permitted by law.\n\
                              }
                            for (i = edit->i1; i <= edit->i2; i++)
                              {
-                               ssize_t k = index_mapping[i];
+                               ssize_t k = entries_mapping_get (&mapping, i);
                                ASSERT (k >= 0);
                                ASSERT (entry_equals (ancestor_file.entries[i],
                                                      mainstream_file.entries[k]));