+/* Return the end a paragraph.
+ ENTRY is an entry.
+ OFFSET is an offset into the entry, OFFSET <= ENTRY->length.
+ Return the offset of the end of paragraph, as an offset <= ENTRY->length;
+ it is the start of a blank line or the end of the entry. */
+static size_t
+find_paragraph_end (const struct entry *entry, size_t offset)
+{
+ const char *string = entry->string;
+ size_t length = entry->length;
+
+ for (;;)
+ {
+ const char *nl = memchr (string + offset, '\n', length - offset);
+ if (nl == NULL)
+ return length;
+ offset = (nl - string) + 1;
+ if (offset < length && string[offset] == '\n')
+ return offset;
+ }
+}
+
+/* Split a merged entry.
+ Given an old entry of the form
+ TITLE
+ BODY
+ and a new entry of the form
+ TITLE
+ BODY1
+ BODY'
+ where the two titles are the same and BODY and BODY' are very similar,
+ this computes two new entries
+ TITLE
+ BODY1
+ and
+ TITLE
+ BODY'
+ and returns true.
+ If the entries don't have this form, it returns false. */
+static bool
+try_split_merged_entry (const struct entry *old_entry,
+ const struct entry *new_entry,
+ struct entry *new_split[2])
+{
+ size_t old_title_len = find_paragraph_end (old_entry, 0);
+ size_t new_title_len = find_paragraph_end (new_entry, 0);
+ struct entry old_body;
+ struct entry new_body;
+ size_t best_split_offset;
+ double best_similarity;
+ size_t split_offset;
+
+ /* Same title? */
+ if (!(old_title_len == new_title_len
+ && memcmp (old_entry->string, new_entry->string, old_title_len) == 0))
+ return false;
+
+ old_body.string = old_entry->string + old_title_len;
+ old_body.length = old_entry->length - old_title_len;
+
+ /* Determine where to split the new entry.
+ This is done by maximizing the similarity between BODY and BODY'. */
+ best_split_offset = split_offset = new_title_len;
+ best_similarity = 0.0;
+ for (;;)
+ {
+ double similarity;
+
+ new_body.string = new_entry->string + split_offset;
+ new_body.length = new_entry->length - split_offset;
+ similarity = entry_fstrcmp (&old_body, &new_body);
+ if (similarity > best_similarity)
+ {
+ best_split_offset = split_offset;
+ best_similarity = similarity;
+ }
+ if (best_similarity == 1.0)
+ /* It cannot get better. */
+ break;
+
+ if (split_offset < new_entry->length)
+ split_offset = find_paragraph_end (new_entry, split_offset + 1);
+ else
+ break;
+ }
+
+ /* BODY' should not be empty. */
+ if (best_split_offset == new_entry->length)
+ return false;
+ ASSERT (new_entry->string[best_split_offset] == '\n');
+
+ /* A certain similarity between BODY and BODY' is required. */
+ if (best_similarity < FSTRCMP_STRICTER_THRESHOLD)
+ return false;
+
+ new_split[0] = entry_create (new_entry->string, best_split_offset + 1);
+
+ {
+ size_t len1 = new_title_len;
+ size_t len2 = new_entry->length - best_split_offset;
+ char *combined = XNMALLOC (len1 + len2, char);
+ memcpy (combined, new_entry->string, len1);
+ memcpy (combined + len1, new_entry->string + best_split_offset, len2);
+ new_split[1] = entry_create (combined, len1 + len2);
+ }
+
+ return true;
+}
+