Commit 97ad85b88d7a2f6d1c31a07de3c3c0b4b504d4ee

Russell Belfer 2013-12-02T13:30:05

Add GIT_DIFF_FIND_DELETE_UNMODIFIED flag When doing copy detection, it is often necessary to include UNMODIFIED records in the git_diff so they are available as source records for GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED. Yet in the final diff, often you will not want to have these UNMODIFIED records. This adds a flag which marks these UNMODIFIED records for deletion from the diff list so they will be removed after the rename detect phase is over.

diff --git a/include/git2/diff.h b/include/git2/diff.h
index 61d2883..51ad349 100644
--- a/include/git2/diff.h
+++ b/include/git2/diff.h
@@ -468,41 +468,71 @@ typedef int (*git_diff_line_cb)(
  * Flags to control the behavior of diff rename/copy detection.
  */
 typedef enum {
-	/** look for renames? (`--find-renames`) */
+	/** Look for renames? (`--find-renames`) */
 	GIT_DIFF_FIND_RENAMES = (1u << 0),
-	/** consider old side of modified for renames? (`--break-rewrites=N`) */
+
+	/** Consider old side of MODIFIED for renames? (`--break-rewrites=N`) */
 	GIT_DIFF_FIND_RENAMES_FROM_REWRITES = (1u << 1),
 
-	/** look for copies? (a la `--find-copies`) */
+	/** Look for copies? (a la `--find-copies`). */
 	GIT_DIFF_FIND_COPIES = (1u << 2),
-	/** consider unmodified as copy sources? (`--find-copies-harder`) */
+
+	/** Consider UNMODIFIED as copy sources? (`--find-copies-harder`).
+	 *
+	 * For this to work correctly, use GIT_DIFF_INCLUDE_UNMODIFIED when
+	 * the initial `git_diff` is being generated.
+	 */
 	GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED = (1u << 3),
 
-	/** mark large rewrites for split (`--break-rewrites=/M`) */
+	/** Mark significant rewrites for split (`--break-rewrites=/M`) */
 	GIT_DIFF_FIND_REWRITES = (1u << 4),
-	/** actually split large rewrites into delete/add pairs */
+	/** Actually split large rewrites into delete/add pairs */
 	GIT_DIFF_BREAK_REWRITES = (1u << 5),
-	/** mark rewrites for split and break into delete/add pairs */
+	/** Mark rewrites for split and break into delete/add pairs */
 	GIT_DIFF_FIND_AND_BREAK_REWRITES =
 		(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
 
-	/** find renames/copies for untracked items in working directory */
+	/** Find renames/copies for UNTRACKED items in working directory.
+	 *
+	 * For this to work correctly, use GIT_DIFF_INCLUDE_UNTRACKED when the
+	 * initial `git_diff` is being generated (and obviously the diff must
+	 * be against the working directory for this to make sense).
+	 */
 	GIT_DIFF_FIND_FOR_UNTRACKED = (1u << 6),
 
-	/** turn on all finding features */
+	/** Turn on all finding features. */
 	GIT_DIFF_FIND_ALL = (0x0ff),
 
-	/** measure similarity ignoring leading whitespace (default) */
+	/** Measure similarity ignoring leading whitespace (default) */
 	GIT_DIFF_FIND_IGNORE_LEADING_WHITESPACE = 0,
-	/** measure similarity ignoring all whitespace */
+	/** Measure similarity ignoring all whitespace */
 	GIT_DIFF_FIND_IGNORE_WHITESPACE = (1u << 12),
-	/** measure similarity including all data */
+	/** Measure similarity including all data */
 	GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE = (1u << 13),
-	/** measure similarity only by comparing SHAs (fast and cheap) */
+	/** Measure similarity only by comparing SHAs (fast and cheap) */
 	GIT_DIFF_FIND_EXACT_MATCH_ONLY = (1u << 14),
 
-	/** do not break rewrites unless they contribute to a rename */
+	/** Do not break rewrites unless they contribute to a rename.
+	 *
+	 * Normally, GIT_DIFF_FIND_AND_BREAK_REWRITES will measure the self-
+	 * similarity of modified files and split the ones that have changed a
+	 * lot into a DELETE / ADD pair.  Then the sides of that pair will be
+	 * considered candidates for rename and copy detection.
+	 *
+	 * If you add this flag in and the split pair is *not* used for an
+	 * actual rename or copy, then the modified record will be restored to
+	 * a regular MODIFIED record instead of being split.
+	 */
 	GIT_DIFF_BREAK_REWRITES_FOR_RENAMES_ONLY  = (1u << 15),
+
+	/** Delete any UNMODIFIED records after find_similar is done.
+	 *
+	 * Using GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED to emulate the
+	 * --find-copies-harder behavior requires building a diff with the
+	 * GIT_DIFF_INCLUDE_UNMODIFIED flag.  If you do not want UNMODIFIED
+	 * records in the final result, pass this flag to have them removed.
+	 */
+	GIT_DIFF_FIND_DELETE_UNMODIFIED = (1u << 16),
 } git_diff_find_t;
 
 /**
diff --git a/src/diff_tform.c b/src/diff_tform.c
index 3b5575d..bff821d 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -746,6 +746,8 @@ static bool is_rename_source(
 	case GIT_DELTA_UNMODIFIED:
 		if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
 			return false;
+		if (FLAG_SET(opts, GIT_DIFF_FIND_DELETE_UNMODIFIED))
+			delta->flags |= GIT_DIFF_FLAG__TO_DELETE;
 		break;
 
 	default: /* MODIFIED, RENAMED, COPIED */
diff --git a/tests/diff/rename.c b/tests/diff/rename.c
index 7f03393..e081d61 100644
--- a/tests/diff/rename.c
+++ b/tests/diff/rename.c
@@ -1333,3 +1333,51 @@ void test_diff_rename__can_find_copy_to_split(void)
 
 	git_buf_free(&c1);
 }
+
+void test_diff_rename__can_delete_unmodified_deltas(void)
+{
+	git_buf c1 = GIT_BUF_INIT;
+	git_index *index;
+	git_tree *tree;
+	git_diff *diff;
+	git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
+	git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
+	diff_expects exp;
+
+	cl_git_pass(git_futils_readbuffer(&c1, "renames/songof7cities.txt"));
+	cl_git_pass(git_futils_writebuffer(&c1, "renames/untimely.txt", 0, 0));
+
+	cl_git_pass(
+		git_revparse_single((git_object **)&tree, g_repo, "HEAD^{tree}"));
+
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_git_pass(git_index_read_tree(index, tree));
+	cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
+
+	diffopts.flags = GIT_DIFF_INCLUDE_UNMODIFIED;
+
+	cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
+
+	memset(&exp, 0, sizeof(exp));
+	cl_git_pass(git_diff_foreach(
+		diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
+	cl_assert_equal_i(4, exp.files);
+	cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
+	cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNMODIFIED]);
+
+	opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_DELETE_UNMODIFIED;
+	cl_git_pass(git_diff_find_similar(diff, &opts));
+
+	memset(&exp, 0, sizeof(exp));
+	cl_git_pass(git_diff_foreach(
+		diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
+	cl_assert_equal_i(2, exp.files);
+	cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
+	cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
+
+	git_diff_free(diff);
+	git_tree_free(tree);
+	git_index_free(index);
+
+	git_buf_free(&c1);
+}