Commit aa408cbfc4940e05cb7843383264623e45737bb9

Edward Thomson 2013-03-11T11:18:00

handle small files in similarity metrics

diff --git a/src/diff_tform.c b/src/diff_tform.c
index 958d2bf..e9969d9 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -174,16 +174,34 @@ static int find_similar__hashsig_for_file(
 	void **out, const git_diff_file *f, const char *path, void *p)
 {
 	git_hashsig_option_t opt = (git_hashsig_option_t)p;
+	int error = 0;
+
 	GIT_UNUSED(f);
-	return git_hashsig_create_fromfile((git_hashsig **)out, path, opt);
+	error = git_hashsig_create_fromfile((git_hashsig **)out, path, opt);
+	
+	if (error == GIT_EBUFS) {
+		error = 0;
+		giterr_clear();
+	}
+
+	return error;
 }
 
 static int find_similar__hashsig_for_buf(
 	void **out, const git_diff_file *f, const char *buf, size_t len, void *p)
 {
 	git_hashsig_option_t opt = (git_hashsig_option_t)p;
+	int error = 0;
+	
 	GIT_UNUSED(f);
-	return git_hashsig_create((git_hashsig **)out, buf, len, opt);
+	error = git_hashsig_create((git_hashsig **)out, buf, len, opt);
+	
+	if (error == GIT_EBUFS) {
+		error = 0;
+		giterr_clear();
+	}
+
+	return error;
 }
 
 static void find_similar__hashsig_free(void *sig, void *payload)
@@ -414,6 +432,10 @@ static int similarity_measure(
 		return -1;
 	if (!cache[b_idx] && similarity_calc(diff, opts, b_idx, cache) < 0)
 		return -1;
+	
+	/* some metrics may not wish to process this file (too big / too small) */
+	if (!cache[a_idx] || !cache[b_idx])
+		return 0;
 
 	/* compare signatures */
 	if (opts->metric->similarity(
diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c
index ae76640..5a8af93 100644
--- a/tests-clar/diff/rename.c
+++ b/tests-clar/diff/rename.c
@@ -352,6 +352,39 @@ void test_diff_rename__not_exact_match(void)
 	git_tree_free(new_tree);
 }
 
+void test_diff_rename__handles_small_files(void)
+{
+	const char *tree_sha = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
+	git_index *index;
+	git_tree *tree;
+	git_diff_list *diff;
+	git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
+	git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
+
+	cl_git_pass(git_repository_index(&index, g_repo));
+
+	tree = resolve_commit_oid_to_tree(g_repo, tree_sha);
+		
+	cl_git_rewritefile("renames/songof7cities.txt", "single line\n");
+	cl_git_pass(git_index_add_bypath(index, "songof7cities.txt"));
+
+	cl_git_rewritefile("renames/untimely.txt", "untimely\n");
+	cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
+
+	/* Tests that we can invoke find_similar on small files
+	 * and that the GIT_EBUFS (too small) error code is not
+	 * propagated to the caller.
+	 */
+	cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
+
+	opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES | GIT_DIFF_FIND_AND_BREAK_REWRITES;
+	cl_git_pass(git_diff_find_similar(diff, &opts));
+
+	git_diff_list_free(diff);
+	git_tree_free(tree);
+	git_index_free(index);
+}
+
 void test_diff_rename__working_directory_changes(void)
 {
 	/* let's rewrite some files in the working directory on demand */