handle small files in similarity metrics
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
diff --git a/src/diff_tform.c b/src/diff_tform.c
index 958d2bf..e9969d9 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -174,16 +174,34 @@ static int find_similar__hashsig_for_file(
void **out, const git_diff_file *f, const char *path, void *p)
{
git_hashsig_option_t opt = (git_hashsig_option_t)p;
+ int error = 0;
+
GIT_UNUSED(f);
- return git_hashsig_create_fromfile((git_hashsig **)out, path, opt);
+ error = git_hashsig_create_fromfile((git_hashsig **)out, path, opt);
+
+ if (error == GIT_EBUFS) {
+ error = 0;
+ giterr_clear();
+ }
+
+ return error;
}
static int find_similar__hashsig_for_buf(
void **out, const git_diff_file *f, const char *buf, size_t len, void *p)
{
git_hashsig_option_t opt = (git_hashsig_option_t)p;
+ int error = 0;
+
GIT_UNUSED(f);
- return git_hashsig_create((git_hashsig **)out, buf, len, opt);
+ error = git_hashsig_create((git_hashsig **)out, buf, len, opt);
+
+ if (error == GIT_EBUFS) {
+ error = 0;
+ giterr_clear();
+ }
+
+ return error;
}
static void find_similar__hashsig_free(void *sig, void *payload)
@@ -414,6 +432,10 @@ static int similarity_measure(
return -1;
if (!cache[b_idx] && similarity_calc(diff, opts, b_idx, cache) < 0)
return -1;
+
+ /* some metrics may not wish to process this file (too big / too small) */
+ if (!cache[a_idx] || !cache[b_idx])
+ return 0;
/* compare signatures */
if (opts->metric->similarity(
diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c
index ae76640..5a8af93 100644
--- a/tests-clar/diff/rename.c
+++ b/tests-clar/diff/rename.c
@@ -352,6 +352,39 @@ void test_diff_rename__not_exact_match(void)
git_tree_free(new_tree);
}
+void test_diff_rename__handles_small_files(void)
+{
+ const char *tree_sha = "2bc7f351d20b53f1c72c16c4b036e491c478c49a";
+ git_index *index;
+ git_tree *tree;
+ git_diff_list *diff;
+ git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
+ git_diff_find_options opts = GIT_DIFF_FIND_OPTIONS_INIT;
+
+ cl_git_pass(git_repository_index(&index, g_repo));
+
+ tree = resolve_commit_oid_to_tree(g_repo, tree_sha);
+
+ cl_git_rewritefile("renames/songof7cities.txt", "single line\n");
+ cl_git_pass(git_index_add_bypath(index, "songof7cities.txt"));
+
+ cl_git_rewritefile("renames/untimely.txt", "untimely\n");
+ cl_git_pass(git_index_add_bypath(index, "untimely.txt"));
+
+ /* Tests that we can invoke find_similar on small files
+ * and that the GIT_EBUFS (too small) error code is not
+ * propagated to the caller.
+ */
+ cl_git_pass(git_diff_tree_to_index(&diff, g_repo, tree, index, &diffopts));
+
+ opts.flags = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES | GIT_DIFF_FIND_AND_BREAK_REWRITES;
+ cl_git_pass(git_diff_find_similar(diff, &opts));
+
+ git_diff_list_free(diff);
+ git_tree_free(tree);
+ git_index_free(index);
+}
+
void test_diff_rename__working_directory_changes(void)
{
/* let's rewrite some files in the working directory on demand */