Improve robustness of diff rename detection Under some strange circumstances, diffs can end up listing files that we can't actually open successfully. Instead of aborting the git_diff_find_similar, this makes it so that those files just won't be considered as valid rename/copy targets instead.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
diff --git a/src/diff_tform.c b/src/diff_tform.c
index 201a0e8..de2d5f6 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -386,8 +386,12 @@ static int similarity_calc(
/* TODO: apply wd-to-odb filters to file data if necessary */
- if (!(error = git_buf_joinpath(
- &path, git_repository_workdir(diff->repo), file->path)))
+ if ((error = git_buf_joinpath(
+ &path, git_repository_workdir(diff->repo), file->path)) < 0)
+ return error;
+
+ /* if path is not a regular file, just skip this item */
+ if (git_path_isfile(path.ptr))
error = opts->metric->file_signature(
&cache[file_idx], file, path.ptr, opts->metric->payload);
@@ -398,8 +402,11 @@ static int similarity_calc(
/* TODO: add max size threshold a la diff? */
- if ((error = git_blob_lookup(&blob, diff->repo, &file->oid)) < 0)
- return error;
+ if (git_blob_lookup(&blob, diff->repo, &file->oid) < 0) {
+ /* if lookup fails, just skip this item in similarity calc */
+ giterr_clear();
+ return 0;
+ }
blobsize = git_blob_rawsize(blob);
if (!git__is_sizet(blobsize)) /* ? what to do ? */
@@ -437,7 +444,7 @@ static int similarity_measure(
return -1;
if (!cache[b_idx] && similarity_calc(diff, opts, b_idx, cache) < 0)
return -1;
-
+
/* some metrics may not wish to process this file (too big / too small) */
if (!cache[a_idx] || !cache[b_idx])
return 0;