Commit 8ef4e11a76599111b98682d235e7a4df921b2597

Russell Belfer 2014-04-28T14:16:26

Skip diff oid calc when size definitely changed When we think the stat cache in the index seems valid and the size or mode of a file has definitely changed, then don't bother trying to recalculate the OID of the workdir bits to confirm that it is modified - just accept that it is modified. This can result in files that show as modified with no actual diff, but the behavior actually appears to match Git on the command line. This also includes a minor optimization to not perform a submodule lookup on the ".git" directory itself.

diff --git a/src/diff.c b/src/diff.c
index aa88065..4c028ca 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -664,6 +664,7 @@ static int maybe_modified(
 	unsigned int omode = oitem->mode;
 	unsigned int nmode = nitem->mode;
 	bool new_is_workdir = (info->new_iter->type == GIT_ITERATOR_TYPE_WORKDIR);
+	bool modified_uncertain = false;
 	const char *matched_pathspec;
 	int error = 0;
 
@@ -731,15 +732,21 @@ static int maybe_modified(
 		/* if the stat data looks different, then mark modified - this just
 		 * means that the OID will be recalculated below to confirm change
 		 */
-		else if (omode != nmode ||
-			oitem->file_size != nitem->file_size ||
-			!diff_time_eq(&oitem->mtime, &nitem->mtime, use_nanos) ||
+		else if (omode != nmode || oitem->file_size != nitem->file_size) {
+			status = GIT_DELTA_MODIFIED;
+			modified_uncertain =
+				(oitem->file_size <= 0 && nitem->file_size > 0);
+		}
+		else if (!diff_time_eq(&oitem->mtime, &nitem->mtime, use_nanos) ||
 			(use_ctime &&
 			 !diff_time_eq(&oitem->ctime, &nitem->ctime, use_nanos)) ||
 			oitem->ino != nitem->ino ||
 			oitem->uid != nitem->uid ||
 			oitem->gid != nitem->gid)
+		{
 			status = GIT_DELTA_MODIFIED;
+			modified_uncertain = true;
+		}
 	}
 
 	/* if mode is GITLINK and submodules are ignored, then skip */
@@ -750,7 +757,7 @@ static int maybe_modified(
 	/* if we got here and decided that the files are modified, but we
 	 * haven't calculated the OID of the new item, then calculate it now
 	 */
-	if (status == GIT_DELTA_MODIFIED && git_oid_iszero(&nitem->id)) {
+	if (modified_uncertain && git_oid_iszero(&nitem->id)) {
 		if (git_oid_iszero(&noid)) {
 			if ((error = git_diff__oid_for_file(&noid,
 					diff, nitem->path, nitem->mode, nitem->file_size)) < 0)
diff --git a/src/iterator.c b/src/iterator.c
index 5e668b5..03058b9 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -1306,7 +1306,7 @@ static int workdir_iterator__enter_dir(fs_iterator *fi)
 
 	/* convert submodules to GITLINK and remove trailing slashes */
 	git_vector_foreach(&ff->entries, pos, entry) {
-		if (!S_ISDIR(entry->st.st_mode))
+		if (!S_ISDIR(entry->st.st_mode) || !strcmp(GIT_DIR, entry->path))
 			continue;
 
 		GIT_PERF_INC(fi->base.submodule_lookups);
diff --git a/tests/diff/workdir.c b/tests/diff/workdir.c
index 03a3ff4..84c8866 100644
--- a/tests/diff/workdir.c
+++ b/tests/diff/workdir.c
@@ -67,8 +67,8 @@ void test_diff_workdir__to_index(void)
 #ifdef GIT_PERF
 		cl_assert_equal_sz(
 			13 /* in root */ + 3 /* in subdir */, diff->stat_calls);
-		cl_assert_equal_sz(9, diff->oid_calculations);
-		cl_assert_equal_sz(2, diff->submodule_lookups);
+		cl_assert_equal_sz(5, diff->oid_calculations);
+		cl_assert_equal_sz(1, diff->submodule_lookups);
 #endif
 	}
 
diff --git a/tests/status/worktree.c b/tests/status/worktree.c
index def3d60..f51c618 100644
--- a/tests/status/worktree.c
+++ b/tests/status/worktree.c
@@ -578,7 +578,11 @@ void test_status_worktree__line_endings_dont_count_as_changes_with_autocrlf(void
 
 	cl_git_pass(git_status_file(&status, repo, "current_file"));
 
-	cl_assert_equal_i(GIT_STATUS_CURRENT, status);
+	/* stat data on file should no longer match stat cache, even though
+	 * file diff will be empty because of line-ending conversion - matches
+	 * the Git command-line behavior here.
+	 */
+	cl_assert_equal_i(GIT_STATUS_WT_MODIFIED, status);
 }
 
 void test_status_worktree__line_endings_dont_count_as_changes_with_autocrlf_issue_1397(void)