Commit 416f7cb34762a75ae2ca14959caa59d94bc14919

Edward Thomson 2022-02-12T10:38:08

Merge pull request #6208 from jorio/fix-stale-filesize-crash diff_file: fix crash if size of diffed file changes in workdir

diff --git a/include/git2/diff.h b/include/git2/diff.h
index 9424ffd..3839f00 100644
--- a/include/git2/diff.h
+++ b/include/git2/diff.h
@@ -207,7 +207,8 @@ typedef enum {
 	GIT_DIFF_FLAG_BINARY     = (1u << 0), /**< file(s) treated as binary data */
 	GIT_DIFF_FLAG_NOT_BINARY = (1u << 1), /**< file(s) treated as text data */
 	GIT_DIFF_FLAG_VALID_ID   = (1u << 2), /**< `id` value is known correct */
-	GIT_DIFF_FLAG_EXISTS     = (1u << 3)  /**< file exists at this side of the delta */
+	GIT_DIFF_FLAG_EXISTS     = (1u << 3), /**< file exists at this side of the delta */
+	GIT_DIFF_FLAG_VALID_SIZE = (1u << 4)  /**< file size value is known correct */
 } git_diff_flag_t;
 
 /**
diff --git a/src/diff_file.c b/src/diff_file.c
index 78e332f..c7e9fbe 100644
--- a/src/diff_file.c
+++ b/src/diff_file.c
@@ -328,16 +328,25 @@ static int diff_file_content_load_workdir_file(
 	git_filter_list *fl = NULL;
 	git_file fd = git_futils_open_ro(git_str_cstr(path));
 	git_str raw = GIT_STR_INIT;
+	git_object_size_t new_file_size = 0;
 
 	if (fd < 0)
 		return fd;
 
-	if (!fc->file->size)
-	    error = git_futils_filesize(&fc->file->size, fd);
+	error = git_futils_filesize(&new_file_size, fd);
 
-	if (error < 0 || !fc->file->size)
+	if (error < 0)
 		goto cleanup;
 
+	if (!(fc->file->flags & GIT_DIFF_FLAG_VALID_SIZE)) {
+		fc->file->size = new_file_size;
+		fc->file->flags |= GIT_DIFF_FLAG_VALID_SIZE;
+	} else if (fc->file->size != new_file_size) {
+		git_error_set(GIT_ERROR_FILESYSTEM, "file changed before we could read it");
+		error = -1;
+		goto cleanup;
+	}
+
 	if ((diff_opts->flags & GIT_DIFF_SHOW_BINARY) == 0 &&
 		diff_file_content_binary_by_size(fc))
 		goto cleanup;
diff --git a/src/diff_generate.c b/src/diff_generate.c
index dca16d5..cfaefba 100644
--- a/src/diff_generate.c
+++ b/src/diff_generate.c
@@ -117,6 +117,26 @@ static bool diff_pathspec_match(
 		matched_pathspec, NULL);
 }
 
+static void diff_delta__flag_known_size(git_diff_file *file)
+{
+	/*
+	 * If we don't know the ID, that can only come from the workdir
+	 * iterator, which means we *do* know the file size.  This is a
+	 * leaky abstraction, but alas.  Otherwise, we test against the
+	 * empty blob id.
+	 */
+	if (file->size ||
+	    !(file->flags & GIT_DIFF_FLAG_VALID_ID) ||
+	    git_oid_equal(&file->id, &git_oid__empty_blob_sha1))
+		file->flags |= GIT_DIFF_FLAG_VALID_SIZE;
+}
+
+static void diff_delta__flag_known_sizes(git_diff_delta *delta)
+{
+	diff_delta__flag_known_size(&delta->old_file);
+	diff_delta__flag_known_size(&delta->new_file);
+}
+
 static int diff_delta__from_one(
 	git_diff_generated *diff,
 	git_delta_t status,
@@ -182,6 +202,8 @@ static int diff_delta__from_one(
 	if (has_old || !git_oid_is_zero(&delta->new_file.id))
 		delta->new_file.flags |= GIT_DIFF_FLAG_VALID_ID;
 
+	diff_delta__flag_known_sizes(delta);
+
 	return diff_insert_delta(diff, delta, matched_pathspec);
 }
 
@@ -244,6 +266,8 @@ static int diff_delta__from_two(
 			delta->new_file.flags |= GIT_DIFF_FLAG_VALID_ID;
 	}
 
+	diff_delta__flag_known_sizes(delta);
+
 	return diff_insert_delta(diff, delta, matched_pathspec);
 }
 
diff --git a/src/diff_generate.h b/src/diff_generate.h
index f39bf6b..b782f29 100644
--- a/src/diff_generate.h
+++ b/src/diff_generate.h
@@ -119,8 +119,10 @@ GIT_INLINE(int) git_diff_file__resolve_zero_size(
 
 	git_odb_free(odb);
 
-	if (!error)
+	if (!error) {
 		file->size = (git_object_size_t)len;
+		file->flags |= GIT_DIFF_FLAG_VALID_SIZE;
+	}
 
 	return error;
 }
diff --git a/src/diff_tform.c b/src/diff_tform.c
index f9836d2..913d649 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -460,7 +460,8 @@ static int similarity_init(
 	info->blob = NULL;
 	git_str_init(&info->data, 0);
 
-	if (info->file->size > 0 || info->src == GIT_ITERATOR_WORKDIR)
+	if ((info->file->flags & GIT_DIFF_FLAG_VALID_SIZE) ||
+	    info->src == GIT_ITERATOR_WORKDIR)
 		return 0;
 
 	return git_diff_file__resolve_zero_size(
diff --git a/src/odb.c b/src/odb.c
index 3abeae2..14eff53 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -16,6 +16,7 @@
 #include "filter.h"
 #include "repository.h"
 #include "blob.h"
+#include "oid.h"
 
 #include "git2/odb_backend.h"
 #include "git2/oid.h"
@@ -58,10 +59,7 @@ static int error_null_oid(int error, const char *message);
 
 static git_object_t odb_hardcoded_type(const git_oid *id)
 {
-	static git_oid empty_tree = {{ 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60,
-					   0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04 }};
-
-	if (!git_oid_cmp(id, &empty_tree))
+	if (!git_oid_cmp(id, &git_oid__empty_tree_sha1))
 		return GIT_OBJECT_TREE;
 
 	return GIT_OBJECT_INVALID;
diff --git a/src/oid.c b/src/oid.c
index dbc5a5a..19061e8 100644
--- a/src/oid.c
+++ b/src/oid.c
@@ -13,6 +13,13 @@
 #include <string.h>
 #include <limits.h>
 
+const git_oid git_oid__empty_blob_sha1 =
+	{{ 0xe6, 0x9d, 0xe2, 0x9b, 0xb2, 0xd1, 0xd6, 0x43, 0x4b, 0x8b,
+	   0x29, 0xae, 0x77, 0x5a, 0xd8, 0xc2, 0xe4, 0x8c, 0x53, 0x91 }};
+const git_oid git_oid__empty_tree_sha1 =
+	{{ 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60,
+	   0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04 }};
+
 static char to_hex[] = "0123456789abcdef";
 
 static int oid_error_invalid(const char *msg)
diff --git a/src/oid.h b/src/oid.h
index 7a89d21..5baec33 100644
--- a/src/oid.h
+++ b/src/oid.h
@@ -11,6 +11,9 @@
 
 #include "git2/oid.h"
 
+extern const git_oid git_oid__empty_blob_sha1;
+extern const git_oid git_oid__empty_tree_sha1;
+
 /**
  * Format a git_oid into a newly allocated c-string.
  *
diff --git a/tests/diff/externalmodifications.c b/tests/diff/externalmodifications.c
new file mode 100644
index 0000000..df62c33
--- /dev/null
+++ b/tests/diff/externalmodifications.c
@@ -0,0 +1,133 @@
+#include "clar_libgit2.h"
+#include "../checkout/checkout_helpers.h"
+
+#include "index.h"
+#include "repository.h"
+
+static git_repository *g_repo;
+
+void test_diff_externalmodifications__initialize(void)
+{
+	g_repo = cl_git_sandbox_init("testrepo2");
+}
+
+void test_diff_externalmodifications__cleanup(void)
+{
+	cl_git_sandbox_cleanup();
+	g_repo = NULL;
+}
+
+void test_diff_externalmodifications__file_becomes_smaller(void)
+{
+	git_index *index;
+	git_diff *diff;
+	git_patch* patch;
+	git_str path = GIT_STR_INIT;
+	char big_string[500001];
+
+	cl_git_pass(git_str_joinpath(&path, git_repository_workdir(g_repo), "README"));
+
+	/* Modify the file with a large string */
+	memset(big_string, '\n', sizeof(big_string) - 1);
+	big_string[sizeof(big_string) - 1] = '\0';
+	cl_git_mkfile(path.ptr, big_string);
+
+	/* Get a diff */
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
+	cl_assert_equal_i(1, git_diff_num_deltas(diff));
+	cl_assert_equal_i(500000, git_diff_get_delta(diff, 0)->new_file.size);
+
+	/* Simulate file modification after we've gotten the diff.
+	 * Write a shorter string to ensure that we don't mmap 500KB from
+	 * the previous revision, which would most likely crash. */
+	cl_git_mkfile(path.ptr, "hello");
+
+	/* Attempt to get a patch */
+	cl_git_fail(git_patch_from_diff(&patch, diff, 0));
+
+	git_index_free(index);
+	git_diff_free(diff);
+	git_str_dispose(&path);
+}
+
+void test_diff_externalmodifications__file_becomes_empty(void)
+{
+	git_index *index;
+	git_diff *diff;
+	git_patch* patch;
+	git_str path = GIT_STR_INIT;
+
+	cl_git_pass(git_str_joinpath(&path, git_repository_workdir(g_repo), "README"));
+
+	/* Modify the file */
+	cl_git_mkfile(path.ptr, "hello");
+
+	/* Get a diff */
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
+	cl_assert_equal_i(1, git_diff_num_deltas(diff));
+	cl_assert_equal_i(5, git_diff_get_delta(diff, 0)->new_file.size);
+
+	/* Empty out the file after we've gotten the diff */
+	cl_git_mkfile(path.ptr, "");
+
+	/* Attempt to get a patch */
+	cl_git_fail(git_patch_from_diff(&patch, diff, 0));
+
+	git_index_free(index);
+	git_diff_free(diff);
+	git_str_dispose(&path);
+}
+
+void test_diff_externalmodifications__file_deleted(void)
+{
+	git_index *index;
+	git_diff *diff;
+	git_patch* patch;
+	git_str path = GIT_STR_INIT;
+
+	cl_git_pass(git_str_joinpath(&path, git_repository_workdir(g_repo), "README"));
+
+	/* Get a diff */
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
+	cl_assert_equal_i(0, git_diff_num_deltas(diff));
+
+	/* Delete the file */
+	cl_git_rmfile(path.ptr);
+
+	/* Attempt to get a patch */
+	cl_git_fail(git_patch_from_diff(&patch, diff, 0));
+
+	git_index_free(index);
+	git_diff_free(diff);
+	git_str_dispose(&path);
+}
+
+void test_diff_externalmodifications__empty_file_becomes_non_empty(void)
+{
+	git_index *index;
+	git_diff *diff;
+	git_patch* patch;
+	git_str path = GIT_STR_INIT;
+
+	cl_git_pass(git_str_joinpath(&path, git_repository_workdir(g_repo), "README"));
+
+	/* Empty out the file */
+	cl_git_mkfile(path.ptr, "");
+
+	/* Get a diff */
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, index, NULL));
+	cl_assert_equal_i(1, git_diff_num_deltas(diff));
+	cl_assert_equal_i(0, git_diff_get_delta(diff, 0)->new_file.size);
+
+	/* Simulate file modification after we've gotten the diff */
+	cl_git_mkfile(path.ptr, "hello");
+	cl_git_fail(git_patch_from_diff(&patch, diff, 0));
+
+	git_index_free(index);
+	git_diff_free(diff);
+	git_str_dispose(&path);
+}