Commit 359240b674ad809c4986596bfe8a6837ee386e13

Edward Thomson 2022-02-11T17:56:05

diff: indicate when the file size is "valid" When we know the file size (because we're producing it from a working directory iterator, or an index with an up-to-date cache) then set a flag indicating as such. This removes the ambiguity about a 0 file size, which could indicate that a file exists and is 0 bytes, or that we haven't read it yet.

diff --git a/include/git2/diff.h b/include/git2/diff.h
index 9424ffd..3839f00 100644
--- a/include/git2/diff.h
+++ b/include/git2/diff.h
@@ -207,7 +207,8 @@ typedef enum {
 	GIT_DIFF_FLAG_BINARY     = (1u << 0), /**< file(s) treated as binary data */
 	GIT_DIFF_FLAG_NOT_BINARY = (1u << 1), /**< file(s) treated as text data */
 	GIT_DIFF_FLAG_VALID_ID   = (1u << 2), /**< `id` value is known correct */
-	GIT_DIFF_FLAG_EXISTS     = (1u << 3)  /**< file exists at this side of the delta */
+	GIT_DIFF_FLAG_EXISTS     = (1u << 3), /**< file exists at this side of the delta */
+	GIT_DIFF_FLAG_VALID_SIZE = (1u << 4)  /**< file size value is known correct */
 } git_diff_flag_t;
 
 /**
diff --git a/src/diff_generate.c b/src/diff_generate.c
index dca16d5..cfaefba 100644
--- a/src/diff_generate.c
+++ b/src/diff_generate.c
@@ -117,6 +117,26 @@ static bool diff_pathspec_match(
 		matched_pathspec, NULL);
 }
 
+static void diff_delta__flag_known_size(git_diff_file *file)
+{
+	/*
+	 * If we don't know the ID, that can only come from the workdir
+	 * iterator, which means we *do* know the file size.  This is a
+	 * leaky abstraction, but alas.  Otherwise, we test against the
+	 * empty blob id.
+	 */
+	if (file->size ||
+	    !(file->flags & GIT_DIFF_FLAG_VALID_ID) ||
+	    git_oid_equal(&file->id, &git_oid__empty_blob_sha1))
+		file->flags |= GIT_DIFF_FLAG_VALID_SIZE;
+}
+
+static void diff_delta__flag_known_sizes(git_diff_delta *delta)
+{
+	diff_delta__flag_known_size(&delta->old_file);
+	diff_delta__flag_known_size(&delta->new_file);
+}
+
 static int diff_delta__from_one(
 	git_diff_generated *diff,
 	git_delta_t status,
@@ -182,6 +202,8 @@ static int diff_delta__from_one(
 	if (has_old || !git_oid_is_zero(&delta->new_file.id))
 		delta->new_file.flags |= GIT_DIFF_FLAG_VALID_ID;
 
+	diff_delta__flag_known_sizes(delta);
+
 	return diff_insert_delta(diff, delta, matched_pathspec);
 }
 
@@ -244,6 +266,8 @@ static int diff_delta__from_two(
 			delta->new_file.flags |= GIT_DIFF_FLAG_VALID_ID;
 	}
 
+	diff_delta__flag_known_sizes(delta);
+
 	return diff_insert_delta(diff, delta, matched_pathspec);
 }
 
diff --git a/src/diff_generate.h b/src/diff_generate.h
index f39bf6b..b782f29 100644
--- a/src/diff_generate.h
+++ b/src/diff_generate.h
@@ -119,8 +119,10 @@ GIT_INLINE(int) git_diff_file__resolve_zero_size(
 
 	git_odb_free(odb);
 
-	if (!error)
+	if (!error) {
 		file->size = (git_object_size_t)len;
+		file->flags |= GIT_DIFF_FLAG_VALID_SIZE;
+	}
 
 	return error;
 }
diff --git a/src/diff_tform.c b/src/diff_tform.c
index f9836d2..913d649 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -460,7 +460,8 @@ static int similarity_init(
 	info->blob = NULL;
 	git_str_init(&info->data, 0);
 
-	if (info->file->size > 0 || info->src == GIT_ITERATOR_WORKDIR)
+	if ((info->file->flags & GIT_DIFF_FLAG_VALID_SIZE) ||
+	    info->src == GIT_ITERATOR_WORKDIR)
 		return 0;
 
 	return git_diff_file__resolve_zero_size(
diff --git a/src/odb.c b/src/odb.c
index 3abeae2..14eff53 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -16,6 +16,7 @@
 #include "filter.h"
 #include "repository.h"
 #include "blob.h"
+#include "oid.h"
 
 #include "git2/odb_backend.h"
 #include "git2/oid.h"
@@ -58,10 +59,7 @@ static int error_null_oid(int error, const char *message);
 
 static git_object_t odb_hardcoded_type(const git_oid *id)
 {
-	static git_oid empty_tree = {{ 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60,
-					   0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04 }};
-
-	if (!git_oid_cmp(id, &empty_tree))
+	if (!git_oid_cmp(id, &git_oid__empty_tree_sha1))
 		return GIT_OBJECT_TREE;
 
 	return GIT_OBJECT_INVALID;
diff --git a/src/oid.c b/src/oid.c
index 1a50d3d..19061e8 100644
--- a/src/oid.c
+++ b/src/oid.c
@@ -13,6 +13,9 @@
 #include <string.h>
 #include <limits.h>
 
+const git_oid git_oid__empty_blob_sha1 =
+	{{ 0xe6, 0x9d, 0xe2, 0x9b, 0xb2, 0xd1, 0xd6, 0x43, 0x4b, 0x8b,
+	   0x29, 0xae, 0x77, 0x5a, 0xd8, 0xc2, 0xe4, 0x8c, 0x53, 0x91 }};
 const git_oid git_oid__empty_tree_sha1 =
 	{{ 0x4b, 0x82, 0x5d, 0xc6, 0x42, 0xcb, 0x6e, 0xb9, 0xa0, 0x60,
 	   0xe5, 0x4b, 0xf8, 0xd6, 0x92, 0x88, 0xfb, 0xee, 0x49, 0x04 }};
diff --git a/src/oid.h b/src/oid.h
index 5c9b68f..5baec33 100644
--- a/src/oid.h
+++ b/src/oid.h
@@ -11,6 +11,7 @@
 
 #include "git2/oid.h"
 
+extern const git_oid git_oid__empty_blob_sha1;
 extern const git_oid git_oid__empty_tree_sha1;
 
 /**