Commit c6cac733c147ff800f78e7dff81f90d93369ea68

Edward Thomson 2019-01-20T22:40:38

blob: validate that blob sizes fit in a size_t Our blob size is a `git_off_t`, which is a signed 64 bit int. This may be erroneously negative or larger than `SIZE_MAX`. Ensure that the blob size fits into a `size_t` before casting.

diff --git a/src/attr_file.c b/src/attr_file.c
index bd69c63..40c72ea 100644
--- a/src/attr_file.c
+++ b/src/attr_file.c
@@ -12,6 +12,7 @@
 #include "attrcache.h"
 #include "git2/blob.h"
 #include "git2/tree.h"
+#include "blob.h"
 #include "index.h"
 #include <ctype.h>
 
@@ -119,6 +120,7 @@ int git_attr_file__load(
 		break;
 	case GIT_ATTR_FILE__FROM_INDEX: {
 		git_oid id;
+		git_off_t blobsize;
 
 		if ((error = attr_file_oid_from_index(&id, repo, entry->path)) < 0 ||
 			(error = git_blob_lookup(&blob, repo, &id)) < 0)
@@ -126,7 +128,10 @@ int git_attr_file__load(
 
 		/* Do not assume that data straight from the ODB is NULL-terminated;
 		 * copy the contents of a file to a buffer to work on */
-		git_buf_put(&content, git_blob_rawcontent(blob), git_blob_rawsize(blob));
+		blobsize = git_blob_rawsize(blob);
+
+		GIT_ERROR_CHECK_BLOBSIZE(blobsize);
+		git_buf_put(&content, git_blob_rawcontent(blob), (size_t)blobsize);
 		break;
 	}
 	case GIT_ATTR_FILE__FROM_FILE: {
diff --git a/src/blob.c b/src/blob.c
index 79748af..566d24b 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -36,10 +36,10 @@ git_off_t git_blob_rawsize(const git_blob *blob)
 
 int git_blob__getbuf(git_buf *buffer, git_blob *blob)
 {
-	return git_buf_set(
-		buffer,
-		git_blob_rawcontent(blob),
-		git_blob_rawsize(blob));
+	git_off_t size = git_blob_rawsize(blob);
+
+	GIT_ERROR_CHECK_BLOBSIZE(size);
+	return git_buf_set(buffer, git_blob_rawcontent(blob), (size_t)size);
 }
 
 void git_blob__free(void *_blob)
@@ -389,12 +389,14 @@ cleanup:
 int git_blob_is_binary(const git_blob *blob)
 {
 	git_buf content = GIT_BUF_INIT;
+	git_off_t size;
 
 	assert(blob);
 
+	size = git_blob_rawsize(blob);
+
 	git_buf_attach_notowned(&content, git_blob_rawcontent(blob),
-		min(git_blob_rawsize(blob),
-		GIT_FILTER_BYTES_TO_CHECK_NUL));
+		(size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL));
 	return git_buf_text_is_binary(&content);
 }
 
diff --git a/src/blob.h b/src/blob.h
index f644ec5..b9aa330 100644
--- a/src/blob.h
+++ b/src/blob.h
@@ -27,6 +27,14 @@ struct git_blob {
 	unsigned int raw:1;
 };
 
+#define GIT_ERROR_CHECK_BLOBSIZE(n) \
+	do { \
+		if (!git__is_sizet(n)) { \
+			git_error_set(GIT_ERROR_NOMEMORY, "blob contents too large to fit in memory"); \
+			return -1; \
+		} \
+	} while(0)
+
 void git_blob__free(void *blob);
 int git_blob__parse(void *blob, git_odb_object *obj);
 int git_blob__parse_raw(void *blob, const char *data, size_t size);
diff --git a/src/diff_generate.c b/src/diff_generate.c
index acc6c34..065cc72 100644
--- a/src/diff_generate.c
+++ b/src/diff_generate.c
@@ -564,9 +564,14 @@ int git_diff__oid_for_file(
 {
 	git_index_entry entry;
 
+	if (!git__is_sizet(size)) {
+		git_error_set(GIT_ERROR_NOMEMORY, "file size overflow (for 32-bits) on '%s'", path);
+		return -1;
+	}
+
 	memset(&entry, 0, sizeof(entry));
 	entry.mode = mode;
-	entry.file_size = size;
+	entry.file_size = (size_t)size;
 	entry.path = (char *)path;
 
 	return git_diff__oid_for_entry(out, diff, &entry, mode, NULL);
@@ -628,7 +633,7 @@ int git_diff__oid_for_entry(
 		error = git_odb__hashlink(out, full_path.ptr);
 		diff->base.perf.oid_calculations++;
 	} else if (!git__is_sizet(entry.file_size)) {
-		git_error_set(GIT_ERROR_OS, "file size overflow (for 32-bits) on '%s'",
+		git_error_set(GIT_ERROR_NOMEMORY, "file size overflow (for 32-bits) on '%s'",
 			entry.path);
 		error = -1;
 	} else if (!(error = git_filter_list_load(&fl,
diff --git a/src/diff_stats.c b/src/diff_stats.c
index 9cd8006..a068171 100644
--- a/src/diff_stats.c
+++ b/src/diff_stats.c
@@ -54,7 +54,8 @@ int git_diff_file_stats__full_to_buf(
 	size_t width)
 {
 	const char *old_path = NULL, *new_path = NULL;
-	size_t padding, old_size, new_size;
+	size_t padding;
+	git_off_t old_size, new_size;
 
 	old_path = delta->old_file.path;
 	new_path = delta->new_file.path;
@@ -96,7 +97,7 @@ int git_diff_file_stats__full_to_buf(
 
 	if (delta->flags & GIT_DIFF_FLAG_BINARY) {
 		if (git_buf_printf(out,
-				"Bin %" PRIuZ " -> %" PRIuZ " bytes", old_size, new_size) < 0)
+				"Bin %" PRId64 " -> %" PRId64 " bytes", old_size, new_size) < 0)
 			goto on_error;
 	}
 	else {
diff --git a/src/notes.c b/src/notes.c
index 2931353..8e622c6 100644
--- a/src/notes.c
+++ b/src/notes.c
@@ -12,6 +12,7 @@
 #include "config.h"
 #include "iterator.h"
 #include "signature.h"
+#include "blob.h"
 
 static int note_error_notfound(void)
 {
@@ -319,6 +320,7 @@ static int note_new(
 	git_blob *blob)
 {
 	git_note *note = NULL;
+	git_off_t blobsize;
 
 	note = git__malloc(sizeof(git_note));
 	GIT_ERROR_CHECK_ALLOC(note);
@@ -329,7 +331,10 @@ static int note_new(
 		git_signature_dup(&note->committer, git_commit_committer(commit)) < 0)
 		return -1;
 
-	note->message = git__strndup(git_blob_rawcontent(blob), git_blob_rawsize(blob));
+	blobsize = git_blob_rawsize(blob);
+	GIT_ERROR_CHECK_BLOBSIZE(blobsize);
+
+	note->message = git__strndup(git_blob_rawcontent(blob), (size_t)blobsize);
 	GIT_ERROR_CHECK_ALLOC(note->message);
 
 	*out = note;
diff --git a/src/odb.c b/src/odb.c
index b74d42f..498652c 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -15,6 +15,7 @@
 #include "delta.h"
 #include "filter.h"
 #include "repository.h"
+#include "blob.h"
 
 #include "git2/odb_backend.h"
 #include "git2/oid.h"
@@ -387,18 +388,17 @@ static void fake_wstream__free(git_odb_stream *_stream)
 static int init_fake_wstream(git_odb_stream **stream_p, git_odb_backend *backend, git_off_t size, git_object_t type)
 {
 	fake_wstream *stream;
+	size_t blobsize;
 
-	if (!git__is_ssizet(size)) {
-		git_error_set(GIT_ERROR_ODB, "object size too large to keep in memory");
-		return -1;
-	}
+	GIT_ERROR_CHECK_BLOBSIZE(size);
+	blobsize = (size_t)size;
 
 	stream = git__calloc(1, sizeof(fake_wstream));
 	GIT_ERROR_CHECK_ALLOC(stream);
 
-	stream->size = size;
+	stream->size = blobsize;
 	stream->type = type;
-	stream->buffer = git__malloc(size);
+	stream->buffer = git__malloc(blobsize);
 	if (stream->buffer == NULL) {
 		git__free(stream);
 		return -1;
diff --git a/src/reader.c b/src/reader.c
index c374c46..1a48446 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -32,11 +32,17 @@ static int tree_reader_read(
 	tree_reader *reader = (tree_reader *)_reader;
 	git_tree_entry *tree_entry = NULL;
 	git_blob *blob = NULL;
+	git_off_t blobsize;
 	int error;
 
 	if ((error = git_tree_entry_bypath(&tree_entry, reader->tree, filename)) < 0 ||
-	    (error = git_blob_lookup(&blob, git_tree_owner(reader->tree), git_tree_entry_id(tree_entry))) < 0 ||
-	    (error = git_buf_set(out, git_blob_rawcontent(blob), git_blob_rawsize(blob))) < 0)
+	    (error = git_blob_lookup(&blob, git_tree_owner(reader->tree), git_tree_entry_id(tree_entry))) < 0)
+		goto done;
+
+	blobsize = git_blob_rawsize(blob);
+	GIT_ERROR_CHECK_BLOBSIZE(blobsize);
+
+	if ((error = git_buf_set(out, git_blob_rawcontent(blob), (size_t)blobsize)) < 0)
 		goto done;
 
 	if (out_id)