Commit fa7aba70d8c1bc68cd2572d808c66059df6da989

Patrick Steinhardt 2018-11-07T12:23:14

Merge pull request #4871 from pks-t/pks/tree-parsing-fixes Tree parsing fixes

diff --git a/src/tree.c b/src/tree.c
index d628aeb..d9b5939 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -356,21 +356,21 @@ static int tree_error(const char *str, const char *path)
 	return -1;
 }
 
-static int parse_mode(unsigned int *modep, const char *buffer, const char **buffer_out)
+static int parse_mode(uint16_t *mode_out, const char *buffer, size_t buffer_len, const char **buffer_out)
 {
-	unsigned char c;
-	unsigned int mode = 0;
+	int32_t mode;
+	int error;
 
-	if (*buffer == ' ')
+	if (!buffer_len || git__isspace(*buffer))
 		return -1;
 
-	while ((c = *buffer++) != ' ') {
-		if (c < '0' || c > '7')
-			return -1;
-		mode = (mode << 3) + (c - '0');
-	}
-	*modep = mode;
-	*buffer_out = buffer;
+	if ((error = git__strntol32(&mode, buffer, buffer_len, buffer_out, 8)) < 0)
+		return error;
+
+	if (mode < 0 || mode > UINT16_MAX)
+		return -1;
+
+	*mode_out = mode;
 
 	return 0;
 }
@@ -392,11 +392,14 @@ int git_tree__parse_raw(void *_tree, const char *data, size_t size)
 		git_tree_entry *entry;
 		size_t filename_len;
 		const char *nul;
-		unsigned int attr;
+		uint16_t attr;
 
-		if (parse_mode(&attr, buffer, &buffer) < 0 || !buffer)
+		if (parse_mode(&attr, buffer, buffer_end - buffer, &buffer) < 0 || !buffer)
 			return tree_error("failed to parse tree: can't parse filemode", NULL);
 
+		if (buffer >= buffer_end || (*buffer++) != ' ')
+			return tree_error("failed to parse tree: missing space after filemode", NULL);
+
 		if ((nul = memchr(buffer, 0, buffer_end - buffer)) == NULL)
 			return tree_error("failed to parse tree: object is corrupted", NULL);
 
diff --git a/src/util.c b/src/util.c
index 52495f7..735f0b5 100644
--- a/src/util.c
+++ b/src/util.c
@@ -83,8 +83,11 @@ int git__strntol64(int64_t *result, const char *nptr, size_t nptr_len, const cha
 	/*
 	 * White space
 	 */
-	while (git__isspace(*p))
-		p++;
+	while (nptr_len && git__isspace(*p))
+		p++, nptr_len--;
+
+	if (!nptr_len)
+		goto Return;
 
 	/*
 	 * Sign
@@ -94,25 +97,36 @@ int git__strntol64(int64_t *result, const char *nptr, size_t nptr_len, const cha
 			neg = 1;
 
 	/*
-	 * Base
+	 * Automatically detect the base if none was given to us.
+	 * Right now, we assume that a number starting with '0x'
+	 * is hexadecimal and a number starting with '0' is
+	 * octal.
 	 */
 	if (base == 0) {
 		if (*p != '0')
 			base = 10;
-		else {
+		else if (nptr_len > 2 && (p[1] == 'x' || p[1] == 'X'))
+			base = 16;
+		else
 			base = 8;
-			if (p[1] == 'x' || p[1] == 'X') {
-				p += 2;
-				base = 16;
-			}
-		}
-	} else if (base == 16 && *p == '0') {
-		if (p[1] == 'x' || p[1] == 'X')
-			p += 2;
-	} else if (base < 0 || 36 < base)
+	}
+
+	if (base < 0 || 36 < base)
 		goto Return;
 
 	/*
+	 * Skip prefix of '0x'-prefixed hexadecimal numbers. There is no
+	 * need to do the same for '0'-prefixed octal numbers as a
+	 * leading '0' does not have any impact. Also, if we skip a
+	 * leading '0' in such a string, then we may end up with no
+	 * digits left and produce an error later on which isn't one.
+	 */
+	if (base == 16 && nptr_len > 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+		p += 2;
+		nptr_len -= 2;
+	}
+
+	/*
 	 * Non-empty sequence of digits
 	 */
 	for (; nptr_len > 0; p++,ndig++,nptr_len--) {
diff --git a/tests/core/strtol.c b/tests/core/strtol.c
index ba79fba..71af332 100644
--- a/tests/core/strtol.c
+++ b/tests/core/strtol.c
@@ -64,6 +64,28 @@ void test_core_strtol__int64(void)
 	assert_l64_fails("-0x8000000000000001", 16);
 }
 
+void test_core_strtol__base_autodetection(void)
+{
+	assert_l64_parses("0", 0, 0);
+	assert_l64_parses("00", 0, 0);
+	assert_l64_parses("0x", 0, 0);
+	assert_l64_parses("0foobar", 0, 0);
+	assert_l64_parses("07", 7, 0);
+	assert_l64_parses("017", 15, 0);
+	assert_l64_parses("0x8", 8, 0);
+	assert_l64_parses("0x18", 24, 0);
+}
+
+void test_core_strtol__buffer_length_with_autodetection_truncates(void)
+{
+	int64_t i64;
+
+	cl_git_pass(git__strntol64(&i64, "011", 2, NULL, 0));
+	cl_assert_equal_i(i64, 1);
+	cl_git_pass(git__strntol64(&i64, "0x11", 3, NULL, 0));
+	cl_assert_equal_i(i64, 1);
+}
+
 void test_core_strtol__buffer_length_truncates(void)
 {
 	int32_t i32;
@@ -76,6 +98,16 @@ void test_core_strtol__buffer_length_truncates(void)
 	cl_assert_equal_i(i64, 1);
 }
 
+void test_core_strtol__buffer_length_with_leading_ws_truncates(void)
+{
+	int64_t i64;
+
+	cl_git_fail(git__strntol64(&i64, " 1", 1, NULL, 10));
+
+	cl_git_pass(git__strntol64(&i64, " 11", 2, NULL, 10));
+	cl_assert_equal_i(i64, 1);
+}
+
 void test_core_strtol__error_message_cuts_off(void)
 {
 	assert_l32_fails("2147483657foobar", 10);
diff --git a/tests/object/tree/parse.c b/tests/object/tree/parse.c
new file mode 100644
index 0000000..83d2219
--- /dev/null
+++ b/tests/object/tree/parse.c
@@ -0,0 +1,164 @@
+#include "clar_libgit2.h"
+#include "tree.h"
+#include "object.h"
+
+#define OID1_HEX \
+	"\xae\x90\xf1\x2e\xea\x69\x97\x29\xed\x24" \
+	"\x55\x5e\x40\xb9\xfd\x66\x9d\xa1\x2a\x12"
+#define OID1_STR "ae90f12eea699729ed24555e40b9fd669da12a12"
+
+#define OID2_HEX \
+	"\xe8\xbf\xe5\xaf\x39\x57\x9a\x7e\x48\x98" \
+	"\xbb\x23\xf3\xa7\x6a\x72\xc3\x68\xce\xe6"
+#define OID2_STR "e8bfe5af39579a7e4898bb23f3a76a72c368cee6"
+
+typedef struct {
+	const char *filename;
+	uint16_t attr;
+	const char *oid;
+} expected_entry;
+
+static void assert_tree_parses(const char *data, size_t datalen,
+	expected_entry *expected_entries, size_t expected_nentries)
+{
+	git_tree *tree;
+	size_t n;
+
+	if (!datalen)
+		datalen = strlen(data);
+	cl_git_pass(git_object__from_raw((git_object **) &tree, data, datalen, GIT_OBJ_TREE));
+
+	cl_assert_equal_i(git_tree_entrycount(tree), expected_nentries);
+
+	for (n = 0; n < expected_nentries; n++) {
+		expected_entry *expected = expected_entries + n;
+		const git_tree_entry *entry;
+		git_oid oid;
+
+		cl_git_pass(git_oid_fromstr(&oid, expected->oid));
+
+		cl_assert(entry = git_tree_entry_byname(tree, expected->filename));
+		cl_assert_equal_s(expected->filename, entry->filename);
+		cl_assert_equal_i(expected->attr, entry->attr);
+		cl_assert_equal_oid(&oid, entry->oid);
+	}
+
+	git_object_free(&tree->object);
+}
+
+static void assert_tree_fails(const char *data, size_t datalen)
+{
+	git_object *object;
+	if (!datalen)
+		datalen = strlen(data);
+	cl_git_fail(git_object__from_raw(&object, data, datalen, GIT_OBJ_TREE));
+}
+
+void test_object_tree_parse__single_blob_parses(void)
+{
+	expected_entry entries[] = {
+		{ "foo", 0100644, OID1_STR },
+	};
+	const char data[] = "100644 foo\x00" OID1_HEX;
+
+	assert_tree_parses(data, ARRAY_SIZE(data) - 1, entries, ARRAY_SIZE(entries));
+}
+
+void test_object_tree_parse__single_tree_parses(void)
+{
+	expected_entry entries[] = {
+		{ "foo", 040000, OID1_STR },
+	};
+	const char data[] = "040000 foo\x00" OID1_HEX;
+
+	assert_tree_parses(data, ARRAY_SIZE(data) - 1, entries, ARRAY_SIZE(entries));
+}
+
+void test_object_tree_parse__leading_filename_spaces_parse(void)
+{
+	expected_entry entries[] = {
+		{ "       bar", 0100644, OID1_STR },
+	};
+	const char data[] = "100644        bar\x00" OID1_HEX;
+
+	assert_tree_parses(data, ARRAY_SIZE(data) - 1, entries, ARRAY_SIZE(entries));
+}
+
+void test_object_tree_parse__multiple_entries_parse(void)
+{
+	expected_entry entries[] = {
+		{ "bar", 0100644, OID1_STR },
+		{ "foo", 040000,  OID2_STR },
+	};
+	const char data[] =
+	    "100644 bar\x00" OID1_HEX
+	    "040000 foo\x00" OID2_HEX;
+
+	assert_tree_parses(data, ARRAY_SIZE(data) - 1, entries, ARRAY_SIZE(entries));
+}
+
+void test_object_tree_parse__invalid_mode_fails(void)
+{
+	const char data[] = "10x644 bar\x00" OID1_HEX;
+	assert_tree_fails(data, ARRAY_SIZE(data) - 1);
+}
+
+void test_object_tree_parse__missing_mode_fails(void)
+{
+	const char data[] = " bar\x00" OID1_HEX;
+	assert_tree_fails(data, ARRAY_SIZE(data) - 1);
+}
+
+void test_object_tree_parse__mode_doesnt_cause_oob_read(void)
+{
+	const char data[] = "100644 bar\x00" OID1_HEX;
+	assert_tree_fails(data, 2);
+	/*
+	 * An oob-read would correctly parse the filename and
+	 * later fail to parse the OID with a different error
+	 * message
+	 */
+	cl_assert_equal_s(giterr_last()->message, "failed to parse tree: missing space after filemode");
+}
+
+void test_object_tree_parse__unreasonably_large_mode_fails(void)
+{
+	const char data[] = "10000000000000000000000000 bar\x00" OID1_HEX;
+	assert_tree_fails(data, ARRAY_SIZE(data) - 1);
+}
+
+void test_object_tree_parse__missing_filename_separator_fails(void)
+{
+	const char data[] = "100644bar\x00" OID1_HEX;
+	assert_tree_fails(data, ARRAY_SIZE(data) - 1);
+}
+
+void test_object_tree_parse__missing_filename_terminator_fails(void)
+{
+	const char data[] = "100644 bar" OID1_HEX;
+	assert_tree_fails(data, ARRAY_SIZE(data) - 1);
+}
+
+void test_object_tree_parse__empty_filename_fails(void)
+{
+	const char data[] = "100644 \x00" OID1_HEX;
+	assert_tree_fails(data, ARRAY_SIZE(data) - 1);
+}
+
+void test_object_tree_parse__trailing_garbage_fails(void)
+{
+	const char data[] = "100644 bar\x00" OID1_HEX "x";
+	assert_tree_fails(data, ARRAY_SIZE(data) - 1);
+}
+
+void test_object_tree_parse__leading_space_fails(void)
+{
+	const char data[] = " 100644 bar\x00" OID1_HEX;
+	assert_tree_fails(data, ARRAY_SIZE(data) - 1);
+}
+
+void test_object_tree_parse__truncated_oid_fails(void)
+{
+	const char data[] = " 100644 bar\x00" OID1_HEX;
+	assert_tree_fails(data, ARRAY_SIZE(data) - 2);
+}