Commit 5b78dbdbf30d863760936ee6755dfd3db951c1e3

Edward Thomson 2015-07-09T13:04:10

git_buf: decode base85 inputs

diff --git a/src/buffer.c b/src/buffer.c
index 5fafe69..c2a54a5 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -273,42 +273,51 @@ int git_buf_encode_base64(git_buf *buf, const char *data, size_t len)
 	return 0;
 }
 
-/* The inverse of base64_encode, offset by '+' == 43. */
+/* The inverse of base64_encode */
 static const int8_t base64_decode[] = {
-	62,
-	-1, -1, -1,
-	63,
-	52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
-	-1, -1, -1, 0, -1, -1, -1,
-	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
-	13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
-	-1, -1, -1, -1, -1, -1,
-	26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
-	39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
+	52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1,  0, -1, -1,
+	-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+	15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+	-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+	41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
 };
 
-#define BASE64_DECODE_VALUE(c) (((c) < 43 || (c) > 122) ? -1 : base64_decode[c - 43])
-
 int git_buf_decode_base64(git_buf *buf, const char *base64, size_t len)
 {
 	size_t i;
 	int8_t a, b, c, d;
 	size_t orig_size = buf->size, new_size;
 
+	if (len % 4) {
+		giterr_set(GITERR_INVALID, "invalid base64 input");
+		return -1;
+	}
+
 	assert(len % 4 == 0);
 	GITERR_CHECK_ALLOC_ADD(&new_size, (len / 4 * 3), buf->size);
 	GITERR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
 	ENSURE_SIZE(buf, new_size);
 
 	for (i = 0; i < len; i += 4) {
-		if ((a = BASE64_DECODE_VALUE(base64[i])) < 0 ||
-			(b = BASE64_DECODE_VALUE(base64[i+1])) < 0 ||
-			(c = BASE64_DECODE_VALUE(base64[i+2])) < 0 ||
-			(d = BASE64_DECODE_VALUE(base64[i+3])) < 0) {
+		if ((a = base64_decode[(unsigned char)base64[i]]) < 0 ||
+			(b = base64_decode[(unsigned char)base64[i+1]]) < 0 ||
+			(c = base64_decode[(unsigned char)base64[i+2]]) < 0 ||
+			(d = base64_decode[(unsigned char)base64[i+3]]) < 0) {
 			buf->size = orig_size;
 			buf->ptr[buf->size] = '\0';
 
-			giterr_set(GITERR_INVALID, "Invalid base64 input");
+			giterr_set(GITERR_INVALID, "invalid base64 input");
 			return -1;
 		}
 
@@ -321,7 +330,7 @@ int git_buf_decode_base64(git_buf *buf, const char *base64, size_t len)
 	return 0;
 }
 
-static const char b85str[] =
+static const char base85_encode[] =
 	"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
 
 int git_buf_encode_base85(git_buf *buf, const char *data, size_t len)
@@ -351,7 +360,7 @@ int git_buf_encode_base85(git_buf *buf, const char *data, size_t len)
 			int val = acc % 85;
 			acc /= 85;
 
-			b85[i] = b85str[val];
+			b85[i] = base85_encode[val];
 		}
 
 		for (i = 0; i < 5; i++)
@@ -363,6 +372,88 @@ int git_buf_encode_base85(git_buf *buf, const char *data, size_t len)
 	return 0;
 }
 
+/* The inverse of base85_encode */
+static const int8_t base85_decode[] = {
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, 63, -1, 64, 65, 66, 67, -1, 68, 69, 70, 71, -1, 72, -1, -1,
+	 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, -1, 73, 74, 75, 76, 77,
+	78, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+	26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, -1, -1, -1, 79, 80,
+	81, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+	52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 82, 83, 84, 85, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+};
+
+int git_buf_decode_base85(
+	git_buf *buf,
+	const char *base85,
+	size_t base85_len,
+	size_t output_len)
+{
+	size_t orig_size = buf->size, new_size;
+
+	if (base85_len % 5 ||
+		output_len > base85_len * 4 / 5) {
+		giterr_set(GITERR_INVALID, "invalid base85 input");
+		return -1;
+	}
+
+	GITERR_CHECK_ALLOC_ADD(&new_size, output_len, buf->size);
+	GITERR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
+	ENSURE_SIZE(buf, new_size);
+
+	while (output_len) {
+		unsigned acc = 0;
+		int de, cnt = 4;
+		unsigned char ch;
+		do {
+			ch = *base85++;
+			de = base85_decode[ch];
+			if (--de < 0)
+				goto on_error;
+
+			acc = acc * 85 + de;
+		} while (--cnt);
+		ch = *base85++;
+		de = base85_decode[ch];
+		if (--de < 0)
+			goto on_error;
+
+		/* Detect overflow. */
+		if (0xffffffff / 85 < acc ||
+			0xffffffff - de < (acc *= 85))
+			goto on_error;
+
+		acc += de;
+
+		cnt = (output_len < 4) ? output_len : 4;
+		output_len -= cnt;
+		do {
+			acc = (acc << 8) | (acc >> 24);
+			buf->ptr[buf->size++] = acc;
+		} while (--cnt);
+	}
+
+	buf->ptr[buf->size] = 0;
+
+	return 0;
+
+on_error:
+	buf->size = orig_size;
+	buf->ptr[buf->size] = '\0';
+
+	giterr_set(GITERR_INVALID, "invalid base85 input");
+	return -1;
+}
+
 int git_buf_vprintf(git_buf *buf, const char *format, va_list ap)
 {
 	size_t expected_size, new_size;
diff --git a/src/buffer.h b/src/buffer.h
index d446e04..2be299b 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -185,6 +185,8 @@ int git_buf_decode_base64(git_buf *buf, const char *base64, size_t len);
 
 /* Write data as "base85" encoded in buffer */
 int git_buf_encode_base85(git_buf *buf, const char *data, size_t len);
+/* Decode the given "base85" and write the result to the buffer */
+int git_buf_decode_base85(git_buf *buf, const char *base64, size_t len, size_t output_len);
 
 /*
  * Insert, remove or replace a portion of the buffer.
diff --git a/tests/core/buffer.c b/tests/core/buffer.c
index 9872af7..1cf2342 100644
--- a/tests/core/buffer.c
+++ b/tests/core/buffer.c
@@ -813,6 +813,42 @@ void test_core_buffer__encode_base85(void)
 	git_buf_free(&buf);
 }
 
+void test_core_buffer__decode_base85(void)
+{
+	git_buf buf = GIT_BUF_INIT;
+
+	cl_git_pass(git_buf_decode_base85(&buf, "bZBXF", 5, 4));
+	cl_assert_equal_sz(4, buf.size);
+	cl_assert_equal_s("this", buf.ptr);
+	git_buf_clear(&buf);
+
+	cl_git_pass(git_buf_decode_base85(&buf, "ba!tca&BaE", 10, 8));
+	cl_assert_equal_sz(8, buf.size);
+	cl_assert_equal_s("two rnds", buf.ptr);
+	git_buf_clear(&buf);
+
+	cl_git_pass(git_buf_decode_base85(&buf, "bZBXFAZc?TVqtS-AUHK3Wo~0{WMyOk", 30, 23));
+	cl_assert_equal_sz(23, buf.size);
+	cl_assert_equal_s("this is base 85 encoded", buf.ptr);
+	git_buf_clear(&buf);
+
+	git_buf_free(&buf);
+}
+
+void test_core_buffer__decode_base85_fails_gracefully(void)
+{
+	git_buf buf = GIT_BUF_INIT;
+
+	git_buf_puts(&buf, "foobar");
+
+	cl_git_fail(git_buf_decode_base85(&buf, "invalid charsZZ", 15, 42));
+	cl_git_fail(git_buf_decode_base85(&buf, "invalidchars__ ", 15, 42));
+	cl_git_fail(git_buf_decode_base85(&buf, "overflowZZ~~~~~", 15, 42));
+	cl_git_fail(git_buf_decode_base85(&buf, "truncated", 9, 42));
+	cl_assert_equal_sz(6, buf.size);
+	cl_assert_equal_s("foobar", buf.ptr);
+}
+
 void test_core_buffer__classify_with_utf8(void)
 {
 	char *data0 = "Simple text\n";