Commit 8606f33beadf5df48b36a64359c99d50aeb0f496

Russell Belfer 2014-01-30T09:59:15

Expand zstream tests and fix off-by-one error

diff --git a/src/zstream.c b/src/zstream.c
index c836022..82ae5e6 100644
--- a/src/zstream.c
+++ b/src/zstream.c
@@ -98,6 +98,7 @@ int git_zstream_get_output(void *out, size_t *out_len, git_zstream *zstream)
 
 		/* compress next chunk */
 		zstream->zerr = deflate(&zstream->z, zflush);
+
 		if (zstream->zerr == Z_STREAM_ERROR)
 			return zstream_seterr(zstream);
 
@@ -133,7 +134,7 @@ int git_zstream_deflatebuf(git_buf *out, const void *in, size_t in_len)
 	while (!git_zstream_done(&zs)) {
 		size_t step = git_zstream_suggest_output_len(&zs), written;
 
-		if ((error = git_buf_grow(out, out->asize + step)) < 0)
+		if ((error = git_buf_grow(out, out->asize + step + 1)) < 0)
 			goto done;
 
 		written = out->asize - out->size;
diff --git a/tests/core/zstream.c b/tests/core/zstream.c
index 63ff8c9..7ba9424 100644
--- a/tests/core/zstream.c
+++ b/tests/core/zstream.c
@@ -68,31 +68,76 @@ void test_core_zstream__buffer(void)
 
 #define BIG_STRING_PART "Big Data IS Big - Long Data IS Long - We need a buffer larger than 1024 x 1024 to make sure we trigger chunked compression - Big Big Data IS Bigger than Big - Long Long Data IS Longer than Long"
 
-void test_core_zstream__big_data(void)
+static void compress_input_various_ways(git_buf *input)
 {
-	git_buf in = GIT_BUF_INIT;
-	git_buf out = GIT_BUF_INIT;
-	size_t scan;
+	git_buf out1 = GIT_BUF_INIT, out2 = GIT_BUF_INIT;
+	size_t i, fixed_size = max(input->size / 2, 256);
+	char *fixed = git__malloc(fixed_size);
+	cl_assert(fixed);
 
-	/* make a big string that's easy to compress */
-	while (in.size < 1024 * 1024)
-		cl_git_pass(git_buf_put(&in, BIG_STRING_PART, strlen(BIG_STRING_PART)));
+	/* compress with deflatebuf */
 
-	cl_git_pass(git_zstream_deflatebuf(&out, in.ptr, in.size));
-	assert_zlib_equal(in.ptr, in.size, out.ptr, out.size);
+	cl_git_pass(git_zstream_deflatebuf(&out1, input->ptr, input->size));
+	assert_zlib_equal(input->ptr, input->size, out1.ptr, out1.size);
 
-	git_buf_free(&out);
+	/* compress with various fixed size buffer (accumulating the output) */
 
-	/* make a big string that's hard to compress */
+	for (i = 0; i < 3; ++i) {
+		git_zstream zs = GIT_ZSTREAM_INIT;
+		size_t use_fixed_size;
 
-	srand(0xabad1dea);
-	for (scan = 0; scan < in.size; ++scan)
-		in.ptr[scan] = (char)rand();
+		switch (i) {
+		case 0: use_fixed_size = 256; break;
+		case 1: use_fixed_size = fixed_size / 2; break;
+		case 2: use_fixed_size = fixed_size; break;
+		}
+		cl_assert(use_fixed_size <= fixed_size);
 
-	cl_git_pass(git_zstream_deflatebuf(&out, in.ptr, in.size));
-	assert_zlib_equal(in.ptr, in.size, out.ptr, out.size);
+		cl_git_pass(git_zstream_init(&zs));
+		cl_git_pass(git_zstream_set_input(&zs, input->ptr, input->size));
 
-	git_buf_free(&out);
+		while (!git_zstream_done(&zs)) {
+			size_t written = use_fixed_size;
+			cl_git_pass(git_zstream_get_output(fixed, &written, &zs));
+			cl_git_pass(git_buf_put(&out2, fixed, written));
+		}
+
+		git_zstream_free(&zs);
+		assert_zlib_equal(input->ptr, input->size, out2.ptr, out2.size);
+
+		/* did both approaches give the same data? */
+		cl_assert_equal_sz(out1.size, out2.size);
+		cl_assert(!memcmp(out1.ptr, out2.ptr, out1.size));
+
+		git_buf_free(&out2);
+	}
+
+	git_buf_free(&out1);
+	git__free(fixed);
+}
+
+void test_core_zstream__big_data(void)
+{
+	git_buf in = GIT_BUF_INIT;
+	size_t scan, target;
+
+	for (target = 1024; target <= 1024 * 1024 * 4; target *= 8) {
+
+		/* make a big string that's easy to compress */
+		git_buf_clear(&in);
+		while (in.size < target)
+			cl_git_pass(
+				git_buf_put(&in, BIG_STRING_PART, strlen(BIG_STRING_PART)));
+
+		compress_input_various_ways(&in);
+
+		/* make a big string that's hard to compress */
+		srand(0xabad1dea);
+		for (scan = 0; scan < in.size; ++scan)
+			in.ptr[scan] = (char)rand();
+
+		compress_input_various_ways(&in);
+	}
 
 	git_buf_free(&in);
 }