Commit 8fe335382a27296ea7fb575cc0644a18b6e81c37

Patrick Steinhardt 2017-05-19T12:45:48

tests: index::version: verify we write compressed index entries While we do have a test which checks whether a written index of version 4 has the correct version set, we do not check whether this actually enables path compression for index entries. This commit adds a new test by adding a number of index entries with equal path prefixes to the index and subsequently flushing that to disk. With suffix compression enabled by index version 4, only the last few bytes of these paths will actually have to be written to the index, saving a lot of disk space. For the test, differences are about an order of magnitude, allowing us to easily verify without taking a deeper look at actual on-disk contents.

diff --git a/tests/index/version.c b/tests/index/version.c
index c3cb0cd..fc139f6 100644
--- a/tests/index/version.c
+++ b/tests/index/version.c
@@ -62,3 +62,63 @@ void test_index_version__can_write_v4(void)
 
 	git_index_free(index);
 }
+
+void test_index_version__v4_uses_path_compression(void)
+{
+	git_index_entry entry;
+	git_index *index;
+	char path[250], buf[1];
+	struct stat st;
+	char i, j;
+
+	memset(path, 'a', sizeof(path));
+	memset(buf, 'a', sizeof(buf));
+
+	memset(&entry, 0, sizeof(entry));
+	entry.path = path;
+	entry.mode = GIT_FILEMODE_BLOB;
+
+	g_repo = cl_git_sandbox_init("indexv4");
+	cl_git_pass(git_repository_index(&index, g_repo));
+
+	/* write 676 paths of 250 bytes length */
+	for (i = 'a'; i <= 'z'; i++) {
+		for (j = 'a'; j < 'z'; j++) {
+			path[ARRAY_SIZE(path) - 3] = i;
+			path[ARRAY_SIZE(path) - 2] = j;
+			path[ARRAY_SIZE(path) - 1] = '\0';
+			cl_git_pass(git_index_add_frombuffer(index, &entry, buf, sizeof(buf)));
+		}
+	}
+
+	cl_git_pass(git_index_write(index));
+	cl_git_pass(p_stat(git_index_path(index), &st));
+
+	/*
+	 * Without path compression, the written paths would at
+	 * least take
+	 *
+	 *    (entries * pathlen) = len
+	 *    (676 * 250) = 169000
+	 *
+	 *  bytes. As index v4 uses suffix-compression and our
+	 *  written paths only differ in the last two entries,
+	 *  this number will be much smaller, e.g.
+	 *
+	 *    (1 * pathlen) + (675 * 2) = len
+	 *    676 + 1350 = 2026
+	 *
+	 *    bytes.
+	 *
+	 *    Note that the above calculations do not include
+	 *    additional metadata of the index, e.g. OIDs or
+	 *    index extensions. Including those we get an index
+	 *    of approx. 200kB without compression and 40kB with
+	 *    compression. As this is a lot smaller than without
+	 *    compression, we can verify that path compression is
+	 *    used.
+	 */
+	cl_assert_(st.st_size < 75000, "path compression not enabled");
+
+	git_index_free(index);
+}