Commit 5625d86b994fd81f1b0d887890e8168d7b5f46cc

David Turner 2016-05-17T15:40:32

index: support index v4 Support reading and writing index v4. Index v4 uses a very simple compression scheme for pathnames, but is otherwise similar to index v3. Signed-off-by: David Turner <dturner@twitter.com>

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 92bc0c1..e4fd68d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,8 @@ v0.24 + 1
 * Do not fail when deleting remotes in the presence of broken
   global configs which contain branches.
 
+* Support for reading and writing git index v4 files
+
 ### API additions
 
 * You can now get the user-agent used by libgit2 using the
@@ -49,6 +51,9 @@ v0.24 + 1
 * `git_diff_from_buffer` can create a `git_diff` object from the contents
   of a git-style patch file.
 
+* `git_index_version()` and `git_index_set_version()` to get and set
+  the index version
+
 ### API removals
 
 * `git_blob_create_fromchunks()` has been removed in favour of
diff --git a/include/git2/index.h b/include/git2/index.h
index 466765b..e58b328 100644
--- a/include/git2/index.h
+++ b/include/git2/index.h
@@ -252,6 +252,31 @@ GIT_EXTERN(int) git_index_caps(const git_index *index);
 GIT_EXTERN(int) git_index_set_caps(git_index *index, int caps);
 
 /**
+ * Get index on-disk version.
+ *
+ * Valid return values are 2, 3, or 4.  If 3 is returned, an index
+ * with version 2 may be written instead, if the extension data in
+ * version 3 is not necessary.
+ *
+ * @param index An existing index object
+ * @return the index version
+ */
+GIT_EXTERN(unsigned int) git_index_version(git_index *index);
+
+/**
+ * Set index on-disk version.
+ *
+ * Valid values are 2, 3, or 4.  If 2 is given, git_index_write may
+ * write an index with version 3 instead, if necessary to accurately
+ * represent the index.
+ *
+ * @param index An existing index object
+ * @param version The new version number
+ * @return 0 on success, -1 on failure
+ */
+GIT_EXTERN(int) git_index_set_version(git_index *index, unsigned int version);
+
+/**
  * Update the contents of an existing index object in memory by reading
  * from the hard disk.
  *
diff --git a/src/index.c b/src/index.c
index 9908ba6..bc15959 100644
--- a/src/index.c
+++ b/src/index.c
@@ -19,6 +19,7 @@
 #include "blob.h"
 #include "idxmap.h"
 #include "diff.h"
+#include "varint.h"
 
 #include "git2/odb.h"
 #include "git2/oid.h"
@@ -65,8 +66,11 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr
 static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ;
 static const size_t INDEX_HEADER_SIZE = 12;
 
-static const unsigned int INDEX_VERSION_NUMBER = 2;
+static const unsigned int INDEX_VERSION_NUMBER_DEFAULT = 2;
+static const unsigned int INDEX_VERSION_NUMBER_LB = 2;
 static const unsigned int INDEX_VERSION_NUMBER_EXT = 3;
+static const unsigned int INDEX_VERSION_NUMBER_COMP = 4;
+static const unsigned int INDEX_VERSION_NUMBER_UB = 4;
 
 static const unsigned int INDEX_HEADER_SIG = 0x44495243;
 static const char INDEX_EXT_TREECACHE_SIG[] = {'T', 'R', 'E', 'E'};
@@ -434,6 +438,7 @@ int git_index_open(git_index **index_out, const char *index_path)
 	index->entries_search = git_index_entry_srch;
 	index->entries_search_path = index_entry_srch_path;
 	index->reuc_search = reuc_srch;
+	index->version = INDEX_VERSION_NUMBER_DEFAULT;
 
 	if (index_path != NULL && (error = git_index_read(index, true)) < 0)
 		goto fail;
@@ -747,6 +752,28 @@ done:
 	return 0;
 }
 
+unsigned git_index_version(git_index *index)
+{
+	assert(index);
+
+	return index->version;
+}
+
+int git_index_set_version(git_index *index, unsigned int version)
+{
+	assert(index);
+
+	if (version < INDEX_VERSION_NUMBER_LB ||
+	    version > INDEX_VERSION_NUMBER_UB) {
+		giterr_set(GITERR_INDEX, "Invalid version number");
+		return -1;
+	}
+
+	index->version = version;
+
+	return 0;
+}
+
 int git_index_write(git_index *index)
 {
 	git_indexwriter writer = GIT_INDEXWRITER_INIT;
@@ -2262,12 +2289,15 @@ static size_t read_entry(
 	git_index_entry **out,
 	git_index *index,
 	const void *buffer,
-	size_t buffer_size)
+	size_t buffer_size,
+	const char **last)
 {
 	size_t path_length, entry_size;
 	const char *path_ptr;
 	struct entry_short source;
 	git_index_entry entry = {{0}};
+	bool compressed = index->version >= INDEX_VERSION_NUMBER_COMP;
+	char *tmp_path = NULL;
 
 	if (INDEX_FOOTER_SIZE + minimal_entry_size > buffer_size)
 		return 0;
@@ -2302,33 +2332,56 @@ static size_t read_entry(
 	} else
 		path_ptr = (const char *) buffer + offsetof(struct entry_short, path);
 
-	path_length = entry.flags & GIT_IDXENTRY_NAMEMASK;
-
-	/* if this is a very long string, we must find its
-	 * real length without overflowing */
-	if (path_length == 0xFFF) {
-		const char *path_end;
+	if (!compressed) {
+		path_length = entry.flags & GIT_IDXENTRY_NAMEMASK;
 
-		path_end = memchr(path_ptr, '\0', buffer_size);
-		if (path_end == NULL)
-			return 0;
+		/* if this is a very long string, we must find its
+		 * real length without overflowing */
+		if (path_length == 0xFFF) {
+			const char *path_end;
 
-		path_length = path_end - path_ptr;
-	}
+			path_end = memchr(path_ptr, '\0', buffer_size);
+			if (path_end == NULL)
+				return 0;
 
-	if (entry.flags & GIT_IDXENTRY_EXTENDED)
-		entry_size = long_entry_size(path_length);
-	else
-		entry_size = short_entry_size(path_length);
+			path_length = path_end - path_ptr;
+		}
 
-	if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
-		return 0;
+		if (entry.flags & GIT_IDXENTRY_EXTENDED)
+			entry_size = long_entry_size(path_length);
+		else
+			entry_size = short_entry_size(path_length);
 
-	entry.path = (char *)path_ptr;
+		if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
+			return 0;
 
-	if (index_entry_dup(out, index, &entry) < 0)
+		entry.path = (char *)path_ptr;
+	} else {
+		size_t varint_len;
+		size_t shared = git_decode_varint((const unsigned char *)path_ptr, 
+						  &varint_len);
+		size_t len = strlen(path_ptr + varint_len);
+		size_t last_len = strlen(*last);
+		size_t tmp_path_len;
+
+		if (varint_len == 0)
+			return index_error_invalid("incorrect prefix length");
+
+		GITERR_CHECK_ALLOC_ADD(&tmp_path_len, shared, len + 1);
+		tmp_path = git__malloc(tmp_path_len);
+		GITERR_CHECK_ALLOC(tmp_path);
+		memcpy(tmp_path, last, last_len);
+		memcpy(tmp_path + last_len, path_ptr + varint_len, len);
+		entry_size = long_entry_size(shared + len);
+		entry.path = tmp_path;
+	}
+
+	if (index_entry_dup(out, index, &entry) < 0) {
+		git__free(tmp_path);
 		return 0;
+	}
 
+	git__free(tmp_path);
 	return entry_size;
 }
 
@@ -2341,8 +2394,8 @@ static int read_header(struct index_header *dest, const void *buffer)
 		return index_error_invalid("incorrect header signature");
 
 	dest->version = ntohl(source->version);
-	if (dest->version != INDEX_VERSION_NUMBER_EXT &&
-		dest->version != INDEX_VERSION_NUMBER)
+	if (dest->version < INDEX_VERSION_NUMBER_LB ||
+		dest->version > INDEX_VERSION_NUMBER_UB)
 		return index_error_invalid("incorrect header version");
 
 	dest->entry_count = ntohl(source->entry_count);
@@ -2395,6 +2448,8 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
 	unsigned int i;
 	struct index_header header = { 0 };
 	git_oid checksum_calculated, checksum_expected;
+	const char **last = NULL;
+	const char *empty = "";
 
 #define seek_forward(_increase) { \
 	if (_increase >= buffer_size) { \
@@ -2415,6 +2470,10 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
 	if ((error = read_header(&header, buffer)) < 0)
 		return error;
 
+	index->version = header.version;
+	if (index->version >= INDEX_VERSION_NUMBER_COMP)
+		last = &empty;
+
 	seek_forward(INDEX_HEADER_SIZE);
 
 	assert(!index->entries.length);
@@ -2427,7 +2486,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
 	/* Parse all the entries */
 	for (i = 0; i < header.entry_count && buffer_size > INDEX_FOOTER_SIZE; ++i) {
 		git_index_entry *entry;
-		size_t entry_size = read_entry(&entry, index, buffer, buffer_size);
+		size_t entry_size = read_entry(&entry, index, buffer, buffer_size, last);
 
 		/* 0 bytes read means an object corruption */
 		if (entry_size == 0) {
@@ -2518,15 +2577,31 @@ static bool is_index_extended(git_index *index)
 	return (extended > 0);
 }
 
-static int write_disk_entry(git_filebuf *file, git_index_entry *entry)
+static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char **last)
 {
 	void *mem = NULL;
 	struct entry_short *ondisk;
 	size_t path_len, disk_size;
 	char *path;
+	const char *path_start = entry->path;
+	size_t same_len = 0;
 
 	path_len = ((struct entry_internal *)entry)->pathlen;
 
+	if (last) {
+		const char *last_c = *last;
+
+		while (*path_start == *last_c) {
+			if (!*path_start || !*last_c)
+				break;
+			++path_start;
+			++last_c;
+			++same_len;
+		}
+		path_len -= same_len;
+		*last = entry->path;
+	}
+
 	if (entry->flags & GIT_IDXENTRY_EXTENDED)
 		disk_size = long_entry_size(path_len);
 	else
@@ -2574,7 +2649,12 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry)
 	else
 		path = ondisk->path;
 
-	memcpy(path, entry->path, path_len);
+	if (last) {
+		path += git_encode_varint((unsigned char *) path,
+					  disk_size,
+					  path_len - same_len);
+	}
+	memcpy(path, path_start, path_len);
 
 	return 0;
 }
@@ -2585,6 +2665,8 @@ static int write_entries(git_index *index, git_filebuf *file)
 	size_t i;
 	git_vector case_sorted, *entries;
 	git_index_entry *entry;
+	const char **last = NULL;
+	const char *empty = "";
 
 	/* If index->entries is sorted case-insensitively, then we need
 	 * to re-sort it case-sensitively before writing */
@@ -2596,8 +2678,11 @@ static int write_entries(git_index *index, git_filebuf *file)
 		entries = &index->entries;
 	}
 
+	if (index->version >= INDEX_VERSION_NUMBER_COMP)
+		last = &empty;
+
 	git_vector_foreach(entries, i, entry)
-		if ((error = write_disk_entry(file, entry)) < 0)
+		if ((error = write_disk_entry(file, entry, last)) < 0)
 			break;
 
 	if (index->ignore_case)
@@ -2762,8 +2847,12 @@ static int write_index(git_oid *checksum, git_index *index, git_filebuf *file)
 
 	assert(index && file);
 
-	is_extended = is_index_extended(index);
-	index_version_number = is_extended ? INDEX_VERSION_NUMBER_EXT : INDEX_VERSION_NUMBER;
+	if (index->version <= INDEX_VERSION_NUMBER_EXT)  {
+		is_extended = is_index_extended(index);
+		index_version_number = is_extended ? INDEX_VERSION_NUMBER_EXT : INDEX_VERSION_NUMBER_LB;
+	} else {
+		index_version_number = index->version;
+	}
 
 	header.signature = htonl(INDEX_HEADER_SIG);
 	header.version = htonl(index_version_number);
diff --git a/src/index.h b/src/index.h
index 8b9b494..9918f14 100644
--- a/src/index.h
+++ b/src/index.h
@@ -46,6 +46,8 @@ struct git_index {
 	git_vector_cmp entries_search;
 	git_vector_cmp entries_search_path;
 	git_vector_cmp reuc_search;
+
+	unsigned int version;
 };
 
 struct git_index_conflict_iterator {
diff --git a/tests/index/version.c b/tests/index/version.c
new file mode 100644
index 0000000..3fd240d
--- /dev/null
+++ b/tests/index/version.c
@@ -0,0 +1,41 @@
+#include "clar_libgit2.h"
+#include "index.h"
+
+static git_repository *g_repo = NULL;
+
+void test_index_version__can_write_v4(void)
+{
+	git_index *index;
+	const git_index_entry *entry;
+
+	g_repo = cl_git_sandbox_init("filemodes");
+	cl_git_pass(git_repository_index(&index, g_repo));
+
+	cl_assert(index->on_disk);
+	cl_assert(git_index_version(index) == 2);
+
+	cl_assert(git_index_entrycount(index) == 6);
+
+	cl_git_pass(git_index_set_version(index, 4));
+
+	cl_git_pass(git_index_write(index));
+	git_index_free(index);
+
+	cl_git_pass(git_repository_index(&index, g_repo));
+	cl_assert(git_index_version(index) == 4);
+
+	entry = git_index_get_bypath(index, "exec_off", 0);
+	cl_assert(entry);
+	entry = git_index_get_bypath(index, "exec_off2on_staged", 0);
+	cl_assert(entry);
+	entry = git_index_get_bypath(index, "exec_on", 0);
+	cl_assert(entry);
+
+	git_index_free(index);
+}
+
+void test_index_version__cleanup(void)
+{
+        cl_git_sandbox_cleanup();
+        g_repo = NULL;
+}