Commit af1d5239a16976bd1b8d0a9358497f043bdfed14

Carlos Martín Nieto 2015-08-14T21:10:12

index: keep a hash table as well as a vector of entries The hash table allows quick lookup of specific paths, while we use the vector for enumeration.

diff --git a/src/index.c b/src/index.c
index e424698..e904ffc 100644
--- a/src/index.c
+++ b/src/index.c
@@ -17,6 +17,7 @@
 #include "pathspec.h"
 #include "ignore.h"
 #include "blob.h"
+#include "idxmap.h"
 
 #include "git2/odb.h"
 #include "git2/oid.h"
@@ -24,6 +25,9 @@
 #include "git2/config.h"
 #include "git2/sys/index.h"
 
+GIT__USE_IDXMAP
+GIT__USE_IDXMAP_ICASE
+
 static int index_apply_to_wd_diff(git_index *index, int action, const git_strarray *paths,
 				  unsigned int flags,
 				  git_index_matched_path_cb cb, void *payload);
@@ -425,6 +429,7 @@ int git_index_open(git_index **index_out, const char *index_path)
 	}
 
 	if (git_vector_init(&index->entries, 32, git_index_entry_cmp) < 0 ||
+		git_idxmap_alloc(&index->entries_map) < 0 ||
 		git_vector_init(&index->names, 8, conflict_name_cmp) < 0 ||
 		git_vector_init(&index->reuc, 8, reuc_cmp) < 0 ||
 		git_vector_init(&index->deleted, 8, git_index_entry_cmp) < 0)
@@ -462,6 +467,7 @@ static void index_free(git_index *index)
 	assert(!git_atomic_get(&index->readers));
 
 	git_index_clear(index);
+	git_idxmap_free(index->entries_map);
 	git_vector_free(&index->entries);
 	git_vector_free(&index->names);
 	git_vector_free(&index->reuc);
@@ -508,6 +514,11 @@ static int index_remove_entry(git_index *index, size_t pos)
 	if (entry != NULL)
 		git_tree_cache_invalidate_path(index->tree, entry->path);
 
+	if (index->ignore_case)
+		git_idxmap_icase_delete((khash_t(idxicase) *) index->entries_map, entry);
+	else
+		git_idxmap_delete(index->entries_map, entry);
+
 	error = git_vector_remove(&index->entries, pos);
 
 	if (!error) {
@@ -535,6 +546,7 @@ int git_index_clear(git_index *index)
 		return -1;
 	}
 
+	git_idxmap_clear(index->entries_map);
 	while (!error && index->entries.length > 0)
 		error = index_remove_entry(index, index->entries.length - 1);
 	index_free_deleted(index);
@@ -804,16 +816,24 @@ const git_index_entry *git_index_get_byindex(
 const git_index_entry *git_index_get_bypath(
 	git_index *index, const char *path, int stage)
 {
-	size_t pos;
+	khiter_t pos;
+	git_index_entry key = {{ 0 }};
 
 	assert(index);
 
-	if (index_find(&pos, index, path, 0, stage, true) < 0) {
-		giterr_set(GITERR_INDEX, "Index does not contain %s", path);
-		return NULL;
-	}
+	key.path = path;
+	GIT_IDXENTRY_STAGE_SET(&key, stage);
+
+	if (index->ignore_case)
+		pos = git_idxmap_icase_lookup_index((khash_t(idxicase) *) index->entries_map, &key);
+	else
+		pos = git_idxmap_lookup_index(index->entries_map, &key);
+
+	if (git_idxmap_valid_index(index->entries_map, pos))
+		return git_idxmap_value_at(index->entries_map, pos);
 
-	return git_index_get_byindex(index, pos);
+	giterr_set(GITERR_INDEX, "Index does not contain %s", path);
+	return NULL;
 }
 
 void git_index_entry__init_from_stat(
@@ -1139,6 +1159,13 @@ static int index_insert(
 		 * check for dups, this is actually cheaper in the long run.)
 		 */
 		error = git_vector_insert_sorted(&index->entries, entry, index_no_dups);
+
+		if (error == 0) {
+			if (index->ignore_case)
+				git_idxmap_icase_insert((khash_t(idxicase) *) index->entries_map, entry, entry, error);			else
+				git_idxmap_insert(index->entries_map, entry, entry, error);
+
+		}
 	}
 
 	if (error < 0) {
@@ -1364,12 +1391,20 @@ int git_index_remove(git_index *index, const char *path, int stage)
 {
 	int error;
 	size_t position;
+	git_index_entry remove_key = {{ 0 }};
 
 	if (git_mutex_lock(&index->lock) < 0) {
 		giterr_set(GITERR_OS, "Failed to lock index");
 		return -1;
 	}
 
+	remove_key.path = path;
+	GIT_IDXENTRY_STAGE_SET(&remove_key, stage);
+	if (index->ignore_case)
+		git_idxmap_icase_delete((khash_t(idxicase) *) index->entries_map, &remove_key);
+	else
+		git_idxmap_delete(index->entries_map, &remove_key);
+
 	if (index_find(&position, index, path, 0, stage, false) < 0) {
 		giterr_set(
 			GITERR_INDEX, "Index does not contain %s at stage %d", path, stage);
@@ -2180,6 +2215,11 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
 
 	assert(!index->entries.length);
 
+	if (index->ignore_case)
+		kh_resize(idxicase, (khash_t(idxicase) *) index->entries_map, header.entry_count);
+	else
+		kh_resize(idx, index->entries_map, header.entry_count);
+
 	/* Parse all the entries */
 	for (i = 0; i < header.entry_count && buffer_size > INDEX_FOOTER_SIZE; ++i) {
 		git_index_entry *entry;
@@ -2196,6 +2236,16 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
 			goto done;
 		}
 
+		if (index->ignore_case)
+			git_idxmap_icase_insert((khash_t(idxicase) *) index->entries_map, entry, entry, error);
+		else
+			git_idxmap_insert(index->entries_map, entry, entry, error);
+
+		if (error < 0) {
+			index_entry_free(entry);
+			goto done;
+		}
+
 		seek_forward(entry_size);
 	}
 
@@ -2610,7 +2660,13 @@ int git_index_read_tree(git_index *index, const git_tree *tree)
 {
 	int error = 0;
 	git_vector entries = GIT_VECTOR_INIT;
+	git_idxmap *entries_map;
 	read_tree_data data;
+	size_t i;
+	git_index_entry *e;
+
+	if (git_idxmap_alloc(&entries_map) < 0)
+		return -1;
 
 	git_vector_set_cmp(&entries, index->entries._cmp); /* match sort */
 
@@ -2625,23 +2681,44 @@ int git_index_read_tree(git_index *index, const git_tree *tree)
 	if (index_sort_if_needed(index, true) < 0)
 		return -1;
 
-	error = git_tree_walk(tree, GIT_TREEWALK_POST, read_tree_cb, &data);
+	if ((error = git_tree_walk(tree, GIT_TREEWALK_POST, read_tree_cb, &data)) < 0)
+		goto cleanup;
 
-	if (!error) {
-		git_vector_sort(&entries);
+	if (index->ignore_case)
+		kh_resize(idxicase, (khash_t(idxicase) *) entries_map, entries.length);
+	else
+		kh_resize(idx, entries_map, entries.length);
 
-		if ((error = git_index_clear(index)) < 0)
-			/* well, this isn't good */;
-		else if (git_mutex_lock(&index->lock) < 0) {
-			giterr_set(GITERR_OS, "Unable to acquire index lock");
-			error = -1;
-		} else {
-			git_vector_swap(&entries, &index->entries);
-			git_mutex_unlock(&index->lock);
+	git_vector_foreach(&entries, i, e) {
+		if (index->ignore_case)
+			git_idxmap_icase_insert((git_idxmap_icase *) entries_map, e, e, error);
+		else
+			git_idxmap_insert(entries_map, e, e, error);
+
+		if (error < 0) {
+			giterr_set(GITERR_INDEX, "failed to insert entry into map");
+			return error;
 		}
 	}
 
+	error = 0;
+
+	git_vector_sort(&entries);
+
+	if ((error = git_index_clear(index)) < 0)
+		/* well, this isn't good */;
+	else if (git_mutex_lock(&index->lock) < 0) {
+		giterr_set(GITERR_OS, "Unable to acquire index lock");
+		error = -1;
+	} else {
+		git_vector_swap(&entries, &index->entries);
+		entries_map = git__swap(index->entries_map, entries_map);
+		git_mutex_unlock(&index->lock);
+	}
+
+cleanup:
 	git_vector_free(&entries);
+	git_idxmap_free(entries_map);
 	if (error < 0)
 		return error;
 
diff --git a/src/index.h b/src/index.h
index 9c60b01..546e677 100644
--- a/src/index.h
+++ b/src/index.h
@@ -10,6 +10,7 @@
 #include "fileops.h"
 #include "filebuf.h"
 #include "vector.h"
+#include "idxmap.h"
 #include "tree-cache.h"
 #include "git2/odb.h"
 #include "git2/index.h"
@@ -25,6 +26,7 @@ struct git_index {
 	git_oid checksum;   /* checksum at the end of the file */
 
 	git_vector entries;
+	git_idxmap *entries_map;
 
 	git_mutex  lock;    /* lock held while entries is being changed */
 	git_vector deleted; /* deleted entries if readers > 0 */