Commit acc573cba3212c637d02520fb520326b36dcdd39

Edward Thomson 2015-05-19T14:12:40

Merge pull request #3109 from libgit2/cmn/index-use-diff Use a diff for iteration in index_update_all and index_add_all

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 69f02bb..810538c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,6 +42,9 @@ support for HTTPS connections insead of OpenSSL.
   remote functions now take these options or the callbacks instead of
   setting them beforehand.
 
+* The index now uses diffs for `add_all()` and `update_all()` which
+  gives it a speed boost and closer semantics to git.
+
 
 ### API additions
 
diff --git a/src/index.c b/src/index.c
index 7c0c310..8f0976d 100644
--- a/src/index.c
+++ b/src/index.c
@@ -24,6 +24,10 @@
 #include "git2/config.h"
 #include "git2/sys/index.h"
 
+static int index_apply_to_wd_diff(git_index *index, int action, const git_strarray *paths,
+				  unsigned int flags,
+				  git_index_matched_path_cb cb, void *payload);
+
 #define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7)
 #define short_entry_size(len) entry_size(struct entry_short, len)
 #define long_entry_size(len) entry_size(struct entry_long, len)
@@ -2554,6 +2558,13 @@ git_repository *git_index_owner(const git_index *index)
 	return INDEX_OWNER(index);
 }
 
+enum {
+	INDEX_ACTION_NONE = 0,
+	INDEX_ACTION_UPDATE = 1,
+	INDEX_ACTION_REMOVE = 2,
+	INDEX_ACTION_ADDALL = 3,
+};
+
 int git_index_add_all(
 	git_index *index,
 	const git_strarray *paths,
@@ -2564,29 +2575,15 @@ int git_index_add_all(
 	int error;
 	git_repository *repo;
 	git_iterator *wditer = NULL;
-	const git_index_entry *wd = NULL;
-	git_index_entry *entry;
 	git_pathspec ps;
-	const char *match;
-	size_t existing;
 	bool no_fnmatch = (flags & GIT_INDEX_ADD_DISABLE_PATHSPEC_MATCH) != 0;
-	int ignorecase;
-	git_oid blobid;
 
 	assert(index);
 
-	if (INDEX_OWNER(index) == NULL)
-		return create_index_error(-1,
-			"Could not add paths to index. "
-			"Index is not backed up by an existing repository.");
-
 	repo = INDEX_OWNER(index);
 	if ((error = git_repository__ensure_not_bare(repo, "index add all")) < 0)
 		return error;
 
-	if (git_repository__cvar(&ignorecase, repo, GIT_CVAR_IGNORECASE) < 0)
-		return -1;
-
 	if ((error = git_pathspec__init(&ps, paths)) < 0)
 		return error;
 
@@ -2597,77 +2594,115 @@ int git_index_add_all(
 			repo, &ps.pathspec, no_fnmatch)) < 0)
 		goto cleanup;
 
-	if ((error = git_iterator_for_workdir(
-			&wditer, repo, NULL, NULL, 0, ps.prefix, ps.prefix)) < 0)
-		goto cleanup;
+	error = index_apply_to_wd_diff(index, INDEX_ACTION_ADDALL, paths, flags, cb, payload);
 
-	while (!(error = git_iterator_advance(&wd, wditer))) {
+	if (error)
+		giterr_set_after_callback(error);
 
-		/* check if path actually matches */
-		if (!git_pathspec__match(
-				&ps.pathspec, wd->path, no_fnmatch, (bool)ignorecase, &match, NULL))
-			continue;
+cleanup:
+	git_iterator_free(wditer);
+	git_pathspec__clear(&ps);
 
-		/* skip ignored items that are not already in the index */
-		if ((flags & GIT_INDEX_ADD_FORCE) == 0 &&
-			git_iterator_current_is_ignored(wditer) &&
-			index_find(&existing, index, wd->path, 0, 0, true) < 0)
-			continue;
+	return error;
+}
 
-		/* issue notification callback if requested */
-		if (cb && (error = cb(wd->path, match, payload)) != 0) {
-			if (error > 0) /* return > 0 means skip this one */
-				continue;
-			if (error < 0) { /* return < 0 means abort */
-				giterr_set_after_callback(error);
-				break;
-			}
-		}
+struct foreach_diff_data {
+	git_index *index;
+	const git_pathspec *pathspec;
+	unsigned int flags;
+	git_index_matched_path_cb cb;
+	void *payload;
+};
 
-		/* TODO: Should we check if the file on disk is already an exact
-		 * match to the file in the index and skip this work if it is?
-		 */
+static int apply_each_file(const git_diff_delta *delta, float progress, void *payload)
+{
+	struct foreach_diff_data *data = payload;
+	const char *match, *path;
+	int error = 0;
 
-		/* write the blob to disk and get the oid */
-		if ((error = git_blob_create_fromworkdir(&blobid, repo, wd->path)) < 0)
-			break;
+	GIT_UNUSED(progress);
 
-		/* make the new entry to insert */
-		if ((error = index_entry_dup(&entry, INDEX_OWNER(index), wd)) < 0)
-			break;
+	path = delta->old_file.path;
 
-		entry->id = blobid;
+	/* We only want those which match the pathspecs */
+	if (!git_pathspec__match(
+		    &data->pathspec->pathspec, path, false, (bool)data->index->ignore_case,
+		    &match, NULL))
+		return 0;
 
-		/* add working directory item to index */
-		if ((error = index_insert(index, &entry, 1, false)) < 0)
-			break;
+	if (data->cb)
+		error = data->cb(path, match, data->payload);
 
-		git_tree_cache_invalidate_path(index->tree, wd->path);
+	if (error > 0) /* skip this entry */
+		return 0;
+	if (error < 0) /* actual error */
+		return error;
 
-		/* add implies conflict resolved, move conflict entries to REUC */
-		if ((error = index_conflict_to_reuc(index, wd->path)) < 0) {
-			if (error != GIT_ENOTFOUND)
-				break;
-			giterr_clear();
-		}
+	if (delta->status == GIT_DELTA_DELETED)
+		error = git_index_remove_bypath(data->index, path);
+	else
+		error = git_index_add_bypath(data->index, delta->new_file.path);
+
+	return error;
+}
+
+static int index_apply_to_wd_diff(git_index *index, int action, const git_strarray *paths,
+				  unsigned int flags,
+				  git_index_matched_path_cb cb, void *payload)
+{
+	int error;
+	git_diff *diff;
+	git_pathspec ps;
+	git_repository *repo;
+	git_diff_options opts = GIT_DIFF_OPTIONS_INIT;
+	struct foreach_diff_data data = {
+		index,
+		NULL,
+		flags,
+		cb,
+		payload,
+	};
+
+	assert(index);
+	assert(action == INDEX_ACTION_UPDATE || action == INDEX_ACTION_ADDALL);
+
+	repo = INDEX_OWNER(index);
+
+	if (!repo) {
+		return create_index_error(-1,
+			"cannot run update; the index is not backed up by a repository.");
+	}
+
+	/*
+	 * We do the matching ourselves intead of passing the list to
+	 * diff because we want to tell the callback which one
+	 * matched, which we do not know if we ask diff to filter for us.
+	 */
+	if ((error = git_pathspec__init(&ps, paths)) < 0)
+		return error;
+
+	opts.flags = GIT_DIFF_INCLUDE_TYPECHANGE;
+	if (action == INDEX_ACTION_ADDALL) {
+		opts.flags |= GIT_DIFF_INCLUDE_UNTRACKED | GIT_DIFF_RECURSE_IGNORED_DIRS;
+		if (flags == GIT_INDEX_ADD_FORCE)
+			opts.flags |= GIT_DIFF_INCLUDE_IGNORED;
 	}
 
-	if (error == GIT_ITEROVER)
-		error = 0;
+	if ((error = git_diff_index_to_workdir(&diff, repo, index, &opts)) < 0)
+		goto cleanup;
+
+	data.pathspec = &ps;
+	error = git_diff_foreach(diff, apply_each_file, NULL, NULL, &data);
+	git_diff_free(diff);
+
+	if (error) /* make sure error is set if callback stopped iteration */
+		giterr_set_after_callback(error);
 
 cleanup:
-	git_iterator_free(wditer);
 	git_pathspec__clear(&ps);
-
 	return error;
 }
 
-enum {
-	INDEX_ACTION_NONE = 0,
-	INDEX_ACTION_UPDATE = 1,
-	INDEX_ACTION_REMOVE = 2,
-};
-
 static int index_apply_to_all(
 	git_index *index,
 	int action,
@@ -2764,9 +2799,7 @@ int git_index_update_all(
 	git_index_matched_path_cb cb,
 	void *payload)
 {
-	int error = index_apply_to_all(
-		index, INDEX_ACTION_UPDATE, pathspec, cb, payload);
-
+	int error = index_apply_to_wd_diff(index, INDEX_ACTION_UPDATE, pathspec, 0, cb, payload);
 	if (error) /* make sure error is set if callback stopped iteration */
 		giterr_set_after_callback(error);
 
diff --git a/tests/index/addall.c b/tests/index/addall.c
index a7e2583..f13b768 100644
--- a/tests/index/addall.c
+++ b/tests/index/addall.c
@@ -193,6 +193,19 @@ void test_index_addall__repo_lifecycle(void)
 	cl_repo_commit_from_index(NULL, g_repo, NULL, 0, "first commit");
 	check_status(g_repo, 0, 0, 0, 3, 0, 0, 1);
 
+	if (cl_repo_get_bool(g_repo, "core.filemode")) {
+		cl_git_pass(git_index_update_all(index, NULL, NULL, NULL));
+		cl_must_pass(p_chmod(TEST_DIR "/file.zzz", 0777));
+		cl_git_pass(git_index_update_all(index, NULL, NULL, NULL));
+		check_status(g_repo, 0, 0, 1, 3, 0, 0, 1);
+
+		/* go back to what we had before */
+		cl_must_pass(p_chmod(TEST_DIR "/file.zzz", 0666));
+		cl_git_pass(git_index_update_all(index, NULL, NULL, NULL));
+		check_status(g_repo, 0, 0, 0, 3, 0, 0, 1);
+	}
+
+
 	/* attempt to add an ignored file - does nothing */
 	strs[0] = "file.foo";
 	cl_git_pass(git_index_add_all(index, &paths, 0, NULL, NULL));