Commit 2a6d0956f0fb6e4c0b9ff2455b4449701bd8e6a2

Carlos Martín Nieto 2018-04-01T14:01:52

revwalk: avoid walking the entire history when output is unsorted As part of reducing our divergence from git, its code for revwalk was ported into our codebase. A detail about when to limit the list was lost and we ended up always calling that code. Limiting the list means performing the walk and creating the final list of commits to be output during the preparation stage. This is unavoidable when sorting and when there are negative refs. We did this even when asked for unsorted output with no negative refs, which you might do to retrieve something like the "last 10 commits on HEAD" for a nominally unsorted meaning of "last". This commit adds and sets a flag indicating when we do need to limit the list, letting us avoid doing so when we can. The previously mentioned query thus no longer loads the entire history of the project during the prepare stage, but loads it iteratively during the walk.

diff --git a/src/revwalk.c b/src/revwalk.c
index b1aa8f9..d913987 100644
--- a/src/revwalk.c
+++ b/src/revwalk.c
@@ -15,6 +15,8 @@
 #include "merge.h"
 #include "vector.h"
 
+static int get_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list);
+
 git_commit_list_node *git_revwalk__commit_lookup(
 	git_revwalk *walk, const git_oid *oid)
 {
@@ -76,10 +78,12 @@ static int push_commit(git_revwalk *walk, const git_oid *oid, int uninteresting,
 	if (commit->uninteresting)
 		return 0;
 
-	if (uninteresting)
+	if (uninteresting) {
+		walk->limited = 1;
 		walk->did_hide = 1;
-	else
+	} else {
 		walk->did_push = 1;
+	}
 
 	commit->uninteresting = uninteresting;
 	list = walk->user_input;
@@ -245,9 +249,10 @@ static int revwalk_next_timesort(git_commit_list_node **object_out, git_revwalk 
 
 static int revwalk_next_unsorted(git_commit_list_node **object_out, git_revwalk *walk)
 {
+	int error;
 	git_commit_list_node *next;
 
-	while ((next = git_commit_list_pop(&walk->iterator_rand)) != NULL) {
+	while (!(error = get_revision(&next, walk, &walk->iterator_rand))) {
 		/* Some commits might become uninteresting after being added to the list */
 		if (!next->uninteresting) {
 			*object_out = next;
@@ -255,15 +260,15 @@ static int revwalk_next_unsorted(git_commit_list_node **object_out, git_revwalk 
 		}
 	}
 
-	giterr_clear();
-	return GIT_ITEROVER;
+	return error;
 }
 
 static int revwalk_next_toposort(git_commit_list_node **object_out, git_revwalk *walk)
 {
+	int error;
 	git_commit_list_node *next;
 
-	while ((next = git_commit_list_pop(&walk->iterator_topo)) != NULL) {
+	while (!(error = get_revision(&next, walk, &walk->iterator_topo))) {
 		/* Some commits might become uninteresting after being added to the list */
 		if (!next->uninteresting) {
 			*object_out = next;
@@ -271,8 +276,7 @@ static int revwalk_next_toposort(git_commit_list_node **object_out, git_revwalk 
 		}
 	}
 
-	giterr_clear();
-	return GIT_ITEROVER;
+	return error;
 }
 
 static int revwalk_next_reverse(git_commit_list_node **object_out, git_revwalk *walk)
@@ -449,6 +453,45 @@ static int limit_list(git_commit_list **out, git_revwalk *walk, git_commit_list 
 	return 0;
 }
 
+static int get_one_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list)
+{
+	int error;
+	git_commit_list_node *commit;
+
+	while(true) {
+		commit = git_commit_list_pop(list);
+		if (!commit) {
+			giterr_clear();
+			return GIT_ITEROVER;
+		}
+
+		/*
+		 * If we did not run limit_list and we must add parents to the
+		 * list ourselves.
+		 */
+		if (!walk->limited) {
+			if ((error = add_parents_to_list(walk, commit, list)) < 0)
+				return error;
+		}
+
+		*out = commit;
+		return 0;
+	}
+}
+
+static int get_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list)
+{
+	int error;
+	git_commit_list_node *commit;
+
+	if ((error = get_one_revision(&commit, walk, list)) < 0)
+		return error;
+
+	/* Here is where we would handle boundary commits if we implement that */
+	*out = commit;
+	return 0;
+}
+
 static int sort_in_topological_order(git_commit_list **out, git_revwalk *walk, git_commit_list *list)
 {
 	git_commit_list *ll = NULL, *newlist, **pptr;
@@ -561,7 +604,7 @@ static int prepare_walk(git_revwalk *walk)
 		}
 	}
 
-	if ((error = limit_list(&commits, walk, commits)) < 0)
+	if (walk->limited && (error = limit_list(&commits, walk, commits)) < 0)
 		return error;
 
 	if (walk->sorting & GIT_SORT_TOPOLOGICAL) {
@@ -664,6 +707,9 @@ void git_revwalk_sorting(git_revwalk *walk, unsigned int sort_mode)
 		walk->get_next = &revwalk_next_unsorted;
 		walk->enqueue = &revwalk_enqueue_unsorted;
 	}
+
+	if (sort_mode != GIT_SORT_NONE)
+		walk->limited = 1;
 }
 
 void git_revwalk_simplify_first_parent(git_revwalk *walk)
@@ -719,6 +765,7 @@ void git_revwalk_reset(git_revwalk *walk)
 	git_commit_list_free(&walk->user_input);
 	walk->first_parent = 0;
 	walk->walking = 0;
+	walk->limited = 0;
 	walk->did_push = walk->did_hide = 0;
 }
 
@@ -740,6 +787,7 @@ int git_revwalk_add_hide_cb(
 
 	walk->hide_cb = hide_cb;
 	walk->hide_cb_payload = payload;
+	walk->limited = 1;
 
 	return 0;
 }
diff --git a/src/revwalk.h b/src/revwalk.h
index 578328b..923a2bc 100644
--- a/src/revwalk.h
+++ b/src/revwalk.h
@@ -36,7 +36,8 @@ struct git_revwalk {
 	unsigned walking:1,
 		first_parent: 1,
 		did_hide: 1,
-		did_push: 1;
+		did_push: 1,
+		limited: 1;
 	unsigned int sorting;
 
 	/* the pushes and hides */