revwalk: avoid walking the entire history when output is unsorted As part of reducing our divergence from git, its code for revwalk was ported into our codebase. A detail about when to limit the list was lost and we ended up always calling that code. Limiting the list means performing the walk and creating the final list of commits to be output during the preparation stage. This is unavoidable when sorting and when there are negative refs. We did this even when asked for unsorted output with no negative refs, which you might do to retrieve something like the "last 10 commits on HEAD" for a nominally unsorted meaning of "last". This commit adds and sets a flag indicating when we do need to limit the list, letting us avoid doing so when we can. The previously mentioned query thus no longer loads the entire history of the project during the prepare stage, but loads it iteratively during the walk.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
diff --git a/src/revwalk.c b/src/revwalk.c
index b1aa8f9..d913987 100644
--- a/src/revwalk.c
+++ b/src/revwalk.c
@@ -15,6 +15,8 @@
#include "merge.h"
#include "vector.h"
+static int get_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list);
+
git_commit_list_node *git_revwalk__commit_lookup(
git_revwalk *walk, const git_oid *oid)
{
@@ -76,10 +78,12 @@ static int push_commit(git_revwalk *walk, const git_oid *oid, int uninteresting,
if (commit->uninteresting)
return 0;
- if (uninteresting)
+ if (uninteresting) {
+ walk->limited = 1;
walk->did_hide = 1;
- else
+ } else {
walk->did_push = 1;
+ }
commit->uninteresting = uninteresting;
list = walk->user_input;
@@ -245,9 +249,10 @@ static int revwalk_next_timesort(git_commit_list_node **object_out, git_revwalk
static int revwalk_next_unsorted(git_commit_list_node **object_out, git_revwalk *walk)
{
+ int error;
git_commit_list_node *next;
- while ((next = git_commit_list_pop(&walk->iterator_rand)) != NULL) {
+ while (!(error = get_revision(&next, walk, &walk->iterator_rand))) {
/* Some commits might become uninteresting after being added to the list */
if (!next->uninteresting) {
*object_out = next;
@@ -255,15 +260,15 @@ static int revwalk_next_unsorted(git_commit_list_node **object_out, git_revwalk
}
}
- giterr_clear();
- return GIT_ITEROVER;
+ return error;
}
static int revwalk_next_toposort(git_commit_list_node **object_out, git_revwalk *walk)
{
+ int error;
git_commit_list_node *next;
- while ((next = git_commit_list_pop(&walk->iterator_topo)) != NULL) {
+ while (!(error = get_revision(&next, walk, &walk->iterator_topo))) {
/* Some commits might become uninteresting after being added to the list */
if (!next->uninteresting) {
*object_out = next;
@@ -271,8 +276,7 @@ static int revwalk_next_toposort(git_commit_list_node **object_out, git_revwalk
}
}
- giterr_clear();
- return GIT_ITEROVER;
+ return error;
}
static int revwalk_next_reverse(git_commit_list_node **object_out, git_revwalk *walk)
@@ -449,6 +453,45 @@ static int limit_list(git_commit_list **out, git_revwalk *walk, git_commit_list
return 0;
}
+static int get_one_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list)
+{
+ int error;
+ git_commit_list_node *commit;
+
+ while(true) {
+ commit = git_commit_list_pop(list);
+ if (!commit) {
+ giterr_clear();
+ return GIT_ITEROVER;
+ }
+
+ /*
+ * If we did not run limit_list and we must add parents to the
+ * list ourselves.
+ */
+ if (!walk->limited) {
+ if ((error = add_parents_to_list(walk, commit, list)) < 0)
+ return error;
+ }
+
+ *out = commit;
+ return 0;
+ }
+}
+
+static int get_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list)
+{
+ int error;
+ git_commit_list_node *commit;
+
+ if ((error = get_one_revision(&commit, walk, list)) < 0)
+ return error;
+
+ /* Here is where we would handle boundary commits if we implement that */
+ *out = commit;
+ return 0;
+}
+
static int sort_in_topological_order(git_commit_list **out, git_revwalk *walk, git_commit_list *list)
{
git_commit_list *ll = NULL, *newlist, **pptr;
@@ -561,7 +604,7 @@ static int prepare_walk(git_revwalk *walk)
}
}
- if ((error = limit_list(&commits, walk, commits)) < 0)
+ if (walk->limited && (error = limit_list(&commits, walk, commits)) < 0)
return error;
if (walk->sorting & GIT_SORT_TOPOLOGICAL) {
@@ -664,6 +707,9 @@ void git_revwalk_sorting(git_revwalk *walk, unsigned int sort_mode)
walk->get_next = &revwalk_next_unsorted;
walk->enqueue = &revwalk_enqueue_unsorted;
}
+
+ if (sort_mode != GIT_SORT_NONE)
+ walk->limited = 1;
}
void git_revwalk_simplify_first_parent(git_revwalk *walk)
@@ -719,6 +765,7 @@ void git_revwalk_reset(git_revwalk *walk)
git_commit_list_free(&walk->user_input);
walk->first_parent = 0;
walk->walking = 0;
+ walk->limited = 0;
walk->did_push = walk->did_hide = 0;
}
@@ -740,6 +787,7 @@ int git_revwalk_add_hide_cb(
walk->hide_cb = hide_cb;
walk->hide_cb_payload = payload;
+ walk->limited = 1;
return 0;
}
diff --git a/src/revwalk.h b/src/revwalk.h
index 578328b..923a2bc 100644
--- a/src/revwalk.h
+++ b/src/revwalk.h
@@ -36,7 +36,8 @@ struct git_revwalk {
unsigned walking:1,
first_parent: 1,
did_hide: 1,
- did_push: 1;
+ did_push: 1,
+ limited: 1;
unsigned int sorting;
/* the pushes and hides */