speed up commits during rebase by caching a list of merged paths
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
diff --git a/got/got.c b/got/got.c
index 0558151..e1ed4ae 100644
--- a/got/got.c
+++ b/got/got.c
@@ -3315,7 +3315,8 @@ rebase_complete(struct got_worktree *worktree, struct got_reference *branch,
}
static const struct got_error *
-rebase_commit(struct got_worktree *worktree, struct got_reference *tmp_branch,
+rebase_commit(struct got_pathlist_head *merged_paths,
+ struct got_worktree *worktree, struct got_reference *tmp_branch,
struct got_object_id *commit_id, struct got_repository *repo)
{
const struct got_error *error;
@@ -3326,8 +3327,8 @@ rebase_commit(struct got_worktree *worktree, struct got_reference *tmp_branch,
if (error)
return error;
- error = got_worktree_rebase_commit(&new_commit_id, worktree,
- tmp_branch, commit, commit_id, repo);
+ error = got_worktree_rebase_commit(&new_commit_id, merged_paths,
+ worktree, tmp_branch, commit, commit_id, repo);
if (error) {
if (error->code != GOT_ERR_COMMIT_NO_CHANGES)
goto done;
@@ -3425,10 +3426,12 @@ cmd_rebase(int argc, char *argv[])
int ch, rebase_in_progress = 0, abort_rebase = 0, continue_rebase = 0;
unsigned char rebase_status = GOT_STATUS_NO_CHANGE;
struct got_object_id_queue commits;
+ struct got_pathlist_head merged_paths;
const struct got_object_id_queue *parent_ids;
struct got_object_qid *qid, *pid;
SIMPLEQ_INIT(&commits);
+ TAILQ_INIT(&merged_paths);
while ((ch = getopt(argc, argv, "ac")) != -1) {
switch (ch) {
@@ -3502,8 +3505,8 @@ cmd_rebase(int argc, char *argv[])
if (error)
goto done;
- error = rebase_commit(worktree, tmp_branch, resume_commit_id,
- repo);
+ error = rebase_commit(NULL, worktree, tmp_branch,
+ resume_commit_id, repo);
if (error)
goto done;
@@ -3612,16 +3615,20 @@ cmd_rebase(int argc, char *argv[])
parent_id = pid ? pid->id : yca_id;
pid = qid;
- error = got_worktree_rebase_merge_files(worktree, parent_id,
- commit_id, repo, rebase_progress, &rebase_status,
- check_cancelled, NULL);
+ error = got_worktree_rebase_merge_files(&merged_paths,
+ worktree, parent_id, commit_id, repo, rebase_progress,
+ &rebase_status, check_cancelled, NULL);
if (error)
goto done;
- if (rebase_status == GOT_STATUS_CONFLICT)
+ if (rebase_status == GOT_STATUS_CONFLICT) {
+ got_worktree_rebase_pathlist_free(&merged_paths);
break;
+ }
- error = rebase_commit(worktree, tmp_branch, commit_id, repo);
+ error = rebase_commit(&merged_paths, worktree, tmp_branch,
+ commit_id, repo);
+ got_worktree_rebase_pathlist_free(&merged_paths);
if (error)
goto done;
}
diff --git a/include/got_worktree.h b/include/got_worktree.h
index b0aaf62..4581e98 100644
--- a/include/got_worktree.h
+++ b/include/got_worktree.h
@@ -240,21 +240,30 @@ const struct got_error *got_worktree_rebase_in_progress(int *,
/*
* Merge changes from the commit currently being rebased into the work tree.
* Report affected files, including merge conflicts, via the specified
- * progress callback.
+ * progress callback. Also populate a list of affected paths which should
+ * be passed to got_worktree_rebase_commit() after a conflict-free merge.
+ * This list must be initialized with TAILQ_INIT() and disposed of with
+ * got_worktree_rebase_pathlist_free().
*/
const struct got_error *got_worktree_rebase_merge_files(
- struct got_worktree *, struct got_object_id *, struct got_object_id *,
- struct got_repository *, got_worktree_checkout_cb, void *,
- got_worktree_cancel_cb, void *);
+ struct got_pathlist_head *, struct got_worktree *,
+ struct got_object_id *, struct got_object_id *, struct got_repository *,
+ got_worktree_checkout_cb, void *, got_worktree_cancel_cb, void *);
/*
- * Commit merged rebased changes to a temporary branch and return the
- * ID of the newly created commit.
+ * Commit changes merged by got_worktree_rebase_merge_files() to a temporary
+ * branch and return the ID of the newly created commit. An optional list of
+ * merged paths can be provided; otherwise this function will perform a status
+ * crawl across the entire work tree to find paths to commit.
*/
const struct got_error *got_worktree_rebase_commit(struct got_object_id **,
- struct got_worktree *, struct got_reference *, struct got_commit_object *,
+ struct got_pathlist_head *, struct got_worktree *,
+ struct got_reference *, struct got_commit_object *,
struct got_object_id *, struct got_repository *);
+/* Free a list of merged paths from got_worktree_merge_files. */
+void got_worktree_rebase_pathlist_free(struct got_pathlist_head *);
+
/* Postpone the rebase operation. Should be called after a merge conflict. */
const struct got_error *got_worktree_rebase_postpone(struct got_worktree *);
diff --git a/lib/worktree.c b/lib/worktree.c
index 373a13e..00d0c3d 100644
--- a/lib/worktree.c
+++ b/lib/worktree.c
@@ -3742,16 +3742,63 @@ rebase_status(void *arg, unsigned char status, const char *path,
return NULL;
}
+struct collect_merged_paths_arg {
+ got_worktree_checkout_cb progress_cb;
+ void *progress_arg;
+ struct got_pathlist_head *merged_paths;
+};
+
+static const struct got_error *
+collect_merged_paths(void *arg, unsigned char status, const char *path)
+{
+ const struct got_error *err;
+ struct collect_merged_paths_arg *a = arg;
+ char *p;
+ struct got_pathlist_entry *new;
+
+ err = (*a->progress_cb)(a->progress_arg, status, path);
+ if (err)
+ return err;
+
+ if (status != GOT_STATUS_MERGE &&
+ status != GOT_STATUS_ADD &&
+ status != GOT_STATUS_DELETE &&
+ status != GOT_STATUS_CONFLICT)
+ return NULL;
+
+ p = strdup(path);
+ if (p == NULL)
+ return got_error_from_errno("strdup");
+
+ err = got_pathlist_insert(&new, a->merged_paths, p, NULL);
+ if (err || new == NULL)
+ free(p);
+ return err;
+}
+
+void
+got_worktree_rebase_pathlist_free(struct got_pathlist_head *merged_paths)
+{
+ struct got_pathlist_entry *pe;
+
+ TAILQ_FOREACH(pe, merged_paths, entry)
+ free((char *)pe->path);
+
+ got_pathlist_free(merged_paths);
+}
+
const struct got_error *
-got_worktree_rebase_merge_files(struct got_worktree *worktree,
- struct got_object_id *parent_commit_id, struct got_object_id *commit_id,
- struct got_repository *repo, got_worktree_checkout_cb progress_cb,
- void *progress_arg, got_worktree_cancel_cb cancel_cb, void *cancel_arg)
+got_worktree_rebase_merge_files(struct got_pathlist_head *merged_paths,
+ struct got_worktree *worktree, struct got_object_id *parent_commit_id,
+ struct got_object_id *commit_id, struct got_repository *repo,
+ got_worktree_checkout_cb progress_cb, void *progress_arg,
+ got_worktree_cancel_cb cancel_cb, void *cancel_arg)
{
const struct got_error *err;
struct got_fileindex *fileindex;
char *fileindex_path, *commit_ref_name = NULL;
struct got_reference *commit_ref = NULL;
+ struct collect_merged_paths_arg cmp_arg;
/* Work tree is locked/unlocked during rebase preparation/teardown. */
@@ -3787,9 +3834,12 @@ got_worktree_rebase_merge_files(struct got_worktree *worktree,
}
}
+ cmp_arg.progress_cb = progress_cb;
+ cmp_arg.progress_arg = progress_arg;
+ cmp_arg.merged_paths = merged_paths;
err = merge_files(worktree, fileindex, fileindex_path,
- parent_commit_id, commit_id, repo, progress_cb, progress_arg,
- cancel_cb, cancel_arg);
+ parent_commit_id, commit_id, repo, collect_merged_paths,
+ &cmp_arg, cancel_cb, cancel_arg);
done:
got_fileindex_free(fileindex);
free(fileindex_path);
@@ -3800,8 +3850,8 @@ done:
const struct got_error *
got_worktree_rebase_commit(struct got_object_id **new_commit_id,
- struct got_worktree *worktree, struct got_reference *tmp_branch,
- struct got_commit_object *orig_commit,
+ struct got_pathlist_head *merged_paths, struct got_worktree *worktree,
+ struct got_reference *tmp_branch, struct got_commit_object *orig_commit,
struct got_object_id *orig_commit_id, struct got_repository *repo)
{
const struct got_error *err, *sync_err;
@@ -3840,10 +3890,26 @@ got_worktree_rebase_commit(struct got_object_id **new_commit_id,
cc_arg.commitable_paths = &commitable_paths;
cc_arg.worktree = worktree;
cc_arg.repo = repo;
- err = worktree_status(worktree, "", fileindex, repo,
- collect_commitables, &cc_arg, NULL, NULL);
- if (err)
- goto done;
+ /*
+ * If possible get the status of individual files directly to
+ * avoid crawling the entire work tree once per rebased commit.
+ * TODO: Ideally, merged_paths would contain a list of commitables
+ * we could use so we could skip worktree_status() entirely.
+ */
+ if (merged_paths) {
+ struct got_pathlist_entry *pe;
+ TAILQ_FOREACH(pe, merged_paths, entry) {
+ err = worktree_status(worktree, pe->path, fileindex,
+ repo, collect_commitables, &cc_arg, NULL, NULL);
+ if (err)
+ goto done;
+ }
+ } else {
+ err = worktree_status(worktree, "", fileindex, repo,
+ collect_commitables, &cc_arg, NULL, NULL);
+ if (err)
+ goto done;
+ }
if (TAILQ_EMPTY(&commitable_paths)) {
/* No-op change; commit will be elided. */