Commit 01757395d9acd01c9c7796b4e1f5c0b75943b810

Stefan Sperling 2019-07-12T19:40:57

speed up commits during rebase by caching a list of merged paths

diff --git a/got/got.c b/got/got.c
index 0558151..e1ed4ae 100644
--- a/got/got.c
+++ b/got/got.c
@@ -3315,7 +3315,8 @@ rebase_complete(struct got_worktree *worktree, struct got_reference *branch,
 }
 
 static const struct got_error *
-rebase_commit(struct got_worktree *worktree, struct got_reference *tmp_branch,
+rebase_commit(struct got_pathlist_head *merged_paths,
+    struct got_worktree *worktree, struct got_reference *tmp_branch,
    struct got_object_id *commit_id, struct got_repository *repo)
 {
 	const struct got_error *error;
@@ -3326,8 +3327,8 @@ rebase_commit(struct got_worktree *worktree, struct got_reference *tmp_branch,
 	if (error)
 		return error;
 
-	error = got_worktree_rebase_commit(&new_commit_id, worktree,
-	    tmp_branch, commit, commit_id, repo);
+	error = got_worktree_rebase_commit(&new_commit_id, merged_paths,
+	    worktree, tmp_branch, commit, commit_id, repo);
 	if (error) {
 		if (error->code != GOT_ERR_COMMIT_NO_CHANGES)
 			goto done;
@@ -3425,10 +3426,12 @@ cmd_rebase(int argc, char *argv[])
 	int ch, rebase_in_progress = 0, abort_rebase = 0, continue_rebase = 0;
 	unsigned char rebase_status = GOT_STATUS_NO_CHANGE;
 	struct got_object_id_queue commits;
+	struct got_pathlist_head merged_paths;
 	const struct got_object_id_queue *parent_ids;
 	struct got_object_qid *qid, *pid;
 
 	SIMPLEQ_INIT(&commits);
+	TAILQ_INIT(&merged_paths);
 
 	while ((ch = getopt(argc, argv, "ac")) != -1) {
 		switch (ch) {
@@ -3502,8 +3505,8 @@ cmd_rebase(int argc, char *argv[])
 		if (error)
 			goto done;
 
-		error = rebase_commit(worktree, tmp_branch, resume_commit_id,
-		    repo);
+		error = rebase_commit(NULL, worktree, tmp_branch,
+		    resume_commit_id, repo);
 		if (error)
 			goto done;
 
@@ -3612,16 +3615,20 @@ cmd_rebase(int argc, char *argv[])
 		parent_id = pid ? pid->id : yca_id;
 		pid = qid;
 
-		error = got_worktree_rebase_merge_files(worktree, parent_id,
-		    commit_id, repo, rebase_progress, &rebase_status,
-		    check_cancelled, NULL);
+		error = got_worktree_rebase_merge_files(&merged_paths,
+		    worktree, parent_id, commit_id, repo, rebase_progress,
+		    &rebase_status, check_cancelled, NULL);
 		if (error)
 			goto done;
 
-		if (rebase_status == GOT_STATUS_CONFLICT)
+		if (rebase_status == GOT_STATUS_CONFLICT) {
+			got_worktree_rebase_pathlist_free(&merged_paths);
 			break;
+		}
 
-		error = rebase_commit(worktree, tmp_branch, commit_id, repo);
+		error = rebase_commit(&merged_paths, worktree, tmp_branch,
+		    commit_id, repo);
+		got_worktree_rebase_pathlist_free(&merged_paths);
 		if (error)
 			goto done;
 	}
diff --git a/include/got_worktree.h b/include/got_worktree.h
index b0aaf62..4581e98 100644
--- a/include/got_worktree.h
+++ b/include/got_worktree.h
@@ -240,21 +240,30 @@ const struct got_error *got_worktree_rebase_in_progress(int *,
 /*
  * Merge changes from the commit currently being rebased into the work tree.
  * Report affected files, including merge conflicts, via the specified
- * progress callback.
+ * progress callback. Also populate a list of affected paths which should
+ * be passed to got_worktree_rebase_commit() after a conflict-free merge.
+ * This list must be initialized with TAILQ_INIT() and disposed of with
+ * got_worktree_rebase_pathlist_free().
  */
 const struct got_error *got_worktree_rebase_merge_files(
-    struct got_worktree *, struct got_object_id *, struct got_object_id *,
-    struct got_repository *, got_worktree_checkout_cb, void *,
-    got_worktree_cancel_cb, void *);
+    struct got_pathlist_head *, struct got_worktree *,
+    struct got_object_id *, struct got_object_id *, struct got_repository *,
+    got_worktree_checkout_cb, void *, got_worktree_cancel_cb, void *);
 
 /*
- * Commit merged rebased changes to a temporary branch and return the
- * ID of the newly created commit.
+ * Commit changes merged by got_worktree_rebase_merge_files() to a temporary
+ * branch and return the ID of the newly created commit. An optional list of
+ * merged paths can be provided; otherwise this function will perform a status
+ * crawl across the entire work tree to find paths to commit.
  */
 const struct got_error *got_worktree_rebase_commit(struct got_object_id **,
-    struct got_worktree *, struct got_reference *, struct got_commit_object *,
+    struct got_pathlist_head *, struct got_worktree *,
+    struct got_reference *, struct got_commit_object *,
     struct got_object_id *, struct got_repository *);
 
+/* Free a list of merged paths from got_worktree_merge_files. */
+void got_worktree_rebase_pathlist_free(struct got_pathlist_head *);
+
 /* Postpone the rebase operation. Should be called after a merge conflict. */
 const struct got_error *got_worktree_rebase_postpone(struct got_worktree *);
 
diff --git a/lib/worktree.c b/lib/worktree.c
index 373a13e..00d0c3d 100644
--- a/lib/worktree.c
+++ b/lib/worktree.c
@@ -3742,16 +3742,63 @@ rebase_status(void *arg, unsigned char status, const char *path,
 	return NULL;
 }
 
+struct collect_merged_paths_arg {
+	got_worktree_checkout_cb progress_cb;
+	void *progress_arg;
+	struct got_pathlist_head *merged_paths;
+};
+
+static const struct got_error *
+collect_merged_paths(void *arg, unsigned char status, const char *path)
+{
+	const struct got_error *err;
+	struct collect_merged_paths_arg *a = arg;
+	char *p;
+	struct got_pathlist_entry *new;
+
+	err = (*a->progress_cb)(a->progress_arg, status, path);
+	if (err)
+		return err;
+
+	if (status != GOT_STATUS_MERGE &&
+	    status != GOT_STATUS_ADD &&
+	    status != GOT_STATUS_DELETE &&
+	    status != GOT_STATUS_CONFLICT)
+		return NULL;
+
+	p = strdup(path);
+	if (p == NULL)
+		return got_error_from_errno("strdup");
+
+	err = got_pathlist_insert(&new, a->merged_paths, p, NULL);
+	if (err || new == NULL)
+		free(p);
+	return err;
+}
+
+void
+got_worktree_rebase_pathlist_free(struct got_pathlist_head *merged_paths)
+{
+	struct got_pathlist_entry *pe;
+
+	TAILQ_FOREACH(pe, merged_paths, entry)
+		free((char *)pe->path);
+
+	got_pathlist_free(merged_paths);
+}
+
 const struct got_error *
-got_worktree_rebase_merge_files(struct got_worktree *worktree,
-    struct got_object_id *parent_commit_id, struct got_object_id *commit_id,
-    struct got_repository *repo, got_worktree_checkout_cb progress_cb,
-    void *progress_arg, got_worktree_cancel_cb cancel_cb, void *cancel_arg)
+got_worktree_rebase_merge_files(struct got_pathlist_head *merged_paths,
+    struct got_worktree *worktree, struct got_object_id *parent_commit_id,
+    struct got_object_id *commit_id, struct got_repository *repo,
+    got_worktree_checkout_cb progress_cb, void *progress_arg,
+    got_worktree_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err;
 	struct got_fileindex *fileindex;
 	char *fileindex_path, *commit_ref_name = NULL;
 	struct got_reference *commit_ref = NULL;
+	struct collect_merged_paths_arg cmp_arg;
 
 	/* Work tree is locked/unlocked during rebase preparation/teardown. */
 
@@ -3787,9 +3834,12 @@ got_worktree_rebase_merge_files(struct got_worktree *worktree,
 		}
 	}
 
+	cmp_arg.progress_cb = progress_cb;
+	cmp_arg.progress_arg = progress_arg;
+	cmp_arg.merged_paths = merged_paths;
 	err = merge_files(worktree, fileindex, fileindex_path,
-	    parent_commit_id, commit_id, repo, progress_cb, progress_arg,
-	    cancel_cb, cancel_arg);
+	    parent_commit_id, commit_id, repo, collect_merged_paths,
+	    &cmp_arg, cancel_cb, cancel_arg);
 done:
 	got_fileindex_free(fileindex);
 	free(fileindex_path);
@@ -3800,8 +3850,8 @@ done:
 
 const struct got_error *
 got_worktree_rebase_commit(struct got_object_id **new_commit_id,
-    struct got_worktree *worktree, struct got_reference *tmp_branch,
-    struct got_commit_object *orig_commit,
+    struct got_pathlist_head *merged_paths, struct got_worktree *worktree,
+    struct got_reference *tmp_branch, struct got_commit_object *orig_commit,
     struct got_object_id *orig_commit_id, struct got_repository *repo)
 {
 	const struct got_error *err, *sync_err;
@@ -3840,10 +3890,26 @@ got_worktree_rebase_commit(struct got_object_id **new_commit_id,
 	cc_arg.commitable_paths = &commitable_paths;
 	cc_arg.worktree = worktree;
 	cc_arg.repo = repo;
-	err = worktree_status(worktree, "", fileindex, repo,
-	    collect_commitables, &cc_arg, NULL, NULL);
-	if (err)
-		goto done;
+	/*
+	 * If possible get the status of individual files directly to
+	 * avoid crawling the entire work tree once per rebased commit.
+	 * TODO: Ideally, merged_paths would contain a list of commitables
+	 * we could use so we could skip worktree_status() entirely.
+	 */
+	if (merged_paths) {
+		struct got_pathlist_entry *pe;
+		TAILQ_FOREACH(pe, merged_paths, entry) {
+			err = worktree_status(worktree, pe->path, fileindex,
+			    repo, collect_commitables, &cc_arg, NULL, NULL);
+			if (err)
+				goto done;
+		}
+	} else {
+		err = worktree_status(worktree, "", fileindex, repo,
+		    collect_commitables, &cc_arg, NULL, NULL);
+		if (err)
+			goto done;
+	}
 
 	if (TAILQ_EMPTY(&commitable_paths)) {
 		/* No-op change; commit will be elided. */