Commit a48ea31d69a76d6b398d3a1e522a1c7363a9b92a

Russell Belfer 2012-03-21T12:33:09

Reimplment git_status_foreach using git diff This is an initial reimplementation of status using diff a la the way that core git does it.

diff --git a/include/git2/status.h b/include/git2/status.h
index 5c45dae..4dc80b9 100644
--- a/include/git2/status.h
+++ b/include/git2/status.h
@@ -31,7 +31,8 @@ GIT_BEGIN_DECL
 #define GIT_STATUS_WT_MODIFIED		(1 << 4)
 #define GIT_STATUS_WT_DELETED		(1 << 5)
 
-#define GIT_STATUS_IGNORED		(1 << 6)
+#define GIT_STATUS_IGNORED			(1 << 6)
+#define GIT_STATUS_WT_UNTRACKED		(1 << 7)
 
 /**
  * Gather file statuses and run a callback for each one.
@@ -47,6 +48,71 @@ GIT_BEGIN_DECL
 GIT_EXTERN(int) git_status_foreach(git_repository *repo, int (*callback)(const char *, unsigned int, void *), void *payload);
 
 /**
+ * Select the files on which to report status.
+ *
+ * - GIT_STATUS_SHOW_INDEX_AND_WORKDIR is the default.  This is the
+ *   rough equivalent of `git status --porcelain` where each file
+ *   will receive a callback indicating its status in the index and
+ *   in the workdir.
+ * - GIT_STATUS_SHOW_INDEX_ONLY will only make callbacks for index
+ *   side of status.  The status of the index contents relative to
+ *   the HEAD will be given.
+ * - GIT_STATUS_SHOW_WORKDIR_ONLY will only make callbacks for the
+ *   workdir side of status, reporting the status of workdir content
+ *   relative to the index.
+ * - GIT_STATUS_SHOW_INDEX_THEN_WORKDIR behaves like index-only
+ *   followed by workdir-only, causing two callbacks to be issued
+ *   per file (first index then workdir).  This is slightly more
+ *   efficient than making separate calls.  This makes it easier to
+ *   emulate the output of a plain `git status`.
+ */
+typedef enum {
+	GIT_STATUS_SHOW_INDEX_AND_WORKDIR = 0,
+	GIT_STATUS_SHOW_INDEX_ONLY = 1,
+	GIT_STATUS_SHOW_WORKDIR_ONLY = 2,
+	GIT_STATUS_SHOW_INDEX_THEN_WORKDIR = 3,
+} git_status_show_t;
+
+/**
+ * Flags to control status callbacks
+ *
+ * - GIT_STATUS_OPT_INCLUDE_UNTRACKED says that callbacks should
+ *   be made on untracked files.  These will only be made if the
+ *   workdir files are included in the status "show" option.
+ * - GIT_STATUS_OPT_INCLUDE_IGNORED says that ignored files should
+ *   get callbacks.  Again, these callbacks will only be made if
+ *   the workdir files are included in the status "show" option.
+ *   Right now, there is no option to include all files in
+ *   directories that are ignored completely.
+ * - GIT_STATUS_OPT_EXCLUDE_UNMODIFIED indicates that callback
+ *   do not need to be made on unmodified files.
+ * - GIT_STATUS_OPT_EXCLUDE_SUBMODULES indicates that directories
+ *   which appear to be submodules should just be skipped over.
+ */
+#define GIT_STATUS_OPT_INCLUDE_UNTRACKED  (1 << 0)
+#define GIT_STATUS_OPT_INCLUDE_IGNORED    (1 << 1)
+#define GIT_STATUS_OPT_EXCLUDE_UNMODIFIED (1 << 2)
+#define GIT_STATUS_OPT_EXCLUDE_SUBMODULES (1 << 3)
+
+/**
+ * Options to control which callbacks will be made by
+ * `git_status_foreach_ext()`
+ */
+typedef struct {
+	git_status_show_t show;
+	unsigned int flags;
+} git_status_options;
+
+/**
+ * Gather file status information and run callbacks as requested.
+ */
+GIT_EXTERN(int) git_status_foreach_ext(
+	git_repository *repo,
+	git_status_options *opts,
+	int (*callback)(const char *, unsigned int, void *),
+	void *payload);
+
+/**
  * Get file status for a single file
  *
  * @param status_flags the status value
diff --git a/src/diff.c b/src/diff.c
index 69c944c..469a6c0 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -538,33 +538,22 @@ int git_diff_merge(
 	const git_diff_list *from)
 {
 	int error = 0;
-	unsigned int i = 0, j = 0;
 	git_vector onto_new;
-	git_diff_delta *delta;
+	git_diff_delta *delta, *o;
+	const git_diff_delta *f;
+	unsigned int i;
 
 	if (git_vector_init(&onto_new, onto->deltas.length, diff_delta__cmp) < 0)
 		return -1;
 
-	while (!error && (i < onto->deltas.length || j < from->deltas.length)) {
-		git_diff_delta       *o = git_vector_get(&onto->deltas, i);
-		const git_diff_delta *f = git_vector_get_const(&from->deltas, j);
-		const char *opath = !o ? NULL : o->old.path ? o->old.path : o->new.path;
-		const char *fpath = !f ? NULL : f->old.path ? f->old.path : f->new.path;
-
-		if (opath && (!fpath || strcmp(opath, fpath) < 0)) {
-			delta = diff_delta__dup(o);
-			i++;
-		} else if (fpath && (!opath || strcmp(opath, fpath) > 0)) {
-			delta = diff_delta__dup(f);
-			j++;
-		} else {
-			delta = diff_delta__merge_like_cgit(o, f);
-			i++;
-			j++;
-		}
-
-		error = !delta ? -1 : git_vector_insert(&onto_new, delta);
-	}
+	GIT_DIFF_COITERATE(
+		onto, from, o, f,
+		delta = diff_delta__dup(o),
+		delta = diff_delta__dup(f),
+		delta = diff_delta__merge_like_cgit(o, f),
+		if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0)
+			break;
+		);
 
 	if (error == 0) {
 		git_vector_swap(&onto->deltas, &onto_new);
@@ -577,3 +566,4 @@ int git_diff_merge(
 
 	return error;
 }
+
diff --git a/src/diff.h b/src/diff.h
index 7d69199..058a1f5 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -21,5 +21,19 @@ struct git_diff_list {
 	git_iterator_type_t new_src;
 };
 
+/* macro lets you iterate over two diff lists together */
+
+#define GIT_DIFF_COITERATE(A,B,AD,BD,LEFT,RIGHT,BOTH,AFTER) do { \
+	unsigned int _i = 0, _j = 0; int _cmp; \
+	while (((A) && _i < (A)->deltas.length) || ((B) && _j < (B)->deltas.length)) { \
+		(AD) = (A) ? GIT_VECTOR_GET(&(A)->deltas,_i) : NULL; \
+		(BD) = (B) ? GIT_VECTOR_GET(&(B)->deltas,_j) : NULL; \
+		_cmp = !(BD) ? -1 : !(AD) ? 1 : strcmp((AD)->old.path,(BD)->old.path); \
+		if (_cmp < 0) { LEFT; _i++; } \
+		else if (_cmp > 0) { RIGHT; _j++; } \
+		else { BOTH; _i++; _j++; } \
+		AFTER; \
+	} } while (0)
+
 #endif
 
diff --git a/src/status.c b/src/status.c
index 2221db3..eab7c88 100644
--- a/src/status.c
+++ b/src/status.c
@@ -15,6 +15,183 @@
 #include "repository.h"
 #include "ignore.h"
 
+#include "git2/diff.h"
+#include "diff.h"
+
+static int resolve_head_to_tree(git_tree **tree, git_repository *repo)
+{
+	git_reference *head = NULL;
+	git_object *obj = NULL;
+
+	if (git_reference_lookup(&head, repo, GIT_HEAD_FILE) < 0)
+		return -1;
+
+	if (git_reference_oid(head) == NULL) {
+		git_reference *resolved;
+
+		if (git_reference_resolve(&resolved, head) < 0) {
+			/* cannot resolve HEAD - probably brand new repo */
+			giterr_clear();
+			git_reference_free(head);
+			return GIT_ENOTFOUND;
+		}
+
+		git_reference_free(head);
+		head = resolved;
+	}
+
+	if (git_object_lookup(&obj, repo, git_reference_oid(head), GIT_OBJ_ANY) < 0)
+		goto fail;
+
+	switch (git_object_type(obj)) {
+	case GIT_OBJ_TREE:
+		*tree = (git_tree *)obj;
+		break;
+	case GIT_OBJ_COMMIT:
+		if (git_commit_tree(tree, (git_commit *)obj) < 0)
+			goto fail;
+		git_object_free(obj);
+		break;
+	default:
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	git_object_free(obj);
+	git_reference_free(head);
+	return -1;
+}
+
+static unsigned int index_delta2status(git_delta_t index_status)
+{
+	unsigned int st = GIT_STATUS_CURRENT;
+
+	switch (index_status) {
+	case GIT_DELTA_ADDED:
+	case GIT_DELTA_COPIED:
+	case GIT_DELTA_RENAMED:
+		st = GIT_STATUS_INDEX_NEW;
+		break;
+	case GIT_DELTA_DELETED:
+		st = GIT_STATUS_INDEX_DELETED;
+		break;
+	case GIT_DELTA_MODIFIED:
+		st = GIT_STATUS_INDEX_MODIFIED;
+		break;
+	default:
+		break;
+	}
+
+	return st;
+}
+
+static unsigned int workdir_delta2status(git_delta_t workdir_status)
+{
+	unsigned int st = GIT_STATUS_CURRENT;
+
+	switch (workdir_status) {
+	case GIT_DELTA_ADDED:
+	case GIT_DELTA_COPIED:
+	case GIT_DELTA_RENAMED:
+	case GIT_DELTA_UNTRACKED:
+		st = GIT_STATUS_WT_NEW;
+		break;
+	case GIT_DELTA_DELETED:
+		st = GIT_STATUS_WT_DELETED;
+		break;
+	case GIT_DELTA_MODIFIED:
+		st = GIT_STATUS_WT_MODIFIED;
+		break;
+	case GIT_DELTA_IGNORED:
+		st = GIT_STATUS_IGNORED;
+		break;
+	default:
+		break;
+	}
+
+	return st;
+}
+
+int git_status_foreach_ext(
+	git_repository *repo,
+	git_status_options *opts,
+	int (*cb)(const char *, unsigned int, void *),
+	void *cbdata)
+{
+	int err = 0;
+	git_diff_options diffopt;
+	git_diff_list *idx2head = NULL, *wd2idx = NULL;
+	git_tree *head = NULL;
+	git_status_show_t show =
+		opts ? opts->show : GIT_STATUS_SHOW_INDEX_AND_WORKDIR;
+	git_diff_delta *i2h, *w2i;
+
+	assert(show <= GIT_STATUS_SHOW_INDEX_THEN_WORKDIR);
+
+	switch (resolve_head_to_tree(&head, repo)) {
+	case 0: break;
+	case GIT_ENOTFOUND: return 0;
+	default: return -1;
+	}
+
+	memset(&diffopt, 0, sizeof(diffopt));
+	diffopt.flags = GIT_DIFF_INCLUDE_IGNORED | GIT_DIFF_INCLUDE_UNTRACKED;
+
+	if (show != GIT_STATUS_SHOW_WORKDIR_ONLY &&
+		(err = git_diff_index_to_tree(repo, &diffopt, head, &idx2head)) < 0)
+		goto cleanup;
+
+	if (show != GIT_STATUS_SHOW_INDEX_ONLY &&
+		(err = git_diff_workdir_to_index(repo, &diffopt, &wd2idx)) < 0)
+		goto cleanup;
+
+	if (show == GIT_STATUS_SHOW_INDEX_THEN_WORKDIR) {
+		git_diff_list *empty = NULL;
+		GIT_DIFF_COITERATE(
+			idx2head, empty, i2h, w2i,
+			err = cb(i2h->old.path, index_delta2status(i2h->status), cbdata),
+			/* nothing */, /* nothing */, if (err < 0) break);
+
+		git_diff_list_free(idx2head);
+		idx2head = NULL;
+	}
+
+	GIT_DIFF_COITERATE(
+		idx2head, wd2idx, i2h, w2i,
+		err = cb(i2h->old.path, index_delta2status(i2h->status), cbdata),
+		err = cb(w2i->old.path, workdir_delta2status(w2i->status), cbdata),
+		err = cb(i2h->old.path, index_delta2status(i2h->status) |
+			workdir_delta2status(w2i->status), cbdata),
+		if (err < 0) break);
+
+cleanup:
+	git_tree_free(head);
+	git_diff_list_free(idx2head);
+	git_diff_list_free(wd2idx);
+	return err;
+}
+
+int git_status_foreach(
+	git_repository *repo,
+	int (*callback)(const char *, unsigned int, void *),
+	void *payload)
+{
+	git_status_options opts;
+
+	opts.show = GIT_STATUS_SHOW_INDEX_AND_WORKDIR;
+	opts.flags = GIT_STATUS_OPT_INCLUDE_IGNORED |
+		GIT_STATUS_OPT_EXCLUDE_SUBMODULES;
+
+	return git_status_foreach_ext(repo, &opts, callback, payload);
+}
+
+
+/*
+ * the old stuff
+ */
+
 struct status_entry {
 	git_index_time mtime;
 
@@ -461,7 +638,7 @@ static int status_cmp(const void *a, const void *b)
 
 #define DEFAULT_SIZE 16
 
-int git_status_foreach(
+int git_status_foreach_old(
 	git_repository *repo,
 	int (*callback)(const char *, unsigned int, void *),
 	void *payload)
diff --git a/src/vector.h b/src/vector.h
index 180edbf..5bc2791 100644
--- a/src/vector.h
+++ b/src/vector.h
@@ -44,6 +44,8 @@ GIT_INLINE(const void *) git_vector_get_const(const git_vector *v, unsigned int 
 	return (position < v->length) ? v->contents[position] : NULL;
 }
 
+#define GIT_VECTOR_GET(V,I) ((I) < (V)->length ? (V)->contents[(I)] : NULL)
+
 GIT_INLINE(void *) git_vector_last(git_vector *v)
 {
 	return (v->length > 0) ? git_vector_get(v, v->length - 1) : NULL;