Commit b36effa22e015871948daeea250b4996c663e11a

Russell Belfer 2012-09-10T09:59:14

Replace git_diff_iterator_num_files with progress The `git_diff_iterator_num_files` API was problematic, since we don't actually know the exact number of files to be iterated over until we load those files into memory. This replaces it with a new `git_diff_iterator_progress` API that goes from 0 to 1, and moves and renamed the old API for the internal places that can tolerate a max value instead of an exact value.

diff --git a/include/git2/diff.h b/include/git2/diff.h
index 2898f3b..7a86d24 100644
--- a/include/git2/diff.h
+++ b/include/git2/diff.h
@@ -384,26 +384,16 @@ GIT_EXTERN(int) git_diff_iterator_new(
 GIT_EXTERN(void) git_diff_iterator_free(git_diff_iterator *iterator);
 
 /**
- * Return the number of files in the diff.
+ * Return progress value for traversing the diff.
  *
- * NOTE: This number has to be treated as an upper bound on the number of
- * files that have changed if the diff is with the working directory.
+ * This returns a value between 0.0 and 1.0 that represents the progress
+ * through the diff iterator.  The value is monotonically increasing and
+ * will advance gradually as you progress through the iteration.
  *
- * Why?! For efficiency, we defer loading the file contents as long as
- * possible, so if a file has been "touched" in the working directory and
- * then reverted to the original content, it may get stored in the diff list
- * as MODIFIED along with a flag that the status should be reconfirmed when
- * it is actually loaded into memory.  When that load happens, it could get
- * flipped to UNMODIFIED. If unmodified files are being skipped, then the
- * iterator will skip that file and this number may be too high.
- *
- * This behavior is true of `git_diff_foreach` as well, but the only
- * implication there is that the `progress` value would not advance evenly.
- *
- * @param iterator The iterator object
- * @return The maximum number of files to be iterated over
+ * @param iterator The diff iterator
+ * @return Value between 0.0 and 1.0
  */
-GIT_EXTERN(int) git_diff_iterator_num_files(git_diff_iterator *iterator);
+GIT_EXTERN(float) git_diff_iterator_progress(git_diff_iterator *iterator);
 
 /**
  * Return the number of hunks in the current file
diff --git a/src/diff.h b/src/diff.h
index def7463..ea38a67 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -42,5 +42,27 @@ struct git_diff_list {
 extern void git_diff__cleanup_modes(
 	uint32_t diffcaps, uint32_t *omode, uint32_t *nmode);
 
+/**
+ * Return the maximum possible number of files in the diff.
+ *
+ * NOTE: This number has to be treated as an upper bound on the number of
+ * files that have changed if the diff is with the working directory.
+ *
+ * Why?! For efficiency, we defer loading the file contents as long as
+ * possible, so if a file has been "touched" in the working directory and
+ * then reverted to the original content, it may get stored in the diff list
+ * as MODIFIED along with a flag that the status should be reconfirmed when
+ * it is actually loaded into memory.  When that load happens, it could get
+ * flipped to UNMODIFIED. If unmodified files are being skipped, then the
+ * iterator will skip that file and this number may be too high.
+ *
+ * This behavior is true of `git_diff_foreach` as well, but the only
+ * implication there is that the `progress` value would not advance evenly.
+ *
+ * @param iterator The iterator object
+ * @return The maximum number of files to be iterated over
+ */
+int git_diff_iterator__max_files(git_diff_iterator *iterator);
+
 #endif
 
diff --git a/src/diff_output.c b/src/diff_output.c
index 6ff880e..f65d005 100644
--- a/src/diff_output.c
+++ b/src/diff_output.c
@@ -1115,7 +1115,6 @@ struct git_diff_iterator {
 	diff_delta_context ctxt;
 	size_t file_index;
 	size_t next_index;
-	size_t file_count;
 	git_pool hunks;
 	size_t   hunk_count;
 	diffiter_hunk *hunk_head;
@@ -1239,8 +1238,6 @@ int git_diff_iterator_new(
 	git_diff_iterator **iterator_ptr,
 	git_diff_list *diff)
 {
-	size_t i;
-	git_diff_delta *delta;
 	git_diff_iterator *iter;
 
 	assert(diff && iterator_ptr);
@@ -1261,12 +1258,6 @@ int git_diff_iterator_new(
 		git_pool_init(&iter->lines, sizeof(diffiter_line), 0) < 0)
 		goto fail;
 
-	git_vector_foreach(&diff->deltas, i, delta) {
-		if (diff_delta_should_skip(iter->ctxt.opts, delta))
-			continue;
-		iter->file_count++;
-	}
-
 	*iterator_ptr = iter;
 
 	return 0;
@@ -1284,9 +1275,14 @@ void git_diff_iterator_free(git_diff_iterator *iter)
 	git__free(iter);
 }
 
-int git_diff_iterator_num_files(git_diff_iterator *iter)
+float git_diff_iterator_progress(git_diff_iterator *iter)
+{
+	return (float)iter->next_index / (float)iter->diff->deltas.length;
+}
+
+int git_diff_iterator__max_files(git_diff_iterator *iter)
 {
-	return (int)iter->file_count;
+	return (int)iter->diff->deltas.length;
 }
 
 int git_diff_iterator_num_hunks_in_file(git_diff_iterator *iter)
diff --git a/tests-clar/diff/diff_helpers.c b/tests-clar/diff/diff_helpers.c
index 59e0180..ef59b68 100644
--- a/tests-clar/diff/diff_helpers.c
+++ b/tests-clar/diff/diff_helpers.c
@@ -111,23 +111,21 @@ int diff_foreach_via_iterator(
 	git_diff_hunk_fn hunk_cb,
 	git_diff_data_fn line_cb)
 {
-	int error, curr, total;
+	int error;
 	git_diff_iterator *iter;
 	git_diff_delta *delta;
 
 	if ((error = git_diff_iterator_new(&iter, diff)) < 0)
 		return error;
 
-	curr  = 0;
-	total = git_diff_iterator_num_files(iter);
-
 	while (!(error = git_diff_iterator_next_file(&delta, iter))) {
 		git_diff_range *range;
 		const char *hdr;
 		size_t hdr_len;
+		float progress = git_diff_iterator_progress(iter);
 
 		/* call file_cb for this file */
-		if (file_cb != NULL && file_cb(data, delta, (float)curr / total) != 0)
+		if (file_cb != NULL && file_cb(data, delta, progress) != 0)
 			goto abort;
 
 		if (!hunk_cb && !line_cb)