Commit 2b12dcf6d6971a622abe9730472dc373c0856d98

Edward Thomson 2018-03-19T19:45:11

iterator: optionally hash filesystem iterators Optionally hash the contents of files encountered in the filesystem or working directory iterators. This is not expected to be used in production code paths, but may allow us to simplify some test contexts. For working directory iterators, apply filters as appropriate, since we have the context able to do it.

diff --git a/src/iterator.c b/src/iterator.c
index 6e7300a..0bd67c7 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -1015,6 +1015,7 @@ typedef struct {
 	struct stat st;
 	size_t path_len;
 	iterator_pathlist_search_t match;
+	git_oid id;
 	char path[GIT_FLEX_ARRAY];
 } filesystem_iterator_entry;
 
@@ -1265,7 +1266,32 @@ GIT_INLINE(bool) filesystem_iterator_is_dot_git(
 	return (len == 4 || path[len - 5] == '/');
 }
 
-static filesystem_iterator_entry *filesystem_iterator_entry_init(
+static int filesystem_iterator_entry_hash(
+	filesystem_iterator *iter,
+	filesystem_iterator_entry *entry)
+{
+	git_buf fullpath = GIT_BUF_INIT;
+	int error;
+
+	if (S_ISDIR(entry->st.st_mode)) {
+		memset(&entry->id, 0, GIT_OID_RAWSZ);
+		return 0;
+	}
+
+	if (iter->base.type == GIT_ITERATOR_TYPE_WORKDIR)
+		return git_repository_hashfile(&entry->id,
+			iter->base.repo, entry->path, GIT_OBJ_BLOB, NULL);
+
+	if (!(error = git_buf_joinpath(&fullpath, iter->root, entry->path)))
+		error = git_odb_hashfile(&entry->id, fullpath.ptr, GIT_OBJ_BLOB);
+
+	git_buf_dispose(&fullpath);
+	return error;
+}
+
+static int filesystem_iterator_entry_init(
+	filesystem_iterator_entry **out,
+	filesystem_iterator *iter,
 	filesystem_iterator_frame *frame,
 	const char *path,
 	size_t path_len,
@@ -1274,15 +1300,19 @@ static filesystem_iterator_entry *filesystem_iterator_entry_init(
 {
 	filesystem_iterator_entry *entry;
 	size_t entry_size;
+	int error = 0;
+
+	*out = NULL;
 
 	/* Make sure to append two bytes, one for the path's null
 	 * termination, one for a possible trailing '/' for folders.
 	 */
-	if (GIT_ADD_SIZET_OVERFLOW(&entry_size,
-			sizeof(filesystem_iterator_entry), path_len) ||
-		GIT_ADD_SIZET_OVERFLOW(&entry_size, entry_size, 2) ||
-		(entry = git_pool_malloc(&frame->entry_pool, entry_size)) == NULL)
-		return NULL;
+	GITERR_CHECK_ALLOC_ADD(&entry_size,
+		sizeof(filesystem_iterator_entry), path_len);
+	GITERR_CHECK_ALLOC_ADD(&entry_size, entry_size, 2);
+
+	entry = git_pool_malloc(&frame->entry_pool, entry_size);
+	GITERR_CHECK_ALLOC(entry);
 
 	entry->path_len = path_len;
 	entry->match = pathlist_match;
@@ -1295,7 +1325,13 @@ static filesystem_iterator_entry *filesystem_iterator_entry_init(
 
 	entry->path[entry->path_len] = '\0';
 
-	return entry;
+	if (iter->base.flags & GIT_ITERATOR_INCLUDE_HASH)
+		error = filesystem_iterator_entry_hash(iter, entry);
+
+	if (!error)
+		*out = entry;
+
+	return error;
 }
 
 static int filesystem_iterator_frame_push(
@@ -1418,9 +1454,9 @@ static int filesystem_iterator_frame_push(
 		else if (dir_expected)
 			continue;
 
-		entry = filesystem_iterator_entry_init(new_frame,
-			path, path_len, &statbuf, pathlist_match);
-		GITERR_CHECK_ALLOC(entry);
+		if ((error = filesystem_iterator_entry_init(&entry,
+			iter, new_frame, path, path_len, &statbuf, pathlist_match)) < 0)
+			goto done;
 
 		git_vector_insert(&new_frame->entries, entry);
 	}
@@ -1460,7 +1496,7 @@ static void filesystem_iterator_set_current(
 	iter->entry.ctime.seconds = entry->st.st_ctime;
 	iter->entry.mtime.seconds = entry->st.st_mtime;
 
-#if defined(GIT_USE_NSEC)	
+#if defined(GIT_USE_NSEC)
 	iter->entry.ctime.nanoseconds = entry->st.st_ctime_nsec;
 	iter->entry.mtime.nanoseconds = entry->st.st_mtime_nsec;
 #else
@@ -1475,6 +1511,9 @@ static void filesystem_iterator_set_current(
 	iter->entry.gid = entry->st.st_gid;
 	iter->entry.file_size = entry->st.st_size;
 
+	if (iter->base.flags & GIT_ITERATOR_INCLUDE_HASH)
+		git_oid_cpy(&iter->entry.id, &entry->id);
+
 	iter->entry.path = entry->path;
 
 	iter->current_is_ignored = GIT_IGNORE_UNCHECKED;
diff --git a/src/iterator.h b/src/iterator.h
index a6497d8..fe358f1 100644
--- a/src/iterator.h
+++ b/src/iterator.h
@@ -41,6 +41,8 @@ typedef enum {
 	GIT_ITERATOR_INCLUDE_CONFLICTS = (1u << 6),
 	/** descend into symlinked directories */
 	GIT_ITERATOR_DESCEND_SYMLINKS = (1u << 7),
+	/** hash files in workdir or filesystem iterators */
+	GIT_ITERATOR_INCLUDE_HASH = (1u << 8),
 } git_iterator_flag_t;
 
 typedef enum {
diff --git a/tests/iterator/workdir.c b/tests/iterator/workdir.c
index a16acd7..889fcd6 100644
--- a/tests/iterator/workdir.c
+++ b/tests/iterator/workdir.c
@@ -3,6 +3,7 @@
 #include "repository.h"
 #include "fileops.h"
 #include "../submodule/submodule_helpers.h"
+#include "../merge/merge_helpers.h"
 #include "iterator_helpers.h"
 #include <stdarg.h>
 
@@ -1474,3 +1475,48 @@ void test_iterator_workdir__pathlist_with_directory_include_trees(void)
 	git_vector_free(&filelist);
 }
 
+void test_iterator_workdir__hash_when_requested(void)
+{
+	git_iterator *iter;
+	const git_index_entry *entry;
+	git_iterator_options iter_opts = GIT_ITERATOR_OPTIONS_INIT;
+	git_oid expected_id = {{0}};
+	size_t i;
+
+	struct merge_index_entry expected[] = {
+		{ 0100644, "ffb36e513f5fdf8a6ba850a20142676a2ac4807d", 0, "asparagus.txt" },
+		{ 0100644, "68f6182f4c85d39e1309d97c7e456156dc9c0096", 0, "beef.txt" },
+		{ 0100644, "4b7c5650008b2e747fe1809eeb5a1dde0e80850a", 0, "bouilli.txt" },
+		{ 0100644, "c4e6cca3ec6ae0148ed231f97257df8c311e015f", 0, "gravy.txt" },
+		{ 0100644, "7c7e08f9559d9e1551b91e1cf68f1d0066109add", 0, "oyster.txt" },
+		{ 0100644, "898d12687fb35be271c27c795a6b32c8b51da79e", 0, "veal.txt" },
+	};
+
+	g_repo = cl_git_sandbox_init("merge-recursive");
+
+	/* do the iteration normally, ensure there are no hashes */
+	cl_git_pass(git_iterator_for_workdir(&iter, g_repo, NULL, NULL, &iter_opts));
+
+	for (i = 0; i < sizeof(expected) / sizeof(struct merge_index_entry); i++) {
+		cl_git_pass(git_iterator_advance(&entry, iter));
+
+		cl_assert_equal_oid(&expected_id, &entry->id);
+		cl_assert_equal_s(expected[i].path, entry->path);
+	}
+	cl_assert_equal_i(GIT_ITEROVER, git_iterator_advance(&entry, iter));
+	git_iterator_free(iter);
+
+	/* do the iteration requesting hashes */
+	iter_opts.flags |= GIT_ITERATOR_INCLUDE_HASH;
+	cl_git_pass(git_iterator_for_workdir(&iter, g_repo, NULL, NULL, &iter_opts));
+
+	for (i = 0; i < sizeof(expected) / sizeof(struct merge_index_entry); i++) {
+		cl_git_pass(git_iterator_advance(&entry, iter));
+
+		cl_git_pass(git_oid_fromstr(&expected_id, expected[i].oid_str));
+		cl_assert_equal_oid(&expected_id, &entry->id);
+		cl_assert_equal_s(expected[i].path, entry->path);
+	}
+	cl_assert_equal_i(GIT_ITEROVER, git_iterator_advance(&entry, iter));
+	git_iterator_free(iter);
+}