Commit 47bfa0be6d509b60eda92705b57d3f7ba89c1c6b

Russell Belfer 2012-09-07T13:27:49

Add git_repository_hashfile to hash with filters The existing `git_odb_hashfile` does not apply text filtering rules because it doesn't have a repository context to evaluate the correct rules to apply. This adds a new hashfile function that will apply repository-specific filters (based on config, attributes, and filename) before calculating the hash.

diff --git a/include/git2/repository.h b/include/git2/repository.h
index f520d54..ebea3b0 100644
--- a/include/git2/repository.h
+++ b/include/git2/repository.h
@@ -481,6 +481,31 @@ GIT_EXTERN(int) git_repository_message(char *buffer, size_t len, git_repository 
  */
 GIT_EXTERN(int) git_repository_message_remove(git_repository *repo);
 
+/**
+ * Calculate hash of file using repository filtering rules.
+ *
+ * If you simply want to calculate the hash of a file on disk with no filters,
+ * you can just use the `git_odb_hashfile()` API.  However, if you want to
+ * hash a file in the repository and you want to apply filtering rules (e.g.
+ * crlf filters) before generating the SHA, then use this function.
+ *
+ * @param out Output value of calculated SHA
+ * @param repo Repository pointer.  NULL is allowed to just use global and
+ *             system attributes for choosing filters.
+ * @param path Path to file on disk whose contents should be hashed. If the
+ *             repository is not NULL, this can be a relative path.
+ * @param type The object type to hash as (e.g. GIT_OBJ_BLOB)
+ * @param as_path The path to use to look up filtering rules. If this is
+ *             NULL, then the `path` parameter will be used instead. If
+ *             this is passed as the empty string, then no filters will be
+ *             applied when calculating the hash.
+ */
+GIT_EXTERN(int) git_repository_hashfile(
+    git_oid *out,
+    git_repository *repo,
+    const char *path,
+    git_otype type,
+    const char *as_path);
 
 /** @} */
 GIT_END_DECL
diff --git a/src/crlf.c b/src/crlf.c
index 1b6898b..5e86b4e 100644
--- a/src/crlf.c
+++ b/src/crlf.c
@@ -263,8 +263,9 @@ static int crlf_apply_to_workdir(git_filter *self, git_buf *dest, const git_buf 
 	return convert_line_endings(dest, source, workdir_ending);
 }
 
-static int find_and_add_filter(git_vector *filters, git_repository *repo, const char *path,
-										 int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source))
+static int find_and_add_filter(
+	git_vector *filters, git_repository *repo, const char *path,
+	int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source))
 {
 	struct crlf_attrs ca;
 	struct crlf_filter *filter;
diff --git a/src/repository.c b/src/repository.c
index b9d180d..ab139a7 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -17,6 +17,8 @@
 #include "fileops.h"
 #include "config.h"
 #include "refs.h"
+#include "filter.h"
+#include "odb.h"
 
 #define GIT_FILE_CONTENT_PREFIX "gitdir:"
 
@@ -1372,3 +1374,66 @@ int git_repository_message_remove(git_repository *repo)
 
 	return error;
 }
+
+int git_repository_hashfile(
+    git_oid *out,
+    git_repository *repo,
+    const char *path,
+    git_otype type,
+    const char *as_path)
+{
+	int error;
+	git_vector filters = GIT_VECTOR_INIT;
+	git_file fd;
+	git_off_t len;
+	git_buf full_path = GIT_BUF_INIT;
+
+	assert(out && path); /* repo and as_path can be NULL */
+
+	error = git_path_join_unrooted(
+		&full_path, path, repo ? git_repository_workdir(repo) : NULL, NULL);
+	if (error < 0)
+		return error;
+
+	if (!as_path)
+		as_path = path;
+
+	/* passing empty string for "as_path" indicated --no-filters */
+	if (strlen(as_path) > 0) {
+		error = git_filters_load(&filters, repo, as_path, GIT_FILTER_TO_ODB);
+		if (error < 0)
+			return error;
+	} else {
+		error = 0;
+	}
+
+	/* at this point, error is a count of the number of loaded filters */
+
+	fd = git_futils_open_ro(full_path.ptr);
+	if (fd < 0) {
+		error = fd;
+		goto cleanup;
+	}
+
+	len = git_futils_filesize(fd);
+	if (len < 0) {
+		error = len;
+		goto cleanup;
+	}
+
+	if (!git__is_sizet(len)) {
+		giterr_set(GITERR_OS, "File size overflow for 32-bit systems");
+		error = -1;
+		goto cleanup;
+	}
+
+	error = git_odb__hashfd_filtered(out, fd, len, type, &filters);
+
+cleanup:
+	p_close(fd);
+	git_filters_free(&filters);
+	git_buf_free(&full_path);
+
+	return error;
+}
+
diff --git a/tests-clar/repo/hashfile.c b/tests-clar/repo/hashfile.c
new file mode 100644
index 0000000..9fa0d9b
--- /dev/null
+++ b/tests-clar/repo/hashfile.c
@@ -0,0 +1,55 @@
+#include "clar_libgit2.h"
+#include "buffer.h"
+
+static git_repository *_repo;
+
+void test_repo_hashfile__initialize(void)
+{
+	_repo = cl_git_sandbox_init("status");
+}
+
+void test_repo_hashfile__cleanup(void)
+{
+	cl_git_sandbox_cleanup();
+	_repo = NULL;
+}
+
+void test_repo_hashfile__simple(void)
+{
+	git_oid a, b;
+	git_buf full = GIT_BUF_INIT;
+
+	cl_git_pass(git_odb_hashfile(&a, "status/current_file", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "current_file", GIT_OBJ_BLOB, NULL));
+	cl_assert(git_oid_equal(&a, &b));
+
+	cl_git_pass(git_buf_joinpath(&full, git_repository_workdir(_repo), "current_file"));
+
+	cl_git_pass(git_odb_hashfile(&a, full.ptr, GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, full.ptr, GIT_OBJ_BLOB, NULL));
+	cl_assert(git_oid_equal(&a, &b));
+
+	git_buf_free(&full);
+}
+
+void test_repo_hashfile__filtered(void)
+{
+	git_oid a, b;
+	git_config *config;
+
+	cl_git_pass(git_repository_config(&config, _repo));
+	cl_git_pass(git_config_set_bool(config, "core.autocrlf", true));
+	git_config_free(config);
+
+	cl_git_append2file("status/.gitattributes", "*.txt text\n*.bin binary\n\n");
+
+	cl_git_mkfile("status/testfile.txt", "content\r\n"); /* Content with CRLF */
+
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJ_BLOB, NULL));
+	cl_assert(git_oid_cmp(&a, &b)); /* not equal */
+
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJ_BLOB, "testfile.bin"));
+	cl_assert(git_oid_equal(&a, &b)); /* equal when 'binary' 'as_file' name is used */
+}