Commit 9be2261eaae74552aaa9d568e663292f4382e141

Vicent Martí 2012-09-13T09:24:12

Merge pull request #927 from arrbee/hashfile-with-filters Add git_repository_hashfile to hash with filters

diff --git a/include/git2/repository.h b/include/git2/repository.h
index f520d54..32ec58d 100644
--- a/include/git2/repository.h
+++ b/include/git2/repository.h
@@ -481,6 +481,30 @@ GIT_EXTERN(int) git_repository_message(char *buffer, size_t len, git_repository 
  */
 GIT_EXTERN(int) git_repository_message_remove(git_repository *repo);
 
+/**
+ * Calculate hash of file using repository filtering rules.
+ *
+ * If you simply want to calculate the hash of a file on disk with no filters,
+ * you can just use the `git_odb_hashfile()` API.  However, if you want to
+ * hash a file in the repository and you want to apply filtering rules (e.g.
+ * crlf filters) before generating the SHA, then use this function.
+ *
+ * @param out Output value of calculated SHA
+ * @param repo Repository pointer
+ * @param path Path to file on disk whose contents should be hashed. If the
+ *             repository is not NULL, this can be a relative path.
+ * @param type The object type to hash as (e.g. GIT_OBJ_BLOB)
+ * @param as_path The path to use to look up filtering rules. If this is
+ *             NULL, then the `path` parameter will be used instead. If
+ *             this is passed as the empty string, then no filters will be
+ *             applied when calculating the hash.
+ */
+GIT_EXTERN(int) git_repository_hashfile(
+    git_oid *out,
+    git_repository *repo,
+    const char *path,
+    git_otype type,
+    const char *as_path);
 
 /** @} */
 GIT_END_DECL
diff --git a/src/crlf.c b/src/crlf.c
index 1b6898b..5e86b4e 100644
--- a/src/crlf.c
+++ b/src/crlf.c
@@ -263,8 +263,9 @@ static int crlf_apply_to_workdir(git_filter *self, git_buf *dest, const git_buf 
 	return convert_line_endings(dest, source, workdir_ending);
 }
 
-static int find_and_add_filter(git_vector *filters, git_repository *repo, const char *path,
-										 int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source))
+static int find_and_add_filter(
+	git_vector *filters, git_repository *repo, const char *path,
+	int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source))
 {
 	struct crlf_attrs ca;
 	struct crlf_filter *filter;
diff --git a/src/odb.c b/src/odb.c
index c027c12..29c56a5 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -117,6 +117,11 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)
 	git_hash_ctx *ctx;
 	ssize_t read_len = -1;
 
+	if (!git_object_typeisloose(type)) {
+		giterr_set(GITERR_INVALID, "Invalid object type for hash");
+		return -1;
+	}
+
 	hdr_len = format_object_header(hdr, sizeof(hdr), size, type);
 
 	ctx = git_hash_new_ctx();
diff --git a/src/repository.c b/src/repository.c
index b9d180d..bcc6b15 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -17,6 +17,8 @@
 #include "fileops.h"
 #include "config.h"
 #include "refs.h"
+#include "filter.h"
+#include "odb.h"
 
 #define GIT_FILE_CONTENT_PREFIX "gitdir:"
 
@@ -1372,3 +1374,71 @@ int git_repository_message_remove(git_repository *repo)
 
 	return error;
 }
+
+int git_repository_hashfile(
+    git_oid *out,
+    git_repository *repo,
+    const char *path,
+    git_otype type,
+    const char *as_path)
+{
+	int error;
+	git_vector filters = GIT_VECTOR_INIT;
+	git_file fd;
+	git_off_t len;
+	git_buf full_path = GIT_BUF_INIT;
+
+	assert(out && path && repo); /* as_path can be NULL */
+
+	/* At some point, it would be nice if repo could be NULL to just
+	 * apply filter rules defined in system and global files, but for
+	 * now that is not possible because git_filters_load() needs it.
+	 */
+
+	error = git_path_join_unrooted(
+		&full_path, path, repo ? git_repository_workdir(repo) : NULL, NULL);
+	if (error < 0)
+		return error;
+
+	if (!as_path)
+		as_path = path;
+
+	/* passing empty string for "as_path" indicated --no-filters */
+	if (strlen(as_path) > 0) {
+		error = git_filters_load(&filters, repo, as_path, GIT_FILTER_TO_ODB);
+		if (error < 0)
+			return error;
+	} else {
+		error = 0;
+	}
+
+	/* at this point, error is a count of the number of loaded filters */
+
+	fd = git_futils_open_ro(full_path.ptr);
+	if (fd < 0) {
+		error = fd;
+		goto cleanup;
+	}
+
+	len = git_futils_filesize(fd);
+	if (len < 0) {
+		error = len;
+		goto cleanup;
+	}
+
+	if (!git__is_sizet(len)) {
+		giterr_set(GITERR_OS, "File size overflow for 32-bit systems");
+		error = -1;
+		goto cleanup;
+	}
+
+	error = git_odb__hashfd_filtered(out, fd, len, type, &filters);
+
+cleanup:
+	p_close(fd);
+	git_filters_free(&filters);
+	git_buf_free(&full_path);
+
+	return error;
+}
+
diff --git a/tests-clar/repo/hashfile.c b/tests-clar/repo/hashfile.c
new file mode 100644
index 0000000..129e5d3
--- /dev/null
+++ b/tests-clar/repo/hashfile.c
@@ -0,0 +1,88 @@
+#include "clar_libgit2.h"
+#include "buffer.h"
+
+static git_repository *_repo;
+
+void test_repo_hashfile__initialize(void)
+{
+	_repo = cl_git_sandbox_init("status");
+}
+
+void test_repo_hashfile__cleanup(void)
+{
+	cl_git_sandbox_cleanup();
+	_repo = NULL;
+}
+
+void test_repo_hashfile__simple(void)
+{
+	git_oid a, b;
+	git_buf full = GIT_BUF_INIT;
+
+	/* hash with repo relative path */
+	cl_git_pass(git_odb_hashfile(&a, "status/current_file", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "current_file", GIT_OBJ_BLOB, NULL));
+	cl_assert(git_oid_equal(&a, &b));
+
+	cl_git_pass(git_buf_joinpath(&full, git_repository_workdir(_repo), "current_file"));
+
+	/* hash with full path */
+	cl_git_pass(git_odb_hashfile(&a, full.ptr, GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, full.ptr, GIT_OBJ_BLOB, NULL));
+	cl_assert(git_oid_equal(&a, &b));
+
+	/* hash with invalid type */
+	cl_git_fail(git_odb_hashfile(&a, full.ptr, GIT_OBJ_ANY));
+	cl_git_fail(git_repository_hashfile(&b, _repo, full.ptr, GIT_OBJ_OFS_DELTA, NULL));
+
+	git_buf_free(&full);
+}
+
+void test_repo_hashfile__filtered(void)
+{
+	git_oid a, b;
+	git_config *config;
+
+	cl_git_pass(git_repository_config(&config, _repo));
+	cl_git_pass(git_config_set_bool(config, "core.autocrlf", true));
+	git_config_free(config);
+
+	cl_git_append2file("status/.gitattributes", "*.txt text\n*.bin binary\n\n");
+
+	/* create some sample content with CRLF in it */
+	cl_git_mkfile("status/testfile.txt", "content\r\n");
+	cl_git_mkfile("status/testfile.bin", "other\r\nstuff\r\n");
+
+	/* not equal hashes because of filtering */
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJ_BLOB, NULL));
+	cl_assert(git_oid_cmp(&a, &b));
+
+	/* equal hashes because filter is binary */
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.bin", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.bin", GIT_OBJ_BLOB, NULL));
+	cl_assert(git_oid_equal(&a, &b));
+
+	/* equal hashes when 'as_file' points to binary filtering */
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJ_BLOB, "foo.bin"));
+	cl_assert(git_oid_equal(&a, &b));
+
+	/* not equal hashes when 'as_file' points to text filtering */
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.bin", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.bin", GIT_OBJ_BLOB, "foo.txt"));
+	cl_assert(git_oid_cmp(&a, &b));
+
+	/* equal hashes when 'as_file' is empty and turns off filtering */
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJ_BLOB, ""));
+	cl_assert(git_oid_equal(&a, &b));
+
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.bin", GIT_OBJ_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.bin", GIT_OBJ_BLOB, ""));
+	cl_assert(git_oid_equal(&a, &b));
+
+	/* some hash type failures */
+	cl_git_fail(git_odb_hashfile(&a, "status/testfile.txt", 0));
+	cl_git_fail(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJ_ANY, NULL));
+}