Commit 0f4256b8d78cf602192e0546d185953ee1b30fd5

Edward Thomson 2021-09-24T15:23:34

repository: improve `hashfile` for absolute paths When `git_repository_hashfile` is handed an absolute path, it determines whether the path is within the repository's working directory or not. This is necessary when there is no `as_path` specified. If the path is within the working directory, then the given path should be used for attribute lookups (it is the effective `as_path`). If it is not within the working directory, then it is _not_ eligible. Importantly, now we will _never_ pass an absolute path down to attribute lookup functions.

diff --git a/include/git2/repository.h b/include/git2/repository.h
index e699016..8d1cffc 100644
--- a/include/git2/repository.h
+++ b/include/git2/repository.h
@@ -762,13 +762,15 @@ GIT_EXTERN(int) git_repository_mergehead_foreach(
  *
  * @param out Output value of calculated SHA
  * @param repo Repository pointer
- * @param path Path to file on disk whose contents should be hashed. If the
- *             repository is not NULL, this can be a relative path.
+ * @param path Path to file on disk whose contents should be hashed.  This
+ *             may be an absolute path or a relative path, in which case it
+ *             will be treated as a path within the working directory.
  * @param type The object type to hash as (e.g. GIT_OBJECT_BLOB)
  * @param as_path The path to use to look up filtering rules. If this is
- *             NULL, then the `path` parameter will be used instead. If
- *             this is passed as the empty string, then no filters will be
- *             applied when calculating the hash.
+ *             an empty string then no filters will be applied when
+ *             calculating the hash. If this is `NULL` and the `path`
+ *             parameter is a file within the repository's working
+ *             directory, then the `path` will be used.
  * @return 0 on success, or an error code
  */
 GIT_EXTERN(int) git_repository_hashfile(
diff --git a/src/repository.c b/src/repository.c
index 8f0f477..9b3e9c9 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -2840,34 +2840,36 @@ int git_repository_hashfile(
 	git_file fd = -1;
 	uint64_t len;
 	git_buf full_path = GIT_BUF_INIT;
+	const char *workdir = git_repository_workdir(repo);
 
 	 /* as_path can be NULL */
 	GIT_ASSERT_ARG(out);
 	GIT_ASSERT_ARG(path);
 	GIT_ASSERT_ARG(repo);
 
-	/* At some point, it would be nice if repo could be NULL to just
-	 * apply filter rules defined in system and global files, but for
-	 * now that is not possible because git_filters_load() needs it.
-	 */
-
-	if ((error = git_path_join_unrooted(
-		&full_path, path, git_repository_workdir(repo), NULL)) < 0 ||
+	if ((error = git_path_join_unrooted(&full_path, path, workdir, NULL)) < 0 ||
 	    (error = git_path_validate_workdir_buf(repo, &full_path)) < 0)
 		return error;
 
-	if (!as_path)
-		as_path = path;
+	/*
+	 * NULL as_path means that we should derive it from the
+	 * given path.
+	 */
+	if (!as_path) {
+		if (workdir && !git__prefixcmp(full_path.ptr, workdir))
+			as_path = full_path.ptr + strlen(workdir);
+		else
+			as_path = "";
+	}
 
 	/* passing empty string for "as_path" indicated --no-filters */
 	if (strlen(as_path) > 0) {
 		error = git_filter_list_load(
 			&fl, repo, NULL, as_path,
 			GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
+
 		if (error < 0)
 			return error;
-	} else {
-		error = 0;
 	}
 
 	/* at this point, error is a count of the number of loaded filters */
diff --git a/tests/repo/hashfile.c b/tests/repo/hashfile.c
index 0fb4e67..bffb51b 100644
--- a/tests/repo/hashfile.c
+++ b/tests/repo/hashfile.c
@@ -10,6 +10,7 @@ void test_repo_hashfile__initialize(void)
 
 void test_repo_hashfile__cleanup(void)
 {
+	cl_fixture_cleanup("absolute");
 	cl_git_sandbox_cleanup();
 	_repo = NULL;
 }
@@ -38,10 +39,18 @@ void test_repo_hashfile__simple(void)
 	git_buf_dispose(&full);
 }
 
-void test_repo_hashfile__filtered(void)
+void test_repo_hashfile__filtered_in_workdir(void)
 {
+	git_buf root = GIT_BUF_INIT, txt = GIT_BUF_INIT, bin = GIT_BUF_INIT;
+	char cwd[GIT_PATH_MAX];
 	git_oid a, b;
 
+	cl_must_pass(p_getcwd(cwd, GIT_PATH_MAX));
+	cl_must_pass(p_mkdir("absolute", 0777));
+	cl_git_pass(git_buf_joinpath(&root, cwd, "status"));
+	cl_git_pass(git_buf_joinpath(&txt, root.ptr, "testfile.txt"));
+	cl_git_pass(git_buf_joinpath(&bin, root.ptr, "testfile.bin"));
+
 	cl_repo_set_bool(_repo, "core.autocrlf", true);
 
 	cl_git_append2file("status/.gitattributes", "*.txt text\n*.bin binary\n\n");
@@ -55,21 +64,41 @@ void test_repo_hashfile__filtered(void)
 	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJECT_BLOB, NULL));
 	cl_assert(git_oid_cmp(&a, &b));
 
+	/* not equal hashes because of filtering when specified by absolute path */
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, txt.ptr, GIT_OBJECT_BLOB, NULL));
+	cl_assert(git_oid_cmp(&a, &b));
+
 	/* equal hashes because filter is binary */
 	cl_git_pass(git_odb_hashfile(&a, "status/testfile.bin", GIT_OBJECT_BLOB));
 	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.bin", GIT_OBJECT_BLOB, NULL));
 	cl_assert_equal_oid(&a, &b);
 
+	/* equal hashes because filter is binary when specified by absolute path */
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.bin", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, bin.ptr, GIT_OBJECT_BLOB, NULL));
+	cl_assert_equal_oid(&a, &b);
+
 	/* equal hashes when 'as_file' points to binary filtering */
 	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJECT_BLOB));
 	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJECT_BLOB, "foo.bin"));
 	cl_assert_equal_oid(&a, &b);
 
+	/* equal hashes when 'as_file' points to binary filtering (absolute path) */
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, txt.ptr, GIT_OBJECT_BLOB, "foo.bin"));
+	cl_assert_equal_oid(&a, &b);
+
 	/* not equal hashes when 'as_file' points to text filtering */
 	cl_git_pass(git_odb_hashfile(&a, "status/testfile.bin", GIT_OBJECT_BLOB));
 	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.bin", GIT_OBJECT_BLOB, "foo.txt"));
 	cl_assert(git_oid_cmp(&a, &b));
 
+	/* not equal hashes when 'as_file' points to text filtering */
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.bin", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, bin.ptr, GIT_OBJECT_BLOB, "foo.txt"));
+	cl_assert(git_oid_cmp(&a, &b));
+
 	/* equal hashes when 'as_file' is empty and turns off filtering */
 	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJECT_BLOB));
 	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJECT_BLOB, ""));
@@ -79,7 +108,65 @@ void test_repo_hashfile__filtered(void)
 	cl_git_pass(git_repository_hashfile(&b, _repo, "testfile.bin", GIT_OBJECT_BLOB, ""));
 	cl_assert_equal_oid(&a, &b);
 
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.txt", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, txt.ptr, GIT_OBJECT_BLOB, ""));
+	cl_assert_equal_oid(&a, &b);
+
+	cl_git_pass(git_odb_hashfile(&a, "status/testfile.bin", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, bin.ptr, GIT_OBJECT_BLOB, ""));
+	cl_assert_equal_oid(&a, &b);
+
 	/* some hash type failures */
 	cl_git_fail(git_odb_hashfile(&a, "status/testfile.txt", 0));
 	cl_git_fail(git_repository_hashfile(&b, _repo, "testfile.txt", GIT_OBJECT_ANY, NULL));
+
+	git_buf_dispose(&txt);
+	git_buf_dispose(&bin);
+	git_buf_dispose(&root);
+}
+
+void test_repo_hashfile__filtered_outside_workdir(void)
+{
+	git_buf root = GIT_BUF_INIT, txt = GIT_BUF_INIT, bin = GIT_BUF_INIT;
+	char cwd[GIT_PATH_MAX];
+	git_oid a, b;
+
+	cl_must_pass(p_getcwd(cwd, GIT_PATH_MAX));
+	cl_must_pass(p_mkdir("absolute", 0777));
+	cl_git_pass(git_buf_joinpath(&root, cwd, "absolute"));
+	cl_git_pass(git_buf_joinpath(&txt, root.ptr, "testfile.txt"));
+	cl_git_pass(git_buf_joinpath(&bin, root.ptr, "testfile.bin"));
+
+	cl_repo_set_bool(_repo, "core.autocrlf", true);
+	cl_git_append2file("status/.gitattributes", "*.txt text\n*.bin binary\n\n");
+
+	/* create some sample content with CRLF in it */
+	cl_git_mkfile("absolute/testfile.txt", "content\r\n");
+	cl_git_mkfile("absolute/testfile.bin", "other\r\nstuff\r\n");
+
+	/* not equal hashes because of filtering */
+	cl_git_pass(git_odb_hashfile(&a, "absolute/testfile.txt", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, txt.ptr, GIT_OBJECT_BLOB, "testfile.txt"));
+	cl_assert(git_oid_cmp(&a, &b));
+
+	/* equal hashes because filter is binary */
+	cl_git_pass(git_odb_hashfile(&a, "absolute/testfile.bin", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, bin.ptr, GIT_OBJECT_BLOB, "testfile.bin"));
+	cl_assert_equal_oid(&a, &b);
+
+	/*
+	 * equal hashes because no filtering occurs for absolute paths outside the working
+	 * directory unless as_path is specified
+	 */
+	cl_git_pass(git_odb_hashfile(&a, "absolute/testfile.txt", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, txt.ptr, GIT_OBJECT_BLOB, NULL));
+	cl_assert_equal_oid(&a, &b);
+
+	cl_git_pass(git_odb_hashfile(&a, "absolute/testfile.bin", GIT_OBJECT_BLOB));
+	cl_git_pass(git_repository_hashfile(&b, _repo, bin.ptr, GIT_OBJECT_BLOB, NULL));
+	cl_assert_equal_oid(&a, &b);
+
+	git_buf_dispose(&txt);
+	git_buf_dispose(&bin);
+	git_buf_dispose(&root);
 }