Commit b99b5a81167c35471343da9050ae7d8692ba4139

Edward Thomson 2021-03-04T09:38:10

Merge pull request #5763 from lhchavez/cgraph-lookup commit-graph: Support lookups of entries in a commit-graph

diff --git a/fuzzers/commit_graph_fuzzer.c b/fuzzers/commit_graph_fuzzer.c
index f5b9c89..eb2c382 100644
--- a/fuzzers/commit_graph_fuzzer.c
+++ b/fuzzers/commit_graph_fuzzer.c
@@ -32,6 +32,7 @@ int LLVMFuzzerInitialize(int *argc, char ***argv)
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 {
 	git_commit_graph_file cgraph = {{0}};
+	git_commit_graph_entry e;
 	git_buf commit_graph_buf = GIT_BUF_INIT;
 	git_oid oid = {{0}};
 	bool append_hash = false;
@@ -68,6 +69,10 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
 	    < 0)
 		goto cleanup;
 
+	/* Search for any oid, just to exercise that codepath. */
+	if (git_commit_graph_entry_find(&e, &cgraph, &oid, GIT_OID_HEXSZ) < 0)
+		goto cleanup;
+
 cleanup:
 	git_commit_graph_close(&cgraph);
 	git_buf_dispose(&commit_graph_buf);
diff --git a/src/commit_graph.c b/src/commit_graph.c
index 71a56e3..9740418 100644
--- a/src/commit_graph.c
+++ b/src/commit_graph.c
@@ -9,6 +9,7 @@
 
 #include "futils.h"
 #include "hash.h"
+#include "pack.h"
 
 #define GIT_COMMIT_GRAPH_MISSING_PARENT 0x70000000
 
@@ -278,6 +279,142 @@ int git_commit_graph_open(git_commit_graph_file **cgraph_out, const char *path)
 	return 0;
 }
 
+static int git_commit_graph_entry_get_byindex(
+		git_commit_graph_entry *e,
+		const git_commit_graph_file *cgraph,
+		size_t pos)
+{
+	const unsigned char *commit_data;
+
+	GIT_ASSERT_ARG(e);
+	GIT_ASSERT_ARG(cgraph);
+
+	if (pos >= cgraph->num_commits) {
+		git_error_set(GIT_ERROR_INVALID, "commit index %zu does not exist", pos);
+		return GIT_ENOTFOUND;
+	}
+
+	commit_data = cgraph->commit_data + pos * (GIT_OID_RAWSZ + 4 * sizeof(uint32_t));
+	git_oid_cpy(&e->tree_oid, (const git_oid *)commit_data);
+	e->parent_indices[0] = ntohl(*((uint32_t *)(commit_data + GIT_OID_RAWSZ)));
+	e->parent_indices[1]
+			= ntohl(*((uint32_t *)(commit_data + GIT_OID_RAWSZ + sizeof(uint32_t))));
+	e->parent_count = (e->parent_indices[0] != GIT_COMMIT_GRAPH_MISSING_PARENT)
+			+ (e->parent_indices[1] != GIT_COMMIT_GRAPH_MISSING_PARENT);
+	e->generation = ntohl(*((uint32_t *)(commit_data + GIT_OID_RAWSZ + 2 * sizeof(uint32_t))));
+	e->commit_time = ntohl(*((uint32_t *)(commit_data + GIT_OID_RAWSZ + 3 * sizeof(uint32_t))));
+
+	e->commit_time |= (e->generation & 0x3ull) << 32ull;
+	e->generation >>= 2u;
+	if (e->parent_indices[1] & 0x80000000u) {
+		uint32_t extra_edge_list_pos = e->parent_indices[1] & 0x7fffffff;
+
+		/* Make sure we're not being sent out of bounds */
+		if (extra_edge_list_pos >= cgraph->num_extra_edge_list) {
+			git_error_set(GIT_ERROR_INVALID,
+				      "commit %u does not exist",
+				      extra_edge_list_pos);
+			return GIT_ENOTFOUND;
+		}
+
+		e->extra_parents_index = extra_edge_list_pos;
+		while (extra_edge_list_pos < cgraph->num_extra_edge_list
+		       && (ntohl(*(
+					   (uint32_t *)(cgraph->extra_edge_list
+							+ extra_edge_list_pos * sizeof(uint32_t))))
+			   & 0x80000000u)
+				       == 0) {
+			extra_edge_list_pos++;
+			e->parent_count++;
+		}
+
+	}
+	git_oid_cpy(&e->sha1, &cgraph->oid_lookup[pos]);
+	return 0;
+}
+
+int git_commit_graph_entry_find(
+		git_commit_graph_entry *e,
+		const git_commit_graph_file *cgraph,
+		const git_oid *short_oid,
+		size_t len)
+{
+	int pos, found = 0;
+	uint32_t hi, lo;
+	const git_oid *current = NULL;
+
+	GIT_ASSERT_ARG(e);
+	GIT_ASSERT_ARG(cgraph);
+	GIT_ASSERT_ARG(short_oid);
+
+	hi = ntohl(cgraph->oid_fanout[(int)short_oid->id[0]]);
+	lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(cgraph->oid_fanout[(int)short_oid->id[0] - 1]));
+
+	pos = git_pack__lookup_sha1(cgraph->oid_lookup, GIT_OID_RAWSZ, lo, hi, short_oid->id);
+
+	if (pos >= 0) {
+		/* An object matching exactly the oid was found */
+		found = 1;
+		current = cgraph->oid_lookup + pos;
+	} else {
+		/* No object was found */
+		/* pos refers to the object with the "closest" oid to short_oid */
+		pos = -1 - pos;
+		if (pos < (int)cgraph->num_commits) {
+			current = cgraph->oid_lookup + pos;
+
+			if (!git_oid_ncmp(short_oid, current, len))
+				found = 1;
+		}
+	}
+
+	if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)cgraph->num_commits) {
+		/* Check for ambiguousity */
+		const git_oid *next = current + 1;
+
+		if (!git_oid_ncmp(short_oid, next, len)) {
+			found = 2;
+		}
+	}
+
+	if (!found)
+		return git_odb__error_notfound(
+				"failed to find offset for multi-pack index entry", short_oid, len);
+	if (found > 1)
+		return git_odb__error_ambiguous(
+				"found multiple offsets for multi-pack index entry");
+
+	return git_commit_graph_entry_get_byindex(e, cgraph, pos);
+}
+
+int git_commit_graph_entry_parent(
+		git_commit_graph_entry *parent,
+		const git_commit_graph_file *cgraph,
+		const git_commit_graph_entry *entry,
+		size_t n)
+{
+	GIT_ASSERT_ARG(parent);
+	GIT_ASSERT_ARG(cgraph);
+
+	if (n >= entry->parent_count) {
+		git_error_set(GIT_ERROR_INVALID, "parent index %zu does not exist", n);
+		return GIT_ENOTFOUND;
+	}
+
+	if (n == 0 || (n == 1 && entry->parent_count == 2))
+		return git_commit_graph_entry_get_byindex(parent, cgraph, entry->parent_indices[n]);
+
+	return git_commit_graph_entry_get_byindex(
+			parent,
+			cgraph,
+			ntohl(
+					*(uint32_t *)(cgraph->extra_edge_list
+						      + (entry->extra_parents_index + n - 1)
+								      * sizeof(uint32_t)))
+					& 0x7fffffff);
+}
+
+
 int git_commit_graph_close(git_commit_graph_file *cgraph)
 {
 	GIT_ASSERT_ARG(cgraph);
diff --git a/src/commit_graph.h b/src/commit_graph.h
index 01512d7..f3a4317 100644
--- a/src/commit_graph.h
+++ b/src/commit_graph.h
@@ -57,7 +57,48 @@ typedef struct git_commit_graph_file {
 	git_buf filename;
 } git_commit_graph_file;
 
+/**
+ * An entry in the commit-graph file. Provides a subset of the information that
+ * can be obtained from the commit header.
+ */
+typedef struct git_commit_graph_entry {
+	/* The generation number of the commit within the graph */
+	size_t generation;
+
+	/* Time in seconds from UNIX epoch. */
+	git_time_t commit_time;
+
+	/* The number of parents of the commit. */
+	size_t parent_count;
+
+	/*
+	 * The indices of the parent commits within the Commit Data table. The value
+	 * of `GIT_COMMIT_GRAPH_MISSING_PARENT` indicates that no parent is in that
+	 * position.
+	 */
+	size_t parent_indices[2];
+
+	/* The index within the Extra Edge List of any parent after the first two. */
+	size_t extra_parents_index;
+
+	/* The SHA-1 hash of the root tree of the commit. */
+	git_oid tree_oid;
+
+	/* The SHA-1 hash of the requested commit. */
+	git_oid sha1;
+} git_commit_graph_entry;
+
 int git_commit_graph_open(git_commit_graph_file **cgraph_out, const char *path);
+int git_commit_graph_entry_find(
+		git_commit_graph_entry *e,
+		const git_commit_graph_file *cgraph,
+		const git_oid *short_oid,
+		size_t len);
+int git_commit_graph_entry_parent(
+		git_commit_graph_entry *parent,
+		const git_commit_graph_file *cgraph,
+		const git_commit_graph_entry *entry,
+		size_t n);
 int git_commit_graph_close(git_commit_graph_file *cgraph);
 void git_commit_graph_free(git_commit_graph_file *cgraph);
 
diff --git a/tests/graph/commit_graph.c b/tests/graph/commit_graph.c
index 329aa5b..43f5667 100644
--- a/tests/graph/commit_graph.c
+++ b/tests/graph/commit_graph.c
@@ -8,12 +8,81 @@ void test_graph_commit_graph__parse(void)
 {
 	git_repository *repo;
 	struct git_commit_graph_file *cgraph;
+	struct git_commit_graph_entry e, parent;
+	git_oid id;
 	git_buf commit_graph_path = GIT_BUF_INIT;
 
 	cl_git_pass(git_repository_open(&repo, cl_fixture("testrepo.git")));
 	cl_git_pass(git_buf_joinpath(&commit_graph_path, git_repository_path(repo), "objects/info/commit-graph"));
 	cl_git_pass(git_commit_graph_open(&cgraph, git_buf_cstr(&commit_graph_path)));
 
+	cl_git_pass(git_oid_fromstr(&id, "5001298e0c09ad9c34e4249bc5801c75e9754fa5"));
+	cl_git_pass(git_commit_graph_entry_find(&e, cgraph, &id, GIT_OID_HEXSZ));
+	cl_assert_equal_oid(&e.sha1, &id);
+	cl_git_pass(git_oid_fromstr(&id, "418382dff1ffb8bdfba833f4d8bbcde58b1e7f47"));
+	cl_assert_equal_oid(&e.tree_oid, &id);
+	cl_assert_equal_i(e.generation, 1);
+	cl_assert_equal_i(e.commit_time, 1273610423ull);
+	cl_assert_equal_i(e.parent_count, 0);
+
+	cl_git_pass(git_oid_fromstr(&id, "be3563ae3f795b2b4353bcce3a527ad0a4f7f644"));
+	cl_git_pass(git_commit_graph_entry_find(&e, cgraph, &id, GIT_OID_HEXSZ));
+	cl_assert_equal_oid(&e.sha1, &id);
+	cl_assert_equal_i(e.generation, 5);
+	cl_assert_equal_i(e.commit_time, 1274813907ull);
+	cl_assert_equal_i(e.parent_count, 2);
+
+	cl_git_pass(git_oid_fromstr(&id, "9fd738e8f7967c078dceed8190330fc8648ee56a"));
+	cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 0));
+	cl_assert_equal_oid(&parent.sha1, &id);
+	cl_assert_equal_i(parent.generation, 4);
+
+	cl_git_pass(git_oid_fromstr(&id, "c47800c7266a2be04c571c04d5a6614691ea99bd"));
+	cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 1));
+	cl_assert_equal_oid(&parent.sha1, &id);
+	cl_assert_equal_i(parent.generation, 3);
+
+	git_commit_graph_free(cgraph);
+	git_repository_free(repo);
+	git_buf_dispose(&commit_graph_path);
+}
+
+void test_graph_commit_graph__parse_octopus_merge(void)
+{
+	git_repository *repo;
+	struct git_commit_graph_file *cgraph;
+	struct git_commit_graph_entry e, parent;
+	git_oid id;
+	git_buf commit_graph_path = GIT_BUF_INIT;
+
+	cl_git_pass(git_repository_open(&repo, cl_fixture("merge-recursive/.gitted")));
+	cl_git_pass(git_buf_joinpath(&commit_graph_path, git_repository_path(repo), "objects/info/commit-graph"));
+	cl_git_pass(git_commit_graph_open(&cgraph, git_buf_cstr(&commit_graph_path)));
+
+	cl_git_pass(git_oid_fromstr(&id, "d71c24b3b113fd1d1909998c5bfe33b86a65ee03"));
+	cl_git_pass(git_commit_graph_entry_find(&e, cgraph, &id, GIT_OID_HEXSZ));
+	cl_assert_equal_oid(&e.sha1, &id);
+	cl_git_pass(git_oid_fromstr(&id, "348f16ffaeb73f319a75cec5b16a0a47d2d5e27c"));
+	cl_assert_equal_oid(&e.tree_oid, &id);
+	cl_assert_equal_i(e.generation, 7);
+	cl_assert_equal_i(e.commit_time, 1447083009ull);
+	cl_assert_equal_i(e.parent_count, 3);
+
+	cl_git_pass(git_oid_fromstr(&id, "ad2ace9e15f66b3d1138922e6ffdc3ea3f967fa6"));
+	cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 0));
+	cl_assert_equal_oid(&parent.sha1, &id);
+	cl_assert_equal_i(parent.generation, 6);
+
+	cl_git_pass(git_oid_fromstr(&id, "483065df53c0f4a02cdc6b2910b05d388fc17ffb"));
+	cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 1));
+	cl_assert_equal_oid(&parent.sha1, &id);
+	cl_assert_equal_i(parent.generation, 2);
+
+	cl_git_pass(git_oid_fromstr(&id, "815b5a1c80ca749d705c7aa0cb294a00cbedd340"));
+	cl_git_pass(git_commit_graph_entry_parent(&parent, cgraph, &e, 2));
+	cl_assert_equal_oid(&parent.sha1, &id);
+	cl_assert_equal_i(parent.generation, 6);
+
 	git_commit_graph_free(cgraph);
 	git_repository_free(repo);
 	git_buf_dispose(&commit_graph_path);
diff --git a/tests/resources/merge-recursive/.gitted/objects/info/commit-graph b/tests/resources/merge-recursive/.gitted/objects/info/commit-graph
new file mode 100644
index 0000000..da055f1
Binary files /dev/null and b/tests/resources/merge-recursive/.gitted/objects/info/commit-graph differ