Commit f04a58b00c1a350e2cd90bddcdaa7865183c9d2f

Patrick Steinhardt 2019-10-03T12:55:48

Merge pull request #4445 from tiennou/shallow/dry-commit-parsing DRY commit parsing

diff --git a/src/commit.c b/src/commit.c
index d5f19df..20e0b8a 100644
--- a/src/commit.c
+++ b/src/commit.c
@@ -20,6 +20,7 @@
 #include "message.h"
 #include "refs.h"
 #include "object.h"
+#include "array.h"
 #include "oidarray.h"
 
 void git_commit__free(void *_commit)
@@ -383,15 +384,16 @@ int git_commit_amend(
 	return error;
 }
 
-int git_commit__parse_raw(void *_commit, const char *data, size_t size)
+static int commit_parse(git_commit *commit, const char *data, size_t size, unsigned int flags)
 {
-	git_commit *commit = _commit;
 	const char *buffer_start = data, *buffer;
 	const char *buffer_end = buffer_start + size;
 	git_oid parent_id;
 	size_t header_len;
 	git_signature dummy_sig;
 
+	assert(commit && data);
+
 	buffer = buffer_start;
 
 	/* Allocate for one, which will allow not to realloc 90% of the time  */
@@ -399,8 +401,15 @@ int git_commit__parse_raw(void *_commit, const char *data, size_t size)
 	GIT_ERROR_CHECK_ARRAY(commit->parent_ids);
 
 	/* The tree is always the first field */
-	if (git_oid__parse(&commit->tree_id, &buffer, buffer_end, "tree ") < 0)
-		goto bad_buffer;
+	if (!(flags & GIT_COMMIT_PARSE_QUICK)) {
+	    if (git_oid__parse(&commit->tree_id, &buffer, buffer_end, "tree ") < 0)
+			goto bad_buffer;
+	} else {
+		size_t tree_len = strlen("tree ") + GIT_OID_HEXSZ + 1;
+		if (buffer + tree_len > buffer_end)
+			goto bad_buffer;
+		buffer += tree_len;
+	}
 
 	/*
 	 * TODO: commit grafts!
@@ -413,11 +422,13 @@ int git_commit__parse_raw(void *_commit, const char *data, size_t size)
 		git_oid_cpy(new_id, &parent_id);
 	}
 
-	commit->author = git__malloc(sizeof(git_signature));
-	GIT_ERROR_CHECK_ALLOC(commit->author);
+	if (!(flags & GIT_COMMIT_PARSE_QUICK)) {
+		commit->author = git__malloc(sizeof(git_signature));
+		GIT_ERROR_CHECK_ALLOC(commit->author);
 
-	if (git_signature__parse(commit->author, &buffer, buffer_end, "author ", '\n') < 0)
-		return -1;
+		if (git_signature__parse(commit->author, &buffer, buffer_end, "author ", '\n') < 0)
+			return -1;
+	}
 
 	/* Some tools create multiple author fields, ignore the extra ones */
 	while (!git__prefixncmp(buffer, buffer_end - buffer, "author ")) {
@@ -435,6 +446,9 @@ int git_commit__parse_raw(void *_commit, const char *data, size_t size)
 	if (git_signature__parse(commit->committer, &buffer, buffer_end, "committer ", '\n') < 0)
 		return -1;
 
+	if (flags & GIT_COMMIT_PARSE_QUICK)
+		return 0;
+
 	/* Parse add'l header entries */
 	while (buffer < buffer_end) {
 		const char *eoln = buffer;
@@ -477,11 +491,19 @@ bad_buffer:
 	return -1;
 }
 
+int git_commit__parse_raw(void *commit, const char *data, size_t size)
+{
+	return commit_parse(commit, data, size, 0);
+}
+
+int git_commit__parse_ext(git_commit *commit, git_odb_object *odb_obj, unsigned int flags)
+{
+	return commit_parse(commit, git_odb_object_data(odb_obj), git_odb_object_size(odb_obj), flags);
+}
+
 int git_commit__parse(void *_commit, git_odb_object *odb_obj)
 {
-	return git_commit__parse_raw(_commit,
-		git_odb_object_data(odb_obj),
-		git_odb_object_size(odb_obj));
+	return git_commit__parse_ext(_commit, odb_obj, 0);
 }
 
 #define GIT_COMMIT_GETTER(_rvalue, _name, _return) \
diff --git a/src/commit.h b/src/commit.h
index 9137a8f..318ce5c 100644
--- a/src/commit.h
+++ b/src/commit.h
@@ -37,4 +37,10 @@ void git_commit__free(void *commit);
 int git_commit__parse(void *commit, git_odb_object *obj);
 int git_commit__parse_raw(void *commit, const char *data, size_t size);
 
+typedef enum {
+	GIT_COMMIT_PARSE_QUICK = (1 << 0), /**< Only parse parents and committer info */
+} git_commit__parse_flags;
+
+int git_commit__parse_ext(git_commit *commit, git_odb_object *odb_obj, unsigned int flags);
+
 #endif
diff --git a/src/commit_list.c b/src/commit_list.c
index 78b1f05..2e103ec 100644
--- a/src/commit_list.c
+++ b/src/commit_list.c
@@ -10,6 +10,7 @@
 #include "revwalk.h"
 #include "pool.h"
 #include "odb.h"
+#include "commit.h"
 
 int git_commit_list_time_cmp(const void *a, const void *b)
 {
@@ -55,17 +56,6 @@ git_commit_list_node *git_commit_list_alloc_node(git_revwalk *walk)
 	return (git_commit_list_node *)git_pool_mallocz(&walk->commit_pool, 1);
 }
 
-static int commit_error(git_commit_list_node *commit, const char *msg)
-{
-	char commit_oid[GIT_OID_HEXSZ + 1];
-	git_oid_fmt(commit_oid, &commit->oid);
-	commit_oid[GIT_OID_HEXSZ] = '\0';
-
-	git_error_set(GIT_ERROR_ODB, "failed to parse commit %s - %s", commit_oid, msg);
-
-	return -1;
-}
-
 static git_commit_list_node **alloc_parents(
 	git_revwalk *walk, git_commit_list_node *commit, size_t n_parents)
 {
@@ -111,77 +101,42 @@ git_commit_list_node *git_commit_list_pop(git_commit_list **stack)
 
 static int commit_quick_parse(
 	git_revwalk *walk,
-	git_commit_list_node *commit,
-	const uint8_t *buffer,
-	size_t buffer_len)
+	git_commit_list_node *node,
+	git_odb_object *obj)
 {
-	const size_t parent_len = strlen("parent ") + GIT_OID_HEXSZ + 1;
-	const uint8_t *buffer_end = buffer + buffer_len;
-	const uint8_t *parents_start, *committer_start;
-	int i, parents = 0;
-	int64_t commit_time;
-
-	buffer += strlen("tree ") + GIT_OID_HEXSZ + 1;
-
-	parents_start = buffer;
-	while (buffer + parent_len < buffer_end && memcmp(buffer, "parent ", strlen("parent ")) == 0) {
-		parents++;
-		buffer += parent_len;
-	}
-
-	commit->parents = alloc_parents(walk, commit, parents);
-	GIT_ERROR_CHECK_ALLOC(commit->parents);
-
-	buffer = parents_start;
-	for (i = 0; i < parents; ++i) {
-		git_oid oid;
-
-		if (git_oid_fromstr(&oid, (const char *)buffer + strlen("parent ")) < 0)
-			return -1;
+	git_oid *parent_oid;
+	git_commit *commit;
+	int error;
+	size_t i;
 
-		commit->parents[i] = git_revwalk__commit_lookup(walk, &oid);
-		if (commit->parents[i] == NULL)
-			return -1;
+	commit = git__calloc(1, sizeof(*commit));
+	GIT_ERROR_CHECK_ALLOC(commit);
+	commit->object.repo = walk->repo;
 
-		buffer += parent_len;
+	if ((error = git_commit__parse_ext(commit, obj, GIT_COMMIT_PARSE_QUICK)) < 0) {
+		git__free(commit);
+		return error;
 	}
 
-	commit->out_degree = (unsigned short)parents;
-
-	if ((committer_start = buffer = memchr(buffer, '\n', buffer_end - buffer)) == NULL)
-		return commit_error(commit, "object is corrupted");
-
-	buffer++;
-
-	if ((buffer = memchr(buffer, '\n', buffer_end - buffer)) == NULL)
-		return commit_error(commit, "object is corrupted");
-
-	/* Skip trailing spaces */
-	while (buffer > committer_start && git__isspace(*buffer))
-		buffer--;
-
-	/* Seek for the beginning of the pack of digits */
-	while (buffer > committer_start && git__isdigit(*buffer))
-		buffer--;
-
-	/* Skip potential timezone offset */
-	if ((buffer > committer_start) && (*buffer == '+' || *buffer == '-')) {
-		buffer--;
+	if (!git__is_uint16(git_array_size(commit->parent_ids))) {
+		git__free(commit);
+		git_error_set(GIT_ERROR_INVALID, "commit has more than 2^16 parents");
+		return -1;
+	}
 
-		while (buffer > committer_start && git__isspace(*buffer))
-			buffer--;
+	node->time = commit->committer->when.time;
+	node->out_degree = (uint16_t) git_array_size(commit->parent_ids);
+	node->parents = alloc_parents(walk, node, node->out_degree);
+	GIT_ERROR_CHECK_ALLOC(node->parents);
 
-		while (buffer > committer_start && git__isdigit(*buffer))
-			buffer--;
+	git_array_foreach(commit->parent_ids, i, parent_oid) {
+		node->parents[i] = git_revwalk__commit_lookup(walk, parent_oid);
 	}
 
-	if ((buffer == committer_start) ||
-	    (git__strntol64(&commit_time, (char *)(buffer + 1),
-			    buffer_end - buffer + 1, NULL, 10) < 0))
-		return commit_error(commit, "cannot parse commit time");
+	git_commit__free(commit);
+
+	node->parsed = 1;
 
-	commit->time = commit_time;
-	commit->parsed = 1;
 	return 0;
 }
 
@@ -200,10 +155,7 @@ int git_commit_list_parse(git_revwalk *walk, git_commit_list_node *commit)
 		git_error_set(GIT_ERROR_INVALID, "object is no commit object");
 		error = -1;
 	} else
-		error = commit_quick_parse(
-			walk, commit,
-			(const uint8_t *)git_odb_object_data(obj),
-			git_odb_object_size(obj));
+		error = commit_quick_parse(walk, commit, obj);
 
 	git_odb_object_free(obj);
 	return error;
diff --git a/src/commit_list.h b/src/commit_list.h
index a7551a2..6a65f8a 100644
--- a/src/commit_list.h
+++ b/src/commit_list.h
@@ -33,8 +33,8 @@ typedef struct git_commit_list_node {
 			 added:1,
 			 flags : FLAG_BITS;
 
-	unsigned short in_degree;
-	unsigned short out_degree;
+	uint16_t in_degree;
+	uint16_t out_degree;
 
 	struct git_commit_list_node **parents;
 } git_commit_list_node;
diff --git a/src/integer.h b/src/integer.h
index 10b1097..4738e9e 100644
--- a/src/integer.h
+++ b/src/integer.h
@@ -21,6 +21,13 @@ GIT_INLINE(int) git__is_ssizet(size_t p)
 	return p == (size_t)r;
 }
 
+/** @return true if p fits into the range of a uint16_t */
+GIT_INLINE(int) git__is_uint16(size_t p)
+{
+	uint16_t r = (uint16_t)p;
+	return p == (size_t)r;
+}
+
 /** @return true if p fits into the range of a uint32_t */
 GIT_INLINE(int) git__is_uint32(size_t p)
 {