Commit daeac29add198f9c732e381affc272687d8bc5d2

Vicent Marti 2014-02-05T16:24:09

Merge pull request #2103 from libgit2/cmn/parse-commit-faster commit: faster parsing

diff --git a/src/commit.c b/src/commit.c
index da7c499..730fa64 100644
--- a/src/commit.c
+++ b/src/commit.c
@@ -164,33 +164,15 @@ int git_commit__parse(void *_commit, git_odb_object *odb_obj)
 	const char *buffer_start = git_odb_object_data(odb_obj), *buffer;
 	const char *buffer_end = buffer_start + git_odb_object_size(odb_obj);
 	git_oid parent_id;
-	uint32_t parent_count = 0;
 	size_t header_len;
 
-	/* find end-of-header (counting parents as we go) */
-	for (buffer = buffer_start; buffer < buffer_end; ++buffer) {
-		if (!strncmp("\n\n", buffer, 2)) {
-			++buffer;
-			break;
-		}
-		if (!strncmp("\nparent ", buffer, strlen("\nparent ")))
-			++parent_count;
-	}
-
-	header_len = buffer - buffer_start;
-	commit->raw_header = git__strndup(buffer_start, header_len);
-	GITERR_CHECK_ALLOC(commit->raw_header);
+	buffer = buffer_start;
 
-	/* point "buffer" to header data */
-	buffer = commit->raw_header;
-	buffer_end = commit->raw_header + header_len;
-
-	if (parent_count < 1)
-		parent_count = 1;
-
-	git_array_init_to_size(commit->parent_ids, parent_count);
+	/* Allocate for one, which will allow not to realloc 90% of the time  */
+	git_array_init_to_size(commit->parent_ids, 1);
 	GITERR_CHECK_ARRAY(commit->parent_ids);
 
+	/* The tree is always the first field */
 	if (git_oid__parse(&commit->tree_id, &buffer, buffer_end, "tree ") < 0)
 		goto bad_buffer;
 
@@ -221,6 +203,9 @@ int git_commit__parse(void *_commit, git_odb_object *odb_obj)
 	/* Parse add'l header entries */
 	while (buffer < buffer_end) {
 		const char *eoln = buffer;
+		if (buffer[-1] == '\n' && buffer[0] == '\n')
+			break;
+
 		while (eoln < buffer_end && *eoln != '\n')
 			++eoln;
 
@@ -236,13 +221,12 @@ int git_commit__parse(void *_commit, git_odb_object *odb_obj)
 		buffer = eoln;
 	}
 
-	/* point "buffer" to data after header */
-	buffer = git_odb_object_data(odb_obj);
-	buffer_end = buffer + git_odb_object_size(odb_obj);
+	header_len = buffer - buffer_start;
+	commit->raw_header = git__strndup(buffer_start, header_len);
+	GITERR_CHECK_ALLOC(commit->raw_header);
 
-	buffer += header_len;
-	if (*buffer == '\n')
-		++buffer;
+	/* point "buffer" to data after header, +1 for the final LF */
+	buffer = buffer_start + header_len + 1;
 
 	/* extract commit message */
 	if (buffer <= buffer_end) {
diff --git a/src/posix.h b/src/posix.h
index 0d9be49..6d3a84e 100644
--- a/src/posix.h
+++ b/src/posix.h
@@ -89,18 +89,7 @@ extern struct tm * p_gmtime_r (const time_t *timer, struct tm *result);
 #	include "unix/posix.h"
 #endif
 
-#if defined(__MINGW32__) || defined(__sun) || defined(__APPLE__)
-#   define NO_STRNLEN
-#endif
-
-#ifdef NO_STRNLEN
-GIT_INLINE(size_t) p_strnlen(const char *s, size_t maxlen) {
-	const char *end = memchr(s, 0, maxlen);
-	return end ? (size_t)(end - s) : maxlen;
-}
-#else
-#   define p_strnlen strnlen
-#endif
+#include "strnlen.h"
 
 #ifdef NO_READDIR_R
 #	include <dirent.h>
diff --git a/src/strnlen.h b/src/strnlen.h
new file mode 100644
index 0000000..007da2e
--- /dev/null
+++ b/src/strnlen.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_strlen_h__
+#define INCLUDE_strlen_h__
+
+#if defined(__MINGW32__) || defined(__sun) || defined(__APPLE__)
+#   define NO_STRNLEN
+#endif
+
+#ifdef NO_STRNLEN
+GIT_INLINE(size_t) p_strnlen(const char *s, size_t maxlen) {
+	const char *end = memchr(s, 0, maxlen);
+	return end ? (size_t)(end - s) : maxlen;
+}
+#else
+#   define p_strnlen strnlen
+#endif
+
+#endif
diff --git a/src/util.h b/src/util.h
index f9de909..e378786 100644
--- a/src/util.h
+++ b/src/util.h
@@ -8,6 +8,7 @@
 #define INCLUDE_util_h__
 
 #include "common.h"
+#include "strnlen.h"
 
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 #define bitsizeof(x) (CHAR_BIT * sizeof(x))
@@ -50,8 +51,7 @@ GIT_INLINE(char *) git__strndup(const char *str, size_t n)
 	size_t length = 0;
 	char *ptr;
 
-	while (length < n && str[length])
-		++length;
+	length = p_strnlen(str, n);
 
 	ptr = (char*)git__malloc(length + 1);