Commit c51880eeaf80840e922309f3b5804d98ec647cc8

Vicent Marti 2013-02-20T17:03:18

Simplify signature parsing

diff --git a/src/signature.c b/src/signature.c
index 3380097..ab5882b 100644
--- a/src/signature.c
+++ b/src/signature.c
@@ -22,62 +22,28 @@ void git_signature_free(git_signature *sig)
 	git__free(sig);
 }
 
-static const char *skip_leading_spaces(const char *buffer, const char *buffer_end)
-{
-	while (*buffer == ' ' && buffer < buffer_end)
-		buffer++;
-
-	return buffer;
-}
-
-static const char *skip_trailing_spaces(const char *buffer_start, const char *buffer_end)
-{
-	while (*buffer_end == ' ' && buffer_end > buffer_start)
-		buffer_end--;
-
-	return buffer_end;
-}
-
 static int signature_error(const char *msg)
 {
-	giterr_set(GITERR_INVALID, "Failed to process signature - %s", msg);
+	giterr_set(GITERR_INVALID, "Failed to parse signature - %s", msg);
 	return -1;
 }
 
-static int process_trimming(const char *input, char **storage, const char *input_end, int fail_when_empty)
+static bool contains_angle_brackets(const char *input)
 {
-	const char *left, *right;
-	size_t trimmed_input_length;
-
-	assert(storage);
-
-	left = skip_leading_spaces(input, input_end);
-	right = skip_trailing_spaces(input, input_end - 1);
-
-	if (right < left) {
-		if (fail_when_empty)
-			return signature_error("input is either empty of contains only spaces");
-
-		right = left - 1;
-	}
-
-	trimmed_input_length = right - left + 1;
-
-	*storage = git__malloc(trimmed_input_length + 1);
-	GITERR_CHECK_ALLOC(*storage);
-
-	memcpy(*storage, left, trimmed_input_length);
-	(*storage)[trimmed_input_length] = 0;
-
-	return 0;
+	return strchr(input, '<') != NULL || strchr(input, '>') != NULL;
 }
 
-static bool contains_angle_brackets(const char *input)
+static char *extract_trimmed(const char *ptr, size_t len)
 {
-	if (strchr(input, '<') != NULL)
-		return true;
+	while (len && ptr[0] == ' ') {
+		ptr++; len--;
+	}
 
-	return strchr(input, '>') != NULL;
+	while (len && ptr[len - 1] == ' ') {
+		len--;
+	}
+
+	return git__substrdup(ptr, len);
 }
 
 int git_signature_new(git_signature **sig_out, const char *name, const char *email, git_time_t time, int offset)
@@ -88,28 +54,28 @@ int git_signature_new(git_signature **sig_out, const char *name, const char *ema
 
 	*sig_out = NULL;
 
+	if (contains_angle_brackets(name) ||
+		contains_angle_brackets(email)) {
+		return signature_error(
+			"Neither `name` nor `email` should contain angle brackets chars.");
+	}
+
 	p = git__calloc(1, sizeof(git_signature));
 	GITERR_CHECK_ALLOC(p);
 
-	if (process_trimming(name, &p->name, name + strlen(name), 1) < 0 ||
-		process_trimming(email, &p->email, email + strlen(email), 1) < 0)
-	{
+	p->name = extract_trimmed(name, strlen(name));
+	p->email = extract_trimmed(email, strlen(email));
+
+	if (p->name == NULL || p->email == NULL ||
+		p->name[0] == '\0' || p->email[0] == '\0') {
 		git_signature_free(p);
 		return -1;
 	}
 		
-	if (contains_angle_brackets(p->email) ||
-		contains_angle_brackets(p->name))
-	{
-		git_signature_free(p);
-		return signature_error("Neither `name` nor `email` should contain angle brackets chars.");
-	}
-
 	p->when.time = time;
 	p->when.offset = offset;
 
 	*sig_out = p;
-
 	return 0;
 }
 
@@ -153,169 +119,75 @@ int git_signature_now(git_signature **sig_out, const char *name, const char *ema
 	return 0;
 }
 
-static int timezone_error(const char *msg)
-{
-	giterr_set(GITERR_INVALID, "Failed to parse TZ offset - %s", msg);
-	return -1;
-}
-
-static int parse_timezone_offset(const char *buffer, int *offset_out)
-{
-	int dec_offset;
-	int mins, hours, offset;
-
-	const char *offset_start;
-	const char *offset_end;
-
-	offset_start = buffer;
-
-	if (*offset_start == '\n') {
-		*offset_out = 0;
-		return 0;
-	}
-
-	if (offset_start[0] != '-' && offset_start[0] != '+')
-		return timezone_error("does not start with '+' or '-'");
-
-	if (offset_start[1] < '0' || offset_start[1] > '9')
-		return timezone_error("expected initial digit");
-
-	if (git__strtol32(&dec_offset, offset_start + 1, &offset_end, 10) < 0)
-		return timezone_error("not a valid number");
-
-	if (offset_end - offset_start != 5)
-		return timezone_error("invalid length");
-
-	if (dec_offset > 1400)
-		return timezone_error("value too large");
-
-	hours = dec_offset / 100;
-	mins = dec_offset % 100;
-
-	if (hours > 14)	// see http://www.worldtimezone.com/faq.html
-		return timezone_error("hour value too large");
-
-	if (mins > 59)
-		return timezone_error("minutes value too large");
-
-	offset = (hours * 60) + mins;
-
-	if (offset_start[0] == '-')
-		offset *= -1;
-
-	*offset_out = offset;
-
-	return 0;
-}
-
-static int process_next_token(const char **buffer_out, char **storage,
-	const char *token_end, const char *right_boundary)
-{
-	int error = process_trimming(*buffer_out, storage, token_end, 0);
-	if (error < 0)
-		return error;
-
-	*buffer_out = token_end + 1;
-
-	if (*buffer_out > right_boundary)
-		return signature_error("signature is too short");
-
-	return 0;
-}
-
-static const char *scan_for_previous_token(const char *buffer, const char *left_boundary)
-{
-	const char *start;
-
-	if (buffer <= left_boundary)
-		return NULL;
-
-	start = skip_trailing_spaces(left_boundary, buffer);
-
-	/* Search for previous occurence of space */
-	while (start[-1] != ' ' && start > left_boundary)
-		start--;
-
-	return start;
-}
-
-static int parse_time(git_time_t *time_out, const char *buffer)
-{
-	int error;
-	int64_t time;
-
-	if (*buffer == '+' || *buffer == '-') {
-		giterr_set(GITERR_INVALID, "Failed while parsing time. '%s' actually looks like a timezone offset.", buffer);
-		return -1;
-	}
-
-	error = git__strtol64(&time, buffer, &buffer, 10);
-
-	if (!error)
-		*time_out = (git_time_t)time;
-
-	return error;
-}
-
 int git_signature__parse(git_signature *sig, const char **buffer_out,
 		const char *buffer_end, const char *header, char ender)
 {
 	const char *buffer = *buffer_out;
-	const char *line_end, *name_end, *email_end, *tz_start, *time_start;
-	int error = 0;
+	const char *email_start, *email_end;
 
 	memset(sig, 0, sizeof(git_signature));
 
-	if ((line_end = memchr(buffer, ender, buffer_end - buffer)) == NULL)
+	if ((buffer_end = memchr(buffer, ender, buffer_end - buffer)) == NULL)
 		return signature_error("no newline given");
 
 	if (header) {
 		const size_t header_len = strlen(header);
 
-		if (memcmp(buffer, header, header_len) != 0)
+		if (buffer_end - buffer < header_len || memcmp(buffer, header, header_len) != 0)
 			return signature_error("expected prefix doesn't match actual");
 
 		buffer += header_len;
 	}
 
-	if (buffer > line_end)
+	if (buffer >= buffer_end)
 		return signature_error("signature too short");
 
-	if ((name_end = strchr(buffer, '<')) == NULL)
-		return signature_error("character '<' not allowed in signature");
+	email_start = memrchr(buffer, '<', buffer_end - buffer);
+	email_end = memrchr(buffer, '>', buffer_end - buffer);
 
-	if ((email_end = strchr(name_end, '>')) == NULL)
-		return signature_error("character '>' not allowed in signature");
-
-	if (email_end < name_end)
+	if (!email_start || !email_end || email_end <= email_start)
 		return signature_error("malformed e-mail");
 
-	error = process_next_token(&buffer, &sig->name, name_end, line_end);
-	if (error < 0)
-		return error;
+	sig->name = extract_trimmed(buffer, email_start - buffer);
 
-	error = process_next_token(&buffer, &sig->email, email_end, line_end);
-	if (error < 0)
-		return error;
+	email_start += 1;
+	sig->email = extract_trimmed(email_start, email_end - email_start);
 
-	tz_start = scan_for_previous_token(line_end - 1, buffer);
+	/* Do we even have a time at the end of the signature? */
+	if (email_end + 2 < buffer_end) {
+		const char *time_start = email_end + 2;
+		const char *time_end;
 
-	if (tz_start == NULL)
-		goto clean_exit;	/* No timezone nor date */
+		if (git__strtol64(&sig->when.time, time_start, &time_end, 10) < 0)
+			return signature_error("invalid Unix timestamp");
 
-	time_start = scan_for_previous_token(tz_start - 1, buffer);
-	if (time_start == NULL || parse_time(&sig->when.time, time_start) < 0) {
-		/* The tz_start might point at the time */
-		parse_time(&sig->when.time, tz_start);
-		goto clean_exit;
-	}
+		/* no timezone at all */
+		if (time_end + 1 < buffer_end) {
+			int offset, hours, mins;
+			const char *tz_start, *tz_end;
+
+			tz_start = time_end + 1;
+
+			if ((tz_start[0] != '-' && tz_start[0] != '+') || 
+				git__strtol32(&offset, tz_start + 1, &tz_end, 10) < 0)
+				return signature_error("malformed timezone");
+
+			hours = offset / 100;
+			mins = offset % 100;
 
-	if (parse_timezone_offset(tz_start, &sig->when.offset) < 0) {
-		sig->when.time = 0; /* Bogus timezone, we reset the time */
+			/*
+			 * only store timezone if it's not overflowing;
+			 * see http://www.worldtimezone.com/faq.html
+			 */
+			if (hours < 14 && mins < 59) {
+				sig->when.offset = (hours * 60) + mins;
+				if (tz_start[0] == '-')
+					sig->when.offset = -sig->when.offset;
+			}
+		}
 	}
 
-clean_exit:
-	*buffer_out = line_end + 1;
+	*buffer_out = buffer_end + 1;
 	return 0;
 }
 
diff --git a/tests-clar/commit/parse.c b/tests-clar/commit/parse.c
index 9d291bb..37f27b0 100644
--- a/tests-clar/commit/parse.c
+++ b/tests-clar/commit/parse.c
@@ -108,19 +108,12 @@ passing_signature_test_case passing_signature_cases[] = {
 	// Parse an obviously invalid signature
 	{"committer foo<@bar> 123456 -0100 \n", "committer ", "foo", "@bar", 123456, -60},
 	// Parse an obviously invalid signature
-	{"committer    foo<@bar>123456 -0100 \n", "committer ", "foo", "@bar", 123456, -60},
+	{"committer    foo<@bar> 123456 -0100 \n", "committer ", "foo", "@bar", 123456, -60},
 	// Parse an obviously invalid signature
 	{"committer <>\n", "committer ", "", "", 0, 0},
-	{"committer Vicent Marti <tanoku@gmail.com> 123456 -1500 \n", "committer ", "Vicent Marti", "tanoku@gmail.com", 0, 0},
-	{"committer Vicent Marti <tanoku@gmail.com> 123456 +0163 \n", "committer ", "Vicent Marti", "tanoku@gmail.com", 0, 0},
-	{"author Vicent Marti <tanoku@gmail.com> notime \n", "author ", "Vicent Marti", "tanoku@gmail.com", 0, 0},
-	{"author Vicent Marti <tanoku@gmail.com> 123456 notimezone \n", "author ", "Vicent Marti", "tanoku@gmail.com", 0, 0},
-	{"author Vicent Marti <tanoku@gmail.com> notime +0100\n", "author ", "Vicent Marti", "tanoku@gmail.com", 0, 0},
+	{"committer Vicent Marti <tanoku@gmail.com> 123456 -1500 \n", "committer ", "Vicent Marti", "tanoku@gmail.com", 123456, 0},
+	{"committer Vicent Marti <tanoku@gmail.com> 123456 +0163 \n", "committer ", "Vicent Marti", "tanoku@gmail.com", 123456, 0},
 	{"author Vicent Marti <tanoku@gmail.com>\n", "author ", "Vicent Marti", "tanoku@gmail.com", 0, 0},
-	{"author A U Thor <author@example.com>,  C O. Miter <comiter@example.com> 1234567890 -0700\n", "author ", "A U Thor", "author@example.com", 1234567890, -420},
-	{"author A U Thor <author@example.com> and others 1234567890 -0700\n", "author ", "A U Thor", "author@example.com", 1234567890, -420},
-	{"author A U Thor <author@example.com> and others 1234567890\n", "author ", "A U Thor", "author@example.com", 1234567890, 0},
-	{"author A U Thor> <author@example.com> and others 1234567890\n", "author ", "A U Thor>", "author@example.com", 1234567890, 0},
 	/* a variety of dates */
 	{"author Vicent Marti <tanoku@gmail.com> 0 \n", "author ", "Vicent Marti", "tanoku@gmail.com", 0, 0},
 	{"author Vicent Marti <tanoku@gmail.com> 1234567890 \n", "author ", "Vicent Marti", "tanoku@gmail.com", 1234567890, 0},
@@ -145,7 +138,7 @@ failing_signature_test_case failing_signature_cases[] = {
 	{"author Vicent Marti <broken@email 12345 \n", "author "},
 	{"committer Vicent Marti ><\n", "committer "},
 	{"author ", "author "},
-   {NULL,NULL,}
+	{NULL, NULL,}
 };
 
 void test_commit_parse__signature(void)
@@ -158,11 +151,12 @@ void test_commit_parse__signature(void)
       const char *str = passcase->string;
       size_t len = strlen(passcase->string);
       struct git_signature person = {0};
+
       cl_git_pass(git_signature__parse(&person, &str, str + len, passcase->header, '\n'));
       cl_assert_equal_s(passcase->name, person.name);
       cl_assert_equal_s(passcase->email, person.email);
-      cl_assert(passcase->time == person.when.time);
-      cl_assert(passcase->offset == person.when.offset);
+      cl_assert_equal_i(passcase->time, person.when.time);
+      cl_assert_equal_i(passcase->offset, person.when.offset);
       git__free(person.name); git__free(person.email);
    }
 
diff --git a/tests-clar/revwalk/signatureparsing.c b/tests-clar/revwalk/signatureparsing.c
index 4f29dd1..5c7d881 100644
--- a/tests-clar/revwalk/signatureparsing.c
+++ b/tests-clar/revwalk/signatureparsing.c
@@ -37,8 +37,8 @@ void test_revwalk_signatureparsing__do_not_choke_when_name_contains_angle_bracke
 	cl_git_pass(git_commit_lookup(&commit, _repo, git_reference_target(ref)));
 
 	signature = git_commit_committer(commit);
-	cl_assert_equal_s("Yu V. Bin Haacked", signature->email);
-	cl_assert_equal_s("", signature->name);
+	cl_assert_equal_s("foo@example.com", signature->email);
+	cl_assert_equal_s("<Yu V. Bin Haacked>", signature->name);
 	cl_assert_equal_i(1323847743, (int)signature->when.time);
 	cl_assert_equal_i(60, signature->when.offset);