Commit 5585e358b2a240ca8ed65a00008dbc865a1381c1

Edward Thomson 2018-03-20T00:59:21

Merge pull request #4563 from libgit2/ethomson/ssh-unescape Refactor `gitno_extract_url_parts`

diff --git a/src/buffer.c b/src/buffer.c
index 6dfcbfb..8a58d1a 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -212,7 +212,7 @@ int git_buf_put(git_buf *buf, const char *data, size_t len)
 		size_t new_size;
 
 		assert(data);
-		
+
 		GITERR_CHECK_ALLOC_ADD(&new_size, buf->size, len);
 		GITERR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
 		ENSURE_SIZE(buf, new_size);
@@ -455,6 +455,36 @@ on_error:
 	return -1;
 }
 
+#define HEX_DECODE(c) ((c | 32) % 39 - 9)
+
+int git_buf_decode_percent(
+	git_buf *buf,
+	const char *str,
+	size_t str_len)
+{
+	size_t str_pos, new_size;
+
+	GITERR_CHECK_ALLOC_ADD(&new_size, buf->size, str_len);
+	GITERR_CHECK_ALLOC_ADD(&new_size, new_size, 1);
+	ENSURE_SIZE(buf, new_size);
+
+	for (str_pos = 0; str_pos < str_len; buf->size++, str_pos++) {
+		if (str[str_pos] == '%' &&
+			str_len > str_pos + 2 &&
+			isxdigit(str[str_pos + 1]) &&
+			isxdigit(str[str_pos + 2])) {
+			buf->ptr[buf->size] = (HEX_DECODE(str[str_pos + 1]) << 4) +
+				HEX_DECODE(str[str_pos + 2]);
+			str_pos += 2;
+		} else {
+			buf->ptr[buf->size] = str[str_pos];
+		}
+	}
+
+	buf->ptr[buf->size] = '\0';
+	return 0;
+}
+
 int git_buf_vprintf(git_buf *buf, const char *format, va_list ap)
 {
 	size_t expected_size, new_size;
diff --git a/src/buffer.h b/src/buffer.h
index b0aece4..cc77fc0 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -190,6 +190,9 @@ int git_buf_encode_base85(git_buf *buf, const char *data, size_t len);
 /* Decode the given "base85" and write the result to the buffer */
 int git_buf_decode_base85(git_buf *buf, const char *base64, size_t len, size_t output_len);
 
+/* Decode the given percent-encoded string and write the result to the buffer */
+int git_buf_decode_percent(git_buf *buf, const char *str, size_t len);
+
 /*
  * Insert, remove or replace a portion of the buffer.
  *
diff --git a/src/netops.c b/src/netops.c
index 68f404d..fa20cba 100644
--- a/src/netops.c
+++ b/src/netops.c
@@ -206,83 +206,96 @@ void gitno_connection_data_free_ptrs(gitno_connection_data *d)
 	git__free(d->pass); d->pass = NULL;
 }
 
-#define hex2c(c) ((c | 32) % 39 - 9)
-static char* unescape(char *str)
-{
-	int x, y;
-	int len = (int)strlen(str);
-
-	for (x=y=0; str[y]; ++x, ++y) {
-		if ((str[x] = str[y]) == '%') {
-			if (y < len-2 && isxdigit(str[y+1]) && isxdigit(str[y+2])) {
-				str[x] = (hex2c(str[y+1]) << 4) + hex2c(str[y+2]);
-				y += 2;
-			}
-		}
-	}
-	str[x] = '\0';
-	return str;
-}
-
 int gitno_extract_url_parts(
-		char **host,
-		char **port,
-		char **path,
-		char **username,
-		char **password,
-		const char *url,
-		const char *default_port)
+	char **host_out,
+	char **port_out,
+	char **path_out,
+	char **username_out,
+	char **password_out,
+	const char *url,
+	const char *default_port)
 {
 	struct http_parser_url u = {0};
-	const char *_host, *_port, *_path, *_userinfo;
+	bool has_host, has_port, has_path, has_userinfo;
+	git_buf host = GIT_BUF_INIT,
+		port = GIT_BUF_INIT,
+		path = GIT_BUF_INIT,
+		username = GIT_BUF_INIT,
+		password = GIT_BUF_INIT;
+	int error = 0;
 
 	if (http_parser_parse_url(url, strlen(url), false, &u)) {
 		giterr_set(GITERR_NET, "malformed URL '%s'", url);
-		return GIT_EINVALIDSPEC;
+		error = GIT_EINVALIDSPEC;
+		goto done;
 	}
 
-	_host = url+u.field_data[UF_HOST].off;
-	_port = url+u.field_data[UF_PORT].off;
-	_path = url+u.field_data[UF_PATH].off;
-	_userinfo = url+u.field_data[UF_USERINFO].off;
+	has_host = !!(u.field_set & (1 << UF_HOST));
+	has_port = !!(u.field_set & (1 << UF_PORT));
+	has_path = !!(u.field_set & (1 << UF_PATH));
+	has_userinfo = !!(u.field_set & (1 << UF_USERINFO));
 
-	if (u.field_set & (1 << UF_HOST)) {
-		*host = git__substrdup(_host, u.field_data[UF_HOST].len);
-		GITERR_CHECK_ALLOC(*host);
+	if (has_host) {
+		const char *url_host = url + u.field_data[UF_HOST].off;
+		size_t url_host_len = u.field_data[UF_HOST].len;
+		git_buf_decode_percent(&host, url_host, url_host_len);
 	}
 
-	if (u.field_set & (1 << UF_PORT))
-		*port = git__substrdup(_port, u.field_data[UF_PORT].len);
-	else
-		*port = git__strdup(default_port);
-	GITERR_CHECK_ALLOC(*port);
+	if (has_port) {
+		const char *url_port = url + u.field_data[UF_PORT].off;
+		size_t url_port_len = u.field_data[UF_PORT].len;
+		git_buf_put(&port, url_port, url_port_len);
+	} else {
+		git_buf_puts(&port, default_port);
+	}
 
-	if (path) {
-		if (u.field_set & (1 << UF_PATH)) {
-			*path = git__substrdup(_path, u.field_data[UF_PATH].len);
-			GITERR_CHECK_ALLOC(*path);
-		} else {
-			git__free(*port);
-			*port = NULL;
-			git__free(*host);
-			*host = NULL;
-			giterr_set(GITERR_NET, "invalid url, missing path");
-			return GIT_EINVALIDSPEC;
-		}
+	if (has_path && path_out) {
+		const char *url_path = url + u.field_data[UF_PATH].off;
+		size_t url_path_len = u.field_data[UF_PATH].len;
+		git_buf_decode_percent(&path, url_path, url_path_len);
+	} else if (path_out) {
+		giterr_set(GITERR_NET, "invalid url, missing path");
+		error = GIT_EINVALIDSPEC;
+		goto done;
 	}
 
-	if (u.field_set & (1 << UF_USERINFO)) {
-		const char *colon = memchr(_userinfo, ':', u.field_data[UF_USERINFO].len);
+	if (has_userinfo) {
+		const char *url_userinfo = url + u.field_data[UF_USERINFO].off;
+		size_t url_userinfo_len = u.field_data[UF_USERINFO].len;
+		const char *colon = memchr(url_userinfo, ':', url_userinfo_len);
+
 		if (colon) {
-			*username = unescape(git__substrdup(_userinfo, colon - _userinfo));
-			*password = unescape(git__substrdup(colon+1, u.field_data[UF_USERINFO].len - (colon+1-_userinfo)));
-			GITERR_CHECK_ALLOC(*password);
+			const char *url_username = url_userinfo;
+			size_t url_username_len = colon - url_userinfo;
+			const char *url_password = colon + 1;
+			size_t url_password_len = url_userinfo_len - (url_username_len + 1);
+
+			git_buf_decode_percent(&username, url_username, url_username_len);
+			git_buf_decode_percent(&password, url_password, url_password_len);
 		} else {
-			*username = git__substrdup(_userinfo, u.field_data[UF_USERINFO].len);
+			git_buf_decode_percent(&username, url_userinfo, url_userinfo_len);
 		}
-		GITERR_CHECK_ALLOC(*username);
-
 	}
 
-	return 0;
+	if (git_buf_oom(&host) ||
+		git_buf_oom(&port) ||
+		git_buf_oom(&path) ||
+		git_buf_oom(&username) ||
+		git_buf_oom(&password))
+		return -1;
+
+	*host_out = git_buf_detach(&host);
+	*port_out = git_buf_detach(&port);
+	if (path_out)
+		*path_out = git_buf_detach(&path);
+	*username_out = git_buf_detach(&username);
+	*password_out = git_buf_detach(&password);
+
+done:
+	git_buf_free(&host);
+	git_buf_free(&port);
+	git_buf_free(&path);
+	git_buf_free(&username);
+	git_buf_free(&password);
+	return error;
 }
diff --git a/src/transports/ssh.c b/src/transports/ssh.c
index 2ba91b7..dcd9b5e 100644
--- a/src/transports/ssh.c
+++ b/src/transports/ssh.c
@@ -64,7 +64,7 @@ static void ssh_error(LIBSSH2_SESSION *session, const char *errmsg)
  */
 static int gen_proto(git_buf *request, const char *cmd, const char *url)
 {
-	char *repo;
+	const char *repo;
 	int len;
 	size_t i;
 
@@ -92,8 +92,10 @@ done:
 	len = strlen(cmd) + 1 /* Space */ + 1 /* Quote */ + strlen(repo) + 1 /* Quote */ + 1;
 
 	git_buf_grow(request, len);
-	git_buf_printf(request, "%s '%s'", cmd, repo);
-	git_buf_putc(request, '\0');
+	git_buf_puts(request, cmd);
+	git_buf_puts(request, " '");
+	git_buf_decode_percent(request, repo, strlen(repo));
+	git_buf_puts(request, "'");
 
 	if (git_buf_oom(request))
 		return -1;
diff --git a/tests/buf/percent.c b/tests/buf/percent.c
new file mode 100644
index 0000000..60534a0
--- /dev/null
+++ b/tests/buf/percent.c
@@ -0,0 +1,49 @@
+#include "clar_libgit2.h"
+#include "buffer.h"
+
+static void expect_decode_pass(const char *expected, const char *encoded)
+{
+	git_buf in = GIT_BUF_INIT, out = GIT_BUF_INIT;
+
+	/*
+	 * ensure that we only read the given length of the input buffer
+	 * by putting garbage at the end.  this will ensure that we do
+	 * not, eg, rely on nul-termination or walk off the end of the buf.
+	 */
+	cl_git_pass(git_buf_puts(&in, encoded));
+	cl_git_pass(git_buf_PUTS(&in, "TRAILER"));
+
+	cl_git_pass(git_buf_decode_percent(&out, in.ptr, strlen(encoded)));
+
+	cl_assert_equal_s(expected, git_buf_cstr(&out));
+	cl_assert_equal_i(strlen(expected), git_buf_len(&out));
+
+	git_buf_free(&in);
+	git_buf_free(&out);
+}
+
+void test_buf_percent__decode_succeeds(void)
+{
+	expect_decode_pass("", "");
+	expect_decode_pass(" ", "%20");
+	expect_decode_pass("a", "a");
+	expect_decode_pass(" a", "%20a");
+	expect_decode_pass("a ", "a%20");
+	expect_decode_pass("github.com", "github.com");
+	expect_decode_pass("github.com", "githu%62.com");
+	expect_decode_pass("github.com", "github%2ecom");
+	expect_decode_pass("foo bar baz", "foo%20bar%20baz");
+	expect_decode_pass("foo bar baz", "foo%20bar%20baz");
+	expect_decode_pass("foo bar ", "foo%20bar%20");
+}
+
+void test_buf_percent__ignores_invalid(void)
+{
+	expect_decode_pass("githu%%.com", "githu%%.com");
+	expect_decode_pass("github.co%2", "github.co%2");
+	expect_decode_pass("github%2.com", "github%2.com");
+	expect_decode_pass("githu%2z.com", "githu%2z.com");
+	expect_decode_pass("github.co%9z", "github.co%9z");
+	expect_decode_pass("github.co%2", "github.co%2");
+	expect_decode_pass("github.co%", "github.co%");
+}
diff --git a/tests/transport/register.c b/tests/transport/register.c
index 97aae6b..88ba247 100644
--- a/tests/transport/register.c
+++ b/tests/transport/register.c
@@ -45,6 +45,8 @@ void test_transport_register__custom_transport_ssh(void)
 		"ssh+git://somehost:somepath",
 		"git+ssh://somehost:somepath",
 		"git@somehost:somepath",
+		"ssh://somehost:somepath%20with%20%spaces",
+		"ssh://somehost:somepath with spaces"
 	};
 	git_transport *transport;
 	unsigned i;