Commit e5ba0a3c37a5a1a24f1904d56ebf8d30df9df75f

Edward Thomson 2021-08-31T20:41:45

url: introduce `git_net_url_matches_pattern` Provide a method to determine if a given URL matches a host:port pattern like the ones found in `NO_PROXY` environment variables.

diff --git a/src/net.c b/src/net.c
index d4a9f8a..f3cca5d 100644
--- a/src/net.c
+++ b/src/net.c
@@ -404,6 +404,61 @@ int git_net_url_fmt_path(git_buf *buf, git_net_url *url)
 	return git_buf_oom(buf) ? -1 : 0;
 }
 
+static bool matches_pattern(
+	git_net_url *url,
+	const char *pattern,
+	size_t pattern_len)
+{
+	const char *domain, *port = NULL, *colon;
+	size_t host_len, domain_len, port_len = 0, wildcard = 0;
+
+	GIT_UNUSED(url);
+	GIT_UNUSED(pattern);
+
+	if (!pattern_len)
+		return false;
+	else if (pattern_len == 1 && pattern[0] == '*')
+		return true;
+	else if (pattern_len > 1 && pattern[0] == '*' && pattern[1] == '.')
+		wildcard = 2;
+	else if (pattern[0] == '.')
+		wildcard = 1;
+
+	domain = pattern + wildcard;
+	domain_len = pattern_len - wildcard;
+
+	if ((colon = memchr(domain, ':', domain_len)) != NULL) {
+		domain_len = colon - domain;
+		port = colon + 1;
+		port_len = pattern_len - wildcard - domain_len - 1;
+	}
+
+	/* A pattern's port *must* match if it's specified */
+	if (port_len && git__strlcmp(url->port, port, port_len) != 0)
+		return false;
+
+	/* No wildcard?  Host must match exactly. */
+	if (!wildcard)
+		return !git__strlcmp(url->host, domain, domain_len);
+
+	/* Wildcard: ensure there's (at least) a suffix match */
+	if ((host_len = strlen(url->host)) < domain_len ||
+	    memcmp(url->host + (host_len - domain_len), domain, domain_len))
+		return false;
+
+	/* The pattern is *.domain and the host is simply domain */
+	if (host_len == domain_len)
+		return true;
+
+	/* The pattern is *.domain and the host is foo.domain */
+	return (url->host[host_len - domain_len - 1] == '.');
+}
+
+bool git_net_url_matches_pattern(git_net_url *url, const char *pattern)
+{
+	return matches_pattern(url, pattern, strlen(pattern));
+}
+
 void git_net_url_dispose(git_net_url *url)
 {
 	if (url->username)
diff --git a/src/net.h b/src/net.h
index 391b99a..4d4c7c7 100644
--- a/src/net.h
+++ b/src/net.h
@@ -54,6 +54,11 @@ extern int git_net_url_fmt(git_buf *out, git_net_url *url);
 /** Place the path and query string into the given buffer. */
 extern int git_net_url_fmt_path(git_buf *buf, git_net_url *url);
 
+/** Determines if the url matches given pattern or pattern list */
+extern bool git_net_url_matches_pattern(
+	git_net_url *url,
+	const char *pattern);
+
 /** Disposes the contents of the structure. */
 extern void git_net_url_dispose(git_net_url *url);
 
diff --git a/tests/network/url/pattern.c b/tests/network/url/pattern.c
new file mode 100644
index 0000000..fbe1f9e
--- /dev/null
+++ b/tests/network/url/pattern.c
@@ -0,0 +1,54 @@
+#include "clar_libgit2.h"
+#include "net.h"
+
+struct url_pattern {
+	const char *url;
+	const char *pattern;
+	bool matches;
+};
+
+void test_network_url_pattern__single(void)
+{
+	git_net_url url;
+	size_t i;
+
+	struct url_pattern url_patterns[] = {
+		/* Wildcard matches */
+		{ "https://example.com/", "", false },
+		{ "https://example.com/", "*", true },
+
+		/* Literal and wildcard matches */
+		{ "https://example.com/", "example.com", true },
+		{ "https://example.com/", ".example.com", true },
+		{ "https://example.com/", "*.example.com", true },
+		{ "https://www.example.com/", "www.example.com", true },
+		{ "https://www.example.com/", ".example.com", true },
+		{ "https://www.example.com/", "*.example.com", true },
+
+		/* Literal and wildcard failures */
+		{ "https://example.com/", "example.org", false },
+		{ "https://example.com/", ".example.org", false },
+		{ "https://example.com/", "*.example.org", false },
+		{ "https://foo.example.com/", "www.example.com", false },
+
+		/*
+		 * A port in the pattern is optional; if no port is
+		 * present, it matches *all* ports.
+		 */
+		{ "https://example.com/", "example.com:443", true },
+		{ "https://example.com/", "example.com:80", false },
+		{ "https://example.com:1443/", "example.com", true },
+
+		/* Failures with similar prefix/suffix */
+		{ "https://texample.com/", "example.com", false },
+		{ "https://example.com/", "mexample.com", false },
+		{ "https://example.com:44/", "example.com:443", false },
+		{ "https://example.com:443/", "example.com:44", false },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(url_patterns); i++) {
+		cl_git_pass(git_net_url_parse(&url, url_patterns[i].url));
+		cl_assert_(git_net_url_matches_pattern(&url, url_patterns[i].pattern) == url_patterns[i].matches, url_patterns[i].pattern);
+		git_net_url_dispose(&url);
+	}
+}