Commit e08cc72dc07ea915bb95484818f3be5847d6e556

Stefan Sperling 2019-02-05T14:12:38

add a pathlist API

diff --git a/lib/got_lib_path.h b/lib/got_lib_path.h
index ff7d21e..a3451ce 100644
--- a/lib/got_lib_path.h
+++ b/lib/got_lib_path.h
@@ -60,3 +60,26 @@ int got_path_is_child(const char *, const char *, size_t);
  * their parents.
  */
 int got_path_cmp(const char *, const char *);
+
+/*
+ * Path lists allow for predictable concurrent iteration over multiple lists
+ * of paths obtained from disparate sources which don't all provide the same
+ * ordering guarantees (e.g. git trees, file index, and on-disk directories).
+ */
+struct got_pathlist_entry {
+	TAILQ_ENTRY(got_pathlist_entry) entry;
+	const char *path;
+};
+TAILQ_HEAD(got_pathlist_head, got_pathlist_entry);
+
+/*
+ * Insert a path into the list of paths in a predictable order.
+ * The caller should already have initialized the list head. This list stores
+ * the pointer to the path as-is, i.e. the path is not copied internally and
+ * must remain available until the list is freed with got_pathlist_free().
+ */
+const struct got_error *got_pathlist_insert(struct got_pathlist_head *,
+    const char *);
+
+/* Free resources allocated for a path list. */
+void got_pathlist_free(struct got_pathlist_head *);
diff --git a/lib/path.c b/lib/path.c
index 4878d3b..cc15eae 100644
--- a/lib/path.c
+++ b/lib/path.c
@@ -15,6 +15,8 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+#include <sys/queue.h>
+
 #include <limits.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -166,24 +168,91 @@ got_path_cmp(const char *path1, const char *path2)
 	size_t min_len = MIN(len1, len2);
 	size_t i = 0;
 
+	/* Leading directory separators are insignificant. */
+	while (path1[0] == '/')
+		path1++;
+	while (path2[0] == '/')
+		path2++;
+
+	len1 = strlen(path1);
+	len2 = strlen(path2);
+	min_len = MIN(len1, len2);
+
 	/* Skip over common prefix. */
 	while (i < min_len && path1[i] == path2[i])
 		i++;
 
-	/* Are the paths exactly equal? */
+	/* Are the paths exactly equal (besides path separators)? */
 	if (len1 == len2 && i >= min_len)
 		return 0;
 
+	/* Skip over redundant trailing path seperators. */
+	while (path1[i] == '/' && path1[i + 1] == '/')
+		path1++;
+	while (path2[i] == '/' && path2[i + 1] == '/')
+		path2++;
+
+	/* Trailing path separators are insignificant. */
+	if (path1[i] == '/' && path1[i + 1] == '\0' && path2[i] == '\0')
+		return 0;
+	if (path2[i] == '/' && path2[i + 1] == '\0' && path1[i] == '\0')
+		return 0;
+
 	/* Order children in subdirectories directly after their parents. */
 	if (path1[i] == '/' && path2[i] == '\0')
 		return 1;
 	if (path2[i] == '/' && path1[i] == '\0')
 		return -1;
-	if (path1[i] == '/')
+	if (path1[i] == '/' && path2[i] != '\0')
 		return -1;
-	if (path2[i] == '/')
+	if (path2[i] == '/' && path1[i] != '\0')
 		return 1;
 
 	/* Next character following the common prefix determines order. */
 	return (unsigned char)path1[i] < (unsigned char)path2[i] ? -1 : 1;
 }
+
+const struct got_error *
+got_pathlist_insert(struct got_pathlist_head *pathlist, const char *path)
+{
+	struct got_pathlist_entry *new, *pe;
+
+	new = malloc(sizeof(*new));
+	if (new == NULL)
+		return got_error_from_errno();
+	new->path = path;
+
+	/*
+	 * Many callers will provide paths in a somewhat sorted order while
+	 * constructing a path list from inputs such as tree objects or
+	 * dirents. Iterating backwards from the tail of the list should
+	 * be more efficient than traversing through the entire list each
+	 * time an element is inserted.
+	 */
+	pe = TAILQ_LAST(pathlist, got_pathlist_head);
+	while (pe) {
+		int cmp = got_path_cmp(pe->path, path);
+		if (cmp == 0) {
+			free(new); /* duplicate */
+			return NULL;
+		} else if (cmp < 0) {
+			TAILQ_INSERT_AFTER(pathlist, pe, new, entry);
+			return NULL;
+		}
+		pe = TAILQ_PREV(pe, got_pathlist_head, entry);
+	}
+
+	TAILQ_INSERT_HEAD(pathlist, new, entry);
+	return NULL;
+}
+
+void
+got_pathlist_free(struct got_pathlist_head *pathlist)
+{
+	struct got_pathlist_entry *pe;
+
+	while ((pe = TAILQ_FIRST(pathlist)) != NULL) {
+		TAILQ_REMOVE(pathlist, pe, entry);
+		free(pe);
+	}
+}
diff --git a/regress/path/path_test.c b/regress/path/path_test.c
index e8ede9f..24bfb41 100644
--- a/regress/path/path_test.c
+++ b/regress/path/path_test.c
@@ -14,6 +14,9 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+#include <sys/queue.h>
+
+#include <string.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <stdio.h>
@@ -56,15 +59,21 @@ path_cmp(void)
 		{ "/a", "/b", -1 },
 		{ "x/a", "x.a", -1 },
 		{ "x.a", "x/a", 1 },
-		{ "//foo", "/bar", -1 },
+		{ "//foo", "/bar", 1 },
 		{ "/foo", "/bar", 1 },
+		{ "foo", "bar", 1 },
 		{ "/foo/sub", "/bar", 1 },
 		{ "/foo", "/bar/sub", 1 },
 		{ "/foo/", "/bar", 1 },
 		{ "/foo", "/bar/", 1 },
 		{ "/foo/", "/bar/", 1 },
 		{ "/bar/", "/bar/", 0 },
+		{ "/bar/", "/bar", 0 },
+		{ "//bar//", "/bar/", 0 },
+		{ "//bar//", "/bar////", 0 },
+		{ "/bar/sub", "/bar.", -1 },
 		{ "/bar/sub", "/bar/", 1 },
+		{ "/bar/sub/", "/bar///", 1 },
 		{ "/bar/sub/sub2", "/bar/", 1 },
 		{ "/bar/sub/sub2", "/bar", 1 },
 		{ "/bar.sub.sub2", "/bar", 1 },
@@ -88,6 +97,112 @@ path_cmp(void)
 	return 1;
 }
 
+const char *path_list_input[] = {
+	"", "/", "a", "/b", "/bar", "bar/sub", "/bar/sub", "/bar/",
+	"/bar.c", "/bar/sub/sub2", "/bar.sub.sub2", "/foo",
+	"/foo/sub", "/foo/", "/foo/", "x/a",
+};
+const char *path_list_expected[] = {
+	"",
+	"a",
+	"/b",
+	"/bar",
+	"bar/sub",
+	"/bar/sub/sub2",
+	"/bar.c",
+	"/bar.sub.sub2",
+	"/foo",
+	"/foo/sub",
+	"x/a",
+};
+
+/* If inserting pathlist_input in reverse the result is slightly different. */
+const char *path_list_expected_reverse[] = {
+	"/",
+	"a",
+	"/b",
+	"/bar/",
+	"/bar/sub",
+	"/bar/sub/sub2",
+	"/bar.c",
+	"/bar.sub.sub2",
+	"/foo/",
+	"/foo/sub",
+	"x/a",
+};
+
+
+static int
+path_list(void)
+{
+	const struct got_error *err = NULL;
+	struct got_pathlist_head paths;
+	struct got_pathlist_entry *pe;
+	int i;
+
+	TAILQ_INIT(&paths);
+	for (i = 0; i < nitems(path_list_input); i++) {
+		err = got_pathlist_insert(&paths, path_list_input[i]);
+		if (err) {
+			test_printf("%s\n", __func__, err->msg);
+			return 0;
+		}
+	}
+
+	i = 0;
+	TAILQ_FOREACH(pe, &paths, entry) {
+		test_printf("'%s' -- '%s'\n", pe->path, path_list_expected[i]);
+		if (i >= nitems(path_list_expected)) {
+			test_printf("too many elements on list\n");
+			return 0;
+		}
+		if (strcmp(pe->path, path_list_expected[i]) != 0) {
+			test_printf("unordered elements on list\n");
+			return 0;
+		}
+		i++;
+	}
+
+	got_pathlist_free(&paths);
+	return 1;
+}
+
+static int
+path_list_reverse_input(void)
+{
+	const struct got_error *err = NULL;
+	struct got_pathlist_head paths;
+	struct got_pathlist_entry *pe;
+	int i;
+
+	TAILQ_INIT(&paths);
+	for (i = nitems(path_list_input) - 1; i >= 0; i--) {
+		err = got_pathlist_insert(&paths, path_list_input[i]);
+		if (err) {
+			test_printf("%s\n", __func__, err->msg);
+			return 0;
+		}
+	}
+
+	i = 0;
+	TAILQ_FOREACH(pe, &paths, entry) {
+		test_printf("'%s' -- '%s'\n", pe->path,
+		    path_list_expected_reverse[i]);
+		if (i >= nitems(path_list_expected_reverse)) {
+			test_printf("too many elements on list\n");
+			return 0;
+		}
+		if (strcmp(pe->path, path_list_expected_reverse[i]) != 0) {
+			test_printf("unordered elements on list\n");
+			return 0;
+		}
+		i++;
+	}
+
+	got_pathlist_free(&paths);
+	return 1;
+}
+
 #define RUN_TEST(expr, name) \
 	{ test_ok = (expr);  \
 	printf("test_%s %s\n", (name), test_ok ? "ok" : "failed"); \
@@ -124,6 +239,8 @@ main(int argc, char *argv[])
 	argv += optind;
 
 	RUN_TEST(path_cmp(), "path_cmp");
+	RUN_TEST(path_list(), "path_list");
+	RUN_TEST(path_list_reverse_input(), "path_list_reverse_input");
 
 	return failure ? 1 : 0;
 }