Commit 8cbef12d45db531933e7795820f116a07a21f48b

Patrick Steinhardt 2019-08-08T11:52:54

util: do not perform allocations in insertsort Our hand-rolled fallback sorting function `git__insertsort_r` does an in-place sort of the given array. As elements may not necessarily be pointers, it needs a way of swapping two values of arbitrary size, which is currently implemented by allocating a temporary buffer of the element's size. This is problematic, though, as the emulated `qsort` interface doesn't provide any return values and thus cannot signal an error if allocation of that temporary buffer has failed. Convert the function to swap via a temporary buffer allocated on the stack. Like this, it can `memcpy` contents of both elements in small batches without requiring a heap allocation. The buffer size has been chosen such that in most cases, a single iteration of copying will suffice. Most importantly, it can fully contain `git_oid` structures and pointers. Add a bunch of tests for the `git__qsort_r` interface to verify nothing breaks. Furthermore, this removes the declaration of `git__insertsort_r` and makes it static as it is not used anywhere else.

diff --git a/src/util.c b/src/util.c
index 316a289..fdd8e9a 100644
--- a/src/util.c
+++ b/src/util.c
@@ -719,6 +719,32 @@ static int GIT_STDLIB_CALL git__qsort_r_glue_cmp(
 }
 #endif
 
+static void swap(uint8_t *a, uint8_t *b, size_t elsize)
+{
+	char tmp[256];
+
+	while (elsize) {
+		size_t n = elsize < sizeof(tmp) ? elsize : sizeof(tmp);
+		memcpy(tmp, a + elsize - n, n);
+		memcpy(a + elsize - n, b + elsize - n, n);
+		memcpy(b + elsize - n, tmp, n);
+		elsize -= n;
+	}
+}
+
+static void insertsort(
+	void *els, size_t nel, size_t elsize,
+	git__sort_r_cmp cmp, void *payload)
+{
+	uint8_t *base = els;
+	uint8_t *end = base + nel * elsize;
+	uint8_t *i, *j;
+
+	for (i = base + elsize; i < end; i += elsize)
+		for (j = i; j > base && cmp(j, j - elsize, payload) < 0; j -= elsize)
+			swap(j, j - elsize, elsize);
+}
+
 void git__qsort_r(
 	void *els, size_t nel, size_t elsize, git__sort_r_cmp cmp, void *payload)
 {
@@ -731,33 +757,10 @@ void git__qsort_r(
 	git__qsort_r_glue glue = { cmp, payload };
 	qsort_s(els, nel, elsize, git__qsort_r_glue_cmp, &glue);
 #else
-	git__insertsort_r(els, nel, elsize, NULL, cmp, payload);
+	insertsort(els, nel, elsize, cmp, payload);
 #endif
 }
 
-void git__insertsort_r(
-	void *els, size_t nel, size_t elsize, void *swapel,
-	git__sort_r_cmp cmp, void *payload)
-{
-	uint8_t *base = els;
-	uint8_t *end = base + nel * elsize;
-	uint8_t *i, *j;
-	bool freeswap = !swapel;
-
-	if (freeswap)
-		swapel = git__malloc(elsize);
-
-	for (i = base + elsize; i < end; i += elsize)
-		for (j = i; j > base && cmp(j, j - elsize, payload) < 0; j -= elsize) {
-			memcpy(swapel, j, elsize);
-			memcpy(j, j - elsize, elsize);
-			memcpy(j - elsize, swapel, elsize);
-		}
-
-	if (freeswap)
-		git__free(swapel);
-}
-
 /*
  * git__utf8_iterate is taken from the utf8proc project,
  * http://www.public-software-group.org/utf8proc
diff --git a/src/util.h b/src/util.h
index b9ab652..792eba0 100644
--- a/src/util.h
+++ b/src/util.h
@@ -137,10 +137,6 @@ extern void git__tsort_r(
 extern void git__qsort_r(
 	void *els, size_t nel, size_t elsize, git__sort_r_cmp cmp, void *payload);
 
-extern void git__insertsort_r(
-	void *els, size_t nel, size_t elsize, void *swapel,
-	git__sort_r_cmp cmp, void *payload);
-
 /**
  * @param position If non-NULL, this will be set to the position where the
  * 		element is or would be inserted if not found.
diff --git a/tests/core/qsort.c b/tests/core/qsort.c
new file mode 100644
index 0000000..efb79e6
--- /dev/null
+++ b/tests/core/qsort.c
@@ -0,0 +1,90 @@
+#include "clar_libgit2.h"
+
+#define assert_sorted(a, cmp) \
+	_assert_sorted(a, ARRAY_SIZE(a), sizeof(*a), cmp)
+
+struct big_entries {
+	char c[311];
+};
+
+static void _assert_sorted(void *els, size_t n, size_t elsize, git__sort_r_cmp cmp)
+{
+	int8_t *p = els;
+
+	git__qsort_r(p, n, elsize, cmp, NULL);
+	while (n-- > 1) {
+		cl_assert(cmp(p, p + elsize, NULL) <= 0);
+		p += elsize;
+	}
+}
+
+static int cmp_big(const void *_a, const void *_b, void *payload)
+{
+	const struct big_entries *a = (const struct big_entries *)_a, *b = (const struct big_entries *)_b;
+	GIT_UNUSED(payload);
+	return (a->c[0] < b->c[0]) ? -1 : (a->c[0] > b->c[0]) ? +1 : 0;
+}
+
+static int cmp_int(const void *_a, const void *_b, void *payload)
+{
+	int a = *(const int *)_a, b = *(const int *)_b;
+	GIT_UNUSED(payload);
+	return (a < b) ? -1 : (a > b) ? +1 : 0;
+}
+
+static int cmp_str(const void *_a, const void *_b, void *payload)
+{
+	GIT_UNUSED(payload);
+	return strcmp((const char *) _a, (const char *) _b);
+}
+
+void test_core_qsort__array_with_single_entry(void)
+{
+	int a[] = { 10 };
+	assert_sorted(a, cmp_int);
+}
+
+void test_core_qsort__array_with_equal_entries(void)
+{
+	int a[] = { 4, 4, 4, 4 };
+	assert_sorted(a, cmp_int);
+}
+
+void test_core_qsort__sorted_array(void)
+{
+	int a[] = { 1, 10 };
+	assert_sorted(a, cmp_int);
+}
+
+void test_core_qsort__unsorted_array(void)
+{
+	int a[] = { 123, 9, 412938, 10, 234, 89 };
+	assert_sorted(a, cmp_int);
+}
+
+void test_core_qsort__sorting_strings(void)
+{
+	char *a[] = { "foo", "bar", "baz" };
+	assert_sorted(a, cmp_str);
+}
+
+void test_core_qsort__sorting_big_entries(void)
+{
+	struct big_entries a[5];
+
+	memset(&a, 0, sizeof(a));
+
+	memset(a[0].c, 'w', sizeof(a[0].c) - 1);
+	memset(a[1].c, 'c', sizeof(a[1].c) - 1);
+	memset(a[2].c, 'w', sizeof(a[2].c) - 1);
+	memset(a[3].c, 'h', sizeof(a[3].c) - 1);
+	memset(a[4].c, 'a', sizeof(a[4].c) - 1);
+
+	assert_sorted(a, cmp_big);
+
+	cl_assert_equal_i(strspn(a[0].c, "a"), sizeof(a[0].c) - 1);
+	cl_assert_equal_i(strspn(a[1].c, "c"), sizeof(a[1].c) - 1);
+	cl_assert_equal_i(strspn(a[2].c, "h"), sizeof(a[2].c) - 1);
+	cl_assert_equal_i(strspn(a[3].c, "w"), sizeof(a[3].c) - 1);
+	cl_assert_equal_i(strspn(a[4].c, "w"), sizeof(a[4].c) - 1);
+}