Commit 984e8a45c4baf9764b411c634efe60ccb173097c

Stefan Sperling 2018-11-05T21:18:58

implement object idset with a red-black tree

diff --git a/include/got_object.h b/include/got_object.h
index 6a7f144..6ba2563 100644
--- a/include/got_object.h
+++ b/include/got_object.h
@@ -80,7 +80,8 @@ const struct got_error *got_object_id_str(char **, struct got_object_id *);
 /*
  * Compare two object IDs. Return value behaves like memcmp(3).
  */
-int got_object_id_cmp(struct got_object_id *, struct got_object_id *);
+int got_object_id_cmp(const struct got_object_id *,
+    const struct got_object_id *);
 
 /*
  * Created a newly allocated copy of an object ID.
diff --git a/lib/object.c b/lib/object.c
index 6c29e98..fef5ccd 100644
--- a/lib/object.c
+++ b/lib/object.c
@@ -57,7 +57,8 @@
 #endif
 
 int
-got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
+got_object_id_cmp(const struct got_object_id *id1,
+    const struct got_object_id *id2)
 {
 	return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
 }
diff --git a/lib/object_idset.c b/lib/object_idset.c
index bcfd309..09fe0cf 100644
--- a/lib/object_idset.c
+++ b/lib/object_idset.c
@@ -14,7 +14,9 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+#include <stddef.h>
 #include <sys/queue.h>
+#include <sys/tree.h>
 
 #include <stdlib.h>
 #include <string.h>
@@ -37,19 +39,25 @@
 #endif
 
 struct got_object_idset_element {
-	TAILQ_ENTRY(got_object_idset_element) entry;
+	RBT_ENTRY(got_object_idset_element)	entry;
 	struct got_object_id id;
 	void *data;	/* API user data */
 };
 
+RBT_HEAD(got_object_idset_tree, got_object_idset_element);
+
+static int
+cmp_elements(const struct got_object_idset_element *e1,
+    const struct got_object_idset_element *e2)
+{
+	return got_object_id_cmp(&e1->id, &e2->id);
+}
+
+RBT_PROTOTYPE(got_object_idset_tree, got_object_idset_element, entry,
+    cmp_elements);
+
 struct got_object_idset {
-	/*
-	 * A set is implemented as a collection of 256 lists.
-	 * The value of the first byte of an object ID determines
-	 * which of these lists an object ID is stored in.
-	 */
-	TAILQ_HEAD(, got_object_idset_element) entries[0xff + 1];
-	int nelem[0xff + 1];
+	struct got_object_idset_tree entries;
 	int totelem;
 #define GOT_OBJECT_IDSET_MAX_ELEM INT_MAX
 };
@@ -58,14 +66,13 @@ struct got_object_idset *
 got_object_idset_alloc(void)
 {
 	struct got_object_idset *set;
-	int i;
 
-	set = calloc(1, sizeof(*set));
+	set = malloc(sizeof(*set));
 	if (set == NULL)
 		return NULL;
 
-	for (i = 0; i < nitems(set->entries); i++)
-		TAILQ_INIT(&set->entries[i]);
+	RBT_INIT(got_object_idset_tree, &set->entries);
+	set->totelem = 0;
 
 	return set;
 }
@@ -74,16 +81,13 @@ void
 got_object_idset_free(struct got_object_idset *set)
 {
 	struct got_object_idset_element *entry;
-	int i;
-
-	for (i = 0; i < nitems(set->entries); i++) {
-		while (!TAILQ_EMPTY(&set->entries[i])) {
-			entry = TAILQ_FIRST(&set->entries[i]);
-			TAILQ_REMOVE(&set->entries[i], entry, entry);
-			/* User data should be freed by caller. */
-			free(entry);
-		}
+
+	while ((entry = RBT_MIN(got_object_idset_tree, &set->entries))) {
+		RBT_REMOVE(got_object_idset_tree, &set->entries, entry);
+		/* User data should be freed by caller. */
+		free(entry);
 	}
+
 	free(set);
 }
 
@@ -92,7 +96,6 @@ got_object_idset_add(struct got_object_idset *set, struct got_object_id *id,
     void *data)
 {
 	struct got_object_idset_element *new;
-	uint8_t i = id->sha1[0];
 
 	if (set->totelem >= GOT_OBJECT_IDSET_MAX_ELEM)
 		return got_error(GOT_ERR_NO_SPACE);
@@ -104,79 +107,76 @@ got_object_idset_add(struct got_object_idset *set, struct got_object_id *id,
 	memcpy(&new->id, id, sizeof(new->id));
 	new->data = data;
 
-	TAILQ_INSERT_HEAD(&set->entries[i], new, entry);
-	set->nelem[i]++;
+	RBT_INSERT(got_object_idset_tree, &set->entries, new);
 	set->totelem++;
 	return NULL;
 }
 
-void *
-got_object_idset_get(struct got_object_idset *set, struct got_object_id *id)
+static struct got_object_idset_element *
+find_element(struct got_object_idset *set, struct got_object_id *id)
 {
 	struct got_object_idset_element *entry;
-	uint8_t i = id->sha1[0];
 
-	TAILQ_FOREACH(entry, &set->entries[i], entry) {
-		if (got_object_id_cmp(&entry->id, id) == 0)
-			return entry->data;
+	entry = RBT_ROOT(got_object_idset_tree, &set->entries);
+	while (entry) {
+		int cmp = got_object_id_cmp(id, &entry->id);
+		if (cmp < 0)
+			entry = RBT_LEFT(got_object_idset_tree, entry);
+		else if (cmp > 0)
+			entry = RBT_RIGHT(got_object_idset_tree, entry);
+		else
+			break;
 	}
 
-	return NULL;
+	return entry;
+}
+
+void *
+got_object_idset_get(struct got_object_idset *set, struct got_object_id *id)
+{
+	struct got_object_idset_element *entry = find_element(set, id);
+	return entry ? entry->data : NULL;
 }
 
 const struct got_error *
 got_object_idset_remove(void **data, struct got_object_idset *set,
     struct got_object_id *id)
 {
-	struct got_object_idset_element *entry, *tmp;
-	uint8_t i = id->sha1[0];
+	struct got_object_idset_element *entry;
 
 	if (data)
 		*data = NULL;
 
-	if (set->nelem[i] == 0)
+	if (set->totelem == 0)
 		return got_error(GOT_ERR_NO_OBJ);
 
-	TAILQ_FOREACH_SAFE(entry, &set->entries[i], entry, tmp) {
-		if (got_object_id_cmp(&entry->id, id) == 0) {
-			TAILQ_REMOVE(&set->entries[i], entry, entry);
-			if (data)
-				*data = entry->data;
-			free(entry);
-			set->nelem[i]--;
-			set->totelem--;
-			return NULL;
-		}
-	}
+	entry = find_element(set, id);
+	if (entry == NULL)
+		return got_error(GOT_ERR_NO_OBJ);
 
-	return got_error(GOT_ERR_NO_OBJ);
+	RBT_REMOVE(got_object_idset_tree, &set->entries, entry);
+	if (data)
+		*data = entry->data;
+	free(entry);
+	set->totelem--;
+	return NULL;
 }
 
 int
 got_object_idset_contains(struct got_object_idset *set,
     struct got_object_id *id)
 {
-	struct got_object_idset_element *entry;
-	uint8_t i = id->sha1[0];
-
-	TAILQ_FOREACH(entry, &set->entries[i], entry) {
-		if (got_object_id_cmp(&entry->id, id) == 0)
-			return 1;
-	}
-
-	return 0;
+	struct got_object_idset_element *entry = find_element(set, id);
+	return entry ? 1 : 0;
 }
 
 void got_object_idset_for_each(struct got_object_idset *set,
     void (*cb)(struct got_object_id *, void *, void *), void *arg)
 {
 	struct got_object_idset_element *entry;
-	int i;
 
-	for (i = 0; i < nitems(set->entries); i++) {
-		TAILQ_FOREACH(entry, &set->entries[i], entry)
-			cb(&entry->id, entry->data, arg);
-	}
+	RBT_FOREACH(entry, got_object_idset_tree, &set->entries)
+		(*cb)(&entry->id, entry->data, arg);
 }
 
 int
@@ -184,3 +184,6 @@ got_object_idset_num_elements(struct got_object_idset *set)
 {
 	return set->totelem;
 }
+
+RBT_GENERATE(got_object_idset_tree, got_object_idset_element, entry,
+    cmp_elements);