atom: rewrite as a hash table While the previous 1987-style[0] scheme was fun (and I reasonably optimized it for a fair comparison), this task is more suited to a hash table. Even a simple implementation beats the old one. [0] Seems to have first appeared in X11R1, released September 1987. See server/dix/atom.c here: https://www.x.org/releases/X11R1/X.V11R1.tar.gz Signed-off-by: Ran Benita <ran@unusedvar.com>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
diff --git a/src/atom.c b/src/atom.c
index 180b25f..d43ac38 100644
--- a/src/atom.c
+++ b/src/atom.c
@@ -72,8 +72,14 @@
#include "config.h"
-#include "utils.h"
+#include <assert.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+
#include "atom.h"
+#include "darray.h"
+#include "utils.h"
/* FNV-1a (http://www.isthe.com/chongo/tech/comp/fnv/). */
static inline uint32_t
@@ -90,31 +96,14 @@ hash_buf(const char *string, size_t len)
}
/*
- * The atom table is a insert-only unbalanced binary search tree
- * mapping strings to atoms.
- *
- * The tree nodes are kept contiguously in the `table` array.
- *
- * The atom value is the index of the tree node in the array.
- *
- * As an optimization, strings are not compared by value directly,
- * s1 < s2
- * instead, they are compared by fingerprint (hash) and the value
- * is only used to resolve collisions:
- * (fingerprint(s1), s1) < (fingerprint(s2), s2)
- * Fingerprint are pre-calculated and saved in the tree nodes.
- *
- * Why is this not just a hash table? Who knows!
+ * The atom table is an insert-only linear probing hash table
+ * mapping strings to atoms. Another array maps the atoms to
+ * strings. The atom value is the position in the strings array.
*/
-struct atom_node {
- xkb_atom_t left, right;
- uint32_t fingerprint;
- char *string;
-};
-
struct atom_table {
- xkb_atom_t root;
- darray(struct atom_node) table;
+ xkb_atom_t *index;
+ size_t index_size;
+ darray(char *) strings;
};
struct atom_table *
@@ -124,9 +113,10 @@ atom_table_new(void)
if (!table)
return NULL;
- darray_init(table->table);
- /* The original throw-away root is here, at the illegal atom 0. */
- darray_resize0(table->table, 1);
+ darray_init(table->strings);
+ darray_append(table->strings, NULL);
+ table->index_size = 4;
+ table->index = calloc(table->index_size, sizeof(*table->index));
return table;
}
@@ -137,61 +127,67 @@ atom_table_free(struct atom_table *table)
if (!table)
return;
- struct atom_node *node;
- darray_foreach(node, table->table)
- free(node->string);
- darray_free(table->table);
+ char **string;
+ darray_foreach(string, table->strings)
+ free(*string);
+ darray_free(table->strings);
+ free(table->index);
free(table);
}
const char *
atom_text(struct atom_table *table, xkb_atom_t atom)
{
- assert(atom < darray_size(table->table));
- return darray_item(table->table, atom).string;
+ assert(atom < darray_size(table->strings));
+ return darray_item(table->strings, atom);
}
xkb_atom_t
atom_intern(struct atom_table *table, const char *string, size_t len, bool add)
{
- uint32_t fingerprint = hash_buf(string, len);
-
- xkb_atom_t *atomp = &table->root;
- while (*atomp != XKB_ATOM_NONE) {
- struct atom_node *node = &darray_item(table->table, *atomp);
-
- if (fingerprint > node->fingerprint) {
- atomp = &node->right;
- }
- else if (fingerprint < node->fingerprint) {
- atomp = &node->left;
- }
- else {
- /* Now start testing the strings. */
- const int cmp = strncmp(string, node->string, len);
- if (likely(cmp == 0 && node->string[len] == '\0')) {
- return *atomp;
- }
- else if (cmp > 0) {
- atomp = &node->right;
+ if (darray_size(table->strings) > 0.80 * table->index_size) {
+ table->index_size *= 2;
+ table->index = realloc(table->index, table->index_size * sizeof(*table->index));
+ memset(table->index, 0, table->index_size * sizeof(*table->index));
+ for (size_t j = 1; j < darray_size(table->strings); j++) {
+ const char *s = darray_item(table->strings, j);
+ uint32_t hash = hash_buf(s, strlen(s));
+ for (size_t i = 0; i < table->index_size; i++) {
+ size_t index_pos = (hash + i) & (table->index_size - 1);
+ if (index_pos == 0)
+ continue;
+
+ xkb_atom_t atom = table->index[index_pos];
+ if (atom == XKB_ATOM_NONE) {
+ table->index[index_pos] = j;
+ break;
+ }
}
- else {
- atomp = &node->left;
+ }
+ }
+
+ uint32_t hash = hash_buf(string, len);
+ for (size_t i = 0; i < table->index_size; i++) {
+ size_t index_pos = (hash + i) & (table->index_size - 1);
+ if (index_pos == 0)
+ continue;
+
+ xkb_atom_t existing_atom = table->index[index_pos];
+ if (existing_atom == XKB_ATOM_NONE) {
+ if (add) {
+ xkb_atom_t new_atom = darray_size(table->strings);
+ darray_append(table->strings, strndup(string, len));
+ table->index[index_pos] = new_atom;
+ return new_atom;
+ } else {
+ return XKB_ATOM_NONE;
}
}
+
+ const char *existing_value = darray_item(table->strings, existing_atom);
+ if (strncmp(existing_value, string, len) == 0 && existing_value[len] == '\0')
+ return existing_atom;
}
- if (!add)
- return XKB_ATOM_NONE;
-
- struct atom_node node;
- node.string = strndup(string, len);
- assert(node.string != NULL);
- node.left = node.right = XKB_ATOM_NONE;
- node.fingerprint = fingerprint;
- xkb_atom_t atom = darray_size(table->table);
- /* Do this before the append, as it may realloc and change the offsets. */
- *atomp = atom;
- darray_append(table->table, node);
- return atom;
+ assert(!"couldn't find an empty slot during probing");
}