khash: update to klib f28c067
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
diff --git a/src/khash.h b/src/khash.h
index 2422044..e5789c4 100644
--- a/src/khash.h
+++ b/src/khash.h
@@ -46,6 +46,19 @@ int main() {
*/
/*
+ 2013-05-02 (0.2.8):
+
+ * Use quadratic probing. When the capacity is power of 2, stepping function
+ i*(i+1)/2 guarantees to traverse each bucket. It is better than double
+ hashing on cache performance and is more robust than linear probing.
+
+ In theory, double hashing should be more robust than quadratic probing.
+ However, my implementation is probably not for large hash tables, because
+ the second hash function is closely tied to the first hash function,
+ which reduce the effectiveness of double hashing.
+
+ Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
+
2011-12-29 (0.2.7):
* Minor code clean up; no actual effect.
@@ -110,13 +123,13 @@ int main() {
Generic hash table library.
*/
-#define AC_VERSION_KHASH_H "0.2.6"
+#define AC_VERSION_KHASH_H "0.2.8"
#include <stdlib.h>
#include <string.h>
#include <limits.h>
-/* compipler specific configuration */
+/* compiler specific configuration */
#if UINT_MAX == 0xffffffffu
typedef unsigned int khint32_t;
@@ -130,11 +143,13 @@ typedef unsigned long khint64_t;
typedef unsigned long long khint64_t;
#endif
+#ifndef kh_inline
#ifdef _MSC_VER
#define kh_inline __inline
#else
#define kh_inline inline
#endif
+#endif /* kh_inline */
typedef khint32_t khint_t;
typedef khint_t khiter_t;
@@ -147,12 +162,6 @@ typedef khint_t khiter_t;
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
-#ifdef KHASH_LINEAR
-#define __ac_inc(k, m) 1
-#else
-#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m)
-#endif
-
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
#ifndef kroundup32
@@ -175,7 +184,7 @@ typedef khint_t khiter_t;
static const double __ac_HASH_UPPER = 0.77;
#define __KHASH_TYPE(name, khkey_t, khval_t) \
- typedef struct { \
+ typedef struct kh_##name##_s { \
khint_t n_buckets, size, n_occupied, upper_bound; \
khint32_t *flags; \
khkey_t *keys; \
@@ -213,19 +222,19 @@ static const double __ac_HASH_UPPER = 0.77;
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
{ \
if (h->n_buckets) { \
- khint_t inc, k, i, last, mask; \
+ khint_t k, i, last, mask, step = 0; \
mask = h->n_buckets - 1; \
k = __hash_func(key); i = k & mask; \
- inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
+ last = i; \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
- i = (i + inc) & mask; \
+ i = (i + (++step)) & mask; \
if (i == last) return h->n_buckets; \
} \
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
} else return 0; \
} \
SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
- { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
+ { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
khint32_t *new_flags = 0; \
khint_t j = 1; \
{ \
@@ -238,11 +247,11 @@ static const double __ac_HASH_UPPER = 0.77;
memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
if (h->n_buckets < new_n_buckets) { /* expand */ \
khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
- if (!new_keys) return -1; \
+ if (!new_keys) { kfree(new_flags); return -1; } \
h->keys = new_keys; \
if (kh_is_map) { \
khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
- if (!new_vals) return -1; \
+ if (!new_vals) { kfree(new_flags); return -1; } \
h->vals = new_vals; \
} \
} /* otherwise shrink */ \
@@ -258,11 +267,10 @@ static const double __ac_HASH_UPPER = 0.77;
if (kh_is_map) val = h->vals[j]; \
__ac_set_isdel_true(h->flags, j); \
while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
- khint_t inc, k, i; \
+ khint_t k, i, step = 0; \
k = __hash_func(key); \
i = k & new_mask; \
- inc = __ac_inc(k, new_mask); \
- while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
+ while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
__ac_set_isempty_false(new_flags, i); \
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
@@ -301,14 +309,14 @@ static const double __ac_HASH_UPPER = 0.77;
} \
} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
{ \
- khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
+ khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
else { \
- inc = __ac_inc(k, mask); last = i; \
+ last = i; \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
if (__ac_isdel(h->flags, i)) site = i; \
- i = (i + inc) & mask; \
+ i = (i + (++step)) & mask; \
if (i == last) { x = site; break; } \
} \
if (x == h->n_buckets) { \
@@ -449,7 +457,8 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param k Key [type of keys]
- @param r Extra return code: 0 if the key is present in the hash table;
+ @param r Extra return code: -1 if the operation failed;
+ 0 if the key is present in the hash table;
1 if the bucket is empty (never used); 2 if the element in
the bucket has been deleted [int*]
@return Iterator to the inserted element [khint_t]
@@ -461,7 +470,7 @@ static kh_inline khint_t __ac_Wang_hash(khint_t key)
@param name Name of the hash table [symbol]
@param h Pointer to the hash table [khash_t(name)*]
@param k Key [type of keys]
- @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
+ @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
*/
#define kh_get(name, h, k) kh_get_##name(h, k)
@@ -607,4 +616,4 @@ typedef const char *kh_cstr_t;
#define KHASH_MAP_INIT_STR(name, khval_t) \
KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
-#endif /* __AC_KHASH_H */
+#endif /* __AC_KHASH_H */
\ No newline at end of file