Add generic hash function to util.c It's MurmurHash3 slightly edited to make it cross-platform. Fast and neat. Use this for hashing strings on hash tables instead of a full SHA1 hash. It's very fast and well distributed. Obviously not crypto-secure. Signed-off-by: Vicent Marti <tanoku@gmail.com>
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
diff --git a/src/util.c b/src/util.c
index a3b62ef..ef97f68 100644
--- a/src/util.c
+++ b/src/util.c
@@ -141,3 +141,99 @@ void git__hexdump(const char *buffer, size_t len)
printf("\n");
}
+
+#ifdef GIT_LEGACY_HASH
+uint32_t git__hash(const void *key, int len, unsigned int seed)
+{
+ const uint32_t m = 0x5bd1e995;
+ const int r = 24;
+ uint32_t h = seed ^ len;
+
+ const unsigned char *data = (const unsigned char *)key;
+
+ while(len >= 4) {
+ uint32_t k = *(uint32_t *)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
+ len -= 4;
+ }
+
+ switch(len) {
+ case 3: h ^= data[2] << 16;
+ case 2: h ^= data[1] << 8;
+ case 1: h ^= data[0];
+ h *= m;
+ };
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+}
+#else
+/*
+ Cross-platform version of Murmurhash3
+ http://code.google.com/p/smhasher/wiki/MurmurHash3
+ by Austin Appleby (aappleby@gmail.com)
+
+ This code is on the public domain.
+*/
+uint32_t git__hash(const void *key, int len, uint32_t seed)
+{
+
+#define MURMUR_BLOCK() {\
+ k1 *= c1; \
+ k1 = git__rotl(k1,11);\
+ k1 *= c2;\
+ h1 ^= k1;\
+ h1 = h1*3 + 0x52dce729;\
+ c1 = c1*5 + 0x7b7d159c;\
+ c2 = c2*5 + 0x6bce6396;\
+}
+
+ const uint8_t *data = (const uint8_t*)key;
+ const int nblocks = len / 4;
+
+ const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
+ const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
+
+ uint32_t h1 = 0x971e137b ^ seed;
+ uint32_t k1;
+
+ uint32_t c1 = 0x95543787;
+ uint32_t c2 = 0x2ad7eb25;
+
+ int i;
+
+ for (i = -nblocks; i; i++) {
+ k1 = blocks[i];
+ MURMUR_BLOCK();
+ }
+
+ k1 = 0;
+
+ switch(len & 3) {
+ case 3: k1 ^= tail[2] << 16;
+ case 2: k1 ^= tail[1] << 8;
+ case 1: k1 ^= tail[0];
+ MURMUR_BLOCK();
+ }
+
+ h1 ^= len;
+ h1 ^= h1 >> 16;
+ h1 *= 0x85ebca6b;
+ h1 ^= h1 >> 13;
+ h1 *= 0xc2b2ae35;
+ h1 ^= h1 >> 16;
+
+ return h1;
+}
+#endif
diff --git a/src/util.h b/src/util.h
index 808b20d..5e6c7d2 100644
--- a/src/util.h
+++ b/src/util.h
@@ -23,6 +23,8 @@ extern int git__dirname(char *dir, size_t n, char *path);
extern int git__basename(char *base, size_t n, char *path);
extern void git__hexdump(const char *buffer, size_t n);
+extern uint32_t git__hash(const void *key, int len, uint32_t seed);
+
/** @return true if p fits into the range of a size_t */
GIT_INLINE(int) git__is_sizet(off_t p)
@@ -31,6 +33,13 @@ GIT_INLINE(int) git__is_sizet(off_t p)
return p == (off_t)r;
}
+/* 32-bit cross-platform rotl */
+#ifdef _MSC_VER /* use built-in method in MSVC */
+# define git__rotl(v, s) (uint32_t)_rotl(v, s)
+#else /* use bitops in GCC; with o2 this gets optimized to a rotl instruction */
+# define git__rotl(v, s) (uint32_t)(((uint32_t)(v) << (s)) | ((uint32_t)(v) >> (32 - (s))))
+#endif
+
/*
* Realloc the buffer pointed at by variable 'x' so that it can hold
* at least 'nr' entries; the number of entries currently allocated