Commit e0646b38c7acfcfc1802c987ceaa2c275b4ae226

Vicent Marti 2010-12-30T00:31:58

Add generic hash function to util.c It's MurmurHash3 slightly edited to make it cross-platform. Fast and neat. Use this for hashing strings on hash tables instead of a full SHA1 hash. It's very fast and well distributed. Obviously not crypto-secure. Signed-off-by: Vicent Marti <tanoku@gmail.com>

diff --git a/src/util.c b/src/util.c
index a3b62ef..ef97f68 100644
--- a/src/util.c
+++ b/src/util.c
@@ -141,3 +141,99 @@ void git__hexdump(const char *buffer, size_t len)
 
 	printf("\n");
 }
+
+#ifdef GIT_LEGACY_HASH
+uint32_t git__hash(const void *key, int len, unsigned int seed)
+{
+	const uint32_t m = 0x5bd1e995;
+	const int r = 24;
+	uint32_t h = seed ^ len;
+
+	const unsigned char *data = (const unsigned char *)key;
+
+	while(len >= 4) {
+		uint32_t k = *(uint32_t *)data;
+
+		k *= m; 
+		k ^= k >> r; 
+		k *= m; 
+		
+		h *= m; 
+		h ^= k;
+
+		data += 4;
+		len -= 4;
+	}
+	
+	switch(len) {
+	case 3: h ^= data[2] << 16;
+	case 2: h ^= data[1] << 8;
+	case 1: h ^= data[0];
+	        h *= m;
+	};
+
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+} 
+#else
+/*
+	Cross-platform version of Murmurhash3
+	http://code.google.com/p/smhasher/wiki/MurmurHash3
+	by Austin Appleby (aappleby@gmail.com)
+
+	This code is on the public domain.
+*/
+uint32_t git__hash(const void *key, int len, uint32_t seed)
+{
+
+#define MURMUR_BLOCK() {\
+    k1 *= c1; \
+    k1  = git__rotl(k1,11);\
+    k1 *= c2;\
+    h1 ^= k1;\
+    h1 = h1*3 + 0x52dce729;\
+    c1 = c1*5 + 0x7b7d159c;\
+    c2 = c2*5 + 0x6bce6396;\
+}
+
+	const uint8_t *data = (const uint8_t*)key;
+	const int nblocks = len / 4;
+
+	const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
+	const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
+
+	uint32_t h1 = 0x971e137b ^ seed;
+	uint32_t k1;
+
+	uint32_t c1 = 0x95543787;
+	uint32_t c2 = 0x2ad7eb25;
+
+	int i;
+
+	for (i = -nblocks; i; i++) {
+		k1 = blocks[i];
+		MURMUR_BLOCK();
+	}
+
+	k1 = 0;
+
+	switch(len & 3) {
+	case 3: k1 ^= tail[2] << 16;
+	case 2: k1 ^= tail[1] << 8;
+	case 1: k1 ^= tail[0];
+			MURMUR_BLOCK();
+	}
+
+	h1 ^= len;
+	h1 ^= h1 >> 16;
+	h1 *= 0x85ebca6b;
+	h1 ^= h1 >> 13;
+	h1 *= 0xc2b2ae35;
+	h1 ^= h1 >> 16;
+
+	return h1;
+} 
+#endif
diff --git a/src/util.h b/src/util.h
index 808b20d..5e6c7d2 100644
--- a/src/util.h
+++ b/src/util.h
@@ -23,6 +23,8 @@ extern int git__dirname(char *dir, size_t n, char *path);
 extern int git__basename(char *base, size_t n, char *path);
 
 extern void git__hexdump(const char *buffer, size_t n);
+extern uint32_t git__hash(const void *key, int len, uint32_t seed);
+
 
 /** @return true if p fits into the range of a size_t */
 GIT_INLINE(int) git__is_sizet(off_t p)
@@ -31,6 +33,13 @@ GIT_INLINE(int) git__is_sizet(off_t p)
 	return p == (off_t)r;
 }
 
+/* 32-bit cross-platform rotl */
+#ifdef _MSC_VER /* use built-in method in MSVC */
+#	define git__rotl(v, s) (uint32_t)_rotl(v, s)
+#else /* use bitops in GCC; with o2 this gets optimized to a rotl instruction */
+#	define git__rotl(v, s) (uint32_t)(((uint32_t)(v) << (s)) | ((uint32_t)(v) >> (32 - (s))))
+#endif
+
 /*
  * Realloc the buffer pointed at by variable 'x' so that it can hold
  * at least 'nr' entries; the number of entries currently allocated