diff --git a/doc/userguide.txt b/doc/userguide.txt index 1232b8c3..4d8ce8f0 100644 --- a/doc/userguide.txt +++ b/doc/userguide.txt @@ -1354,17 +1354,8 @@ below. E.g., |OAT | One-at-a-time |FNV | Fowler/Noll/Vo |SFH | Paul Hsieh -|MUR | MurmurHash v3 (see note) |=============================================================================== -[NOTE] -.MurmurHash -================================================================================ -A special symbol must be defined if you intend to use MurmurHash. To use it, add -`-DHASH_USING_NO_STRICT_ALIASING` to your `CFLAGS`. And, if you are using -the gcc compiler with optimization, add `-fno-strict-aliasing` to your `CFLAGS`. -================================================================================ - Which hash function is best? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can easily determine the best hash function for your key domain. To do so, diff --git a/src/uthash.h b/src/uthash.h index 7532a460..22563222 100644 --- a/src/uthash.h +++ b/src/uthash.h @@ -754,87 +754,6 @@ do { hashv += hashv >> 6; \ } while (0) -#ifdef HASH_USING_NO_STRICT_ALIASING -/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. - * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. - * MurmurHash uses the faster approach only on CPU's where we know it's safe. - * - * Note the preprocessor built-in defines can be emitted using: - * - * gcc -m64 -dM -E - < /dev/null (on gcc) - * cc -## a.c (where a.c is a simple test file) (Sun Studio) - */ -#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) -#define MUR_GETBLOCK(p,i) p[i] -#else /* non intel */ -#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) -#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) -#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) -#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) -#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) -#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) -#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) -#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) -#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) -#else /* assume little endian non-intel */ -#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) -#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) -#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) -#endif -#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ - (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ - (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ - MUR_ONE_THREE(p)))) -#endif -#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) -#define MUR_FMIX(_h) \ -do { \ - _h ^= _h >> 16; \ - _h *= 0x85ebca6bu; \ - _h ^= _h >> 13; \ - _h *= 0xc2b2ae35u; \ - _h ^= _h >> 16; \ -} while (0) - -#define HASH_MUR(key,keylen,hashv) \ -do { \ - const uint8_t *_mur_data = (const uint8_t*)(key); \ - const int _mur_nblocks = (int)(keylen) / 4; \ - uint32_t _mur_h1 = 0xf88D5353u; \ - uint32_t _mur_c1 = 0xcc9e2d51u; \ - uint32_t _mur_c2 = 0x1b873593u; \ - uint32_t _mur_k1 = 0; \ - const uint8_t *_mur_tail; \ - const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ - int _mur_i; \ - for (_mur_i = -_mur_nblocks; _mur_i != 0; _mur_i++) { \ - _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ - _mur_k1 *= _mur_c1; \ - _mur_k1 = MUR_ROTL32(_mur_k1,15); \ - _mur_k1 *= _mur_c2; \ - \ - _mur_h1 ^= _mur_k1; \ - _mur_h1 = MUR_ROTL32(_mur_h1,13); \ - _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ - } \ - _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ - _mur_k1=0; \ - switch ((keylen) & 3U) { \ - case 0: break; \ - case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ - case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ - case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ - _mur_k1 *= _mur_c1; \ - _mur_k1 = MUR_ROTL32(_mur_k1,15); \ - _mur_k1 *= _mur_c2; \ - _mur_h1 ^= _mur_k1; \ - } \ - _mur_h1 ^= (uint32_t)(keylen); \ - MUR_FMIX(_mur_h1); \ - hashv = _mur_h1; \ -} while (0) -#endif /* HASH_USING_NO_STRICT_ALIASING */ - /* iterate over items in a known bucket to find desired item */ #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ do { \ diff --git a/tests/Makefile b/tests/Makefile index 2b5fef07..5ba97bf0 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -34,9 +34,6 @@ endif TEST_TARGET=run_tests TESTS=./do_tests -# These flags are required in order to build with -DHASH_FUNCTION=HASH_MUR. -MUR_CFLAGS = -DHASH_USING_NO_STRICT_ALIASING -fno-strict-aliasing - # detect Cygwin ifneq ($(strip $(shell $(CC) -v 2>&1 |grep "cygwin")),) TESTS=./do_tests.cygwin @@ -80,11 +77,9 @@ thorough: $(MAKE) clean && $(MAKE) all EXTRA_CFLAGS='-pedantic' $(MAKE) clean && $(MAKE) all EXTRA_CFLAGS='-pedantic -DHASH_BLOOM=16' $(MAKE) clean && $(MAKE) tests_only EXTRA_CFLAGS='-pedantic -DHASH_BLOOM=16 -DHASH_DEBUG -DNO_DECLTYPE' - $(MAKE) clean && $(MAKE) tests_only EXTRA_CFLAGS='-pedantic -DHASH_USING_NO_STRICT_ALIASING -fno-strict-aliasing -DHASH_FUNCTION=HASH_MUR' $(MAKE) clean && CC=$(CXX) $(MAKE) all EXTRA_CFLAGS='-pedantic' $(MAKE) clean && CC=$(CXX) $(MAKE) all EXTRA_CFLAGS='-pedantic -DHASH_BLOOM=16' $(MAKE) clean && CC=$(CXX) $(MAKE) tests_only EXTRA_CFLAGS='-pedantic -DHASH_BLOOM=16 -DHASH_DEBUG -DNO_DECLTYPE' - $(MAKE) clean && CC=$(CXX) $(MAKE) tests_only EXTRA_CFLAGS='-pedantic -DHASH_USING_NO_STRICT_ALIASING -fno-strict-aliasing -DHASH_FUNCTION=HASH_MUR' example: example.c $(HASHDIR)/uthash.h $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ $(@).c @@ -94,7 +89,7 @@ $(PROGS) $(UTILS) : $(HASHDIR)/uthash.h @$(MKGITIGN) hashscan : $(HASHDIR)/uthash.h - $(CC) $(CPPFLAGS) $(CFLAGS) $(MUR_CFLAGS) $(LDFLAGS) -o $@ $(@).c + $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ $(@).c @$(MKGITIGN) sleep_test : $(HASHDIR)/uthash.h @@ -108,7 +103,6 @@ keystat : $(HASHDIR)/uthash.h $(CC) $(CPPFLAGS) $(CFLAGS) -DHASH_FUNCTION=HASH_OAT $(LDFLAGS) -o keystat.OAT keystat.c $(CC) $(CPPFLAGS) $(CFLAGS) -DHASH_FUNCTION=HASH_SAX $(LDFLAGS) -o keystat.SAX keystat.c $(CC) $(CPPFLAGS) $(CFLAGS) -DHASH_FUNCTION=HASH_SFH $(LDFLAGS) -o keystat.SFH keystat.c - $(CC) $(CPPFLAGS) $(CFLAGS) $(MUR_CFLAGS) -DHASH_FUNCTION=HASH_MUR $(LDFLAGS) -o keystat.MUR keystat.c run_tests: $(PROGS) perl $(TESTS) diff --git a/tests/all_funcs b/tests/all_funcs index 94a21595..f61a410c 100755 --- a/tests/all_funcs +++ b/tests/all_funcs @@ -11,4 +11,3 @@ make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_JEN'; proceed make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_OAT'; proceed make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_SAX'; proceed make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_SFH'; proceed -make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_MUR -DHASH_USING_NO_STRICT_ALIASING -fno-strict-aliasing' diff --git a/tests/hashscan.c b/tests/hashscan.c index b5bd1b00..c0d0dce9 100644 --- a/tests/hashscan.c +++ b/tests/hashscan.c @@ -76,9 +76,8 @@ int getkeys=0; #define SAX 4 #define FNV 5 #define OAT 6 -#define MUR 7 -#define NUM_HASH_FUNCS 8 /* includes id 0, the non-function */ -const char *hash_fcns[] = {"???","JEN","BER","SFH","SAX","FNV","OAT","MUR"}; +#define NUM_HASH_FUNCS 7 /* includes id 0, the non-function */ +const char *hash_fcns[] = {"???","JEN","BER","SFH","SAX","FNV","OAT"}; /* given a peer key/len/hashv, reverse engineer its hash function */ static int infer_hash_function(char *key, size_t keylen, uint32_t hashv) @@ -109,10 +108,6 @@ static int infer_hash_function(char *key, size_t keylen, uint32_t hashv) if (ohashv == hashv) { return OAT; } - HASH_MUR(key,keylen,ohashv); - if (ohashv == hashv) { - return MUR; - } return 0; } diff --git a/tests/test62.ans b/tests/test62.ans index 1b71740f..e69de29b 100644 --- a/tests/test62.ans +++ b/tests/test62.ans @@ -1,20 +0,0 @@ -al aligned (y): y -u1 aligned (n): n -u2 aligned (n): n -u3 aligned (n): n - -al plus1 (n): n -u1 plus1 (y): y -u2 plus1 (n): n -u3 plus1 (n): n - -al plus2 (n): n -u1 plus2 (n): n -u2 plus2 (y): y -u3 plus2 (n): n - -al plus3 (n): n -u1 plus3 (n): n -u2 plus3 (n): n -u3 plus3 (y): y - diff --git a/tests/test62.c b/tests/test62.c index 5944691f..ad7991f5 100644 --- a/tests/test62.c +++ b/tests/test62.c @@ -1,90 +1,80 @@ -#include + +#include #include -#include -#include "uthash.h" -#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) -#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) -#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) -#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) +#define HASH_FUNCTION(s, len, hashv) (hashv) = TrivialHash((const char *)s, len) +#include "uthash.h" -#define yn(rc) ((rc!=0U)?"y":"n") -int main(int argc,char*argv[]) +unsigned int TrivialHash(const char *s, size_t len) { - unsigned rc; - char *c = (char *)malloc(8UL); - if (c == NULL) { - exit(-1); + unsigned int h = 0; + int i; + for (i=0; i < len; ++i) { + h += (unsigned char)s[i]; } - *(c+0) = 0x00; - unsigned *al = (unsigned*)(c+0); - *(c+1) = 0x01; - unsigned *u1 = (unsigned*)(c+1); - *(c+2) = 0x02; - unsigned *u2 = (unsigned*)(c+2); - *(c+3) = 0x03; - unsigned *u3 = (unsigned*)(c+3); - *(c+4) = 0x04; - *(c+5) = 0x05; - *(c+6) = 0x06; - *(c+7) = 0x07; + return h; +} + +struct test_t { + int a; + int b; + UT_hash_handle hh; +}; + +struct test_t *make_test(int value) +{ + struct test_t *test = (struct test_t *)malloc(sizeof *test); + assert(test != NULL); + test->a = value; + return test; +} + +int main() +{ + struct test_t *tests = NULL; + struct test_t *test = NULL; + int x; + unsigned int h; + + x = 0x0042; + HASH_VALUE(&x, sizeof x, h); + assert(h == 0x42); + + x = 0x4002; + HASH_VALUE(&x, sizeof x, h); + assert(h == 0x42); - /* ---------------------------------------- */ - /* test whether alignment is detected properly */ + test = make_test(0x0042); + HASH_ADD_INT(tests, a, test); + test = make_test(0x4002); + HASH_ADD_INT(tests, a, test); - rc = MUR_PLUS0_ALIGNED(al); - printf("al aligned (y): %s\n", yn(rc)); - rc = MUR_PLUS0_ALIGNED(u1); - printf("u1 aligned (n): %s\n", yn(rc)); - rc = MUR_PLUS0_ALIGNED(u2); - printf("u2 aligned (n): %s\n", yn(rc)); - rc = MUR_PLUS0_ALIGNED(u3); - printf("u3 aligned (n): %s\n", yn(rc)); - printf("\n"); + x = 0x4002; + test = NULL; + HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x42, test); + assert(test != NULL); + assert(test->a == 0x4002); - rc = MUR_PLUS1_ALIGNED(al); - printf("al plus1 (n): %s\n", yn(rc)); - rc = MUR_PLUS1_ALIGNED(u1); - printf("u1 plus1 (y): %s\n", yn(rc)); - rc = MUR_PLUS1_ALIGNED(u2); - printf("u2 plus1 (n): %s\n", yn(rc)); - rc = MUR_PLUS1_ALIGNED(u3); - printf("u3 plus1 (n): %s\n", yn(rc)); - printf("\n"); + x = 0x0042; + test = NULL; + HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x42, test); + assert(test != NULL); + assert(test->a == 0x0042); - rc = MUR_PLUS2_ALIGNED(al); - printf("al plus2 (n): %s\n", yn(rc)); - rc = MUR_PLUS2_ALIGNED(u1); - printf("u1 plus2 (n): %s\n", yn(rc)); - rc = MUR_PLUS2_ALIGNED(u2); - printf("u2 plus2 (y): %s\n", yn(rc)); - rc = MUR_PLUS2_ALIGNED(u3); - printf("u3 plus2 (n): %s\n", yn(rc)); - printf("\n"); + x = 0x4002; + test = NULL; + HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x43, test); + assert(test == NULL); - rc = MUR_PLUS3_ALIGNED(al); - printf("al plus3 (n): %s\n", yn(rc)); - rc = MUR_PLUS3_ALIGNED(u1); - printf("u1 plus3 (n): %s\n", yn(rc)); - rc = MUR_PLUS3_ALIGNED(u2); - printf("u2 plus3 (n): %s\n", yn(rc)); - rc = MUR_PLUS3_ALIGNED(u3); - printf("u3 plus3 (y): %s\n", yn(rc)); - printf("\n"); + x = 0x0042; + test = NULL; + HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x43, test); + assert(test == NULL); - /* ---------------------------------------- */ - /* test careful reassembly of an unaligned integer */ -#if 0 /* commented out since result is endian dependent */ - rc = MUR_GETBLOCK(al,0); - printf("%x\n", rc); - rc = MUR_GETBLOCK(u1,0); - printf("%x\n", rc); - rc = MUR_GETBLOCK(u2,0); - printf("%x\n", rc); - rc = MUR_GETBLOCK(u3,0); - printf("%x\n", rc); -#endif + x = 0x4003; + test = NULL; + HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x42, test); + assert(test == NULL); - free(c); - return 0; + HASH_CLEAR(hh, tests); }