Skip to content

Commit

Permalink
Remove all support for MurmurHash (HASH_MUR, HASH_USING_NO_STRICT_ALI…
Browse files Browse the repository at this point in the history
…ASING).

This is related to troydhanson#186: `HASH_MUR` generates a lot of uthash's `-Wcast-align`
noise. There's a lot of crufty technical debt around `HASH_MUR`;
I don't think it's pulling its weight, and will continue to think so
unless somebody steps up to complain about this pull request.

I think that if someone wants to use MurmurHash with uthash, the
right way to do it is to define a function (perhaps an `inline`
function) that implements MurmurHash, and then define `HASH_FUNCTION`
to the name of that function. I don't think we need to continue
supporting MurmurHash out of the box, with all the processor-specific
`#ifdef`s and compiler-specific `-fno-strict-aliasing` cruft that
this old implementation entails.

I've replaced the old test62.c with a new test that shows how to use
`HASH_FUNCTION` and how to use it with `HASH_VALUE` and
`HASH_FIND_BYHASHVALUE`.
  • Loading branch information
Quuxplusone committed Nov 27, 2019
1 parent e06180a commit aa8d42d
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 202 deletions.
9 changes: 0 additions & 9 deletions doc/userguide.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1354,17 +1354,8 @@ below. E.g.,
|OAT | One-at-a-time
|FNV | Fowler/Noll/Vo
|SFH | Paul Hsieh
|MUR | MurmurHash v3 (see note)
|===============================================================================

[NOTE]
.MurmurHash
================================================================================
A special symbol must be defined if you intend to use MurmurHash. To use it, add
`-DHASH_USING_NO_STRICT_ALIASING` to your `CFLAGS`. And, if you are using
the gcc compiler with optimization, add `-fno-strict-aliasing` to your `CFLAGS`.
================================================================================

Which hash function is best?
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
You can easily determine the best hash function for your key domain. To do so,
Expand Down
81 changes: 0 additions & 81 deletions src/uthash.h
Original file line number Diff line number Diff line change
Expand Up @@ -754,87 +754,6 @@ do {
hashv += hashv >> 6; \
} while (0)

#ifdef HASH_USING_NO_STRICT_ALIASING
/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
* For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
* MurmurHash uses the faster approach only on CPU's where we know it's safe.
*
* Note the preprocessor built-in defines can be emitted using:
*
* gcc -m64 -dM -E - < /dev/null (on gcc)
* cc -## a.c (where a.c is a simple test file) (Sun Studio)
*/
#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86))
#define MUR_GETBLOCK(p,i) p[i]
#else /* non intel */
#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL)
#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL)
#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL)
#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL)
#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
#else /* assume little endian non-intel */
#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
#endif
#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
(MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
(MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
MUR_ONE_THREE(p))))
#endif
#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
#define MUR_FMIX(_h) \
do { \
_h ^= _h >> 16; \
_h *= 0x85ebca6bu; \
_h ^= _h >> 13; \
_h *= 0xc2b2ae35u; \
_h ^= _h >> 16; \
} while (0)

#define HASH_MUR(key,keylen,hashv) \
do { \
const uint8_t *_mur_data = (const uint8_t*)(key); \
const int _mur_nblocks = (int)(keylen) / 4; \
uint32_t _mur_h1 = 0xf88D5353u; \
uint32_t _mur_c1 = 0xcc9e2d51u; \
uint32_t _mur_c2 = 0x1b873593u; \
uint32_t _mur_k1 = 0; \
const uint8_t *_mur_tail; \
const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \
int _mur_i; \
for (_mur_i = -_mur_nblocks; _mur_i != 0; _mur_i++) { \
_mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
_mur_k1 *= _mur_c1; \
_mur_k1 = MUR_ROTL32(_mur_k1,15); \
_mur_k1 *= _mur_c2; \
\
_mur_h1 ^= _mur_k1; \
_mur_h1 = MUR_ROTL32(_mur_h1,13); \
_mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \
} \
_mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \
_mur_k1=0; \
switch ((keylen) & 3U) { \
case 0: break; \
case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \
case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \
case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \
_mur_k1 *= _mur_c1; \
_mur_k1 = MUR_ROTL32(_mur_k1,15); \
_mur_k1 *= _mur_c2; \
_mur_h1 ^= _mur_k1; \
} \
_mur_h1 ^= (uint32_t)(keylen); \
MUR_FMIX(_mur_h1); \
hashv = _mur_h1; \
} while (0)
#endif /* HASH_USING_NO_STRICT_ALIASING */

/* iterate over items in a known bucket to find desired item */
#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \
do { \
Expand Down
8 changes: 1 addition & 7 deletions tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ endif
TEST_TARGET=run_tests
TESTS=./do_tests

# These flags are required in order to build with -DHASH_FUNCTION=HASH_MUR.
MUR_CFLAGS = -DHASH_USING_NO_STRICT_ALIASING -fno-strict-aliasing

# detect Cygwin
ifneq ($(strip $(shell $(CC) -v 2>&1 |grep "cygwin")),)
TESTS=./do_tests.cygwin
Expand Down Expand Up @@ -80,11 +77,9 @@ thorough:
$(MAKE) clean && $(MAKE) all EXTRA_CFLAGS='-pedantic'
$(MAKE) clean && $(MAKE) all EXTRA_CFLAGS='-pedantic -DHASH_BLOOM=16'
$(MAKE) clean && $(MAKE) tests_only EXTRA_CFLAGS='-pedantic -DHASH_BLOOM=16 -DHASH_DEBUG -DNO_DECLTYPE'
$(MAKE) clean && $(MAKE) tests_only EXTRA_CFLAGS='-pedantic -DHASH_USING_NO_STRICT_ALIASING -fno-strict-aliasing -DHASH_FUNCTION=HASH_MUR'
$(MAKE) clean && CC=$(CXX) $(MAKE) all EXTRA_CFLAGS='-pedantic'
$(MAKE) clean && CC=$(CXX) $(MAKE) all EXTRA_CFLAGS='-pedantic -DHASH_BLOOM=16'
$(MAKE) clean && CC=$(CXX) $(MAKE) tests_only EXTRA_CFLAGS='-pedantic -DHASH_BLOOM=16 -DHASH_DEBUG -DNO_DECLTYPE'
$(MAKE) clean && CC=$(CXX) $(MAKE) tests_only EXTRA_CFLAGS='-pedantic -DHASH_USING_NO_STRICT_ALIASING -fno-strict-aliasing -DHASH_FUNCTION=HASH_MUR'

example: example.c $(HASHDIR)/uthash.h
$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ $(@).c
Expand All @@ -94,7 +89,7 @@ $(PROGS) $(UTILS) : $(HASHDIR)/uthash.h
@$(MKGITIGN)

hashscan : $(HASHDIR)/uthash.h
$(CC) $(CPPFLAGS) $(CFLAGS) $(MUR_CFLAGS) $(LDFLAGS) -o $@ $(@).c
$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ $(@).c
@$(MKGITIGN)

sleep_test : $(HASHDIR)/uthash.h
Expand All @@ -108,7 +103,6 @@ keystat : $(HASHDIR)/uthash.h
$(CC) $(CPPFLAGS) $(CFLAGS) -DHASH_FUNCTION=HASH_OAT $(LDFLAGS) -o keystat.OAT keystat.c
$(CC) $(CPPFLAGS) $(CFLAGS) -DHASH_FUNCTION=HASH_SAX $(LDFLAGS) -o keystat.SAX keystat.c
$(CC) $(CPPFLAGS) $(CFLAGS) -DHASH_FUNCTION=HASH_SFH $(LDFLAGS) -o keystat.SFH keystat.c
$(CC) $(CPPFLAGS) $(CFLAGS) $(MUR_CFLAGS) -DHASH_FUNCTION=HASH_MUR $(LDFLAGS) -o keystat.MUR keystat.c

run_tests: $(PROGS)
perl $(TESTS)
Expand Down
1 change: 0 additions & 1 deletion tests/all_funcs
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@ make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_JEN'; proceed
make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_OAT'; proceed
make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_SAX'; proceed
make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_SFH'; proceed
make clean tests_only EXTRA_CFLAGS='-DHASH_FUNCTION=HASH_MUR -DHASH_USING_NO_STRICT_ALIASING -fno-strict-aliasing'
9 changes: 2 additions & 7 deletions tests/hashscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,8 @@ int getkeys=0;
#define SAX 4
#define FNV 5
#define OAT 6
#define MUR 7
#define NUM_HASH_FUNCS 8 /* includes id 0, the non-function */
const char *hash_fcns[] = {"???","JEN","BER","SFH","SAX","FNV","OAT","MUR"};
#define NUM_HASH_FUNCS 7 /* includes id 0, the non-function */
const char *hash_fcns[] = {"???","JEN","BER","SFH","SAX","FNV","OAT"};

/* given a peer key/len/hashv, reverse engineer its hash function */
static int infer_hash_function(char *key, size_t keylen, uint32_t hashv)
Expand Down Expand Up @@ -109,10 +108,6 @@ static int infer_hash_function(char *key, size_t keylen, uint32_t hashv)
if (ohashv == hashv) {
return OAT;
}
HASH_MUR(key,keylen,ohashv);
if (ohashv == hashv) {
return MUR;
}
return 0;
}

Expand Down
20 changes: 0 additions & 20 deletions tests/test62.ans
Original file line number Diff line number Diff line change
@@ -1,20 +0,0 @@
al aligned (y): y
u1 aligned (n): n
u2 aligned (n): n
u3 aligned (n): n

al plus1 (n): n
u1 plus1 (y): y
u2 plus1 (n): n
u3 plus1 (n): n

al plus2 (n): n
u1 plus2 (n): n
u2 plus2 (y): y
u3 plus2 (n): n

al plus3 (n): n
u1 plus3 (n): n
u2 plus3 (n): n
u3 plus3 (y): y

144 changes: 67 additions & 77 deletions tests/test62.c
Original file line number Diff line number Diff line change
@@ -1,90 +1,80 @@
#include <stdio.h>

#include <assert.h>
#include <stdlib.h>
#include <inttypes.h>
#include "uthash.h"

#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL)
#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL)
#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL)
#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL)
#define HASH_FUNCTION(s, len, hashv) (hashv) = TrivialHash((const char *)s, len)
#include "uthash.h"

#define yn(rc) ((rc!=0U)?"y":"n")
int main(int argc,char*argv[])
unsigned int TrivialHash(const char *s, size_t len)
{
unsigned rc;
char *c = (char *)malloc(8UL);
if (c == NULL) {
exit(-1);
unsigned int h = 0;
int i;
for (i=0; i < len; ++i) {
h += (unsigned char)s[i];
}
*(c+0) = 0x00;
unsigned *al = (unsigned*)(c+0);
*(c+1) = 0x01;
unsigned *u1 = (unsigned*)(c+1);
*(c+2) = 0x02;
unsigned *u2 = (unsigned*)(c+2);
*(c+3) = 0x03;
unsigned *u3 = (unsigned*)(c+3);
*(c+4) = 0x04;
*(c+5) = 0x05;
*(c+6) = 0x06;
*(c+7) = 0x07;
return h;
}

struct test_t {
int a;
int b;
UT_hash_handle hh;
};

struct test_t *make_test(int value)
{
struct test_t *test = (struct test_t *)malloc(sizeof *test);
assert(test != NULL);
test->a = value;
return test;
}

int main()
{
struct test_t *tests = NULL;
struct test_t *test = NULL;
int x;
unsigned int h;

x = 0x0042;
HASH_VALUE(&x, sizeof x, h);
assert(h == 0x42);

x = 0x4002;
HASH_VALUE(&x, sizeof x, h);
assert(h == 0x42);

/* ---------------------------------------- */
/* test whether alignment is detected properly */
test = make_test(0x0042);
HASH_ADD_INT(tests, a, test);
test = make_test(0x4002);
HASH_ADD_INT(tests, a, test);

rc = MUR_PLUS0_ALIGNED(al);
printf("al aligned (y): %s\n", yn(rc));
rc = MUR_PLUS0_ALIGNED(u1);
printf("u1 aligned (n): %s\n", yn(rc));
rc = MUR_PLUS0_ALIGNED(u2);
printf("u2 aligned (n): %s\n", yn(rc));
rc = MUR_PLUS0_ALIGNED(u3);
printf("u3 aligned (n): %s\n", yn(rc));
printf("\n");
x = 0x4002;
test = NULL;
HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x42, test);
assert(test != NULL);
assert(test->a == 0x4002);

rc = MUR_PLUS1_ALIGNED(al);
printf("al plus1 (n): %s\n", yn(rc));
rc = MUR_PLUS1_ALIGNED(u1);
printf("u1 plus1 (y): %s\n", yn(rc));
rc = MUR_PLUS1_ALIGNED(u2);
printf("u2 plus1 (n): %s\n", yn(rc));
rc = MUR_PLUS1_ALIGNED(u3);
printf("u3 plus1 (n): %s\n", yn(rc));
printf("\n");
x = 0x0042;
test = NULL;
HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x42, test);
assert(test != NULL);
assert(test->a == 0x0042);

rc = MUR_PLUS2_ALIGNED(al);
printf("al plus2 (n): %s\n", yn(rc));
rc = MUR_PLUS2_ALIGNED(u1);
printf("u1 plus2 (n): %s\n", yn(rc));
rc = MUR_PLUS2_ALIGNED(u2);
printf("u2 plus2 (y): %s\n", yn(rc));
rc = MUR_PLUS2_ALIGNED(u3);
printf("u3 plus2 (n): %s\n", yn(rc));
printf("\n");
x = 0x4002;
test = NULL;
HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x43, test);
assert(test == NULL);

rc = MUR_PLUS3_ALIGNED(al);
printf("al plus3 (n): %s\n", yn(rc));
rc = MUR_PLUS3_ALIGNED(u1);
printf("u1 plus3 (n): %s\n", yn(rc));
rc = MUR_PLUS3_ALIGNED(u2);
printf("u2 plus3 (n): %s\n", yn(rc));
rc = MUR_PLUS3_ALIGNED(u3);
printf("u3 plus3 (y): %s\n", yn(rc));
printf("\n");
x = 0x0042;
test = NULL;
HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x43, test);
assert(test == NULL);

/* ---------------------------------------- */
/* test careful reassembly of an unaligned integer */
#if 0 /* commented out since result is endian dependent */
rc = MUR_GETBLOCK(al,0);
printf("%x\n", rc);
rc = MUR_GETBLOCK(u1,0);
printf("%x\n", rc);
rc = MUR_GETBLOCK(u2,0);
printf("%x\n", rc);
rc = MUR_GETBLOCK(u3,0);
printf("%x\n", rc);
#endif
x = 0x4003;
test = NULL;
HASH_FIND_BYHASHVALUE(hh, tests, &x, sizeof x, 0x42, test);
assert(test == NULL);

free(c);
return 0;
HASH_CLEAR(hh, tests);
}

0 comments on commit aa8d42d

Please sign in to comment.