#include "gb-include.h" unsigned long long g_hashtab[256][256] ; // . used for computing zobrist hash of a string up to 256 chars long // . first array component is the max length, 256, of the string bool hashinit () { static bool s_initialized = false; // bail if we already called this if ( s_initialized ) return true; // show RAND_MAX //printf("RAND_MAX = %lu\n", RAND_MAX ); it's 0x7fffffff // seed with same value so we get same rand sequence for all srand ( 1945687 ); for ( long i = 0 ; i < 256 ; i++ ) for ( long j = 0 ; j < 256 ; j++ ) { g_hashtab [i][j] = (unsigned long long)rand(); // the top bit never gets set, so fix if ( rand() > (0x7fffffff / 2) ) g_hashtab[i][j] |= 0x80000000; g_hashtab [i][j] <<= 32; g_hashtab [i][j] |= (unsigned long long)rand(); // the top bit never gets set, so fix if ( rand() > (0x7fffffff / 2) ) g_hashtab[i][j] |= 0x80000000; } if ( g_hashtab[0][0] != 6720717044602784129LL ) return false; s_initialized = true; return true; } // TODO: ensure this wraps over properly unsigned char hash8 ( char *s , long len ) { unsigned char h = 0; long i = 0; while ( i < len ) { h ^= (unsigned char) g_hashtab [(unsigned char)i] [(unsigned char)s[i]]; i++; } return h; } unsigned short hash16 ( char *s , long len ) { unsigned short h = 0; long i = 0; while ( i < len ) { h ^= (unsigned short) g_hashtab [(unsigned char)i] [(unsigned char)s[i]]; i++; } return h; } unsigned long hash32n ( char *s ) { unsigned long h = 0; long i = 0; while ( s[i] ) { h ^= (unsigned long) g_hashtab [(unsigned char)i] [(unsigned char)s[i]]; i++; } return h; } uint64_t hash64n ( char *s, unsigned long long startHash ) { unsigned long long h = startHash; for ( long i = 0 ; s[i] ; i++ ) h ^= g_hashtab [(unsigned char)i] [(unsigned char)s[i]]; return h; } uint64_t hash64n_nospaces ( char *s, long len ) { unsigned long long h = 0LL; long k = 0; for ( long i = 0 ; i> 32) ); sprintf(buf+10, "%08lx", (unsigned long)h ); } // only utf8 allowed now uint32_t hash32d ( char *p, char *pend ) { return (uint32_t)hash64d ( p , pend - p); } // . only utf8 allowed now // . stole this from hash.h hash64LowerE() unsigned long long hash64d ( char *p, long plen ) { char *pend = p + plen; uint64_t h = 0; uint8_t i = 0; char cs = 0; for ( ; p < pend ; p += cs ) { // get the size cs = getUtf8CharSize ( p ); // deal with one ascii char quickly if ( cs == 1 ) { // clean it up here uint8_t c = getClean_a ( *p ); h ^= g_hashtab [i++] [c]; continue; } // filter it UChar32 x = getClean_utf8 ( p ); // back to utf8 uint8_t tmp[4]; char ncs = utf8Encode ( x , (char *)tmp ); // sanity check if ( ncs > 4 ) { char *xx=NULL;*xx=0; } // hash it up h ^= g_hashtab [i++][tmp[0]]; if ( ncs == 1 ) continue; h ^= g_hashtab [i++][tmp[1]]; if ( ncs == 2 ) continue; h ^= g_hashtab [i++][tmp[2]]; if ( ncs == 3 ) continue; h ^= g_hashtab [i++][tmp[3]]; } return h; } uint8_t getClean_a ( char c ) { if ( is_alnum_a ( c ) ) return to_lower_a(c); if ( c == '\n' ) return '\0'; if ( c == '-' ) return c; if ( c == '\'' ) return c; if ( c == '\0' ) return c; return ' '; } UChar32 getClean_utf8 ( char *src ) { // do ascii fast if ( is_ascii ( *src ) ) return (UChar32)getClean_a(*src); // otherwise, lower case it UChar32 x = utf8Decode(src); // convert to upper x = ucToLower (x); // return if alnum if ( ucIsAlnum ( x ) ) return x; // everything else is converted to space return (UChar32)' '; } // // was inlined in hash.h below here //