mosesdecoder/jam-files/engine/hash.c

/*
 * Copyright 1993, 1995 Christopher Seiwald.
 *
 * This file is part of Jam - see jam.c for Copyright information.
 */

# include "jam.h"
# include "hash.h"
# include "compile.h"
# include <assert.h>

/*
 * hash.c - simple in-memory hashing routines
 *
 * External routines:
 *
 *     hashinit() - initialize a hash table, returning a handle
 *     hashitem() - find a record in the table, and optionally enter a new one
 *     hashdone() - free a hash table, given its handle
 *
 * Internal routines:
 *
 *     hashrehash() - resize and rebuild hp->tab, the hash table
 *
 * 4/29/93 - ensure ITEM's are aligned
 */

/* */
#define HASH_DEBUG_PROFILE 1
/* */

char    *hashsccssid="@(#)hash.c    1.14  ()  6/20/88";

/* Header attached to all data items entered into a hash table. */

struct hashhdr
{
    struct item  * next;
    unsigned int   keyval;  /* for quick comparisons */
};

/* This structure overlays the one handed to hashenter(). Its actual size is
 * given to hashinit().
 */

struct hashdata
{
    char * key;
    /* rest of user data */
};

typedef struct item
{
    struct hashhdr  hdr;
    struct hashdata data;
} ITEM ;

# define MAX_LISTS 32

struct hash
{
    /*
     * the hash table, just an array of item pointers
     */
    struct {
        int nel;
        ITEM **base;
    } tab;

    int bloat;  /* tab.nel / items.nel */
    int inel;   /* initial number of elements */

    /*
     * the array of records, maintained by these routines
     * essentially a microallocator
     */
    struct {
        int more;   /* how many more ITEMs fit in lists[ list ] */
        ITEM *free; /* free list of items */
        char *next; /* where to put more ITEMs in lists[ list ] */
        int datalen;    /* length of records in this hash table */
        int size;   /* sizeof( ITEM ) + aligned datalen */
        int nel;    /* total ITEMs held by all lists[] */
        int list;   /* index into lists[] */

        struct {
            int nel;    /* total ITEMs held by this list */
            char *base; /* base of ITEMs array */
        } lists[ MAX_LISTS ];
    } items;

    char * name; /* just for hashstats() */
};

static void hashrehash( struct hash *hp );
static void hashstat( struct hash *hp );
static void * hash_mem_alloc(size_t datalen, size_t size);
static void hash_mem_free(size_t datalen, void * data);
#ifdef OPT_BOEHM_GC
static void hash_mem_finalizer(char * key, struct hash * hp);
#endif

static unsigned int jenkins_one_at_a_time_hash(const unsigned char *key)
{
    unsigned int hash = 0;

    while ( *key )
    {
        hash += *key++;
        hash += (hash << 10);
        hash ^= (hash >> 6);
    }
    hash += (hash << 3);
    hash ^= (hash >> 11);
    hash += (hash << 15);

    return hash;
}

/*
static unsigned int knuth_hash(const unsigned char *key)
{
    unsigned int keyval = *key;
    while ( *key )
        keyval = keyval * 2147059363 + *key++;
    return keyval;
}
*/

static unsigned int hash_keyval( const char * key_ )
{
    /*
	return knuth_hash((const unsigned char *)key_);
	*/
    return jenkins_one_at_a_time_hash((const unsigned char *)key_);
}

#define hash_bucket(hp,keyval) ((hp)->tab.base + ( (keyval) % (hp)->tab.nel ))

/*  Find the hash item for the given data. Returns pointer to the
    item and if given a pointer to the item before the found item.
    If it's the first item in a bucket, there is no previous item,
    and zero is returned for the previous item instead.
*/
static ITEM * hash_search(
    struct hash *hp,
    unsigned int keyval,
    const char * keydata,
    ITEM * * previous )
{
    ITEM * i = *hash_bucket(hp,keyval);
    ITEM * p = 0;

    for ( ; i; i = i->hdr.next )
    {
        if ( ( keyval == i->hdr.keyval ) &&
            !strcmp( i->data.key, keydata ) )
        {
            if (previous)
            {
                *previous = p;
            }
            return i;
        }
        p = i;
    }

    return 0;
}

/*
 * hash_free() - remove the given item from the table if it's there.
 * Returns 1 if found, 0 otherwise.
 *
 * NOTE: 2nd argument is HASHDATA*, not HASHDATA** as elsewhere.
 */
int
hash_free(
    register struct hash *hp,
    HASHDATA *data)
{
    ITEM * i = 0;
    ITEM * prev = 0;
    unsigned int keyval = hash_keyval(data->key);

    i = hash_search( hp, keyval, data->key, &prev );
    if (i)
    {
        /* mark it free so we skip it during enumeration */
        i->data.key = 0;
        /* unlink the record from the hash chain */
        if (prev) prev->hdr.next = i->hdr.next;
        else *hash_bucket(hp,keyval) = i->hdr.next;
        /* link it into the freelist */
        i->hdr.next = hp->items.free;
        hp->items.free = i;
        /* we have another item */
        hp->items.more++;

        return 1;
    }
    return 0;
}

/*
 * hashitem() - find a record in the table, and optionally enter a new one
 */

int
hashitem(
    register struct hash *hp,
    HASHDATA **data,
    int enter )
{
    register ITEM *i;
    char *b = (*data)->key;
    unsigned int keyval = hash_keyval(b);

    #ifdef HASH_DEBUG_PROFILE
    profile_frame prof[1];
    if ( DEBUG_PROFILE )
        profile_enter( 0, prof );
    #endif

    if ( enter && !hp->items.more )
        hashrehash( hp );

    if ( !enter && !hp->items.nel )
    {
        #ifdef HASH_DEBUG_PROFILE
        if ( DEBUG_PROFILE )
            profile_exit( prof );
        #endif
        return 0;
    }

    i = hash_search( hp, keyval, (*data)->key, 0 );
    if (i)
    {
        *data = &i->data;
        #ifdef HASH_DEBUG_PROFILE
        if ( DEBUG_PROFILE ) profile_exit( prof );
        #endif
        return !0;
    }

    if ( enter )
    {
        ITEM * * base = hash_bucket(hp,keyval);

        /* try to grab one from the free list */
        if ( hp->items.free )
        {
            i = hp->items.free;
            hp->items.free = i->hdr.next;
            assert( i->data.key == 0 );
        }
        else
        {
            i = (ITEM *)hp->items.next;
            hp->items.next += hp->items.size;
        }
        hp->items.more--;
        memcpy( (char *)&i->data, (char *)*data, hp->items.datalen );
        i->hdr.keyval = keyval;
        i->hdr.next = *base;
        *base = i;
        *data = &i->data;
        #ifdef OPT_BOEHM_GC
        if (sizeof(HASHDATA) == hp->items.datalen)
        {
            GC_REGISTER_FINALIZER(i->data.key,&hash_mem_finalizer,hp,0,0);
        }
        #endif
    }

    #ifdef HASH_DEBUG_PROFILE
    if ( DEBUG_PROFILE )
        profile_exit( prof );
    #endif
    return 0;
}

/*
 * hashrehash() - resize and rebuild hp->tab, the hash table
 */

static void hashrehash( register struct hash *hp )
{
    int i = ++hp->items.list;
    hp->items.more = i ? 2 * hp->items.nel : hp->inel;
    hp->items.next = (char *)hash_mem_alloc( hp->items.datalen, hp->items.more * hp->items.size );
    hp->items.free = 0;

    hp->items.lists[i].nel = hp->items.more;
    hp->items.lists[i].base = hp->items.next;
    hp->items.nel += hp->items.more;

    if ( hp->tab.base )
        hash_mem_free( hp->items.datalen, (char *)hp->tab.base );

    hp->tab.nel = hp->items.nel * hp->bloat;
    hp->tab.base = (ITEM **)hash_mem_alloc( hp->items.datalen, hp->tab.nel * sizeof(ITEM **) );

    memset( (char *)hp->tab.base, '\0', hp->tab.nel * sizeof( ITEM * ) );

    for ( i = 0; i < hp->items.list; ++i )
    {
        int nel = hp->items.lists[i].nel;
        char *next = hp->items.lists[i].base;

        for ( ; nel--; next += hp->items.size )
        {
            register ITEM *i = (ITEM *)next;
            ITEM **ip = hp->tab.base + i->hdr.keyval % hp->tab.nel;
            /* code currently assumes rehashing only when there are no free items */
            assert( i->data.key != 0 );

            i->hdr.next = *ip;
            *ip = i;
        }
    }
}

void hashenumerate( struct hash * hp, void (* f)( void *, void * ), void * data )
{
    int i;
    for ( i = 0; i <= hp->items.list; ++i )
    {
        char * next = hp->items.lists[i].base;
        int nel = hp->items.lists[i].nel;
        if ( i == hp->items.list )
            nel -= hp->items.more;

        for ( ; nel--; next += hp->items.size )
        {
            ITEM * i = (ITEM *)next;
            if ( i->data.key != 0 )  /* DO not enumerate freed items. */
                f( &i->data, data );
        }
    }
}

/* --- */

# define ALIGNED(x) ( ( x + sizeof( ITEM ) - 1 ) & ~( sizeof( ITEM ) - 1 ) )

/*
 * hashinit() - initialize a hash table, returning a handle
 */

struct hash *
hashinit(
    int datalen,
    char *name )
{
    struct hash *hp = (struct hash *)hash_mem_alloc( datalen, sizeof( *hp ) );

    hp->bloat = 3;
    hp->tab.nel = 0;
    hp->tab.base = (ITEM **)0;
    hp->items.more = 0;
    hp->items.free = 0;
    hp->items.datalen = datalen;
    hp->items.size = sizeof( struct hashhdr ) + ALIGNED( datalen );
    hp->items.list = -1;
    hp->items.nel = 0;
    hp->inel = 11 /* 47 */;
    hp->name = name;

    return hp;
}

/*
 * hashdone() - free a hash table, given its handle
 */

void
hashdone( struct hash *hp )
{
    int i;

    if ( !hp )
        return;

    if ( DEBUG_MEM || DEBUG_PROFILE )
        hashstat( hp );

    if ( hp->tab.base )
        hash_mem_free( hp->items.datalen, (char *)hp->tab.base );
    for ( i = 0; i <= hp->items.list; ++i )
        hash_mem_free( hp->items.datalen, hp->items.lists[i].base );
    hash_mem_free( hp->items.datalen, (char *)hp );
}

static void * hash_mem_alloc(size_t datalen, size_t size)
{
    if (sizeof(HASHDATA) == datalen)
    {
        return BJAM_MALLOC_RAW(size);
    }
    else
    {
        return BJAM_MALLOC(size);
    }
}

static void hash_mem_free(size_t datalen, void * data)
{
    if (sizeof(HASHDATA) == datalen)
    {
        BJAM_FREE_RAW(data);
    }
    else
    {
        BJAM_FREE(data);
    }
}

#ifdef OPT_BOEHM_GC
static void hash_mem_finalizer(char * key, struct hash * hp)
{
    HASHDATA d;
    d.key = key;
    hash_free(hp,&d);
}
#endif


/* ---- */

static void hashstat( struct hash * hp )
{
    ITEM * * tab = hp->tab.base;
    int nel = hp->tab.nel;
    int count = 0;
    int sets = 0;
    int run = ( tab[ nel - 1 ] != (ITEM *)0 );
    int i;
    int here;

    for ( i = nel; i > 0; --i )
    {
        if ( ( here = ( *tab++ != (ITEM *)0 ) ) )
            count++;
        if ( here && !run )
            sets++;
        run = here;
    }

    printf( "%s table: %d+%d+%d (%dK+%luK) items+table+hash, %f density\n",
        hp->name,
        count,
        hp->items.nel,
        hp->tab.nel,
        hp->items.nel * hp->items.size / 1024,
        (long unsigned)hp->tab.nel * sizeof( ITEM ** ) / 1024,
        (float)count / (float)sets );
}