mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-30 15:34:01 +03:00
395 lines
8.8 KiB
C
395 lines
8.8 KiB
C
/*
|
|
* Copyright 1993, 1995 Christopher Seiwald.
|
|
* Copyright 2011 Steven Watanabe
|
|
*
|
|
* This file is part of Jam - see jam.c for Copyright information.
|
|
*/
|
|
|
|
/*
|
|
* object.c - object manipulation routines
|
|
*
|
|
* External functions:
|
|
* object_new() - create an object from a string
|
|
* object_new_range() - create an object from a string of given length
|
|
* object_copy() - return a copy of an object
|
|
* object_free() - free an object
|
|
* object_str() - get the string value of an object
|
|
* object_done() - free string tables
|
|
*
|
|
* This implementation builds a hash table of all strings, so that multiple
|
|
* calls of object_new() on the same string allocate memory for the string once.
|
|
* Strings are never actually freed.
|
|
*/
|
|
|
|
#include "jam.h"
|
|
#include "object.h"
|
|
|
|
#include <assert.h>
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
|
|
|
|
#define OBJECT_MAGIC 0xa762e0e3u
|
|
|
|
#ifndef object_copy
|
|
|
|
struct hash_header
|
|
{
|
|
#ifndef NDEBUG
|
|
unsigned int magic;
|
|
#endif
|
|
unsigned int hash;
|
|
struct hash_item * next;
|
|
};
|
|
|
|
#endif
|
|
|
|
struct hash_item
|
|
{
|
|
struct hash_header header;
|
|
char data[ 1 ];
|
|
};
|
|
|
|
#define ALLOC_ALIGNMENT (sizeof(struct hash_item) - sizeof(struct hash_header))
|
|
|
|
typedef struct string_set
|
|
{
|
|
unsigned int num;
|
|
unsigned int size;
|
|
struct hash_item * * data;
|
|
} string_set;
|
|
|
|
static string_set strhash;
|
|
static int strtotal = 0;
|
|
static int strcount_in = 0;
|
|
static int strcount_out = 0;
|
|
|
|
|
|
/*
|
|
* Immortal string allocator implementation speeds string allocation and cuts
|
|
* down on internal fragmentation.
|
|
*/
|
|
|
|
#define STRING_BLOCK 4096
|
|
typedef struct strblock
|
|
{
|
|
struct strblock * next;
|
|
char data[ STRING_BLOCK ];
|
|
} strblock;
|
|
|
|
static strblock * strblock_chain = 0;
|
|
|
|
/* Storage remaining in the current strblock */
|
|
static char * storage_start = 0;
|
|
static char * storage_finish = 0;
|
|
|
|
|
|
/*
|
|
* allocate() - Allocate n bytes of immortal string storage.
|
|
*/
|
|
|
|
static char * allocate( size_t n )
|
|
{
|
|
#ifdef BJAM_NEWSTR_NO_ALLOCATE
|
|
return (char *)BJAM_MALLOC( n );
|
|
#else
|
|
/* See if we can grab storage from an existing block. */
|
|
size_t remaining = storage_finish - storage_start;
|
|
n = ( ( n + ALLOC_ALIGNMENT - 1 ) / ALLOC_ALIGNMENT ) * ALLOC_ALIGNMENT;
|
|
if ( remaining >= n )
|
|
{
|
|
char * result = storage_start;
|
|
storage_start += n;
|
|
return result;
|
|
}
|
|
else /* Must allocate a new block. */
|
|
{
|
|
strblock * new_block;
|
|
size_t nalloc = n;
|
|
if ( nalloc < STRING_BLOCK )
|
|
nalloc = STRING_BLOCK;
|
|
|
|
/* Allocate a new block and link into the chain. */
|
|
new_block = (strblock *)BJAM_MALLOC( offsetof( strblock, data[ 0 ] ) +
|
|
nalloc * sizeof( new_block->data[ 0 ] ) );
|
|
if ( new_block == 0 )
|
|
return 0;
|
|
new_block->next = strblock_chain;
|
|
strblock_chain = new_block;
|
|
|
|
/* Take future allocations out of the larger remaining space. */
|
|
if ( remaining < nalloc - n )
|
|
{
|
|
storage_start = new_block->data + n;
|
|
storage_finish = new_block->data + nalloc;
|
|
}
|
|
return new_block->data;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
static unsigned int hash_keyval( char const * key, int const size )
|
|
{
|
|
unsigned int const magic = 2147059363;
|
|
unsigned int hash = 0;
|
|
|
|
unsigned int i;
|
|
for ( i = 0; i < size / sizeof( unsigned int ); ++i )
|
|
{
|
|
unsigned int val;
|
|
memcpy( &val, key, sizeof( unsigned int ) );
|
|
hash = hash * magic + val;
|
|
key += sizeof( unsigned int );
|
|
}
|
|
|
|
{
|
|
unsigned int val = 0;
|
|
memcpy( &val, key, size % sizeof( unsigned int ) );
|
|
hash = hash * magic + val;
|
|
}
|
|
|
|
return hash + ( hash >> 17 );
|
|
}
|
|
|
|
|
|
static void string_set_init( string_set * set )
|
|
{
|
|
set->size = 0;
|
|
set->num = 4;
|
|
set->data = (struct hash_item * *)BJAM_MALLOC( set->num * sizeof( struct hash_item * ) );
|
|
memset( set->data, 0, set->num * sizeof( struct hash_item * ) );
|
|
}
|
|
|
|
|
|
static void string_set_done( string_set * set )
|
|
{
|
|
BJAM_FREE( set->data );
|
|
}
|
|
|
|
|
|
static void string_set_resize( string_set * set )
|
|
{
|
|
unsigned i;
|
|
string_set new_set;
|
|
new_set.num = set->num * 2;
|
|
new_set.size = set->size;
|
|
new_set.data = (struct hash_item * *)BJAM_MALLOC( sizeof( struct hash_item *
|
|
) * new_set.num );
|
|
memset( new_set.data, 0, sizeof( struct hash_item * ) * new_set.num );
|
|
for ( i = 0; i < set->num; ++i )
|
|
{
|
|
while ( set->data[ i ] )
|
|
{
|
|
struct hash_item * temp = set->data[ i ];
|
|
unsigned pos = temp->header.hash % new_set.num;
|
|
set->data[ i ] = temp->header.next;
|
|
temp->header.next = new_set.data[ pos ];
|
|
new_set.data[ pos ] = temp;
|
|
}
|
|
}
|
|
BJAM_FREE( set->data );
|
|
*set = new_set;
|
|
}
|
|
|
|
|
|
static char const * string_set_insert( string_set * set, char const * string,
|
|
int const size )
|
|
{
|
|
unsigned hash = hash_keyval( string, size );
|
|
unsigned pos = hash % set->num;
|
|
|
|
struct hash_item * result;
|
|
|
|
for ( result = set->data[ pos ]; result; result = result->header.next )
|
|
if ( !strncmp( result->data, string, size ) && !result->data[ size ] )
|
|
return result->data;
|
|
|
|
if ( set->size >= set->num )
|
|
{
|
|
string_set_resize( set );
|
|
pos = hash % set->num;
|
|
}
|
|
|
|
result = (struct hash_item *)allocate( sizeof( struct hash_header ) + size +
|
|
1 );
|
|
result->header.hash = hash;
|
|
result->header.next = set->data[ pos ];
|
|
#ifndef NDEBUG
|
|
result->header.magic = OBJECT_MAGIC;
|
|
#endif
|
|
memcpy( result->data, string, size );
|
|
result->data[ size ] = '\0';
|
|
assert( hash_keyval( result->data, size ) == result->header.hash );
|
|
set->data[ pos ] = result;
|
|
strtotal += size + 1;
|
|
++set->size;
|
|
|
|
return result->data;
|
|
}
|
|
|
|
|
|
static struct hash_item * object_get_item( OBJECT * obj )
|
|
{
|
|
return (struct hash_item *)( (char *)obj - offsetof( struct hash_item, data
|
|
) );
|
|
}
|
|
|
|
|
|
static void object_validate( OBJECT * obj )
|
|
{
|
|
assert( obj );
|
|
assert( object_get_item( obj )->header.magic == OBJECT_MAGIC );
|
|
}
|
|
|
|
|
|
/*
|
|
* object_new_range() - create an object from a string of given length
|
|
*/
|
|
|
|
OBJECT * object_new_range( char const * const string, int const size )
|
|
{
|
|
++strcount_in;
|
|
|
|
#ifdef BJAM_NO_MEM_CACHE
|
|
{
|
|
struct hash_item * const m = (struct hash_item *)BJAM_MALLOC( sizeof(
|
|
struct hash_header ) + size + 1 );
|
|
strtotal += size + 1;
|
|
memcpy( m->data, string, size );
|
|
m->data[ size ] = '\0';
|
|
m->header.magic = OBJECT_MAGIC;
|
|
return (OBJECT *)m->data;
|
|
}
|
|
#else
|
|
if ( !strhash.data )
|
|
string_set_init( &strhash );
|
|
return (OBJECT *)string_set_insert( &strhash, string, size );
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* object_new() - create an object from a string
|
|
*/
|
|
|
|
OBJECT * object_new( char const * const string )
|
|
{
|
|
return object_new_range( string, strlen( string ) );
|
|
}
|
|
|
|
|
|
#ifndef object_copy
|
|
|
|
/*
|
|
* object_copy() - return a copy of an object
|
|
*/
|
|
|
|
OBJECT * object_copy( OBJECT * obj )
|
|
{
|
|
object_validate( obj );
|
|
#ifdef BJAM_NO_MEM_CACHE
|
|
return object_new( object_str( obj ) );
|
|
#else
|
|
++strcount_in;
|
|
return obj;
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* object_free() - free an object
|
|
*/
|
|
|
|
void object_free( OBJECT * obj )
|
|
{
|
|
object_validate( obj );
|
|
#ifdef BJAM_NO_MEM_CACHE
|
|
BJAM_FREE( object_get_item( obj ) );
|
|
#endif
|
|
++strcount_out;
|
|
}
|
|
|
|
|
|
/*
|
|
* object_str() - return the OBJECT's internal C string
|
|
*/
|
|
|
|
char const * object_str( OBJECT * obj )
|
|
{
|
|
object_validate( obj );
|
|
return (char const *)obj;
|
|
}
|
|
|
|
|
|
/*
|
|
* object_equal() - compare two objects
|
|
*/
|
|
|
|
int object_equal( OBJECT * lhs, OBJECT * rhs )
|
|
{
|
|
object_validate( lhs );
|
|
object_validate( rhs );
|
|
#ifdef BJAM_NO_MEM_CACHE
|
|
return !strcmp( object_str( lhs ), object_str( rhs ) );
|
|
#else
|
|
assert( ( lhs == rhs ) == !strcmp( object_str( lhs ), object_str( rhs ) ) );
|
|
return lhs == rhs;
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* object_hash() - returns the hash value of an object
|
|
*/
|
|
|
|
unsigned int object_hash( OBJECT * obj )
|
|
{
|
|
object_validate( obj );
|
|
#ifdef BJAM_NO_MEM_CACHE
|
|
return hash_keyval( object_str( obj ), strlen( object_str( obj ) ) );
|
|
#else
|
|
return object_get_item( obj )->header.hash;
|
|
#endif
|
|
}
|
|
|
|
#endif
|
|
|
|
/*
|
|
* object_done() - free string tables.
|
|
*/
|
|
|
|
void object_done()
|
|
{
|
|
#ifdef BJAM_NEWSTR_NO_ALLOCATE
|
|
unsigned i;
|
|
for ( i = 0; i < strhash.num; ++i )
|
|
{
|
|
while ( strhash.data[ i ] )
|
|
{
|
|
struct hash_item * item = strhash.data[ i ];
|
|
strhash.data[ i ] = item->header.next;
|
|
BJAM_FREE( item );
|
|
}
|
|
}
|
|
#else
|
|
/* Reclaim string blocks. */
|
|
while ( strblock_chain )
|
|
{
|
|
strblock * const n = strblock_chain->next;
|
|
BJAM_FREE( strblock_chain );
|
|
strblock_chain = n;
|
|
}
|
|
#endif
|
|
|
|
string_set_done( &strhash );
|
|
|
|
if ( DEBUG_MEM )
|
|
{
|
|
printf( "%dK in strings\n", strtotal / 1024 );
|
|
if ( strcount_in != strcount_out )
|
|
printf( "--- %d strings of %d dangling\n", strcount_in -
|
|
strcount_out, strcount_in );
|
|
}
|
|
}
|