Correction of jamfiles etc.

This commit is contained in:
Marcin Junczys-Dowmunt 2012-08-04 15:39:30 +02:00
parent 88a860d776
commit a245e9ea55
16 changed files with 166 additions and 126 deletions

View File

@ -75,8 +75,7 @@ if [ option.get "with-tcmalloc" : : "yes" ] {
requirements += [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
requirements += [ option.get "enable-boost-pool" : : <define>USE_BOOST_POOL ] ;
local with-cmph = [ option.get "with-cmph" ] ;
if $(with-cmph) {
if [ option.get "with-cmph" ] {
requirements += <define>HAVE_CMPH ;
}

View File

@ -8,8 +8,8 @@ exe queryLexicalTable : queryLexicalTable.cpp ../moses/src//moses ;
local with-cmph = [ option.get "with-cmph" ] ;
if $(with-cmph) {
exe processPhraseTableMin : processPhraseTableMin.cpp ../moses/src//moses : <include>$(with-cmph)/include ;
exe processLexicalTableMin : processLexicalTableMin.cpp ../moses/src//moses : <include>$(with-cmph)/include ;
exe processPhraseTableMin : processPhraseTableMin.cpp ../moses/src//moses ;
exe processLexicalTableMin : processLexicalTableMin.cpp ../moses/src//moses ;
alias programsMin : processPhraseTableMin processLexicalTableMin ;
}

View File

@ -25,9 +25,9 @@ void printHelp(char **argv)
"\t-quantize int -- maximum number of scores per score component\n"
"\n"
" For more information see: http://www.statmt.org/moses/...\n"
" For more information see: http://www.statmt.org/moses/?n=Moses.AdvancedFeatures\n"
" and\n\n"
" @article { junczys_mtm_2012,\n"
" @article { junczys_pbml98_2012,\n"
" author = { Marcin Junczys-Dowmunt },\n"
" title = { Phrasal Rank-Encoding: Exploiting Phrase Redundancy and\n"
" Translational Relations for Phrase Table Compression },\n"

View File

@ -24,9 +24,9 @@ void printHelp(char **argv) {
"\t-quantize int -- maximum number of scores per score component\n"
"\n"
" For more information see: http://www.statmt.org/moses/...\n"
" For more information see: http://www.statmt.org/moses/?n=Moses.AdvancedFeatures\n"
" and\n\n"
" @article { junczys_mtm_2012,\n"
" @article { junczys_pbml98_2012,\n"
" author = { Marcin Junczys-Dowmunt },\n"
" title = { Phrasal Rank-Encoding: Exploiting Phrase Redundancy and\n"
" Translational Relations for Phrase Table Compression },\n"

View File

@ -20,6 +20,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "BlockHashIndex.h"
#include "CmphStringVectorAdapter.h"
#ifdef HAVE_CMPH
#include <cmph.h>
#endif
namespace Moses
{
@ -27,39 +32,39 @@ namespace Moses
BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
size_t threadsNum)
: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
m_fileHandle(0), m_fileHandleStart(0), m_algo(CMPH_CHD), m_size(0),
m_fileHandle(0), m_fileHandleStart(0), m_size(0),
m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0),
m_threadPool(threadsNum) {}
BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
CMPH_ALGO algo, size_t threadsNum)
: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
m_fileHandle(0), m_fileHandleStart(0), m_algo(algo), m_size(0),
m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0),
m_threadPool(threadsNum) {}
m_threadPool(threadsNum) {
#ifndef HAVE_CMPH
std::cerr << "minphr: CMPH support not compiled in." << std::endl;
exit(1);
#endif
}
#else
BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits)
: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
m_fileHandle(0), m_fileHandleStart(0), m_algo(CMPH_CHD), m_size(0),
m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0) {}
BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits, CMPH_ALGO algo)
: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
m_fileHandle(0), m_fileHandleStart(0), m_algo(algo), m_size(0),
m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0) {}
m_fileHandle(0), m_fileHandleStart(0), m_size(0),
m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0) {
#ifndef HAVE_CMPH
std::cerr << "minphr: CMPH support not compiled in." << std::endl;
exit(1);
#endif
}
#endif
BlockHashIndex::~BlockHashIndex()
{
for(std::vector<cmph_t*>::iterator it = m_hashes.begin();
#ifdef HAVE_CMPH
for(std::vector<void*>::iterator it = m_hashes.begin();
it != m_hashes.end(); it++)
if(*it != 0)
cmph_destroy(*it);
cmph_destroy((cmph_t*)*it);
for(std::vector<PairedPackedArray<>*>::iterator it = m_arrays.begin();
it != m_arrays.end(); it++)
if(*it != 0)
delete *it;
#endif
}
size_t BlockHashIndex::GetHash(const char* key)
@ -91,9 +96,13 @@ size_t BlockHashIndex::GetHash(size_t i, const char* key)
{
if(m_hashes[i] == 0)
LoadRange(i);
size_t idx = cmph_search(m_hashes[i], key, (cmph_uint32) strlen(key));
#ifdef HAVE_CMPH
size_t idx = cmph_search((cmph_t*)m_hashes[i], key, (cmph_uint32) strlen(key));
#else
size_t idx = 0;
#endif
std::pair<size_t, size_t> orderPrint = m_arrays[i]->Get(idx, m_orderBits, m_fingerPrintBits);
m_clocks[i] = clock();
@ -140,11 +149,13 @@ void BlockHashIndex::BeginSave(std::FILE * mphf)
void BlockHashIndex::SaveRange(size_t i)
{
#ifdef HAVE_CMPH
if(m_seekIndex.size() <= i)
m_seekIndex.resize(i+1);
m_seekIndex[i] = std::ftell(m_fileHandle) - m_fileHandleStart;
cmph_dump(m_hashes[i], m_fileHandle);
m_arrays[i]->Save(m_fileHandle);
cmph_dump((cmph_t*)m_hashes[i], m_fileHandle);
m_arrays[i]->Save(m_fileHandle);
#endif
}
void BlockHashIndex::SaveLastRange()
@ -164,9 +175,10 @@ void BlockHashIndex::SaveLastRange()
void BlockHashIndex::DropRange(size_t i)
{
#ifdef HAVE_CMPH
if(m_hashes[i] != 0)
{
cmph_destroy(m_hashes[i]);
cmph_destroy((cmph_t*)m_hashes[i]);
m_hashes[i] = 0;
}
if(m_arrays[i] != 0)
@ -176,6 +188,7 @@ void BlockHashIndex::DropRange(size_t i)
m_clocks[i] = 0;
}
m_numLoadedRanges--;
#endif
}
void BlockHashIndex::DropLastRange()
@ -265,6 +278,7 @@ size_t BlockHashIndex::LoadIndex(std::FILE* mphf)
void BlockHashIndex::LoadRange(size_t i)
{
#ifdef HAVE_CMPH
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
@ -274,10 +288,11 @@ void BlockHashIndex::LoadRange(size_t i)
m_fingerPrintBits);
m_arrays[i]->Load(m_fileHandle);
m_hashes[i] = hash;
m_hashes[i] = (void*)hash;
m_clocks[i] = clock();
m_numLoadedRanges++;
#endif
}
size_t BlockHashIndex::Load(std::string filename)
@ -326,4 +341,71 @@ void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance)
}
}
void BlockHashIndex::CalcHash(size_t current, void* source_void)
{
#ifdef HAVE_CMPH
cmph_io_adapter_t* source = (cmph_io_adapter_t*) source_void;
cmph_config_t *config = cmph_config_new(source);
cmph_config_set_algo(config, CMPH_CHD);
cmph_t* hash = cmph_new(config);
PairedPackedArray<> *pv =
new PairedPackedArray<>(source->nkeys, m_orderBits, m_fingerPrintBits);
size_t i = 0;
source->rewind(source->data);
while(i < source->nkeys)
{
unsigned keylen;
char* key;
source->read(source->data, &key, &keylen);
std::string temp(key, keylen);
size_t fprint = GetFprint(temp.c_str());
size_t idx = cmph_search(hash, temp.c_str(),
(cmph_uint32) temp.size());
pv->Set(idx, i, fprint, m_orderBits, m_fingerPrintBits);
i++;
}
cmph_config_destroy(config);
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#endif
if(m_hashes.size() <= current)
{
m_hashes.resize(current + 1, 0);
m_arrays.resize(current + 1, 0);
m_clocks.resize(current + 1, 0);
}
m_hashes[current] = (void*)hash;
m_arrays[current] = pv;
m_clocks[current] = clock();
m_queue.push(-current);
#endif
}
#ifdef HAVE_CMPH
void* BlockHashIndex::vectorAdapter(std::vector<std::string>& v)
{
return (void*)CmphVectorAdapter(v);
}
void* BlockHashIndex::vectorAdapter(StringVector<unsigned, size_t, std::allocator>& sv)
{
return (void*)CmphStringVectorAdapter(sv);
}
void* BlockHashIndex::vectorAdapter(StringVector<unsigned, size_t, MmapAllocator>& sv)
{
return (void*)CmphStringVectorAdapter(sv);
}
#endif
}

View File

@ -29,10 +29,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cstring>
#include <cstdio>
#include "cmph.h"
#include "MurmurHash3.h"
#include "StringVector.h"
#include "CmphStringVectorAdapter.h"
#include "PackedArray.h"
#ifdef WITH_THREADS
@ -52,12 +50,10 @@ class BlockHashIndex
std::FILE* m_fileHandle;
size_t m_fileHandleStart;
CMPH_ALGO m_algo;
StringVector<unsigned char, unsigned long> m_landmarks;
std::vector<cmph_t*> m_hashes;
std::vector<void*> m_hashes;
std::vector<clock_t> m_clocks;
std::vector<PairedPackedArray<>*> m_arrays;
@ -103,11 +99,8 @@ class BlockHashIndex
#ifdef WITH_THREADS
BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
size_t threadsNum = 2);
BlockHashIndex(size_t orderBits, size_t fingerPrintBits, CMPH_ALGO algo,
size_t threadsNum = 2);
#else
BlockHashIndex(size_t orderBits, size_t fingerPrintBits);
BlockHashIndex(size_t orderBits, size_t fingerPrintBits, CMPH_ALGO algo);
#endif
~BlockHashIndex();
@ -161,57 +154,19 @@ class BlockHashIndex
template <typename Keys>
void CalcHash(size_t current, Keys &keys)
{
cmph_io_adapter_t *source = VectorAdapter(keys);
cmph_config_t *config = cmph_config_new(source);
cmph_config_set_algo(config, m_algo);
cmph_t* hash = cmph_new(config);
cmph_config_destroy(config);
PairedPackedArray<> *pv =
new PairedPackedArray<>(keys.size(), m_orderBits, m_fingerPrintBits);
size_t i = 0;
for(typename Keys::iterator it = keys.begin(); it != keys.end(); it++)
{
std::string temp = *it;
size_t fprint = GetFprint(temp.c_str());
size_t idx = cmph_search(hash, temp.c_str(),
(cmph_uint32) temp.size());
pv->Set(idx, i, fprint, m_orderBits, m_fingerPrintBits);
i++;
}
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_mutex);
#ifdef HAVE_CMPH
void* source = vectorAdapter(keys);
CalcHash(current, source);
#endif
if(m_hashes.size() <= current)
{
m_hashes.resize(current + 1, 0);
m_arrays.resize(current + 1, 0);
m_clocks.resize(current + 1, 0);
}
m_hashes[current] = hash;
m_arrays[current] = pv;
m_clocks[current] = clock();
m_queue.push(-current);
}
cmph_io_adapter_t* VectorAdapter(std::vector<std::string>& v)
{
return CmphVectorAdapter(v);
}
template <typename ValueT, typename PosT, template <typename> class Allocator>
cmph_io_adapter_t* VectorAdapter(StringVector<ValueT, PosT, Allocator>& sv)
{
return CmphStringVectorAdapter(sv);
}
void CalcHash(size_t current, void* source);
#ifdef HAVE_CMPH
void* vectorAdapter(std::vector<std::string>& v);
void* vectorAdapter(StringVector<unsigned, size_t, std::allocator>& sv);
void* vectorAdapter(StringVector<unsigned, size_t, MmapAllocator>& sv);
#endif
};
}

View File

@ -19,6 +19,8 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifdef HAVE_CMPH
#include "CmphStringVectorAdapter.h"
namespace Moses
@ -88,3 +90,6 @@ namespace Moses
}
}
#endif

View File

@ -25,11 +25,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cassert>
#include <cstring>
#include "cmph.h"
#ifdef HAVE_CMPH
#include <cmph.h>
#include "StringVector.h"
namespace Moses
{
typedef struct
{
void *vector;
@ -98,5 +101,6 @@ namespace Moses
}
#endif
#endif

View File

@ -1,3 +1,23 @@
local current = ;
local with-cmph = [ option.get "with-cmph" ] ;
lib cmph : : <search>$(with-cmph)/lib ;
lib CompactPT : [ glob *.cpp ] ..//moses_internal cmph : <include>$(with-cmph)/include ;
if $(with-cmph) {
lib cmph : : <search>$(with-cmph)/lib ;
obj BlockHashIndex.o : BlockHashIndex.cpp ..//moses_internal cmph : <include>$(with-cmph)/include ;
obj CmphStringVectorAdapter.o : CmphStringVectorAdapter.cpp ..//moses_internal cmph : <include>$(with-cmph)/include ;
current += "--with-cmph=$(with-cmph)" ;
}
else {
obj BlockHashIndex.o : BlockHashIndex.cpp ..//moses_internal ;
obj CmphStringVectorAdapter.o : CmphStringVectorAdapter.cpp ..//moses_internal ;
alias cmph ;
}
alias sources : LexicalReorderingTableCompact.cpp LexicalReorderingTableCreator.cpp
MurmurHash3.cpp PhraseDecoder.cpp PhraseDictionaryCompact.cpp PhraseTableCreator.cpp ;
lib CompactPT : BlockHashIndex.o CmphStringVectorAdapter.o sources ..//moses_internal cmph ;
path-constant PT-LOG : bin/pt.log ;
current = $(current:J=" ") ;
current ?= "" ;
always-if-changed $(PT-LOG) $(current) : BlockHashIndex.o CmphStringVectorAdapter.o ;

View File

@ -120,7 +120,8 @@ namespace Moses
if(!m_fixed)
{
ftruncate(m_file_desc, m_map_size);
size_t read = 0;
read += ftruncate(m_file_desc, m_map_size);
m_data_ptr = (char*)mmap(0, m_map_size, PROT_READ|PROT_WRITE, MAP_SHARED,
m_file_desc, 0);
return (pointer)m_data_ptr;

View File

@ -1,13 +1,4 @@
local with-cmph = [ option.get "with-cmph" ] ;
if $(with-cmph) {
alias headers : ../../util//kenutil : : : <include>. <include>$(with-cmph)/include ;
alias compactpt : CompactPT//CompactPT ;
}
else {
alias headers : ../../util//kenutil : : : <include>. ;
alias compactpt ;
}
alias headers : ../../util//kenutil : : : <include>. ;
alias ThreadPool : ThreadPool.cpp ;
@ -26,6 +17,6 @@ lib moses_internal :
[ glob *.cpp DynSAInclude/*.cpp : PhraseDictionary.cpp ThreadPool.cpp SyntacticLanguageModel.cpp ]
synlm ThreadPool headers ;
lib moses : PhraseDictionary.cpp moses_internal CYKPlusParser//CYKPlusParser LM//LM RuleTable//RuleTable Scope3Parser//Scope3Parser fuzzy-match//fuzzy-match headers compactpt ../..//z ../../OnDiskPt//OnDiskPt ../..//boost_filesystem ;
lib moses : PhraseDictionary.cpp moses_internal CYKPlusParser//CYKPlusParser CompactPT//CompactPT LM//LM RuleTable//RuleTable Scope3Parser//Scope3Parser fuzzy-match//fuzzy-match headers ../..//z ../../OnDiskPt//OnDiskPt ../..//boost_filesystem ;
alias headers-to-install : [ glob-tree *.h ] ;

View File

@ -8,11 +8,9 @@
#include "TargetPhrase.h"
#include "TargetPhraseCollection.h"
#ifdef HAVE_CMPH
#ifndef WIN32
#include "CompactPT/LexicalReorderingTableCompact.h"
#endif
#endif
namespace Moses
{
@ -53,14 +51,14 @@ void auxAppend(IPhrase& head, const IPhrase& tail)
LexicalReorderingTable* LexicalReorderingTable::LoadAvailable(const std::string& filePath, const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors)
{
//decide use Compact or Tree or Memory table
#ifdef HAVE_CMPH
//decide use Compact or Tree or Memory table
#ifndef WIN32
if(FileExists(filePath+".minlexr")) {
//there exists a compact binary version use that
VERBOSE(2,"Using compact lexical reordering table" << std::endl);
return new LexicalReorderingTableCompact(filePath+".minlexr", f_factors, e_factors, c_factors);
}
#endif
#endif
if(FileExists(filePath+".binlexr.idx")) {
//there exists a binary version use that
return new LexicalReorderingTableTree(filePath, f_factors, e_factors, c_factors);

View File

@ -147,11 +147,9 @@ Parameter::Parameter()
AddParam("sort-word-alignment", "Sort word alignments for more consistent display. 0=no sort (default), 1=target order");
AddParam("start-translation-id", "Id of 1st input. Default = 0");
#ifdef HAVE_CMPH
// Compact phrase table and reordering table.
AddParam("minlexr-memory", "Load lexical reordering table in minlexr format into memory");
AddParam("minphr-memory", "Load phrase table in minphr format into memory");
#endif
}
Parameter::~Parameter()

View File

@ -28,10 +28,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifndef WIN32
#include "PhraseDictionaryDynSuffixArray.h"
#ifdef HAVE_CMPH
#include "CompactPT/PhraseDictionaryCompact.h"
#endif
#endif
#include "RuleTable/UTrie.h"
#include "StaticData.h"
@ -209,7 +207,6 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
return dict;
} else if (m_implementation == Compact) {
#ifdef HAVE_CMPH
#ifndef WIN32
VERBOSE(2,"Using compact phrase table" << std::endl);
@ -224,10 +221,6 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
return pd;
#else
CHECK(false);
#endif
#else
std::cerr << "CMPH library not compile in. Cannot use compact phrase table." << m_implementation << endl;
CHECK(false);
#endif
}
else {

View File

@ -406,12 +406,10 @@ bool StaticData::LoadData(Parameter *parameter)
exit(1);
}
if (m_useConsensusDecoding) m_mbr=true;
#ifdef HAVE_CMPH
// Compact phrase table and reordering model
SetBooleanParameter( &m_minphrMemory, "minphr-memory", false );
SetBooleanParameter( &m_minlexrMemory, "minlexr-memory", false );
#endif
m_timeout_threshold = (m_parameter->GetParam("time-out").size() > 0) ?
Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;

View File

@ -201,11 +201,9 @@ protected:
bool m_cubePruningLazyScoring;
size_t m_ruleLimit;
#ifdef HAVE_CMPH
// Whether to load compact phrase table and reordering table into memory
bool m_minphrMemory;
bool m_minlexrMemory;
#endif
// Initial = 0 = can be used when creating poss trans
// Other = 1 = used to calculate LM score once all steps have been processed
@ -391,7 +389,6 @@ public:
return m_nBestIncludesAlignment;
}
#ifdef HAVE_CMPH
bool UseMinphrInMemory() const {
return m_minphrMemory;
}
@ -399,7 +396,6 @@ public:
bool UseMinlexrInMemory() const {
return m_minlexrMemory;
}
#endif
size_t GetNumLinkParams() const {
return m_numLinkParams;