diff --git a/misc/Jamfile b/misc/Jamfile index cbd585c48..68de732f9 100644 --- a/misc/Jamfile +++ b/misc/Jamfile @@ -10,8 +10,9 @@ local with-cmph = [ option.get "with-cmph" ] ; if $(with-cmph) { exe processPhraseTableMin : processPhraseTableMin.cpp ../moses/src//moses ; exe processLexicalTableMin : processLexicalTableMin.cpp ../moses/src//moses ; + exe queryPhraseTableMin : queryPhraseTableMin.cpp ../moses/src//moses ; - alias programsMin : processPhraseTableMin processLexicalTableMin ; + alias programsMin : processPhraseTableMin processLexicalTableMin queryPhraseTableMin ; } else { alias programsMin ; diff --git a/misc/queryPhraseTableMin.cpp b/misc/queryPhraseTableMin.cpp new file mode 100644 index 000000000..02d889598 --- /dev/null +++ b/misc/queryPhraseTableMin.cpp @@ -0,0 +1,97 @@ +// Query binary phrase tables. +// Marcin Junczys-Dowmunt, 13 September 2012 + +#include +#include +#include +#include + +#include "CompactPT/PhraseDictionaryCompact.h" +#include "Util.h" + +void usage(); + +typedef unsigned int uint; + +using namespace Moses; + +int main(int argc, char **argv) +{ + int nscores = 5; + std::string ttable = ""; + bool useAlignments = false; + bool reportCounts = false; + + for(int i = 1; i < argc; i++) { + if(!strcmp(argv[i], "-n")) { + if(i + 1 == argc) + usage(); + nscores = atoi(argv[++i]); + } else if(!strcmp(argv[i], "-t")) { + if(i + 1 == argc) + usage(); + ttable = argv[++i]; + } else if(!strcmp(argv[i], "-a")) { + useAlignments = true; + } else if (!strcmp(argv[i], "-c")) { + reportCounts = true; + } + else + usage(); + } + + if(ttable == "") + usage(); + + std::vector input(1, 0); + std::vector output(1, 0); + std::vector weight(nscores, 0); + + LMList lmList; + + PhraseDictionaryFeature pdf(Compact, nscores, nscores, input, output, ttable, weight, 0, "", ""); + PhraseDictionaryCompact pdc(nscores, Compact, &pdf, false, useAlignments); + bool ret = pdc.Load(input, output, ttable, weight, 0, lmList, 0); + assert(ret); + + std::string line; + while(getline(std::cin, line)) { + Phrase sourcePhrase(0); + sourcePhrase.CreateFromString(input, line, "||dummy_string||"); + + TargetPhraseVectorPtr decodedPhraseColl + = pdc.GetTargetPhraseCollectionRaw(sourcePhrase); + + if(decodedPhraseColl != NULL) { + if(reportCounts) + std::cout << sourcePhrase << decodedPhraseColl->size() << std::endl; + else + for(TargetPhraseVector::iterator it = decodedPhraseColl->begin(); it != decodedPhraseColl->end(); it++) { + TargetPhrase &tp = *it; + std::cout << sourcePhrase << "||| "; + std::cout << static_cast(tp) << "|||"; + + if(useAlignments) + std::cout << " " << tp.GetAlignmentInfo() << "|||"; + + for(size_t i = 0; i < tp.GetScoreBreakdown().size(); i++) + std::cout << " " << exp(tp.GetScoreBreakdown()[i]); + std::cout << std::endl; + } + } + else if(reportCounts) + std::cout << sourcePhrase << 0 << std::endl; + + std::cout.flush(); + } +} + +void usage() +{ + std::cerr << "Usage: queryPhraseTable [-n ] [-a] -t \n" + "-n number of scores in phrase table (default: 5)\n" + "-c only report counts of entries\n" + "-a binary phrase table contains alignments\n" + "-t phrase table\n"; + exit(1); +} diff --git a/moses/src/CompactPT/PhraseDecoder.cpp b/moses/src/CompactPT/PhraseDecoder.cpp index 3c532e5ae..8e998504c 100644 --- a/moses/src/CompactPT/PhraseDecoder.cpp +++ b/moses/src/CompactPT/PhraseDecoder.cpp @@ -327,7 +327,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( return TargetPhraseVectorPtr(); wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank)); - if(StaticData::Instance().UseAlignmentInfo()) + if(m_phraseDictionary.m_useAlignmentInfo) { size_t trgPos = targetPhrase->GetSize(); alignment.insert(AlignPoint(srcPos, trgPos)); @@ -342,7 +342,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( return TargetPhraseVectorPtr(); wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank)); - if(StaticData::Instance().UseAlignmentInfo()) + if(m_phraseDictionary.m_useAlignmentInfo) { size_t trgPos = srcPos; alignment.insert(AlignPoint(srcPos, trgPos)); @@ -398,7 +398,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( { // insert the subphrase into the main target phrase TargetPhrase& subTp = subTpv->at(rank); - if(StaticData::Instance().UseAlignmentInfo()) + if(m_phraseDictionary.m_useAlignmentInfo) { // reconstruct the alignment data based on the alignment of the subphrase for(AlignmentInfo::const_iterator it = subTp.GetAlignmentInfo().begin(); @@ -448,14 +448,14 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( } else { - if(StaticData::Instance().UseAlignmentInfo()) + if(m_phraseDictionary.m_useAlignmentInfo) alignment.insert(AlignPointSizeT(alignPoint)); } } if(state == Add) { - if(StaticData::Instance().UseAlignmentInfo()) + if(m_phraseDictionary.m_useAlignmentInfo) targetPhrase->SetAlignmentInfo(alignment); if(m_coding == PREnc) diff --git a/moses/src/CompactPT/PhraseDictionaryCompact.cpp b/moses/src/CompactPT/PhraseDictionaryCompact.cpp index ca40bd889..c5f963fd4 100644 --- a/moses/src/CompactPT/PhraseDictionaryCompact.cpp +++ b/moses/src/CompactPT/PhraseDictionaryCompact.cpp @@ -85,7 +85,6 @@ bool PhraseDictionaryCompact::Load(const std::vector &input // Keep source phrase index on disk indexSize = m_hash.LoadIndex(pFile); - size_t coderSize = m_phraseDecoder->Load(pFile); size_t phraseSize; @@ -136,7 +135,18 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c } else return NULL; - +} + +TargetPhraseVectorPtr +PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase) const { + + // There is no souch source phrase if source phrase is longer than longest + // observed source phrase during compilation + if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength()) + return TargetPhraseVectorPtr(); + + // Retrieve target phrase collection from phrase table + return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true); } PhraseDictionaryCompact::~PhraseDictionaryCompact() { diff --git a/moses/src/CompactPT/PhraseDictionaryCompact.h b/moses/src/CompactPT/PhraseDictionaryCompact.h index d8807484b..6090967d2 100644 --- a/moses/src/CompactPT/PhraseDictionaryCompact.h +++ b/moses/src/CompactPT/PhraseDictionaryCompact.h @@ -50,6 +50,7 @@ protected: PhraseTableImplementation m_implementation; bool m_inMemory; + bool m_useAlignmentInfo; typedef std::vector PhraseCache; #ifdef WITH_THREADS @@ -75,11 +76,14 @@ protected: public: PhraseDictionaryCompact(size_t numScoreComponent, - PhraseTableImplementation implementation, - PhraseDictionaryFeature* feature) + PhraseTableImplementation implementation, + PhraseDictionaryFeature* feature, + bool inMemory = StaticData::Instance().UseMinphrInMemory(), + bool useAlignmentInfo = StaticData::Instance().UseAlignmentInfo()) : PhraseDictionary(numScoreComponent, feature), m_implementation(implementation), - m_inMemory(StaticData::Instance().UseMinphrInMemory()), + m_inMemory(inMemory), + m_useAlignmentInfo(useAlignmentInfo), m_hash(10, 16), m_phraseDecoder(0) {} @@ -95,7 +99,8 @@ public: , float weightWP); const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &source) const; - + TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const; + void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase); void InitializeForInput(const Moses::InputType&);