diff --git a/misc/queryPhraseTableMin.cpp b/misc/queryPhraseTableMin.cpp index f68117336..0b4324020 100644 --- a/misc/queryPhraseTableMin.cpp +++ b/misc/queryPhraseTableMin.cpp @@ -51,8 +51,8 @@ int main(int argc, char **argv) const_cast&>(parameter->GetParam("factor-delimiter")).resize(1, "||dummy_string||"); const_cast&>(parameter->GetParam("input-factors")).resize(1, "0"); const_cast&>(parameter->GetParam("verbose")).resize(1, "0"); - const_cast&>(parameter->GetParam("weight-w")).resize(1, "0"); - const_cast&>(parameter->GetParam("weight-d")).resize(1, "0"); + //const_cast&>(parameter->GetParam("weight-w")).resize(1, "0"); + //const_cast&>(parameter->GetParam("weight-d")).resize(1, "0"); StaticData::InstanceNonConst().LoadData(parameter); diff --git a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp index 085a7337c..c0767dad9 100644 --- a/moses/TranslationModel/CompactPT/PhraseDecoder.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDecoder.cpp @@ -190,7 +190,7 @@ std::string PhraseDecoder::MakeSourceKey(std::string &source) return source + m_separator; } -TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel) +TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel, bool eval) { // Not using TargetPhraseCollection avoiding "new" operator @@ -234,7 +234,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase & // Decompress and decode target phrase collection TargetPhraseVectorPtr decodedPhraseColl = - DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel); + DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel, eval); return decodedPhraseColl; } else @@ -243,7 +243,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase & TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( TargetPhraseVectorPtr tpv, BitWrapper<> &encodedBitStream, - const Phrase &sourcePhrase, bool topLevel) + const Phrase &sourcePhrase, bool topLevel, bool eval) { bool extending = tpv->size(); @@ -397,7 +397,8 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection( if(scores.size() == m_numScoreComponent) { targetPhrase->GetScoreBreakdown().Assign(&m_phraseDictionary, scores); - targetPhrase->Evaluate(sourcePhrase); + if(eval) + targetPhrase->Evaluate(sourcePhrase); if(m_containsAlignmentInfo) state = Alignment; diff --git a/moses/TranslationModel/CompactPT/PhraseDecoder.h b/moses/TranslationModel/CompactPT/PhraseDecoder.h index 85e9334da..413918314 100644 --- a/moses/TranslationModel/CompactPT/PhraseDecoder.h +++ b/moses/TranslationModel/CompactPT/PhraseDecoder.h @@ -131,12 +131,13 @@ public: size_t Load(std::FILE* in); TargetPhraseVectorPtr CreateTargetPhraseCollection(const Phrase &sourcePhrase, - bool topLevel = false); + bool topLevel = false, bool eval = true); TargetPhraseVectorPtr DecodeCollection(TargetPhraseVectorPtr tpv, BitWrapper<> &encodedBitStream, const Phrase &sourcePhrase, - bool topLevel); + bool topLevel, + bool eval); void PruneCache(); }; diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp index 51ff4c299..8d0f9ff2f 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp @@ -117,7 +117,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c // Retrieve target phrase collection from phrase table TargetPhraseVectorPtr decodedPhraseColl - = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true); + = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true); if(decodedPhraseColl != NULL && decodedPhraseColl->size()) { TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl)); @@ -151,7 +151,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase return TargetPhraseVectorPtr(); // Retrieve target phrase collection from phrase table - return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true); + return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false); } PhraseDictionaryCompact::~PhraseDictionaryCompact() diff --git a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp index fc3b056c6..f2192ee36 100644 --- a/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp +++ b/moses/TranslationModel/CompactPT/PhraseTableCreator.cpp @@ -38,7 +38,7 @@ bool operator<(const PackedItem &pi1, const PackedItem &pi2) } std::string PhraseTableCreator::m_phraseStopSymbol = "__SPECIAL_STOP_SYMBOL__"; -std::string PhraseTableCreator::m_separator = " ||| "; +std::string PhraseTableCreator::m_separator = "|||"; PhraseTableCreator::PhraseTableCreator(std::string inPath, std::string outPath, @@ -332,12 +332,12 @@ void PhraseTableCreator::CreateRankHash() inline std::string PhraseTableCreator::MakeSourceKey(std::string &source) { - return source + m_separator; + return source + " " + m_separator + " "; } inline std::string PhraseTableCreator::MakeSourceTargetKey(std::string &source, std::string &target) { - return source + m_separator + target + m_separator; + return source + " " + m_separator + " " + target + " " + m_separator + " "; } void PhraseTableCreator::EncodeTargetPhrases() @@ -1034,17 +1034,24 @@ void RankingTask::operator()() for(size_t i = 0; i < lines.size(); i++) { std::vector tokens; Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator); - - if(tokens.size() < 3) { + + for(std::vector::iterator it = tokens.begin(); it != tokens.end(); it++) + *it = Moses::Trim(*it); + + if(tokens.size() < 4) { std::cerr << "Error: It seems the following line has a wrong format:" << std::endl; std::cerr << "Line " << i << ": " << lines[i] << std::endl; abort(); } - if(tokens.size() == 3 && m_creator.m_warnMe) { - std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl; - std::cerr << "but you are using PREnc encoding which makes use of alignment data. " << std::endl; - std::cerr << "Better use -encoding None or disable this warning with -no-warnings ." << std::endl; + + if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) { + std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl; + std::cerr << "but you are using "; + std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc"); + std::cerr << " encoding which makes use of alignment data. " << std::endl; + std::cerr << "Use -encoding None" << std::endl; std::cerr << "Line " << i << ": " << lines[i] << std::endl; + abort(); } std::vector scores = Tokenize(tokens[2]); @@ -1125,18 +1132,23 @@ void EncodingTask::operator()() std::vector tokens; Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator); + for(std::vector::iterator it = tokens.begin(); it != tokens.end(); it++) + *it = Moses::Trim(*it); + if(tokens.size() < 3) { std::cerr << "Error: It seems the following line has a wrong format:" << std::endl; std::cerr << "Line " << i << ": " << lines[i] << std::endl; abort(); } - if(tokens.size() == 3 && m_creator.m_coding != PhraseTableCreator::None && m_creator.m_warnMe) { - std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl; + + if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) { + std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl; std::cerr << "but you are using "; std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc"); std::cerr << " encoding which makes use of alignment data. " << std::endl; - std::cerr << "Better use -encoding None or disable this warning with -no-warnings." << std::endl; + std::cerr << "Use -encoding None" << std::endl; std::cerr << "Line " << i << ": " << lines[i] << std::endl; + abort(); } size_t ownRank = 0;