mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-09 16:04:41 +03:00
Fixed queryPhraseTableMin, added warnings for compacting phrase tables qithout alignment
This commit is contained in:
parent
69b7bd3336
commit
d3b4c11be2
@ -51,8 +51,8 @@ int main(int argc, char **argv)
|
||||
const_cast<std::vector<std::string>&>(parameter->GetParam("factor-delimiter")).resize(1, "||dummy_string||");
|
||||
const_cast<std::vector<std::string>&>(parameter->GetParam("input-factors")).resize(1, "0");
|
||||
const_cast<std::vector<std::string>&>(parameter->GetParam("verbose")).resize(1, "0");
|
||||
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
|
||||
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
|
||||
//const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
|
||||
//const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
|
||||
|
||||
StaticData::InstanceNonConst().LoadData(parameter);
|
||||
|
||||
|
@ -190,7 +190,7 @@ std::string PhraseDecoder::MakeSourceKey(std::string &source)
|
||||
return source + m_separator;
|
||||
}
|
||||
|
||||
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel)
|
||||
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel, bool eval)
|
||||
{
|
||||
|
||||
// Not using TargetPhraseCollection avoiding "new" operator
|
||||
@ -234,7 +234,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
|
||||
|
||||
// Decompress and decode target phrase collection
|
||||
TargetPhraseVectorPtr decodedPhraseColl =
|
||||
DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel);
|
||||
DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel, eval);
|
||||
|
||||
return decodedPhraseColl;
|
||||
} else
|
||||
@ -243,7 +243,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
|
||||
|
||||
TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
TargetPhraseVectorPtr tpv, BitWrapper<> &encodedBitStream,
|
||||
const Phrase &sourcePhrase, bool topLevel)
|
||||
const Phrase &sourcePhrase, bool topLevel, bool eval)
|
||||
{
|
||||
|
||||
bool extending = tpv->size();
|
||||
@ -397,6 +397,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
|
||||
if(scores.size() == m_numScoreComponent) {
|
||||
targetPhrase->GetScoreBreakdown().Assign(&m_phraseDictionary, scores);
|
||||
if(eval)
|
||||
targetPhrase->Evaluate(sourcePhrase);
|
||||
|
||||
if(m_containsAlignmentInfo)
|
||||
|
@ -131,12 +131,13 @@ public:
|
||||
size_t Load(std::FILE* in);
|
||||
|
||||
TargetPhraseVectorPtr CreateTargetPhraseCollection(const Phrase &sourcePhrase,
|
||||
bool topLevel = false);
|
||||
bool topLevel = false, bool eval = true);
|
||||
|
||||
TargetPhraseVectorPtr DecodeCollection(TargetPhraseVectorPtr tpv,
|
||||
BitWrapper<> &encodedBitStream,
|
||||
const Phrase &sourcePhrase,
|
||||
bool topLevel);
|
||||
bool topLevel,
|
||||
bool eval);
|
||||
|
||||
void PruneCache();
|
||||
};
|
||||
|
@ -117,7 +117,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
|
||||
|
||||
// Retrieve target phrase collection from phrase table
|
||||
TargetPhraseVectorPtr decodedPhraseColl
|
||||
= m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
|
||||
= m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true);
|
||||
|
||||
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
|
||||
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
|
||||
@ -151,7 +151,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase
|
||||
return TargetPhraseVectorPtr();
|
||||
|
||||
// Retrieve target phrase collection from phrase table
|
||||
return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
|
||||
return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false);
|
||||
}
|
||||
|
||||
PhraseDictionaryCompact::~PhraseDictionaryCompact()
|
||||
|
@ -332,12 +332,12 @@ void PhraseTableCreator::CreateRankHash()
|
||||
|
||||
inline std::string PhraseTableCreator::MakeSourceKey(std::string &source)
|
||||
{
|
||||
return source + m_separator;
|
||||
return source + " " + m_separator + " ";
|
||||
}
|
||||
|
||||
inline std::string PhraseTableCreator::MakeSourceTargetKey(std::string &source, std::string &target)
|
||||
{
|
||||
return source + m_separator + target + m_separator;
|
||||
return source + " " + m_separator + " " + target + " " + m_separator + " ";
|
||||
}
|
||||
|
||||
void PhraseTableCreator::EncodeTargetPhrases()
|
||||
@ -1035,16 +1035,23 @@ void RankingTask::operator()()
|
||||
std::vector<std::string> tokens;
|
||||
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
|
||||
|
||||
if(tokens.size() < 3) {
|
||||
for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
|
||||
*it = Moses::Trim(*it);
|
||||
|
||||
if(tokens.size() < 4) {
|
||||
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
|
||||
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
||||
abort();
|
||||
}
|
||||
if(tokens.size() == 3 && m_creator.m_warnMe) {
|
||||
std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
|
||||
std::cerr << "but you are using PREnc encoding which makes use of alignment data. " << std::endl;
|
||||
std::cerr << "Better use -encoding None or disable this warning with -no-warnings ." << std::endl;
|
||||
|
||||
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
|
||||
std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
|
||||
std::cerr << "but you are using ";
|
||||
std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
|
||||
std::cerr << " encoding which makes use of alignment data. " << std::endl;
|
||||
std::cerr << "Use -encoding None" << std::endl;
|
||||
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
||||
abort();
|
||||
}
|
||||
|
||||
std::vector<float> scores = Tokenize<float>(tokens[2]);
|
||||
@ -1125,18 +1132,23 @@ void EncodingTask::operator()()
|
||||
std::vector<std::string> tokens;
|
||||
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
|
||||
|
||||
for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
|
||||
*it = Moses::Trim(*it);
|
||||
|
||||
if(tokens.size() < 3) {
|
||||
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
|
||||
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
||||
abort();
|
||||
}
|
||||
if(tokens.size() == 3 && m_creator.m_coding != PhraseTableCreator::None && m_creator.m_warnMe) {
|
||||
std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
|
||||
|
||||
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
|
||||
std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
|
||||
std::cerr << "but you are using ";
|
||||
std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
|
||||
std::cerr << " encoding which makes use of alignment data. " << std::endl;
|
||||
std::cerr << "Better use -encoding None or disable this warning with -no-warnings." << std::endl;
|
||||
std::cerr << "Use -encoding None" << std::endl;
|
||||
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
||||
abort();
|
||||
}
|
||||
|
||||
size_t ownRank = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user