mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-19 15:17:10 +03:00
Fixed queryPhraseTableMin, added warnings for compacting phrase tables qithout alignment
This commit is contained in:
parent
69b7bd3336
commit
d3b4c11be2
@ -51,8 +51,8 @@ int main(int argc, char **argv)
|
|||||||
const_cast<std::vector<std::string>&>(parameter->GetParam("factor-delimiter")).resize(1, "||dummy_string||");
|
const_cast<std::vector<std::string>&>(parameter->GetParam("factor-delimiter")).resize(1, "||dummy_string||");
|
||||||
const_cast<std::vector<std::string>&>(parameter->GetParam("input-factors")).resize(1, "0");
|
const_cast<std::vector<std::string>&>(parameter->GetParam("input-factors")).resize(1, "0");
|
||||||
const_cast<std::vector<std::string>&>(parameter->GetParam("verbose")).resize(1, "0");
|
const_cast<std::vector<std::string>&>(parameter->GetParam("verbose")).resize(1, "0");
|
||||||
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
|
//const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
|
||||||
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
|
//const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
|
||||||
|
|
||||||
StaticData::InstanceNonConst().LoadData(parameter);
|
StaticData::InstanceNonConst().LoadData(parameter);
|
||||||
|
|
||||||
|
@ -190,7 +190,7 @@ std::string PhraseDecoder::MakeSourceKey(std::string &source)
|
|||||||
return source + m_separator;
|
return source + m_separator;
|
||||||
}
|
}
|
||||||
|
|
||||||
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel)
|
TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel, bool eval)
|
||||||
{
|
{
|
||||||
|
|
||||||
// Not using TargetPhraseCollection avoiding "new" operator
|
// Not using TargetPhraseCollection avoiding "new" operator
|
||||||
@ -234,7 +234,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
|
|||||||
|
|
||||||
// Decompress and decode target phrase collection
|
// Decompress and decode target phrase collection
|
||||||
TargetPhraseVectorPtr decodedPhraseColl =
|
TargetPhraseVectorPtr decodedPhraseColl =
|
||||||
DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel);
|
DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel, eval);
|
||||||
|
|
||||||
return decodedPhraseColl;
|
return decodedPhraseColl;
|
||||||
} else
|
} else
|
||||||
@ -243,7 +243,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
|
|||||||
|
|
||||||
TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||||
TargetPhraseVectorPtr tpv, BitWrapper<> &encodedBitStream,
|
TargetPhraseVectorPtr tpv, BitWrapper<> &encodedBitStream,
|
||||||
const Phrase &sourcePhrase, bool topLevel)
|
const Phrase &sourcePhrase, bool topLevel, bool eval)
|
||||||
{
|
{
|
||||||
|
|
||||||
bool extending = tpv->size();
|
bool extending = tpv->size();
|
||||||
@ -397,7 +397,8 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
|||||||
|
|
||||||
if(scores.size() == m_numScoreComponent) {
|
if(scores.size() == m_numScoreComponent) {
|
||||||
targetPhrase->GetScoreBreakdown().Assign(&m_phraseDictionary, scores);
|
targetPhrase->GetScoreBreakdown().Assign(&m_phraseDictionary, scores);
|
||||||
targetPhrase->Evaluate(sourcePhrase);
|
if(eval)
|
||||||
|
targetPhrase->Evaluate(sourcePhrase);
|
||||||
|
|
||||||
if(m_containsAlignmentInfo)
|
if(m_containsAlignmentInfo)
|
||||||
state = Alignment;
|
state = Alignment;
|
||||||
|
@ -131,12 +131,13 @@ public:
|
|||||||
size_t Load(std::FILE* in);
|
size_t Load(std::FILE* in);
|
||||||
|
|
||||||
TargetPhraseVectorPtr CreateTargetPhraseCollection(const Phrase &sourcePhrase,
|
TargetPhraseVectorPtr CreateTargetPhraseCollection(const Phrase &sourcePhrase,
|
||||||
bool topLevel = false);
|
bool topLevel = false, bool eval = true);
|
||||||
|
|
||||||
TargetPhraseVectorPtr DecodeCollection(TargetPhraseVectorPtr tpv,
|
TargetPhraseVectorPtr DecodeCollection(TargetPhraseVectorPtr tpv,
|
||||||
BitWrapper<> &encodedBitStream,
|
BitWrapper<> &encodedBitStream,
|
||||||
const Phrase &sourcePhrase,
|
const Phrase &sourcePhrase,
|
||||||
bool topLevel);
|
bool topLevel,
|
||||||
|
bool eval);
|
||||||
|
|
||||||
void PruneCache();
|
void PruneCache();
|
||||||
};
|
};
|
||||||
|
@ -117,7 +117,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
|
|||||||
|
|
||||||
// Retrieve target phrase collection from phrase table
|
// Retrieve target phrase collection from phrase table
|
||||||
TargetPhraseVectorPtr decodedPhraseColl
|
TargetPhraseVectorPtr decodedPhraseColl
|
||||||
= m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
|
= m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true);
|
||||||
|
|
||||||
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
|
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
|
||||||
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
|
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
|
||||||
@ -151,7 +151,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase
|
|||||||
return TargetPhraseVectorPtr();
|
return TargetPhraseVectorPtr();
|
||||||
|
|
||||||
// Retrieve target phrase collection from phrase table
|
// Retrieve target phrase collection from phrase table
|
||||||
return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
|
return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
PhraseDictionaryCompact::~PhraseDictionaryCompact()
|
PhraseDictionaryCompact::~PhraseDictionaryCompact()
|
||||||
|
@ -38,7 +38,7 @@ bool operator<(const PackedItem &pi1, const PackedItem &pi2)
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string PhraseTableCreator::m_phraseStopSymbol = "__SPECIAL_STOP_SYMBOL__";
|
std::string PhraseTableCreator::m_phraseStopSymbol = "__SPECIAL_STOP_SYMBOL__";
|
||||||
std::string PhraseTableCreator::m_separator = " ||| ";
|
std::string PhraseTableCreator::m_separator = "|||";
|
||||||
|
|
||||||
PhraseTableCreator::PhraseTableCreator(std::string inPath,
|
PhraseTableCreator::PhraseTableCreator(std::string inPath,
|
||||||
std::string outPath,
|
std::string outPath,
|
||||||
@ -332,12 +332,12 @@ void PhraseTableCreator::CreateRankHash()
|
|||||||
|
|
||||||
inline std::string PhraseTableCreator::MakeSourceKey(std::string &source)
|
inline std::string PhraseTableCreator::MakeSourceKey(std::string &source)
|
||||||
{
|
{
|
||||||
return source + m_separator;
|
return source + " " + m_separator + " ";
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::string PhraseTableCreator::MakeSourceTargetKey(std::string &source, std::string &target)
|
inline std::string PhraseTableCreator::MakeSourceTargetKey(std::string &source, std::string &target)
|
||||||
{
|
{
|
||||||
return source + m_separator + target + m_separator;
|
return source + " " + m_separator + " " + target + " " + m_separator + " ";
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhraseTableCreator::EncodeTargetPhrases()
|
void PhraseTableCreator::EncodeTargetPhrases()
|
||||||
@ -1034,17 +1034,24 @@ void RankingTask::operator()()
|
|||||||
for(size_t i = 0; i < lines.size(); i++) {
|
for(size_t i = 0; i < lines.size(); i++) {
|
||||||
std::vector<std::string> tokens;
|
std::vector<std::string> tokens;
|
||||||
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
|
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
|
||||||
|
|
||||||
if(tokens.size() < 3) {
|
for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
|
||||||
|
*it = Moses::Trim(*it);
|
||||||
|
|
||||||
|
if(tokens.size() < 4) {
|
||||||
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
|
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
|
||||||
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
if(tokens.size() == 3 && m_creator.m_warnMe) {
|
|
||||||
std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
|
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
|
||||||
std::cerr << "but you are using PREnc encoding which makes use of alignment data. " << std::endl;
|
std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
|
||||||
std::cerr << "Better use -encoding None or disable this warning with -no-warnings ." << std::endl;
|
std::cerr << "but you are using ";
|
||||||
|
std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
|
||||||
|
std::cerr << " encoding which makes use of alignment data. " << std::endl;
|
||||||
|
std::cerr << "Use -encoding None" << std::endl;
|
||||||
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
||||||
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<float> scores = Tokenize<float>(tokens[2]);
|
std::vector<float> scores = Tokenize<float>(tokens[2]);
|
||||||
@ -1125,18 +1132,23 @@ void EncodingTask::operator()()
|
|||||||
std::vector<std::string> tokens;
|
std::vector<std::string> tokens;
|
||||||
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
|
Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
|
||||||
|
|
||||||
|
for(std::vector<std::string>::iterator it = tokens.begin(); it != tokens.end(); it++)
|
||||||
|
*it = Moses::Trim(*it);
|
||||||
|
|
||||||
if(tokens.size() < 3) {
|
if(tokens.size() < 3) {
|
||||||
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
|
std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
|
||||||
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
if(tokens.size() == 3 && m_creator.m_coding != PhraseTableCreator::None && m_creator.m_warnMe) {
|
|
||||||
std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
|
if(tokens[3].size() <= 1 && m_creator.m_coding != PhraseTableCreator::None) {
|
||||||
|
std::cerr << "Error: It seems the following line contains no alignment information, " << std::endl;
|
||||||
std::cerr << "but you are using ";
|
std::cerr << "but you are using ";
|
||||||
std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
|
std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
|
||||||
std::cerr << " encoding which makes use of alignment data. " << std::endl;
|
std::cerr << " encoding which makes use of alignment data. " << std::endl;
|
||||||
std::cerr << "Better use -encoding None or disable this warning with -no-warnings." << std::endl;
|
std::cerr << "Use -encoding None" << std::endl;
|
||||||
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
std::cerr << "Line " << i << ": " << lines[i] << std::endl;
|
||||||
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ownRank = 0;
|
size_t ownRank = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user