mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 07:42:21 +03:00
Merge branch 'miramerge' of thor.inf.ed.ac.uk:/fs/saxnot3/ehasler/mosesdecoder_github_mira into miramerge
This commit is contained in:
commit
9be5ccaa86
@ -293,7 +293,10 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
|
||||
if (decode_hope || decode_fear || decode_model) {
|
||||
decodeHopeOrFear(decode_hope, decode_fear, decode_model, decode_filename, inputSentences, decoder, n);
|
||||
size_t decode = 1;
|
||||
if (decode_fear) decode = 2;
|
||||
if (decode_model) decode = 3;
|
||||
decodeHopeOrFear(rank, size, decode, decode_filename, inputSentences, decoder, n);
|
||||
}
|
||||
|
||||
// Optionally shuffle the sentences
|
||||
@ -437,7 +440,7 @@ int main(int argc, char** argv) {
|
||||
|
||||
time_t now;
|
||||
time(&now);
|
||||
cerr << "Rank " << rank << ", " << ctime(&now) << endl;
|
||||
cerr << "Rank " << rank << ", " << ctime(&now);
|
||||
|
||||
ScoreComponentCollection mixedAverageWeights;
|
||||
ScoreComponentCollection mixedAverageWeightsPrevious;
|
||||
@ -1269,31 +1272,53 @@ void deleteTranslations(vector<vector<const Word*> > &translations) {
|
||||
}
|
||||
}
|
||||
|
||||
void decodeHopeOrFear(bool decode_hope, bool decode_fear, bool decode_model, string filename, vector<string> &inputSentences, MosesDecoder* decoder, size_t n) {
|
||||
if (decode_hope)
|
||||
cerr << "Decoding dev input set according to hope objective.. " << endl;
|
||||
else if (decode_fear)
|
||||
cerr << "Decoding dev input set according to fear objective.. " << endl;
|
||||
void decodeHopeOrFear(size_t rank, size_t size, size_t decode, string filename, vector<string> &inputSentences, MosesDecoder* decoder, size_t n) {
|
||||
if (decode == 1)
|
||||
cerr << "Rank " << rank << ", decoding dev input set according to hope objective.. " << endl;
|
||||
else if (decode == 2)
|
||||
cerr << "Rank " << rank << ", decoding dev input set according to fear objective.. " << endl;
|
||||
else
|
||||
cerr << "Decoding dev input set according to normal objective.. " << endl;
|
||||
cerr << "Rank " << rank << ", decoding dev input set according to normal objective.. " << endl;
|
||||
|
||||
// Create shards according to the number of processes used
|
||||
vector<size_t> order;
|
||||
for (size_t i = 0; i < inputSentences.size(); ++i)
|
||||
order.push_back(i);
|
||||
|
||||
vector<size_t> shard;
|
||||
float shardSize = (float) (order.size()) / size;
|
||||
VERBOSE(1, "Rank " << rank << ", shard size: " << shardSize << endl);
|
||||
size_t shardStart = (size_t) (shardSize * rank);
|
||||
size_t shardEnd = (size_t) (shardSize * (rank + 1));
|
||||
if (rank == size - 1)
|
||||
shardEnd = inputSentences.size();
|
||||
VERBOSE(1, "Rank " << rank << ", shard start: " << shardStart << " Shard end: " << shardEnd << endl);
|
||||
shardSize = shardEnd - shardStart;
|
||||
shard.resize(shardSize);
|
||||
copy(order.begin() + shardStart, order.begin() + shardEnd, shard.begin());
|
||||
VERBOSE(1, "Rank " << rank << ", actual shard size: " << shard.size() << endl);
|
||||
|
||||
// open files for writing
|
||||
stringstream fname;
|
||||
fname << filename << ".rank" << rank;
|
||||
filename = fname.str();
|
||||
ostringstream filename_nbest;
|
||||
filename_nbest << filename << "." << n << "best";
|
||||
ofstream out(filename.c_str());
|
||||
ofstream nbest_out((filename_nbest.str()).c_str());
|
||||
if (!out) {
|
||||
ostringstream msg;
|
||||
msg << "Unable to open " << filename;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
if (!nbest_out) {
|
||||
ostringstream msg;
|
||||
msg << "Unable to open " << filename_nbest;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
ofstream nbest_out((filename_nbest.str()).c_str());
|
||||
if (!out) {
|
||||
ostringstream msg;
|
||||
msg << "Unable to open " << fname.str();
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
if (!nbest_out) {
|
||||
ostringstream msg;
|
||||
msg << "Unable to open " << filename_nbest;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
|
||||
for (size_t sid = 0; sid < inputSentences.size(); ++sid) {
|
||||
for (size_t i = 0; i < shard.size(); ++i) {
|
||||
size_t sid = shard[i];
|
||||
string& input = inputSentences[sid];
|
||||
|
||||
vector<vector<ScoreComponentCollection> > dummyFeatureValues;
|
||||
@ -1307,10 +1332,11 @@ void decodeHopeOrFear(bool decode_hope, bool decode_fear, bool decode_model, str
|
||||
dummyModelScores.push_back(newScores);
|
||||
|
||||
float factor = 0.0;
|
||||
if (decode_hope) factor = 1.0;
|
||||
if (decode_fear) factor = -1.0;
|
||||
if (decode == 1) factor = 1.0;
|
||||
if (decode == 2) factor = -1.0;
|
||||
cerr << "Rank " << rank << ", translating sentence " << sid << endl;
|
||||
vector< vector<const Word*> > nbestOutput = decoder->getNBest(input, sid, n, factor, 1, dummyFeatureValues[0],
|
||||
dummyBleuScores[0], dummyModelScores[0], n, true, 0, 0);
|
||||
dummyBleuScores[0], dummyModelScores[0], n, true, rank, 0);
|
||||
cerr << endl;
|
||||
decoder->cleanup();
|
||||
|
||||
@ -1330,9 +1356,18 @@ void decodeHopeOrFear(bool decode_hope, bool decode_fear, bool decode_model, str
|
||||
}
|
||||
}
|
||||
|
||||
out.close();
|
||||
nbest_out.close();
|
||||
cerr << "Closing files " << filename << " and " << filename_nbest.str() << endl;
|
||||
out.close();
|
||||
nbest_out.close();
|
||||
cerr << "Closing files " << filename << " and " << filename_nbest.str() << endl;
|
||||
|
||||
#ifdef MPI_ENABLE
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
|
||||
time_t now;
|
||||
time(&now);
|
||||
cerr << "Rank " << rank << ", " << ctime(&now);
|
||||
|
||||
delete decoder;
|
||||
exit(0);
|
||||
}
|
||||
|
@ -50,6 +50,6 @@ void printFeatureValues(std::vector<std::vector<Moses::ScoreComponentCollection>
|
||||
void ignoreCoreFeatures(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, ProducerWeightMap &coreWeightMap);
|
||||
void takeLogs(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, size_t base);
|
||||
void deleteTranslations(std::vector<std::vector<const Moses::Word*> > &translations);
|
||||
void decodeHopeOrFear(bool decode_hope, bool decode_fear, bool decode_model, std::string decode_filename, std::vector<std::string> &inputSentences, Mira::MosesDecoder* decoder, size_t n);
|
||||
void decodeHopeOrFear(size_t rank, size_t size, size_t decode, std::string decode_filename, std::vector<std::string> &inputSentences, Mira::MosesDecoder* decoder, size_t n);
|
||||
|
||||
#endif /* MAIN_H_ */
|
||||
|
@ -112,14 +112,12 @@ FFState* GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, const
|
||||
alreadyScored[sourceString] = 1;
|
||||
}
|
||||
|
||||
// add source words right to current source word as context
|
||||
// add source words to the right of current source word as context
|
||||
for(int contextIndex = sourceIndex+1; contextIndex < input.GetSize(); contextIndex++ ) {
|
||||
string contextString = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors
|
||||
bool contextExists;
|
||||
if (!m_unrestricted)
|
||||
contextExists = m_vocabSource.find( contextString ) != m_vocabSource.end();
|
||||
if (contextIndex == sourceIndex+1)
|
||||
contextExists = true; // always add adjacent context words
|
||||
|
||||
if (m_unrestricted || contextExists) {
|
||||
stringstream feature;
|
||||
@ -156,8 +154,6 @@ FFState* GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, const
|
||||
bool sourceTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
|
||||
if (contextIndex == sourceIndex-1)
|
||||
sourceTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (m_unrestricted || sourceTriggerExists)
|
||||
AddFeature(accumulator, alreadyScored, sourceTrigger, sourceString,
|
||||
@ -182,8 +178,6 @@ FFState* GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, const
|
||||
bool targetTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();
|
||||
if (globalContextIndex == targetIndex-1)
|
||||
targetTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (m_unrestricted || targetTriggerExists)
|
||||
AddFeature(accumulator, alreadyScored, sourceContext, sourceString,
|
||||
@ -213,8 +207,6 @@ FFState* GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, const
|
||||
bool targetTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();
|
||||
if (globalContextIndex == globalTargetIndex-1)
|
||||
targetTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
|
||||
AddFeature(accumulator, alreadyScored, sourceTrigger, sourceString,
|
||||
@ -230,8 +222,6 @@ FFState* GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, const
|
||||
bool sourceTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
|
||||
if (contextIndex == sourceIndex-1)
|
||||
sourceTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (globalTargetIndex == 0) {
|
||||
string targetTrigger = "<s>";
|
||||
@ -248,8 +238,6 @@ FFState* GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, const
|
||||
bool targetTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();
|
||||
if (globalContextIndex == globalTargetIndex-1)
|
||||
targetTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
|
||||
AddFeature(accumulator, alreadyScored, sourceTrigger, sourceString,
|
||||
|
@ -7,7 +7,7 @@
|
||||
namespace Moses {
|
||||
|
||||
/**
|
||||
* Phrase pair feature, as in Watanabe et al. Uses alignment info.
|
||||
* Phrase pair feature: complete source/target phrase pair
|
||||
**/
|
||||
class PhrasePairFeature: public StatelessFeatureFunction {
|
||||
public:
|
||||
|
@ -1747,8 +1747,9 @@ bool StaticData::LoadWordTranslationFeature()
|
||||
}
|
||||
|
||||
vector<string> tokens = Tokenize(parameters[0]);
|
||||
if (tokens.size() != 2 && tokens.size() != 3 && tokens.size() != 5) {
|
||||
UserMessage::Add("Format of word translation feature parameter is: --word-translation-feature <factor-src> <factor-tgt> [context-type] [filename-src filename-tgt]");
|
||||
if (tokens.size() != 1 && tokens.size() != 4 && tokens.size() != 6) {
|
||||
UserMessage::Add("Format of word translation feature parameter is: --word-translation-feature <factor-src>-<factor-tgt> "
|
||||
"[simple source-trigger target-trigger] [filename-src filename-tgt]");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1758,18 +1759,25 @@ bool StaticData::LoadWordTranslationFeature()
|
||||
}
|
||||
|
||||
// set factor
|
||||
FactorType factorIdSource = Scan<size_t>(tokens[0]);
|
||||
FactorType factorIdTarget = Scan<size_t>(tokens[1]);
|
||||
size_t context = 0;
|
||||
if (tokens.size() >= 3)
|
||||
context = Scan<size_t>(tokens[2]);
|
||||
vector <string> factors = Tokenize(tokens[0],"-");
|
||||
FactorType factorIdSource = Scan<size_t>(factors[0]);
|
||||
FactorType factorIdTarget = Scan<size_t>(factors[1]);
|
||||
bool simple = 1;
|
||||
bool sourceTrigger = 0;
|
||||
bool targetTrigger = 0;
|
||||
if (tokens.size() >= 4) {
|
||||
simple = Scan<size_t>(tokens[1]);
|
||||
sourceTrigger = Scan<size_t>(tokens[2]);
|
||||
targetTrigger = Scan<size_t>(tokens[3]);
|
||||
}
|
||||
|
||||
m_wordTranslationFeature = new WordTranslationFeature(factorIdSource,factorIdTarget, context);
|
||||
m_wordTranslationFeature = new WordTranslationFeature(factorIdSource, factorIdTarget, simple,
|
||||
sourceTrigger, targetTrigger);
|
||||
|
||||
// load word list for restricted feature set
|
||||
if (tokens.size() == 5) {
|
||||
string filenameSource = tokens[3];
|
||||
string filenameTarget = tokens[4];
|
||||
if (tokens.size() == 6) {
|
||||
string filenameSource = tokens[5];
|
||||
string filenameTarget = tokens[6];
|
||||
cerr << "loading word translation word lists from " << filenameSource << " and " << filenameTarget << endl;
|
||||
if (!m_wordTranslationFeature->Load(filenameSource, filenameTarget)) {
|
||||
UserMessage::Add("Unable to load word lists for word translation feature from files " + filenameSource + " and " + filenameTarget);
|
||||
|
@ -69,135 +69,7 @@ FFState* WordTranslationFeature::Evaluate(const Hypothesis& cur_hypo, const FFSt
|
||||
bool targetExists = m_vocabTarget.find( targetWord ) != m_vocabTarget.end();
|
||||
// no feature if both words are not in restricted vocabularies
|
||||
if (m_unrestricted || (sourceExists && targetExists)) {
|
||||
if (m_sourceContext) {
|
||||
int globalSourceIndex = input.GetSize() - sourcePhrase.GetSize() + sourceIndex;
|
||||
|
||||
// TODO
|
||||
}
|
||||
else if (m_biphrase) {
|
||||
// allow additional discont. triggers on one of the sides, bigram on the other side
|
||||
int globalTargetIndex = cur_hypo.GetSize() - targetPhrase.GetSize() + targetIndex;
|
||||
int globalSourceIndex = input.GetSize() - sourcePhrase.GetSize() + sourceIndex;
|
||||
|
||||
// 1) source-target pair, trigger source word (can be discont.) and adjacent target word (bigram)
|
||||
string targetContext;
|
||||
if (globalTargetIndex > 0)
|
||||
targetContext = cur_hypo.GetWord(globalTargetIndex-1).GetFactor(m_factorTypeTarget)->GetString();
|
||||
else
|
||||
targetContext = "<s>";
|
||||
|
||||
if (globalSourceIndex == 0) {
|
||||
string sourceTrigger = "<s>";
|
||||
AddFeature(accumulator, sourceTrigger, sourceWord,
|
||||
targetContext, targetWord);
|
||||
}
|
||||
else
|
||||
for(int contextIndex = globalSourceIndex-1; contextIndex >= 0; contextIndex-- ) {
|
||||
string sourceTrigger = input.GetWord(contextIndex).GetFactor(m_factorTypeSource)->GetString();
|
||||
bool sourceTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
|
||||
if (contextIndex == globalSourceIndex-1)
|
||||
sourceTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (m_unrestricted || sourceTriggerExists)
|
||||
AddFeature(accumulator, sourceTrigger, sourceWord,
|
||||
targetContext, targetWord);
|
||||
}
|
||||
|
||||
// 2) source-target pair, adjacent source word (bigram) and trigger target word (can be discont.)
|
||||
string sourceContext;
|
||||
if (globalSourceIndex-1 >= 0)
|
||||
sourceContext = input.GetWord(globalSourceIndex-1).GetFactor(m_factorTypeSource)->GetString();
|
||||
else
|
||||
sourceContext = "<s>";
|
||||
|
||||
if (globalTargetIndex == 0) {
|
||||
string targetTrigger = "<s>";
|
||||
AddFeature(accumulator, sourceContext, sourceWord,
|
||||
targetTrigger, targetWord);
|
||||
}
|
||||
else
|
||||
for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) {
|
||||
string targetTrigger = cur_hypo.GetWord(globalContextIndex).GetFactor(m_factorTypeTarget)->GetString();
|
||||
bool targetTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();
|
||||
if (globalContextIndex == targetIndex-1)
|
||||
targetTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (m_unrestricted || targetTriggerExists)
|
||||
AddFeature(accumulator, sourceContext, sourceWord,
|
||||
targetTrigger, targetWord);
|
||||
}
|
||||
}
|
||||
else if (m_bitrigger) {
|
||||
// allow additional discont. triggers on both sides
|
||||
int globalTargetIndex = cur_hypo.GetSize() - targetPhrase.GetSize() + targetIndex;
|
||||
int globalSourceIndex =input.GetSize() - sourcePhrase.GetSize() + sourceIndex;
|
||||
|
||||
if (globalSourceIndex == 0) {
|
||||
string sourceTrigger = "<s>";
|
||||
bool sourceTriggerExists = true;
|
||||
|
||||
if (globalTargetIndex == 0) {
|
||||
string targetTrigger = "<s>";
|
||||
bool targetTriggerExists = true;
|
||||
|
||||
if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
|
||||
AddFeature(accumulator, sourceTrigger, sourceWord, targetTrigger, targetWord);
|
||||
}
|
||||
else {
|
||||
// iterate backwards over target
|
||||
for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) {
|
||||
string targetTrigger = cur_hypo.GetWord(globalContextIndex).GetFactor(m_factorTypeTarget)->GetString();
|
||||
bool targetTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();
|
||||
if (globalContextIndex == globalTargetIndex-1)
|
||||
targetTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
|
||||
AddFeature(accumulator, sourceTrigger, sourceWord, targetTrigger, targetWord);
|
||||
}
|
||||
}
|
||||
}
|
||||
// iterate over both source and target
|
||||
else {
|
||||
// iterate backwards over source
|
||||
for(int contextIndex = globalSourceIndex-1; contextIndex >= 0; contextIndex-- ) {
|
||||
string sourceTrigger = input.GetWord(contextIndex).GetFactor(m_factorTypeSource)->GetString();
|
||||
bool sourceTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
|
||||
if (contextIndex == globalSourceIndex-1)
|
||||
sourceTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (globalTargetIndex == 0) {
|
||||
string targetTrigger = "<s>";
|
||||
bool targetTriggerExists = true;
|
||||
|
||||
if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
|
||||
AddFeature(accumulator, sourceTrigger, sourceWord, targetTrigger, targetWord);
|
||||
}
|
||||
else {
|
||||
// iterate backwards over target
|
||||
for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) {
|
||||
string targetTrigger = cur_hypo.GetWord(globalContextIndex).GetFactor(m_factorTypeTarget)->GetString();
|
||||
bool targetTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();
|
||||
if (globalContextIndex == globalTargetIndex-1)
|
||||
targetTriggerExists = true; // always add adjacent context words
|
||||
|
||||
if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
|
||||
AddFeature(accumulator, sourceTrigger, sourceWord, targetTrigger, targetWord);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (m_simple) {
|
||||
// construct feature name
|
||||
stringstream featureName;
|
||||
featureName << ((sourceExists||m_unrestricted) ? sourceWord : "OTHER");
|
||||
@ -205,10 +77,82 @@ FFState* WordTranslationFeature::Evaluate(const Hypothesis& cur_hypo, const FFSt
|
||||
featureName << ((targetExists||m_unrestricted) ? targetWord : "OTHER");
|
||||
accumulator->PlusEquals(this,featureName.str(),1);
|
||||
}
|
||||
if (m_sourceContext) {
|
||||
size_t globalSourceIndex = cur_hypo.GetCurrSourceWordsRange().GetStartPos() + sourceIndex;
|
||||
if (globalSourceIndex == 0) {
|
||||
// add <s> trigger feature for source
|
||||
stringstream feature;
|
||||
feature << "wt_";
|
||||
feature << targetWord;
|
||||
feature << "~";
|
||||
feature << "<s>,";
|
||||
feature << sourceWord;
|
||||
accumulator->SparsePlusEquals(feature.str(), 1);
|
||||
}
|
||||
|
||||
// range over source words to get context
|
||||
for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
|
||||
if (contextIndex == globalSourceIndex) continue;
|
||||
string sourceTrigger = input.GetWord(contextIndex).GetFactor(m_factorTypeSource)->GetString();
|
||||
bool sourceTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
|
||||
|
||||
if (m_unrestricted || sourceTriggerExists) {
|
||||
stringstream feature;
|
||||
feature << "wt_";
|
||||
feature << targetWord;
|
||||
feature << "~";
|
||||
if (contextIndex < globalSourceIndex) {
|
||||
feature << sourceTrigger;
|
||||
feature << ",";
|
||||
feature << sourceWord;
|
||||
}
|
||||
else {
|
||||
feature << sourceWord;
|
||||
feature << ",";
|
||||
feature << sourceTrigger;
|
||||
}
|
||||
accumulator->SparsePlusEquals(feature.str(), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (m_targetContext) {
|
||||
size_t globalTargetIndex = cur_hypo.GetCurrTargetWordsRange().GetStartPos() + targetIndex;
|
||||
if (globalTargetIndex == 0) {
|
||||
// add <s> trigger feature for source
|
||||
stringstream feature;
|
||||
feature << "wt_";
|
||||
feature << "<s>,";
|
||||
feature << targetWord;
|
||||
feature << "~";
|
||||
feature << sourceWord;
|
||||
accumulator->SparsePlusEquals(feature.str(), 1);
|
||||
}
|
||||
|
||||
// range over target words (up to current position) to get context
|
||||
for(size_t contextIndex = 0; contextIndex < globalTargetIndex; contextIndex++ ) {
|
||||
string targetTrigger = cur_hypo.GetWord(contextIndex).GetFactor(m_factorTypeTarget)->GetString();
|
||||
bool targetTriggerExists = false;
|
||||
if (!m_unrestricted)
|
||||
targetTriggerExists = m_vocabTarget.find( targetTrigger ) != m_vocabTarget.end();
|
||||
|
||||
if (m_unrestricted || targetTriggerExists) {
|
||||
stringstream feature;
|
||||
feature << "wt_";
|
||||
feature << targetTrigger;
|
||||
feature << ",";
|
||||
feature << targetWord;
|
||||
feature << "~";
|
||||
feature << sourceWord;
|
||||
accumulator->SparsePlusEquals(feature.str(), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new DummyState();
|
||||
return new DummyState();
|
||||
}
|
||||
|
||||
void WordTranslationFeature::AddFeature(ScoreComponentCollection* accumulator, string sourceTrigger,
|
||||
|
@ -39,34 +39,27 @@ private:
|
||||
FactorType m_factorTypeSource;
|
||||
FactorType m_factorTypeTarget;
|
||||
bool m_unrestricted;
|
||||
bool m_simple;
|
||||
bool m_sourceContext;
|
||||
bool m_biphrase;
|
||||
bool m_bitrigger;
|
||||
bool m_targetContext;
|
||||
|
||||
public:
|
||||
WordTranslationFeature(FactorType factorTypeSource = 0, FactorType factorTypeTarget = 0, size_t context = 0):
|
||||
WordTranslationFeature(FactorType factorTypeSource, FactorType factorTypeTarget,
|
||||
bool simple, bool sourceContext, bool targetContext):
|
||||
// StatelessFeatureFunction("wt", ScoreProducer::unlimited),
|
||||
StatefulFeatureFunction("wt", ScoreProducer::unlimited),
|
||||
m_factorTypeSource(factorTypeSource),
|
||||
m_factorTypeTarget(factorTypeTarget),
|
||||
m_simple(simple),
|
||||
m_sourceContext(sourceContext),
|
||||
m_targetContext(targetContext),
|
||||
m_unrestricted(true)
|
||||
{
|
||||
m_sourceContext = false;
|
||||
m_biphrase = false;
|
||||
switch(context) {
|
||||
case 1:
|
||||
m_sourceContext = true;
|
||||
std::cerr << "Using source context for word translation feature.." << std::endl;
|
||||
break;
|
||||
case 2:
|
||||
m_biphrase = true;
|
||||
std::cerr << "Using biphrases for word translation feature.." << std::endl;
|
||||
break;
|
||||
case 3:
|
||||
m_bitrigger = true;
|
||||
std::cerr << "Using bitriggers for word translation feature.." << std::endl;
|
||||
break;
|
||||
}
|
||||
std::cerr << "Creating word translation feature.. ";
|
||||
if (m_simple == 1) std::cerr << "using simple word translations.. ";
|
||||
if (m_sourceContext == 1) std::cerr << "using source context.. ";
|
||||
if (m_targetContext == 1) std::cerr << "using target context.. ";
|
||||
std::cerr << "done." << std::endl;
|
||||
}
|
||||
|
||||
bool Load(const std::string &filePathSource, const std::string &filePathTarget);
|
||||
|
Loading…
Reference in New Issue
Block a user