mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
Fix a few bugs in BilingualLM for phrase based decoding.
This commit is contained in:
parent
5f87cf94d8
commit
3624bd776c
@ -106,25 +106,23 @@ int BilingualLM::getNeuralLMId(const Word& word) const{
|
||||
|
||||
//Populates words with amount words from the targetPhrase from the previous hypothesis where
|
||||
//words[0] is the last word of the previous hypothesis, words[1] is the second last etc...
|
||||
void BilingualLM::requestPrevTargetNgrams(const Hypothesis &cur_hypo, int amount, std::vector<int> &words) const {
|
||||
void BilingualLM::requestPrevTargetNgrams(
|
||||
const Hypothesis &cur_hypo, int amount, std::vector<int> &words) const {
|
||||
const Hypothesis * prev_hyp = cur_hypo.GetPrevHypo();
|
||||
int found = 0;
|
||||
|
||||
while (found != amount){
|
||||
if (prev_hyp){
|
||||
const TargetPhrase& currTargetPhrase = prev_hyp->GetCurrTargetPhrase();
|
||||
for (int i = currTargetPhrase.GetSize() - 1; i> -1; i--){
|
||||
if (found != amount){
|
||||
const Word& word = currTargetPhrase.GetWord(i);
|
||||
words[found] = getNeuralLMId(word);
|
||||
found++;
|
||||
} else {
|
||||
return; //We have gotten everything needed
|
||||
}
|
||||
while (prev_hyp && found != amount) {
|
||||
const TargetPhrase& currTargetPhrase = prev_hyp->GetCurrTargetPhrase();
|
||||
for (int i = currTargetPhrase.GetSize() - 1; i> -1; i--){
|
||||
if (found != amount){
|
||||
const Word& word = currTargetPhrase.GetWord(i);
|
||||
words[found] = getNeuralLMId(word);
|
||||
found++;
|
||||
} else {
|
||||
return; //We have gotten everything needed
|
||||
}
|
||||
} else {
|
||||
break; //We have reached the beginning of the hypothesis
|
||||
}
|
||||
|
||||
prev_hyp = prev_hyp->GetPrevHypo();
|
||||
}
|
||||
|
||||
@ -132,43 +130,40 @@ void BilingualLM::requestPrevTargetNgrams(const Hypothesis &cur_hypo, int amount
|
||||
for (int i = found; i < amount; i++){
|
||||
words[i] = neuralLM_wordID;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Populates the words vector with target_ngrams sized that also contains the current word we are looking at.
|
||||
//(in effect target_ngrams + 1)
|
||||
void BilingualLM::getTargetWords(const Hypothesis &cur_hypo
|
||||
, const TargetPhrase &targetPhrase
|
||||
, int current_word_index
|
||||
, std::vector<int> &words) const {
|
||||
|
||||
void BilingualLM::getTargetWords(
|
||||
const Hypothesis &cur_hypo,
|
||||
const TargetPhrase &targetPhrase,
|
||||
int current_word_index,
|
||||
std::vector<int> &words) const {
|
||||
//Check if we need to look at previous target phrases
|
||||
int additional_needed = current_word_index - target_ngrams;
|
||||
if (additional_needed < 0) {
|
||||
additional_needed = -additional_needed;
|
||||
std::vector<int> prev_words(additional_needed);
|
||||
requestPrevTargetNgrams(cur_hypo, additional_needed, prev_words);
|
||||
for (int i=additional_needed -1 ; i>-1; i--){
|
||||
for (int i = additional_needed - 1; i >= 0; i--) {
|
||||
words.push_back(prev_words[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (words.size()!=source_ngrams){
|
||||
if (words.size() > 0) {
|
||||
//We have added some words from previous phrases
|
||||
//Just add until we reach current_word_index
|
||||
for (int i = 0; i<current_word_index + 1; i++){
|
||||
for (int i = 0; i <= current_word_index; i++) {
|
||||
const Word& word = targetPhrase.GetWord(i);
|
||||
words.push_back(getNeuralLMId(word));
|
||||
}
|
||||
|
||||
} else {
|
||||
//We haven't added any words, proceed as before
|
||||
for (int i = current_word_index - target_ngrams; i < current_word_index + 1; i++){
|
||||
for (int i = current_word_index - target_ngrams; i <= current_word_index; i++){
|
||||
const Word& word = targetPhrase.GetWord(i);
|
||||
words.push_back(getNeuralLMId(word));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Returns target_ngrams sized word vector that contains the current word we are looking at. (in effect target_ngrams + 1)
|
||||
@ -202,95 +197,88 @@ void BilingualLM::getTargetWords(Phrase &whole_phrase
|
||||
*/
|
||||
//Returns source words in the way NeuralLM expects them.
|
||||
|
||||
void BilingualLM::getSourceWords(const TargetPhrase &targetPhrase
|
||||
, int targetWordIdx
|
||||
, const Sentence &source_sent
|
||||
, const WordsRange &sourceWordRange
|
||||
, std::vector<int> &words) const {
|
||||
size_t BilingualLM::selectMiddleAlignment(
|
||||
const set<size_t>& alignment_links) const {
|
||||
assert(alignment_links.size() > 0);
|
||||
|
||||
set<size_t>::iterator it = alignment_links.begin();
|
||||
for (int i = 0; i < (alignment_links.size() - 1) / 2; ++i) {
|
||||
++it;
|
||||
}
|
||||
|
||||
return *it;
|
||||
}
|
||||
|
||||
void BilingualLM::getSourceWords(
|
||||
const TargetPhrase &targetPhrase,
|
||||
int targetWordIdx,
|
||||
const Sentence &source_sent,
|
||||
const WordsRange &sourceWordRange,
|
||||
std::vector<int> &words) const {
|
||||
//Get source context
|
||||
|
||||
//Get alignment for the word we require
|
||||
const AlignmentInfo& alignments = targetPhrase.GetAlignTerm();
|
||||
|
||||
//We are getting word alignment for targetPhrase.GetWord(i + target_ngrams -1) according to the paper.
|
||||
//Try to get some alignment, because the word we desire might be unaligned.
|
||||
// We are getting word alignment for targetPhrase.GetWord(i + target_ngrams -1) according to the paper.
|
||||
// Find the closest target word with alignment links.
|
||||
std::set<size_t> last_word_al;
|
||||
for (int j = 0; j < targetPhrase.GetSize(); j++){
|
||||
//Sometimes our word will not be aligned, so find the nearest aligned word right
|
||||
for (int j = 0; j < targetPhrase.GetSize(); j++) {
|
||||
// Find the nearest aligned word with preference for right.
|
||||
if ((targetWordIdx + j) < targetPhrase.GetSize()){
|
||||
last_word_al = alignments.GetAlignmentsForTarget(targetWordIdx + j);
|
||||
if (!last_word_al.empty()){
|
||||
if (!last_word_al.empty()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// We couldn't find word on the right, try to the left.
|
||||
if ((targetWordIdx - j) >= 0) {
|
||||
last_word_al = alignments.GetAlignmentsForTarget(targetWordIdx - j);
|
||||
if (!last_word_al.empty()) {
|
||||
break;
|
||||
}
|
||||
} else if ((targetWordIdx - j) > 0) {
|
||||
//We couldn't find word on the right, try the left.
|
||||
last_word_al = alignments.GetAlignmentsForTarget(targetWordIdx - j);
|
||||
if (!last_word_al.empty()){
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Assume we have gotten some alignment here. If we couldn't get an alignment from the above routine it means
|
||||
//that none of the words in the target phrase aligned to any word in the source phrase
|
||||
|
||||
//Now we get the source words.
|
||||
size_t source_center_index;
|
||||
if (last_word_al.size() == 1) {
|
||||
//We have only one word aligned
|
||||
source_center_index = *last_word_al.begin();
|
||||
} else { //We have more than one alignments, take the middle one
|
||||
int tempidx = 0; //Temporary index to track where the iterator is.
|
||||
for (std::set<size_t>::iterator it = last_word_al.begin(); it != last_word_al.end(); it++){
|
||||
if (tempidx == last_word_al.size()/2){
|
||||
source_center_index = *(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//We have found the alignment. Now determine how much to shift by to get the actual source word index.
|
||||
// Now we get the source words. First select middle alignment.
|
||||
size_t source_center_index = selectMiddleAlignment(last_word_al);
|
||||
// We have found the alignment. Now determine how much to shift by to get the actual source word index.
|
||||
size_t phrase_start_pos = sourceWordRange.GetStartPos();
|
||||
size_t source_word_mid_idx = phrase_start_pos + targetWordIdx; //Account for how far the current word is from the start of the phrase.
|
||||
// Account for how far the current word is from the start of the phrase.
|
||||
size_t source_word_mid_idx = phrase_start_pos + source_center_index;
|
||||
|
||||
|
||||
appendSourceWordsToVector(source_sent, words, source_word_mid_idx);
|
||||
|
||||
}
|
||||
|
||||
size_t BilingualLM::getState(const Hypothesis& cur_hypo) const {
|
||||
|
||||
const TargetPhrase &targetPhrase = cur_hypo.GetCurrTargetPhrase();
|
||||
|
||||
size_t hashCode = 0;
|
||||
|
||||
//Check if we need to look at previous target phrases
|
||||
// Check if we need to look at previous target phrases
|
||||
int additional_needed = targetPhrase.GetSize() - target_ngrams;
|
||||
if (additional_needed < 0) {
|
||||
additional_needed = -additional_needed;
|
||||
std::vector<int> prev_words(additional_needed);
|
||||
requestPrevTargetNgrams(cur_hypo, additional_needed, prev_words);
|
||||
for (int i=additional_needed - 1; i>-1; i--) {
|
||||
for (int i = additional_needed - 1; i >= 0; i--) {
|
||||
boost::hash_combine(hashCode, prev_words[i]);
|
||||
}
|
||||
//Get the rest of the phrases needed
|
||||
|
||||
// Get the rest of the phrases needed
|
||||
for (int i = 0; i < targetPhrase.GetSize(); i++) {
|
||||
int neuralLM_wordID;
|
||||
|
||||
const Word& word = targetPhrase.GetWord(i);
|
||||
neuralLM_wordID = getNeuralLMId(word);
|
||||
|
||||
int neuralLM_wordID = getNeuralLMId(word);
|
||||
boost::hash_combine(hashCode, neuralLM_wordID);
|
||||
}
|
||||
|
||||
} else {
|
||||
// We just need the last target_ngrams from the current target phrase.
|
||||
for (int i = targetPhrase.GetSize() - target_ngrams; i < targetPhrase.GetSize(); i++) {
|
||||
int neuralLM_wordID;
|
||||
|
||||
const Word& word = targetPhrase.GetWord(i);
|
||||
neuralLM_wordID = getNeuralLMId(word);
|
||||
int neuralLM_wordID = getNeuralLMId(word);
|
||||
|
||||
boost::hash_combine(hashCode, neuralLM_wordID);
|
||||
}
|
||||
@ -437,47 +425,32 @@ void BilingualLM::EvaluateWithSourceContext(const InputType &input
|
||||
|
||||
|
||||
FFState* BilingualLM::EvaluateWhenApplied(
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{
|
||||
|
||||
const Hypothesis& cur_hypo,
|
||||
const FFState* prev_state,
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
Manager& manager = cur_hypo.GetManager();
|
||||
const Sentence& source_sent = static_cast<const Sentence&>(manager.GetSource());
|
||||
|
||||
|
||||
|
||||
//Init vectors
|
||||
// Init vectors.
|
||||
std::vector<int> source_words;
|
||||
source_words.reserve(source_ngrams);
|
||||
std::vector<int> target_words;
|
||||
target_words.reserve(target_ngrams);
|
||||
|
||||
float value = 0;
|
||||
|
||||
const TargetPhrase& currTargetPhrase = cur_hypo.GetCurrTargetPhrase();
|
||||
const WordsRange& sourceWordRange = cur_hypo.GetCurrSourceWordsRange(); //Source words range to calculate offsets
|
||||
|
||||
//For each word in the current target phrase get its LM score
|
||||
// For each word in the current target phrase get its LM score.
|
||||
for (int i = 0; i < currTargetPhrase.GetSize(); i++){
|
||||
//std::cout << "Size of Before Words " << all_words.size() << std::endl;
|
||||
getSourceWords(currTargetPhrase
|
||||
, i //The current target phrase
|
||||
, source_sent
|
||||
, sourceWordRange
|
||||
, source_words);
|
||||
|
||||
getTargetWords(cur_hypo
|
||||
, currTargetPhrase
|
||||
, i
|
||||
, target_words);
|
||||
|
||||
getSourceWords(
|
||||
currTargetPhrase, i, source_sent, sourceWordRange, source_words);
|
||||
getTargetWords(cur_hypo, currTargetPhrase, i, target_words);
|
||||
value += Score(source_words, target_words);
|
||||
|
||||
//Clear the vector
|
||||
// Clear the vectors.
|
||||
source_words.clear();
|
||||
target_words.clear();
|
||||
|
||||
}
|
||||
|
||||
size_t new_state = getState(cur_hypo);
|
||||
|
@ -47,18 +47,22 @@ private:
|
||||
virtual void loadModel() const = 0;
|
||||
virtual bool parseAdditionalSettings(const std::string& key, const std::string& value) = 0;
|
||||
|
||||
void getSourceWords(const TargetPhrase &targetPhrase
|
||||
, int targetWordIdx
|
||||
, const Sentence &source_sent
|
||||
, const WordsRange &sourceWordRange
|
||||
, std::vector<int> &words) const;
|
||||
size_t selectMiddleAlignment(const std::set<size_t>& alignment_links) const;
|
||||
|
||||
void getSourceWords(
|
||||
const TargetPhrase &targetPhrase,
|
||||
int targetWordIdx,
|
||||
const Sentence &source_sent,
|
||||
const WordsRange &sourceWordRange,
|
||||
std::vector<int> &words) const;
|
||||
|
||||
void appendSourceWordsToVector(const Sentence &source_sent, std::vector<int> &words, int source_word_mid_idx) const;
|
||||
|
||||
void getTargetWords(const Hypothesis &cur_hypo
|
||||
, const TargetPhrase &targetPhrase
|
||||
, int current_word_index
|
||||
, std::vector<int> &words) const;
|
||||
void getTargetWords(
|
||||
const Hypothesis &cur_hypo,
|
||||
const TargetPhrase &targetPhrase,
|
||||
int current_word_index,
|
||||
std::vector<int> &words) const;
|
||||
|
||||
//size_t getState(const TargetPhrase &targetPhrase, std::vector<int> &prev_words) const;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user