Fix a few bugs in BilingualLM for phrase based decoding.

This commit is contained in:
Paul Baltescu 2014-09-26 12:02:12 +01:00
parent 5f87cf94d8
commit 3624bd776c
2 changed files with 86 additions and 109 deletions

View File

@ -106,12 +106,12 @@ int BilingualLM::getNeuralLMId(const Word& word) const{
//Populates words with amount words from the targetPhrase from the previous hypothesis where
//words[0] is the last word of the previous hypothesis, words[1] is the second last etc...
void BilingualLM::requestPrevTargetNgrams(const Hypothesis &cur_hypo, int amount, std::vector<int> &words) const {
void BilingualLM::requestPrevTargetNgrams(
const Hypothesis &cur_hypo, int amount, std::vector<int> &words) const {
const Hypothesis * prev_hyp = cur_hypo.GetPrevHypo();
int found = 0;
while (found != amount){
if (prev_hyp){
while (prev_hyp && found != amount) {
const TargetPhrase& currTargetPhrase = prev_hyp->GetCurrTargetPhrase();
for (int i = currTargetPhrase.GetSize() - 1; i> -1; i--){
if (found != amount){
@ -122,9 +122,7 @@ void BilingualLM::requestPrevTargetNgrams(const Hypothesis &cur_hypo, int amount
return; //We have gotten everything needed
}
}
} else {
break; //We have reached the beginning of the hypothesis
}
prev_hyp = prev_hyp->GetPrevHypo();
}
@ -132,43 +130,40 @@ void BilingualLM::requestPrevTargetNgrams(const Hypothesis &cur_hypo, int amount
for (int i = found; i < amount; i++){
words[i] = neuralLM_wordID;
}
}
//Populates the words vector with target_ngrams sized that also contains the current word we are looking at.
//(in effect target_ngrams + 1)
void BilingualLM::getTargetWords(const Hypothesis &cur_hypo
, const TargetPhrase &targetPhrase
, int current_word_index
, std::vector<int> &words) const {
void BilingualLM::getTargetWords(
const Hypothesis &cur_hypo,
const TargetPhrase &targetPhrase,
int current_word_index,
std::vector<int> &words) const {
//Check if we need to look at previous target phrases
int additional_needed = current_word_index - target_ngrams;
if (additional_needed < 0) {
additional_needed = -additional_needed;
std::vector<int> prev_words(additional_needed);
requestPrevTargetNgrams(cur_hypo, additional_needed, prev_words);
for (int i=additional_needed -1 ; i>-1; i--){
for (int i = additional_needed - 1; i >= 0; i--) {
words.push_back(prev_words[i]);
}
}
if (words.size()!=source_ngrams){
if (words.size() > 0) {
//We have added some words from previous phrases
//Just add until we reach current_word_index
for (int i = 0; i<current_word_index + 1; i++){
for (int i = 0; i <= current_word_index; i++) {
const Word& word = targetPhrase.GetWord(i);
words.push_back(getNeuralLMId(word));
}
} else {
//We haven't added any words, proceed as before
for (int i = current_word_index - target_ngrams; i < current_word_index + 1; i++){
for (int i = current_word_index - target_ngrams; i <= current_word_index; i++){
const Word& word = targetPhrase.GetWord(i);
words.push_back(getNeuralLMId(word));
}
}
}
//Returns target_ngrams sized word vector that contains the current word we are looking at. (in effect target_ngrams + 1)
@ -202,95 +197,88 @@ void BilingualLM::getTargetWords(Phrase &whole_phrase
*/
//Returns source words in the way NeuralLM expects them.
void BilingualLM::getSourceWords(const TargetPhrase &targetPhrase
, int targetWordIdx
, const Sentence &source_sent
, const WordsRange &sourceWordRange
, std::vector<int> &words) const {
size_t BilingualLM::selectMiddleAlignment(
const set<size_t>& alignment_links) const {
assert(alignment_links.size() > 0);
set<size_t>::iterator it = alignment_links.begin();
for (int i = 0; i < (alignment_links.size() - 1) / 2; ++i) {
++it;
}
return *it;
}
void BilingualLM::getSourceWords(
const TargetPhrase &targetPhrase,
int targetWordIdx,
const Sentence &source_sent,
const WordsRange &sourceWordRange,
std::vector<int> &words) const {
//Get source context
//Get alignment for the word we require
const AlignmentInfo& alignments = targetPhrase.GetAlignTerm();
//We are getting word alignment for targetPhrase.GetWord(i + target_ngrams -1) according to the paper.
//Try to get some alignment, because the word we desire might be unaligned.
// We are getting word alignment for targetPhrase.GetWord(i + target_ngrams -1) according to the paper.
// Find the closest target word with alignment links.
std::set<size_t> last_word_al;
for (int j = 0; j < targetPhrase.GetSize(); j++){
//Sometimes our word will not be aligned, so find the nearest aligned word right
for (int j = 0; j < targetPhrase.GetSize(); j++) {
// Find the nearest aligned word with preference for right.
if ((targetWordIdx + j) < targetPhrase.GetSize()){
last_word_al = alignments.GetAlignmentsForTarget(targetWordIdx + j);
if (!last_word_al.empty()){
if (!last_word_al.empty()) {
break;
}
} else if ((targetWordIdx - j) > 0) {
//We couldn't find word on the right, try the left.
}
// We couldn't find word on the right, try to the left.
if ((targetWordIdx - j) >= 0) {
last_word_al = alignments.GetAlignmentsForTarget(targetWordIdx - j);
if (!last_word_al.empty()){
if (!last_word_al.empty()) {
break;
}
}
}
//Assume we have gotten some alignment here. If we couldn't get an alignment from the above routine it means
//that none of the words in the target phrase aligned to any word in the source phrase
//Now we get the source words.
size_t source_center_index;
if (last_word_al.size() == 1) {
//We have only one word aligned
source_center_index = *last_word_al.begin();
} else { //We have more than one alignments, take the middle one
int tempidx = 0; //Temporary index to track where the iterator is.
for (std::set<size_t>::iterator it = last_word_al.begin(); it != last_word_al.end(); it++){
if (tempidx == last_word_al.size()/2){
source_center_index = *(it);
break;
}
}
}
//We have found the alignment. Now determine how much to shift by to get the actual source word index.
// Now we get the source words. First select middle alignment.
size_t source_center_index = selectMiddleAlignment(last_word_al);
// We have found the alignment. Now determine how much to shift by to get the actual source word index.
size_t phrase_start_pos = sourceWordRange.GetStartPos();
size_t source_word_mid_idx = phrase_start_pos + targetWordIdx; //Account for how far the current word is from the start of the phrase.
// Account for how far the current word is from the start of the phrase.
size_t source_word_mid_idx = phrase_start_pos + source_center_index;
appendSourceWordsToVector(source_sent, words, source_word_mid_idx);
}
size_t BilingualLM::getState(const Hypothesis& cur_hypo) const {
const TargetPhrase &targetPhrase = cur_hypo.GetCurrTargetPhrase();
size_t hashCode = 0;
//Check if we need to look at previous target phrases
// Check if we need to look at previous target phrases
int additional_needed = targetPhrase.GetSize() - target_ngrams;
if (additional_needed < 0) {
additional_needed = -additional_needed;
std::vector<int> prev_words(additional_needed);
requestPrevTargetNgrams(cur_hypo, additional_needed, prev_words);
for (int i=additional_needed - 1; i>-1; i--) {
for (int i = additional_needed - 1; i >= 0; i--) {
boost::hash_combine(hashCode, prev_words[i]);
}
//Get the rest of the phrases needed
// Get the rest of the phrases needed
for (int i = 0; i < targetPhrase.GetSize(); i++) {
int neuralLM_wordID;
const Word& word = targetPhrase.GetWord(i);
neuralLM_wordID = getNeuralLMId(word);
int neuralLM_wordID = getNeuralLMId(word);
boost::hash_combine(hashCode, neuralLM_wordID);
}
} else {
// We just need the last target_ngrams from the current target phrase.
for (int i = targetPhrase.GetSize() - target_ngrams; i < targetPhrase.GetSize(); i++) {
int neuralLM_wordID;
const Word& word = targetPhrase.GetWord(i);
neuralLM_wordID = getNeuralLMId(word);
int neuralLM_wordID = getNeuralLMId(word);
boost::hash_combine(hashCode, neuralLM_wordID);
}
@ -439,45 +427,30 @@ void BilingualLM::EvaluateWithSourceContext(const InputType &input
FFState* BilingualLM::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
ScoreComponentCollection* accumulator) const {
Manager& manager = cur_hypo.GetManager();
const Sentence& source_sent = static_cast<const Sentence&>(manager.GetSource());
//Init vectors
// Init vectors.
std::vector<int> source_words;
source_words.reserve(source_ngrams);
std::vector<int> target_words;
target_words.reserve(target_ngrams);
float value = 0;
const TargetPhrase& currTargetPhrase = cur_hypo.GetCurrTargetPhrase();
const WordsRange& sourceWordRange = cur_hypo.GetCurrSourceWordsRange(); //Source words range to calculate offsets
//For each word in the current target phrase get its LM score
// For each word in the current target phrase get its LM score.
for (int i = 0; i < currTargetPhrase.GetSize(); i++){
//std::cout << "Size of Before Words " << all_words.size() << std::endl;
getSourceWords(currTargetPhrase
, i //The current target phrase
, source_sent
, sourceWordRange
, source_words);
getTargetWords(cur_hypo
, currTargetPhrase
, i
, target_words);
getSourceWords(
currTargetPhrase, i, source_sent, sourceWordRange, source_words);
getTargetWords(cur_hypo, currTargetPhrase, i, target_words);
value += Score(source_words, target_words);
//Clear the vector
// Clear the vectors.
source_words.clear();
target_words.clear();
}
size_t new_state = getState(cur_hypo);

View File

@ -47,18 +47,22 @@ private:
virtual void loadModel() const = 0;
virtual bool parseAdditionalSettings(const std::string& key, const std::string& value) = 0;
void getSourceWords(const TargetPhrase &targetPhrase
, int targetWordIdx
, const Sentence &source_sent
, const WordsRange &sourceWordRange
, std::vector<int> &words) const;
size_t selectMiddleAlignment(const std::set<size_t>& alignment_links) const;
void getSourceWords(
const TargetPhrase &targetPhrase,
int targetWordIdx,
const Sentence &source_sent,
const WordsRange &sourceWordRange,
std::vector<int> &words) const;
void appendSourceWordsToVector(const Sentence &source_sent, std::vector<int> &words, int source_word_mid_idx) const;
void getTargetWords(const Hypothesis &cur_hypo
, const TargetPhrase &targetPhrase
, int current_word_index
, std::vector<int> &words) const;
void getTargetWords(
const Hypothesis &cur_hypo,
const TargetPhrase &targetPhrase,
int current_word_index,
std::vector<int> &words) const;
//size_t getState(const TargetPhrase &targetPhrase, std::vector<int> &prev_words) const;