mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 07:42:21 +03:00
EvaluateWithSource compiles
This commit is contained in:
parent
16eecbd35f
commit
85db12549c
@ -2,6 +2,7 @@
|
||||
#include "BilingualLM.h"
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/InputPath.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -45,25 +46,96 @@ void BilingualLM::EvaluateWithSourceContext(const InputType &input
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
double value = 0;
|
||||
for (int i = 0; i < targetPhrase.GetSize() - target_ngrams; ++i) {
|
||||
for (size_t i = 0; i < targetPhrase.GetSize() - target_ngrams; ++i) {
|
||||
//Get source word indexes
|
||||
/*
|
||||
const AlignmentInfo& alignments = targetPhrase.GetAlignTerm();
|
||||
for (int j = 0; j< source_ngrams; j++){
|
||||
|
||||
}*/
|
||||
|
||||
//Insert n target phrase words.
|
||||
std::vector<int> words(target_ngrams);
|
||||
std::vector<int> words(target_ngrams + source_ngrams);
|
||||
|
||||
//Taken from NeuralLM wrapper more or less
|
||||
for (int j = 0; j < target_ngrams; j++){
|
||||
const Word& word = targetPhrase.GetWord(i + j);
|
||||
for (int j = target_ngrams - 1; j < -1; j--){
|
||||
const Word& word = targetPhrase.GetWord(i + j); //Target phrase is actually Phrase
|
||||
const Factor* factor = word.GetFactor(0); //Parameter here is m_factorType, hard coded to 0
|
||||
const std::string string = factor->GetString().as_string();
|
||||
int neuralLM_wordID = m_neuralLM->lookup_word(string);
|
||||
words[i] = neuralLM_wordID;
|
||||
words.push_back(neuralLM_wordID); //In the paper it seems to be in reverse order
|
||||
}
|
||||
//Get source context
|
||||
|
||||
//Get alignment for the word we require
|
||||
const AlignmentInfo& alignments = targetPhrase.GetAlignTerm();
|
||||
|
||||
//We are getting word alignment for targetPhrase.GetWord(i + target_ngrams -1) according to the paper.
|
||||
//Try to get some alignment, because the word we desire might be unaligned.
|
||||
std::set<size_t> last_word_al;
|
||||
for (int j = 0; j < targetPhrase.GetSize(); j++){
|
||||
//Sometimes our word will not be aligned, so find the nearest aligned word right
|
||||
if ((i + target_ngrams -1 +j) < targetPhrase.GetSize()){
|
||||
last_word_al = alignments.GetAlignmentsForTarget(i + target_ngrams -1 + j);
|
||||
if (!last_word_al.empty()){
|
||||
break;
|
||||
}
|
||||
} else if ((i + target_ngrams -1 +j) > 0) {
|
||||
//We couldn't find word on the right, try the left.
|
||||
last_word_al = alignments.GetAlignmentsForTarget(i + target_ngrams -1 -j);
|
||||
if (!last_word_al.empty()){
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Assume we have gotten some alignment here. Now we get the source words.
|
||||
size_t source_center_index;
|
||||
if (last_word_al.size() == 1) {
|
||||
//We have only one word aligned
|
||||
source_center_index = *last_word_al.begin();
|
||||
} else { //We have more than one alignments, take the middle one
|
||||
int tempidx = 0; //Temporary index to track where the iterator is.
|
||||
for (std::set<size_t>::iterator it = last_word_al.begin(); it != last_word_al.end(); it++){
|
||||
if (tempidx == last_word_al.size()/2){
|
||||
source_center_index = *(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//We have found the alignment. Now determine how much to shift by to get the actual source word index.
|
||||
const WordsRange& wordsRange = inputPath.GetWordsRange();
|
||||
size_t phrase_start_pos = wordsRange.GetStartPos();
|
||||
size_t source_word_mid_idx = phrase_start_pos + i + target_ngrams -1; //Account for how far the current word is from the start of the phrase.
|
||||
|
||||
const Sentence& source_sent = static_cast<const Sentence&>(input);
|
||||
|
||||
//Define begin and end indexes of the lookup. Cases for even and odd ngrams
|
||||
int begin_idx;
|
||||
int end_idx;
|
||||
if (source_ngrams%2 == 0){
|
||||
int begin_idx = source_word_mid_idx - source_ngrams/2 - 1;
|
||||
int end_idx = source_word_mid_idx + source_ngrams/2;
|
||||
} else {
|
||||
int begin_idx = source_word_mid_idx - (source_ngrams - 1)/2;
|
||||
int end_idx = source_word_mid_idx + (source_ngrams - 1)/2;
|
||||
}
|
||||
|
||||
//Add words to vector
|
||||
for (int j = begin_idx; j < end_idx; j++) {
|
||||
int neuralLM_wordID;
|
||||
if (j < 0) {
|
||||
neuralLM_wordID = m_neuralLM->lookup_word(BOS_);
|
||||
} else if (j > source_sent.GetSize()) {
|
||||
neuralLM_wordID = m_neuralLM->lookup_word(EOS_);
|
||||
} else {
|
||||
const Word& word = source_sent.GetWord(j);
|
||||
const Factor* factor = word.GetFactor(0); //Parameter here is m_factorType, hard coded to 0
|
||||
const std::string string = factor->GetString().as_string();
|
||||
neuralLM_wordID = m_neuralLM->lookup_word(string);
|
||||
}
|
||||
words.push_back(neuralLM_wordID);
|
||||
|
||||
}
|
||||
|
||||
value += m_neuralLM->lookup_ngram(words);
|
||||
}
|
||||
scoreBreakdown.PlusEquals(FloorScore(value)); //If the ngrams are > than the target phrase the value added will be zero.
|
||||
|
Loading…
Reference in New Issue
Block a user