#pragma once #include #include "moses/FF/StatefulFeatureFunction.h" #include "moses/FF/FFState.h" #include #include #include #include "moses/Hypothesis.h" #include "moses/ChartHypothesis.h" #include "moses/InputPath.h" #include "moses/Manager.h" #include "moses/ChartManager.h" #include "moses/FactorCollection.h" namespace Moses { class BilingualLMState : public FFState { size_t m_hash; std::vector word_alignments; //Carry the word alignments. For hierarchical public: BilingualLMState(size_t hash) :m_hash(hash) {} BilingualLMState(size_t hash, std::vector& word_alignments_vec) :m_hash(hash) , word_alignments(word_alignments_vec) {} const std::vector& GetWordAlignmentVector() const { return word_alignments; } int Compare(const FFState& other) const; }; class BilingualLM : public StatefulFeatureFunction { private: virtual float Score(std::vector& source_words, std::vector& target_words) const = 0; virtual int getNeuralLMId(const Word& word, bool is_source_word) const = 0; virtual void loadModel() = 0; size_t selectMiddleAlignment(const std::set& alignment_links) const; void getSourceWords( const TargetPhrase &targetPhrase, int targetWordIdx, const Sentence &source_sent, const WordsRange &sourceWordRange, std::vector &words) const; void appendSourceWordsToVector(const Sentence &source_sent, std::vector &words, int source_word_mid_idx) const; void getTargetWords( const Hypothesis &cur_hypo, const TargetPhrase &targetPhrase, int current_word_index, std::vector &words) const; //size_t getState(const TargetPhrase &targetPhrase, std::vector &prev_words) const; size_t getState(const Hypothesis &cur_hypo) const; void requestPrevTargetNgrams(const Hypothesis &cur_hypo, int amount, std::vector &words) const; //Chart decoder void getTargetWordsChart( Phrase& whole_phrase, int current_word_index, std::vector &words) const; //Returns the index of the source_word that the current target word uses int getSourceWordsChart( const TargetPhrase &targetPhrase, const ChartHypothesis& curr_hypothesis, int targetWordIdx, const Sentence &source_sent, size_t souce_phrase_start_pos, int next_nonterminal_index, int featureID, std::vector &words) const; size_t getStateChart(Phrase& whole_phrase) const; protected: // big data (vocab, weights, cache) shared among threads std::string m_filePath; int target_ngrams; int source_ngrams; //NeuralLM lookup FactorType word_factortype; FactorType pos_factortype; const Factor* BOS_factor; const Factor* EOS_factor; mutable Word BOS_word_actual; mutable Word EOS_word_actual; const Word& BOS_word; const Word& EOS_word; public: BilingualLM(const std::string &line); bool IsUseable(const FactorMask &mask) const { return true; } virtual const FFState* EmptyHypothesisState(const InputType &input) const { return new BilingualLMState(0); } void Load(); void EvaluateInIsolation( const Phrase &source, const TargetPhrase &targetPhrase, ScoreComponentCollection &scoreBreakdown, ScoreComponentCollection &estimatedFutureScore) const; void EvaluateWithSourceContext( const InputType &input, const InputPath &inputPath, const TargetPhrase &targetPhrase, const StackVec *stackVec, ScoreComponentCollection &scoreBreakdown, ScoreComponentCollection *estimatedFutureScore = NULL) const; FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; FFState* EvaluateWhenApplied( const ChartHypothesis& cur_hypo , int featureID, /* - used to index the state in the previous hypotheses */ ScoreComponentCollection* accumulator) const; void SetParameter(const std::string& key, const std::string& value); }; }