Support for POS backoff in factored models. Spaghetti code needs refactoring

2024-09-20 07:42:21 +03:00 · 2014-09-01 16:04:49 +01:00 · 2014-09-01 16:04:49 +01:00 · dd43c287f6
commit dd43c287f6
parent 6df221966d
2 changed files with 58 additions and 3 deletions
--- a/moses/LM/bilingual-lm/BilingualLM.cpp
+++ b/moses/LM/bilingual-lm/BilingualLM.cpp
@ -47,7 +47,7 @@ void BilingualLM::Load(){

 //Cache for NeuralLMids
 int BilingualLM::getNeuralLMId(const Word& word) const{
-  const Factor* factor = word.GetFactor(0); //Parameter here is m_factorType, hard coded to 0
+  const Factor* factor = word.GetFactor(word_factortype);

  std::map<const Factor *, int>::iterator it;

@ -55,7 +55,32 @@ int BilingualLM::getNeuralLMId(const Word& word) const{
  it = neuralLMids.find(factor);

  if (it != neuralLMids.end()) {
-    return it->second; //Lock is released here automatically
+    if (!factored){
+      return it->second; //Lock is released here automatically
+    } else {
+      //See if word is unknown
+      if (it->second == unknown_word_id){
+        const Factor* pos_factor = word.GetFactor(pos_factortype); //Get POS tag
+        //Look up the POS tag in the cache
+        it = neuralLMids.find(pos_factor);
+        if (it != neuralLMids.end()){
+          return it->second; //We have our pos tag in the cache.
+        } else {
+          //We have to lookup the word
+          const std::string string = pos_factor->GetString().as_string();
+          int neuralLM_wordID = m_neuralLM->lookup_word(string);
+
+          boost::upgrade_to_unique_lock< boost::shared_mutex > uniqueLock(read_lock);
+          neuralLMids.insert(std::pair<const Factor *, int>(pos_factor, neuralLM_wordID));
+
+          return neuralLM_wordID; //We return the ID of the pos TAG
+        }
+      } else {
+        return it->second; //We return the neuralLMid of the word
+      }
+      
+    }
+    
  } else {
    //We have to lookup the word
    const std::string string = factor->GetString().as_string();
@ -64,7 +89,18 @@ int BilingualLM::getNeuralLMId(const Word& word) const{
    boost::upgrade_to_unique_lock< boost::shared_mutex > uniqueLock(read_lock);
    neuralLMids.insert(std::pair<const Factor *, int>(factor, neuralLM_wordID));

-    return neuralLM_wordID; //Lock is released here
+    if (!factored) {
+      return neuralLM_wordID; //Lock is released here
+    } else {
+      if (neuralLM_wordID == unknown_word_id){
+        const Factor* pos_factor = word.GetFactor(pos_factortype);
+        const std::string factorstring = pos_factor->GetString().as_string();
+        neuralLM_wordID = m_neuralLM->lookup_word(factorstring);
+        neuralLMids.insert(std::pair<const Factor *, int>(pos_factor, neuralLM_wordID));
+      }
+      return neuralLM_wordID; //If a POS tag is needed, neuralLM_wordID is going to be updated.
+    }
+    
  }


@ -621,6 +657,19 @@ void BilingualLM::SetParameter(const std::string& key, const std::string& value)
      std::cerr << "UNRECOGNIZED OPTION FOR PARAMETER premultiply. Got " << value << " , expected true or false!" << std::endl;
      exit(1);
    }
+  } else if (key == "factored") {
+    std::string truestr = "true";
+    std::string falsestr = "false";
+    if (value == truestr) {
+      factored = true;
+    } else if (value == falsestr) {
+        factored = false;
+    } else {
+      std::cerr << "UNRECOGNIZED OPTION FOR PARAMETER factored. Got " << value << " , expected true or false!" << std::endl;
+      exit(1);
+    }
+  } else if (key == "pos_factor") {
+    pos_factortype = (size_t)atoi(value.c_str());
  } else {
    StatefulFeatureFunction::SetParameter(key, value);
  }
--- a/moses/LM/bilingual-lm/BilingualLM.h
+++ b/moses/LM/bilingual-lm/BilingualLM.h
@ -64,14 +64,20 @@ protected:
  int target_ngrams;
  int source_ngrams;
  bool premultiply = true;
+  bool factored = false;
  int neuralLM_cache = 1000000;
  int unknown_word_id;
+
+  //NeuralLM lookup
+  FactorType word_factortype = 0;
+  FactorType pos_factortype;
  const Factor* BOS_factor;
  const Factor* EOS_factor;
  mutable Word BOS_word_actual;
  mutable Word EOS_word_actual;
  const Word& BOS_word = BOS_word_actual;
  const Word& EOS_word = EOS_word_actual;
+
  // thread-specific nplm for thread-safety
  mutable boost::thread_specific_ptr<nplm::neuralLM> m_neuralLM;