GHKM: extract POS phrase property (from preterminals in the syntactic parse tree)

2024-12-25 12:52:29 +03:00 · 2015-03-04 21:40:56 +00:00 · 2015-03-04 21:40:56 +00:00 · 06e87d851e
commit 06e87d851e
parent 6d9b6764a6
12 changed files with 155 additions and 36 deletions
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@ -240,8 +240,9 @@ void ExtractionPhrasePair::AddProperties( const std::string &propertiesString, f
    tok = tok.substr(0, endPos - 1);

    vector<std::string> keyValue = Moses::TokenizeFirstOnly(tok, " ");
-    assert(keyValue.size() == 2);
-    AddProperty(keyValue[0], keyValue[1], count);
+    if (keyValue.size() == 2) {
+      AddProperty(keyValue[0], keyValue[1], count);
+    } 
  }
 }

@ -555,6 +556,27 @@ void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
 }


+void ExtractionPhrasePair::UpdateVocabularyFromValueTokens(const std::string& propertyKey,
+    std::set<std::string>& vocabulary) const
+{
+  const PROPERTY_VALUES *allPropertyValues = GetProperty( propertyKey );
+
+  if ( allPropertyValues == NULL ) {
+    return;
+  }
+
+  for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
+       iter!=allPropertyValues->end(); ++iter) {
+
+    std::vector<std::string> tokens = Moses::Tokenize(iter->first);
+    for (std::vector<std::string>::const_iterator tokenIt=tokens.begin();
+         tokenIt!=tokens.end(); ++tokenIt) {
+      vocabulary.insert(*tokenIt);
+    }
+  }
+}
+
+

 }

--- a/phrase-extract/ExtractionPhrasePair.h
+++ b/phrase-extract/ExtractionPhrasePair.h
@ -139,11 +139,14 @@ public:
                                    double smoothingFactor,
                                    std::ostream &out) const;

-  void AddProperties( const std::string &str, float count );
+  void UpdateVocabularyFromValueTokens(const std::string& propertyKey,
+                                       std::set<std::string>& vocabulary) const;

-  void AddProperty( const std::string &key, const std::string &value, float count ) {
+  void AddProperties(const std::string &str, float count);
+
+  void AddProperty(const std::string &key, const std::string &value, float count) {
    std::map<std::string,
-        std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
+      std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
    if ( iter == m_properties.end() ) {
      // key not found: insert property key and value
      PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES();
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@ -293,10 +293,16 @@ int ExtractGHKM::Main(int argc, char *argv[])
        }
        // TODO Can scope pruning be done earlier?
        if (r->Scope() <= options.maxScope) {
-          if (!options.treeFragments) {
-            scfgWriter.Write(*r,lineNum,false);
-          } else {
-            scfgWriter.Write(*r,**q,lineNum,false);
+          scfgWriter.Write(*r,lineNum,false);
+          if (options.treeFragments) {
+            fwdExtractStream << " {{Tree ";
+            (*q)->PrintTree(fwdExtractStream);
+            fwdExtractStream << "}}";
+          }
+          if (options.partsOfSpeech) {
+            fwdExtractStream << " {{POS";
+            (*q)->PrintPartsOfSpeech(fwdExtractStream);
+            fwdExtractStream << "}}";
          }
          if (options.phraseOrientation) {
            fwdExtractStream << " {{Orientation ";
@ -459,6 +465,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
   "set maximum allowed scope")
  ("Minimal",
   "extract minimal rules only")
+  ("PartsOfSpeech",
+   "output parts-of-speech information (preterminals from the parse tree)")
  ("PCFG",
   "include score based on PCFG scores in target corpus")
  ("PhraseOrientation",
@ -571,6 +579,9 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
  if (vm.count("Minimal")) {
    options.minimal = true;
  }
+  if (vm.count("PartsOfSpeech")) {
+    options.partsOfSpeech = true;
+  }
  if (vm.count("PCFG")) {
    options.pcfg = true;
  }
@ -667,6 +678,9 @@ void ExtractGHKM::WriteGlueGrammar(
  if (options.treeFragments) {
    out << " {{Tree [" << topLabel << " [SSTART <s>]]}}";
  }
+  if (options.partsOfSpeech) {
+    out << " {{POS SSTART}}";
+  }
  if (options.sourceLabels) {
    out << " {{SourceLabels 2 1 " << sourceLabelSentenceStart << " 1 1 " << sourceLabelGlueTop << " 1}}";
  }
@ -679,6 +693,9 @@ void ExtractGHKM::WriteGlueGrammar(
  if (options.treeFragments) {
    out << " {{Tree [" << topLabel << " [" << topLabel << "] [SEND </s>]]}}";
  }
+  if (options.partsOfSpeech) {
+    out << " {{POS SEND}}";
+  }
  if (options.sourceLabels) {
    out << " {{SourceLabels 4 1 " << sourceLabelSentenceStart << " " << sourceLabelGlueTop << " " << sourceLabelSentenceEnd << " 1 1 " << sourceLabelGlueTop << " 1}}";
  }
@ -694,6 +711,9 @@ void ExtractGHKM::WriteGlueGrammar(
    if (options.treeFragments) {
      out << " {{Tree [" << topLabel << " [SSTART <s>] [" << i->first << "] [SEND </s>]]}}";
    }
+    if (options.partsOfSpeech) {
+      out << " {{POS SSTART SEND}}";
+    }
    if (options.sourceLabels) {
      out << " {{SourceLabels 4 1 " << sourceLabelSentenceStart << " " << sourceLabelGlueX << " " << sourceLabelSentenceEnd << " 1 1 " << sourceLabelGlueTop << " 1}}";
    }
--- a/phrase-extract/extract-ghkm/Options.h
+++ b/phrase-extract/extract-ghkm/Options.h
@ -40,6 +40,7 @@ public:
    , maxRuleSize(3)
    , maxScope(3)
    , minimal(false)
+    , partsOfSpeech(false)
    , pcfg(false)
    , phraseOrientation(false)
    , sentenceOffset(0)
@ -68,6 +69,7 @@ public:
  int maxRuleSize;
  int maxScope;
  bool minimal;
+  bool partsOfSpeech;
  bool pcfg;
  bool phraseOrientation;
  int sentenceOffset;
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
@ -191,18 +191,5 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
  }
 }

-void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g, size_t lineNum, bool printEndl)
-{
-  Write(rule,lineNum,false);
-  m_fwd << " {{Tree ";
-  g.PrintTree(m_fwd);
-  m_fwd << "}}";
-
-  if (printEndl) {
-    m_fwd << std::endl;
-    m_inv << std::endl;
-  }
-}
-
 }  // namespace GHKM
 }  // namespace Moses
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.h
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
@ -44,8 +44,6 @@ public:

  void Write(const ScfgRule &rule, size_t lineNum, bool printEndl=true);

-  void Write(const ScfgRule &rule, const Subgraph &g, size_t lineNum, bool printEndl=true);
-
 private:
  // Disallow copying
  ScfgRuleWriter(const ScfgRuleWriter &);
--- a/phrase-extract/extract-ghkm/Subgraph.cpp
+++ b/phrase-extract/extract-ghkm/Subgraph.cpp
@ -144,5 +144,29 @@ void Subgraph::RecursivelyPrintTree(const Node *n, std::ostream &out) const
  }
 }

+void Subgraph::PrintPartsOfSpeech(std::ostream &out) const
+{
+  RecursivelyPrintPartsOfSpeech(m_root,out);
+}
+
+void Subgraph::RecursivelyPrintPartsOfSpeech(const Node *n, std::ostream &out) const
+{
+  NodeType nodeType = n->GetType();
+  if (nodeType == TREE) {
+    if (m_leaves.find(n) == m_leaves.end()) {
+      const std::vector<Node *> &children = n->GetChildren();
+      for (std::vector<Node *>::const_iterator p(children.begin());
+           p != children.end(); ++p) {
+        Node *child = *p;
+        if (child->GetType() == TARGET) {
+          out << " " << n->GetLabel();
+        } else {
+          RecursivelyPrintPartsOfSpeech(child,out);
+        }
+      }
+    }
+  }
+}
+
 }  // namespace Moses
 }  // namespace GHKM
--- a/phrase-extract/extract-ghkm/Subgraph.h
+++ b/phrase-extract/extract-ghkm/Subgraph.h
@ -116,8 +116,8 @@ public:
  }

  void GetTargetLeaves(std::vector<const Node *> &) const;
-
  void PrintTree(std::ostream &out) const;
+  void PrintPartsOfSpeech(std::ostream &out) const;

 private:
  void GetTargetLeaves(const Node *, std::vector<const Node *> &) const;
@ -126,6 +126,7 @@ private:
  float CalcPcfgScore() const;
  int CountNodes(const Node *) const;
  void RecursivelyPrintTree(const Node *n, std::ostream &out) const;
+  void RecursivelyPrintPartsOfSpeech(const Node *n, std::ostream &out) const;

  const Node *m_root;
  std::set<const Node *> m_leaves;
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@ -50,8 +50,8 @@ bool hierarchicalFlag = false;
 bool pcfgFlag = false;
 bool phraseOrientationFlag = false;
 bool treeFragmentsFlag = false;
+bool partsOfSpeechFlag = false;
 bool sourceSyntaxLabelsFlag = false;
-bool sourceSyntaxLabelSetFlag = false;
 bool sourceSyntaxLabelCountsLHSFlag = false;
 bool targetPreferenceLabelsFlag = false;
 bool unpairedExtractFormatFlag = false;
@ -80,6 +80,8 @@ std::set<std::string> sourceLabelSet;
 std::map<std::string,size_t> sourceLabels;
 std::vector<std::string> sourceLabelsByIndex;

+std::set<std::string> partsOfSpeechSet;
+
 boost::unordered_map<std::string,float> targetPreferenceLHSCounts;
 boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > ruleTargetLHSAndTargetPreferenceLHSJointCounts;
 std::set<std::string> targetPreferenceLabelSet;
@ -129,7 +131,7 @@ int main(int argc, char* argv[])

  ScoreFeatureManager featureManager;
  if (argc < 4) {
-    std::cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--PCFG] [--TreeFragments] [--SourceLabels] [--SourceLabelSet] [--SourceLabelCountsLHS] [--TargetPreferenceLabels] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--CrossedNonTerm]" << std::endl;
+    std::cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--NoWordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--PCFG] [--TreeFragments] [--SourceLabels] [--SourceLabelCountsLHS] [--TargetPreferenceLabels] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--CrossedNonTerm]" << std::endl;
    std::cerr << featureManager.usage() << std::endl;
    exit(1);
  }
@ -137,6 +139,7 @@ int main(int argc, char* argv[])
  std::string fileNameLex = argv[2];
  std::string fileNamePhraseTable = argv[3];
  std::string fileNameSourceLabelSet;
+  std::string fileNamePartsOfSpeechSet;
  std::string fileNameCountOfCounts;
  std::string fileNameFunctionWords;
  std::string fileNameLeftHandSideSourceLabelCounts;
@ -163,11 +166,14 @@ int main(int argc, char* argv[])
    } else if (strcmp(argv[i],"--TreeFragments") == 0) {
      treeFragmentsFlag = true;
      std::cerr << "including tree fragment information from syntactic parse" << std::endl;
+    } else if (strcmp(argv[i],"--PartsOfSpeech") == 0) {
+      partsOfSpeechFlag = true;
+      std::cerr << "including parts-of-speech information from syntactic parse" << std::endl;
+      fileNamePartsOfSpeechSet = std::string(fileNamePhraseTable) + ".partsOfSpeech";
+      std::cerr << "writing parts-of-speech set to file " << fileNamePartsOfSpeechSet << std::endl;
    } else if (strcmp(argv[i],"--SourceLabels") == 0) {
      sourceSyntaxLabelsFlag = true;
      std::cerr << "including source label information" << std::endl;
-    } else if (strcmp(argv[i],"--SourceLabelSet") == 0) {
-      sourceSyntaxLabelSetFlag = true;
      fileNameSourceLabelSet = std::string(fileNamePhraseTable) + ".syntaxLabels.src";
      std::cerr << "writing source syntax label set to file " << fileNameSourceLabelSet << std::endl;
    } else if (strcmp(argv[i],"--SourceLabelCountsLHS") == 0) {
@ -452,7 +458,7 @@ int main(int argc, char* argv[])
  }

  // source syntax labels
-  if (sourceSyntaxLabelsFlag && sourceSyntaxLabelSetFlag && !inverseFlag) {
+  if (sourceSyntaxLabelsFlag && !inverseFlag) {
    writeLabelSet( sourceLabelSet, fileNameSourceLabelSet );
  }
  if (sourceSyntaxLabelsFlag && sourceSyntaxLabelCountsLHSFlag && !inverseFlag) {
@ -462,6 +468,11 @@ int main(int argc, char* argv[])
                                  fileNameLeftHandSideTargetSourceLabelCounts );
  }

+  // parts-of-speech
+  if (partsOfSpeechFlag && !inverseFlag) {
+    writeLabelSet( partsOfSpeechSet, fileNamePartsOfSpeechSet );
+  }
+
  // target preference labels
  if (targetPreferenceLabelsFlag && !inverseFlag) {
    writeLabelSet( targetPreferenceLabelSet, fileNameTargetPreferenceLabelSet );
@ -615,8 +626,8 @@ void writeLabelSet( const std::set<std::string> &labelSet, const std::string &fi
  Moses::OutputFileStream out;
  bool success = out.Open(fileName.c_str());
  if (!success) {
-    std::cerr << "ERROR: could not open label set file "
-              << fileName << std::endl;
+    std::cerr << "ERROR: could not open file "
+              << fileName << " for writing" << std::endl;
    return;
  }

@ -811,6 +822,15 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
    }
  }

+  // parts-of-speech
+  if (partsOfSpeechFlag && !inverseFlag) {
+    phrasePair.UpdateVocabularyFromValueTokens("POS", partsOfSpeechSet);
+    const std::string *bestPartOfSpeech = phrasePair.FindBestPropertyValue("POS");
+    if (bestPartOfSpeech) {
+      phraseTableFile << " {{POS " << *bestPartOfSpeech << "}}";
+    }
+  }
+
  // syntax labels
  if ((sourceSyntaxLabelsFlag || targetPreferenceLabelsFlag) && !inverseFlag) {
    unsigned nNTs = 1;
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@ -2240,6 +2240,14 @@ sub define_training_extract_phrases {

      if (&get("TRAINING:ghkm-source-labels")) {
        $cmd .= "-ghkm-source-labels ";
+        my $source_labels_file = &versionize(&long_file_name("source-labels","model",""));
+        $cmd .= "-ghkm-source-labels-file $source_labels_file ";
+      }
+
+      if (&get("TRAINING:ghkm-parts-of-speech")) {
+        $cmd .= "-ghkm-parts-of-speech ";
+        my $parts_of_speech_labels_file = &versionize(&long_file_name("parts-of-speech","model",""));
+        $cmd .= "-ghkm-parts-of-speech-file $parts_of_speech_labels_file ";
      }
    }

@ -2270,19 +2278,28 @@ sub define_training_build_ttable {
    $cmd .= &define_domain_feature_score_option($domains) if &get("TRAINING:domain-features");

    if (&get("TRAINING:hierarchical-rule-set")) {
+
      if (&get("TRAINING:ghkm-tree-fragments")) {
        $cmd .= "-ghkm-tree-fragments ";
      }
+
      if (&get("TRAINING:ghkm-phrase-orientation")) {
        $cmd .= "-ghkm-phrase-orientation ";
        my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
        $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
      }
+
      if (&get("TRAINING:ghkm-source-labels")) {
        $cmd .= "-ghkm-source-labels ";
        my $source_labels_file = &versionize(&long_file_name("source-labels","model",""));
        $cmd .= "-ghkm-source-labels-file $source_labels_file ";
      }
+
+      if (&get("TRAINING:ghkm-parts-of-speech")) {
+        $cmd .= "-ghkm-parts-of-speech ";
+        my $parts_of_speech_labels_file = &versionize(&long_file_name("parts-of-speech","model",""));
+        $cmd .= "-ghkm-parts-of-speech-file $parts_of_speech_labels_file ";
+      }
    }
    
    &create_step($step_id,$cmd);
@ -2476,6 +2493,12 @@ sub define_training_create_config {
      $cmd .= "-ghkm-source-labels-file $source_labels_file ";
    }

+    if (&get("TRAINING:ghkm-parts-of-speech")) {
+      $cmd .= "-ghkm-parts-of-speech ";
+      my $parts_of_speech_labels_file = &versionize(&long_file_name("parts-of-speech","model",""));
+      $cmd .= "-ghkm-parts-of-speech-file $parts_of_speech_labels_file ";
+    }
+
    # sparse lexical features provide additional content for config file
    $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;

--- a/scripts/generic/score-parallel.perl
+++ b/scripts/generic/score-parallel.perl
@ -38,13 +38,19 @@ my $lexFile 		= $ARGV[4];
 my $ptHalf 			= $ARGV[5]; # output
 my $inverse = 0;
 my $sourceLabelsFile;
+my $partsOfSpeechFile;

 my $otherExtractArgs= "";
 for (my $i = 6; $i < $#ARGV; ++$i)
 {
  if ($ARGV[$i] eq '--SourceLabels') {
    $sourceLabelsFile = $ARGV[++$i];
-    $otherExtractArgs .= "--SourceLabels --SourceLabelCountsLHS --SourceLabelSet ";
+    $otherExtractArgs .= "--SourceLabels --SourceLabelCountsLHS ";
+    next;
+  }
+  if ($ARGV[$i] eq '--PartsOfSpeech') {
+    $partsOfSpeechFile = $ARGV[++$i];
+    $otherExtractArgs .= "--PartsOfSpeech ";
    next;
  }
  if ($ARGV[$i] eq '--Inverse') {
@ -287,6 +293,15 @@ if (!$inverse && defined($sourceLabelsFile))
  `$cmd`;
 }

+# merge parts-of-speech files
+if (!$inverse && defined($partsOfSpeechFile)) 
+{
+  my $cmd = "(echo \"SSTART 0\"; echo \"SEND 1\"; cat $TMPDIR/phrase-table.half.*.gz.partsOfSpeech | LC_ALL=C sort | uniq | perl -pe \"s/\$/ \@{[\$.+1]}/\") > $partsOfSpeechFile";
+  print STDERR "Merging parts-of-speech files: $cmd \n";
+  `$cmd`;
+}
+
+
 $cmd = "rm -rf $TMPDIR \n";
 print STDERR $cmd;
 systemCheck($cmd);
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@ -32,7 +32,9 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
   $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
   @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
   $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_OSM_FACTORS, $_POST_DECODING_TRANSLIT, $_TRANSLITERATION_PHRASE_TABLE,
-   $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_GHKM_TREE_FRAGMENTS,$_GHKM_PHRASE_ORIENTATION,$_PHRASE_ORIENTATION_PRIORS_FILE,$_GHKM_SOURCE_LABELS,$_GHKM_SOURCE_LABELS_FILE,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,$_S2T,
+   $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,
+   $_GHKM_TREE_FRAGMENTS,$_GHKM_PHRASE_ORIENTATION,$_PHRASE_ORIENTATION_PRIORS_FILE,$_GHKM_SOURCE_LABELS,$_GHKM_SOURCE_LABELS_FILE,$_GHKM_PARTS_OF_SPEECH,$_GHKM_PARTS_OF_SPEECH_FILE,
+   $_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,$_S2T,
   $_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_UNKNOWN_WORD_SOFT_MATCHES_FILE,
   $_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
   $_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
@ -116,6 +118,8 @@ $_HELP = 1
 		       'phrase-orientation-priors-file=s' => \$_PHRASE_ORIENTATION_PRIORS_FILE, # currently relevant for GHKM extraction only; phrase orientation for PBT has different implementation
               'ghkm-source-labels' => \$_GHKM_SOURCE_LABELS,
               'ghkm-source-labels-file=s' => \$_GHKM_SOURCE_LABELS_FILE,
+               'ghkm-parts-of-speech' => \$_GHKM_PARTS_OF_SPEECH,
+               'ghkm-parts-of-speech-file=s' => \$_GHKM_PARTS_OF_SPEECH_FILE,
 		       'pcfg' => \$_PCFG,
 		       'alt-direct-rule-score-1' => \$_ALT_DIRECT_RULE_SCORE_1,
 		       'alt-direct-rule-score-2' => \$_ALT_DIRECT_RULE_SCORE_2,
@ -1454,6 +1458,7 @@ sub extract_phrase {
          $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
          $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if defined($_PHRASE_ORIENTATION_PRIORS_FILE);
          $cmd .= " --SourceLabels" if $_GHKM_SOURCE_LABELS;
+          $cmd .= " --PartsOfSpeech" if $_GHKM_PARTS_OF_SPEECH;
        }
        else
        {
@ -1583,7 +1588,6 @@ sub score_phrase_phrase_extract {
    my $MIN_COUNT_HIERARCHICAL = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /MinCountHierarchical ([\d\.]+)/) ? $1 : undef;
    my $SOURCE_LABELS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabels/);
    my $SOURCE_LABEL_COUNTS_LHS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabelCountsLHS/);
-    my $SOURCE_LABEL_SET = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabelSet/);
    my $SPAN_LENGTH = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SpanLength/);
    my $CORE_SCORE_OPTIONS = "";
    $CORE_SCORE_OPTIONS .= " --LogProb" if $LOG_PROB;
@ -1593,7 +1597,6 @@ sub score_phrase_phrase_extract {
 	$CORE_SCORE_OPTIONS .= " --CrossedNonTerm" if $CROSSEDNONTERM;
    $CORE_SCORE_OPTIONS .= " --SourceLabels" if $SOURCE_LABELS;
    $CORE_SCORE_OPTIONS .= " --SourceLabelCountsLHS " if $SOURCE_LABEL_COUNTS_LHS;
-    $CORE_SCORE_OPTIONS .= " --SourceLabelSet " if $SOURCE_LABEL_SET;

    my $substep = 1;
    my $isParent = 1;
@ -1637,6 +1640,7 @@ sub score_phrase_phrase_extract {
        $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
        $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if $_GHKM_PHRASE_ORIENTATION && defined($_PHRASE_ORIENTATION_PRIORS_FILE);
        $cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
+        $cmd .= " --PartsOfSpeech $_GHKM_PARTS_OF_SPEECH_FILE" if $_GHKM_PARTS_OF_SPEECH && defined($_GHKM_PARTS_OF_SPEECH_FILE);
        $cmd .= " $DOMAIN" if $DOMAIN;
        $cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
        $cmd .= " --FlexibilityScore=$FLEX_SCORER" if $_FLEXIBILITY_SCORE;