From 60e56efc6bc41f08e7e7ae6251f9ae8ae93e42ad Mon Sep 17 00:00:00 2001
From: Phil Williams <philip.williams@mac.com>
Date: Sun, 7 Dec 2014 14:27:51 +0000
Subject: [PATCH] phrase-extract: add syntax-common sub-library

And remove some (near-)duplicate code from pcfg-common and score-stsg.
---
 Jamroot                                       |   2 +-
 phrase-extract/pcfg-common/Jamfile            |   2 +-
 phrase-extract/pcfg-common/exception.h        |  46 -------
 phrase-extract/pcfg-common/numbered_set.h     | 126 ------------------
 phrase-extract/pcfg-common/pcfg.cc            |  10 +-
 phrase-extract/pcfg-common/pcfg.h             |  37 ++---
 phrase-extract/pcfg-common/pcfg_tree.h        |  39 +++---
 phrase-extract/pcfg-common/syntax_tree.h      |  65 +++------
 phrase-extract/pcfg-common/tool.cc            |   6 +-
 phrase-extract/pcfg-common/tool.h             |  27 ++--
 phrase-extract/pcfg-common/typedef.h          |  16 +--
 phrase-extract/pcfg-common/xml_tree_parser.cc |  24 ++--
 phrase-extract/pcfg-common/xml_tree_parser.h  |  23 ++--
 phrase-extract/pcfg-common/xml_tree_writer.h  |  36 +++--
 phrase-extract/pcfg-extract/main.cc           |   2 +-
 phrase-extract/pcfg-extract/options.h         |  10 +-
 phrase-extract/pcfg-extract/pcfg_extract.cc   |   9 +-
 phrase-extract/pcfg-extract/pcfg_extract.h    |  15 +--
 .../pcfg-extract/rule_collection.cc           |   6 +-
 phrase-extract/pcfg-extract/rule_collection.h |  37 ++---
 phrase-extract/pcfg-extract/rule_extractor.cc |   6 +-
 phrase-extract/pcfg-extract/rule_extractor.h  |  21 ++-
 phrase-extract/pcfg-score/main.cc             |   2 +-
 phrase-extract/pcfg-score/options.h           |  10 +-
 phrase-extract/pcfg-score/pcfg_score.cc       |  30 +++--
 phrase-extract/pcfg-score/pcfg_score.h        |  19 ++-
 phrase-extract/pcfg-score/tree_scorer.cc      |   6 +-
 phrase-extract/pcfg-score/tree_scorer.h       |  17 ++-
 phrase-extract/relax-parse-main.cpp           |   2 +-
 phrase-extract/score-stsg/Exception.h         |  23 ----
 phrase-extract/score-stsg/Jamfile             |   2 +-
 phrase-extract/score-stsg/LexicalTable.cpp    |   7 +-
 phrase-extract/score-stsg/LexicalTable.h      |   7 +-
 phrase-extract/score-stsg/Main.cpp            |   2 +-
 phrase-extract/score-stsg/Options.h           |   7 +-
 phrase-extract/score-stsg/RuleGroup.cpp       |   7 +-
 phrase-extract/score-stsg/RuleGroup.h         |   7 +-
 phrase-extract/score-stsg/RuleSymbol.h        |   7 +-
 phrase-extract/score-stsg/RuleTableWriter.cpp |   8 +-
 phrase-extract/score-stsg/RuleTableWriter.h   |  11 +-
 phrase-extract/score-stsg/ScoreStsg.cpp       |  29 ++--
 phrase-extract/score-stsg/ScoreStsg.h         |  19 +--
 .../score-stsg/TokenizedRuleHalf.cpp          |   7 +-
 phrase-extract/score-stsg/TokenizedRuleHalf.h |  12 +-
 phrase-extract/score-stsg/Vocabulary.h        |   8 +-
 phrase-extract/syntax-common/Jamfile          |   8 ++
 phrase-extract/syntax-common/exception.h      |  20 +++
 .../numbered_set.h}                           |  10 +-
 phrase-extract/syntax-common/string_tree.h    |  13 ++
 phrase-extract/syntax-common/tree-inl.h       | 115 ++++++++++++++++
 phrase-extract/syntax-common/tree.h           |  91 +++++++++++++
 .../tree_fragment_tokenizer.cc}               |  10 +-
 .../tree_fragment_tokenizer.h}                |  17 ++-
 .../tree_fragment_tokenizer_test.cc           |  74 ++++++++++
 phrase-extract/syntax-common/tree_test.cc     |  66 +++++++++
 .../syntax-common/xml_tree_parser.cc          |  59 ++++++++
 .../syntax-common/xml_tree_parser.h           |  34 +++++
 scripts/other/beautify.perl                   |   2 +
 58 files changed, 805 insertions(+), 528 deletions(-)
 delete mode 100644 phrase-extract/pcfg-common/exception.h
 delete mode 100644 phrase-extract/pcfg-common/numbered_set.h
 delete mode 100644 phrase-extract/score-stsg/Exception.h
 create mode 100644 phrase-extract/syntax-common/Jamfile
 create mode 100644 phrase-extract/syntax-common/exception.h
 rename phrase-extract/{score-stsg/NumberedSet.h => syntax-common/numbered_set.h} (96%)
 create mode 100644 phrase-extract/syntax-common/string_tree.h
 create mode 100644 phrase-extract/syntax-common/tree-inl.h
 create mode 100644 phrase-extract/syntax-common/tree.h
 rename phrase-extract/{score-stsg/TreeFragmentTokenizer.cpp => syntax-common/tree_fragment_tokenizer.cc} (93%)
 rename phrase-extract/{score-stsg/TreeFragmentTokenizer.h => syntax-common/tree_fragment_tokenizer.h} (85%)
 create mode 100644 phrase-extract/syntax-common/tree_fragment_tokenizer_test.cc
 create mode 100644 phrase-extract/syntax-common/tree_test.cc
 create mode 100644 phrase-extract/syntax-common/xml_tree_parser.cc
 create mode 100644 phrase-extract/syntax-common/xml_tree_parser.h

diff --git a/Jamroot b/Jamroot
index c20dd1e48..ce14258a5 100644
--- a/Jamroot
+++ b/Jamroot
@@ -173,7 +173,7 @@ project : requirements
   ;
 
 #Add directories here if you want their incidental targets too (i.e. tests).
-build-projects lm util phrase-extract search moses moses/LM mert moses-cmd mira scripts regression-testing  ;
+build-projects lm util phrase-extract phrase-extract/syntax-common search moses moses/LM mert moses-cmd mira scripts regression-testing  ;
 
 if [ option.get "with-mm" : : "yes" ]
 {
diff --git a/phrase-extract/pcfg-common/Jamfile b/phrase-extract/pcfg-common/Jamfile
index b74b1071d..5669b443e 100644
--- a/phrase-extract/pcfg-common/Jamfile
+++ b/phrase-extract/pcfg-common/Jamfile
@@ -1 +1 @@
-lib pcfg_common : [ glob *.cc ] ..//deps : <include>.. ;
+lib pcfg_common : [ glob *.cc ] ..//syntax-common ..//deps : <include>.. ;
diff --git a/phrase-extract/pcfg-common/exception.h b/phrase-extract/pcfg-common/exception.h
deleted file mode 100644
index d9266ca36..000000000
--- a/phrase-extract/pcfg-common/exception.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/***********************************************************************
- Moses - statistical machine translation system
- Copyright (C) 2006-2012 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-***********************************************************************/
-
-#pragma once
-#ifndef PCFG_EXCEPTION_H_
-#define PCFG_EXCEPTION_H_
-
-#include <string>
-
-namespace Moses
-{
-namespace PCFG
-{
-
-class Exception
-{
-public:
-  Exception(const char *msg) : msg_(msg) {}
-  Exception(const std::string &msg) : msg_(msg) {}
-  const std::string &msg() const {
-    return msg_;
-  }
-private:
-  std::string msg_;
-};
-
-}  // namespace PCFG
-}  // namespace Moses
-
-#endif
diff --git a/phrase-extract/pcfg-common/numbered_set.h b/phrase-extract/pcfg-common/numbered_set.h
deleted file mode 100644
index 66e960404..000000000
--- a/phrase-extract/pcfg-common/numbered_set.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/***********************************************************************
- Moses - statistical machine translation system
- Copyright (C) 2006-2012 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-***********************************************************************/
-
-#pragma once
-#ifndef PCFG_NUMBERED_SET_H_
-#define PCFG_NUMBERED_SET_H_
-
-#include "exception.h"
-
-#include <boost/unordered_map.hpp>
-
-#include <limits>
-#include <sstream>
-#include <vector>
-
-namespace Moses
-{
-namespace PCFG
-{
-
-// Stores a set of elements of type T, each of which is allocated an integral
-// ID of type I.  IDs are contiguous starting at 0.  Individual elements cannot
-// be removed once inserted (but the whole set can be cleared).
-template<typename T, typename I=std::size_t>
-class NumberedSet
-{
-private:
-  typedef boost::unordered_map<T, I> ElementToIdMap;
-  typedef std::vector<const T *> IdToElementMap;
-
-public:
-  typedef I IdType;
-  typedef typename IdToElementMap::const_iterator const_iterator;
-
-  NumberedSet() {}
-
-  const_iterator begin() const {
-    return id_to_element_.begin();
-  }
-  const_iterator end() const {
-    return id_to_element_.end();
-  }
-
-  // Static value
-  static I NullId() {
-    return std::numeric_limits<I>::max();
-  }
-
-  bool Empty() const {
-    return id_to_element_.empty();
-  }
-  std::size_t Size() const {
-    return id_to_element_.size();
-  }
-
-  // Insert the given object and return its ID.
-  I Insert(const T &);
-
-  I Lookup(const T &) const;
-  const T &Lookup(I) const;
-
-  void Clear();
-
-private:
-  ElementToIdMap element_to_id_;
-  IdToElementMap id_to_element_;
-};
-
-template<typename T, typename I>
-I NumberedSet<T, I>::Lookup(const T &s) const
-{
-  typename ElementToIdMap::const_iterator p = element_to_id_.find(s);
-  return (p == element_to_id_.end()) ? NullId() : p->second;
-}
-
-template<typename T, typename I>
-const T &NumberedSet<T, I>::Lookup(I id) const
-{
-  if (id < 0 || id >= id_to_element_.size()) {
-    std::ostringstream msg;
-    msg << "Value not found: " << id;
-    throw Exception(msg.str());
-  }
-  return *(id_to_element_[id]);
-}
-
-template<typename T, typename I>
-I NumberedSet<T, I>::Insert(const T &x)
-{
-  std::pair<T, I> value(x, id_to_element_.size());
-  std::pair<typename ElementToIdMap::iterator, bool> result =
-    element_to_id_.insert(value);
-  if (result.second) {
-    // x is a new element.
-    id_to_element_.push_back(&result.first->first);
-  }
-  return result.first->second;
-}
-
-template<typename T, typename I>
-void NumberedSet<T, I>::Clear()
-{
-  element_to_id_.clear();
-  id_to_element_.clear();
-}
-
-}  // namespace PCFG
-}  // namespace Moses
-
-#endif
diff --git a/phrase-extract/pcfg-common/pcfg.cc b/phrase-extract/pcfg-common/pcfg.cc
index 054e20a48..cae6d4763 100644
--- a/phrase-extract/pcfg-common/pcfg.cc
+++ b/phrase-extract/pcfg-common/pcfg.cc
@@ -19,14 +19,15 @@
 
 #include "pcfg.h"
 
-#include "exception.h"
+#include <cassert>
 
 #include <boost/algorithm/string.hpp>
 #include <boost/lexical_cast.hpp>
 
-#include <cassert>
+#include "syntax-common/exception.h"
 
-namespace Moses {
+namespace MosesTraining {
+namespace Syntax {
 namespace PCFG {
 
 void Pcfg::Add(const Key &key, double score) {
@@ -103,4 +104,5 @@ void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const {
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/pcfg-common/pcfg.h b/phrase-extract/pcfg-common/pcfg.h
index 5398cd97e..c5c04cba4 100644
--- a/phrase-extract/pcfg-common/pcfg.h
+++ b/phrase-extract/pcfg-common/pcfg.h
@@ -21,21 +21,19 @@
 #ifndef PCFG_PCFG_H_
 #define PCFG_PCFG_H_
 
-#include "typedef.h"
-
 #include <istream>
 #include <map>
 #include <ostream>
 #include <vector>
 
-namespace Moses
-{
-namespace PCFG
-{
+#include "typedef.h"
 
-class Pcfg
-{
-public:
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
+
+class Pcfg {
+ public:
   typedef std::vector<std::size_t> Key;
   typedef std::map<Key, double> Map;
   typedef Map::iterator iterator;
@@ -43,30 +41,23 @@ public:
 
   Pcfg() {}
 
-  iterator begin() {
-    return rules_.begin();
-  }
-  const_iterator begin() const {
-    return rules_.begin();
-  }
+  iterator begin() { return rules_.begin(); }
+  const_iterator begin() const { return rules_.begin(); }
 
-  iterator end() {
-    return rules_.end();
-  }
-  const_iterator end() const {
-    return rules_.end();
-  }
+  iterator end() { return rules_.end(); }
+  const_iterator end() const { return rules_.end(); }
 
   void Add(const Key &, double);
   bool Lookup(const Key &, double &) const;
   void Read(std::istream &, Vocabulary &);
   void Write(const Vocabulary &, std::ostream &) const;
 
-private:
+ private:
   Map rules_;
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-common/pcfg_tree.h b/phrase-extract/pcfg-common/pcfg_tree.h
index d125cad16..ce28eb8dd 100644
--- a/phrase-extract/pcfg-common/pcfg_tree.h
+++ b/phrase-extract/pcfg-common/pcfg_tree.h
@@ -21,48 +21,40 @@
 #ifndef PCFG_PCFG_TREE_H_
 #define PCFG_PCFG_TREE_H_
 
+#include <string>
+
 #include "syntax_tree.h"
 #include "xml_tree_writer.h"
 
-#include <string>
-
-namespace Moses
-{
-namespace PCFG
-{
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
 template<typename DerivedType>
-class PcfgTreeBase : public SyntaxTreeBase<std::string, DerivedType>
-{
-public:
+class PcfgTreeBase : public SyntaxTreeBase<std::string, DerivedType> {
+ public:
   typedef std::string LabelType;
   typedef SyntaxTreeBase<LabelType, DerivedType> BaseType;
 
   PcfgTreeBase(const LabelType &label) : BaseType(label), score_(0.0) {}
 
-  double score() const {
-    return score_;
-  }
-  void set_score(double s) {
-    score_ = s;
-  }
+  double score() const { return score_; }
+  void set_score(double s) { score_ = s; }
 
-private:
+ private:
   double score_;
 };
 
-class PcfgTree : public PcfgTreeBase<PcfgTree>
-{
-public:
+class PcfgTree : public PcfgTreeBase<PcfgTree> {
+ public:
   typedef PcfgTreeBase<PcfgTree> BaseType;
   PcfgTree(const BaseType::LabelType &label) : BaseType(label) {}
 };
 
 // Specialise XmlOutputHandler for PcfgTree.
 template<>
-class XmlOutputHandler<PcfgTree>
-{
-public:
+class XmlOutputHandler<PcfgTree> {
+ public:
   typedef std::map<std::string, std::string> AttributeMap;
 
   void GetLabel(const PcfgTree &tree, std::string &label) const {
@@ -81,6 +73,7 @@ public:
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-common/syntax_tree.h b/phrase-extract/pcfg-common/syntax_tree.h
index 93d9dbec9..c0c6eaef9 100644
--- a/phrase-extract/pcfg-common/syntax_tree.h
+++ b/phrase-extract/pcfg-common/syntax_tree.h
@@ -24,16 +24,14 @@
 #include <cassert>
 #include <vector>
 
-namespace Moses
-{
-namespace PCFG
-{
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
 // Base class for SyntaxTree, AgreementTree, and friends.
 template<typename T, typename DerivedType>
-class SyntaxTreeBase
-{
-public:
+class SyntaxTreeBase {
+ public:
   // Constructors
   SyntaxTreeBase(const T &label)
     : label_(label)
@@ -48,54 +46,33 @@ public:
   // Destructor
   virtual ~SyntaxTreeBase();
 
-  const T &label() const {
-    return label_;
-  }
-  const DerivedType *parent() const {
-    return parent_;
-  }
-  DerivedType *parent() {
-    return parent_;
-  }
-  const std::vector<DerivedType *> &children() const {
-    return children_;
-  }
-  std::vector<DerivedType *> &children() {
-    return children_;
-  }
+  const T &label() const { return label_; }
+  const DerivedType *parent() const { return parent_; }
+  DerivedType *parent() { return parent_; }
+  const std::vector<DerivedType *> &children() const { return children_; }
+  std::vector<DerivedType *> &children() { return children_; }
 
-  void set_label(const T &label) {
-    label_ = label;
-  }
-  void set_parent(DerivedType *parent) {
-    parent_ = parent;
-  }
-  void set_children(const std::vector<DerivedType *> &c) {
-    children_ = c;
-  }
+  void set_label(const T &label) { label_ = label; }
+  void set_parent(DerivedType *parent) { parent_ = parent; }
+  void set_children(const std::vector<DerivedType *> &c) { children_ = c; }
 
-  bool IsLeaf() const {
-    return children_.empty();
-  }
+  bool IsLeaf() const { return children_.empty(); }
 
   bool IsPreterminal() const {
     return children_.size() == 1 && children_[0]->IsLeaf();
   }
 
-  void AddChild(DerivedType *child) {
-    children_.push_back(child);
-  }
+  void AddChild(DerivedType *child) { children_.push_back(child); }
 
-private:
+ private:
   T label_;
   std::vector<DerivedType *> children_;
   DerivedType *parent_;
 };
 
 template<typename T>
-class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> >
-{
-public:
+class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> > {
+ public:
   typedef SyntaxTreeBase<T, SyntaxTree<T> > BaseType;
   SyntaxTree(const T &label) : BaseType(label) {}
   SyntaxTree(const T &label, const std::vector<SyntaxTree *> &children)
@@ -103,14 +80,14 @@ public:
 };
 
 template<typename T, typename DerivedType>
-SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase()
-{
+SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase() {
   for (std::size_t i = 0; i < children_.size(); ++i) {
     delete children_[i];
   }
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-common/tool.cc b/phrase-extract/pcfg-common/tool.cc
index bebd220e1..f54e07a12 100644
--- a/phrase-extract/pcfg-common/tool.cc
+++ b/phrase-extract/pcfg-common/tool.cc
@@ -21,7 +21,8 @@
 
 #include <sstream>
 
-namespace Moses {
+namespace MosesTraining {
+namespace Syntax {
 namespace PCFG {
 
 std::istream &Tool::OpenInputOrDie(const std::string &filename) {
@@ -77,4 +78,5 @@ void Tool::OpenNamedOutputOrDie(const std::string &filename,
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/pcfg-common/tool.h b/phrase-extract/pcfg-common/tool.h
index aada036e3..2c903a11e 100644
--- a/phrase-extract/pcfg-common/tool.h
+++ b/phrase-extract/pcfg-common/tool.h
@@ -21,30 +21,26 @@
 #ifndef PCFG_TOOL_H_
 #define PCFG_TOOL_H_
 
-#include <boost/program_options/cmdline.hpp>
-
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
 #include <string>
 
-namespace Moses
-{
-namespace PCFG
-{
+#include <boost/program_options/cmdline.hpp>
 
-class Tool
-{
-public:
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
+
+class Tool {
+ public:
   virtual ~Tool() {}
 
-  const std::string &name() const {
-    return name_;
-  }
+  const std::string &name() const { return name_; }
 
   virtual int Main(int argc, char *argv[]) = 0;
 
-protected:
+ protected:
   Tool(const std::string &name) : name_(name) {}
 
   // Returns the boost::program_options style that should be used by all tools.
@@ -82,7 +78,7 @@ protected:
   // the file cannot be opened for writing.
   void OpenNamedOutputOrDie(const std::string &, std::ofstream &);
 
-private:
+ private:
   std::string name_;
   std::istream *input_ptr_;
   std::ifstream input_file_stream_;
@@ -91,6 +87,7 @@ private:
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-common/typedef.h b/phrase-extract/pcfg-common/typedef.h
index ce3e0423b..e738163df 100644
--- a/phrase-extract/pcfg-common/typedef.h
+++ b/phrase-extract/pcfg-common/typedef.h
@@ -21,19 +21,19 @@
 #ifndef PCFG_TYPEDEF_H_
 #define PCFG_TYPEDEF_H_
 
-#include "numbered_set.h"
-#include "syntax_tree.h"
-
 #include <string>
 
-namespace Moses
-{
-namespace PCFG
-{
+#include "syntax-common/numbered_set.h"
+#include "syntax_tree.h"
+
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
 typedef NumberedSet<std::string> Vocabulary;
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-common/xml_tree_parser.cc b/phrase-extract/pcfg-common/xml_tree_parser.cc
index b6c1da177..3d9291994 100644
--- a/phrase-extract/pcfg-common/xml_tree_parser.cc
+++ b/phrase-extract/pcfg-common/xml_tree_parser.cc
@@ -19,25 +19,23 @@
 
 #include "xml_tree_parser.h"
 
-#include "exception.h"
+#include <cassert>
+#include <vector>
+
 #include "tables-core.h"
 #include "XmlException.h"
 #include "XmlTree.h"
 
-#include <cassert>
-#include <vector>
+#include "syntax-common/exception.h"
 
-using namespace MosesTraining;
-
-namespace Moses {
+namespace MosesTraining {
+namespace Syntax {
 namespace PCFG {
 
-XmlTreeParser::XmlTreeParser()
-{
+XmlTreeParser::XmlTreeParser() {
 }
 
-std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line)
-{
+std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line) {
   m_line = line;
   m_tree.Clear();
   try {
@@ -60,8 +58,7 @@ std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line)
 // Converts a SyntaxNode tree to a Moses::PCFG::PcfgTree.
 std::auto_ptr<PcfgTree> XmlTreeParser::ConvertTree(
     const SyntaxNode &tree,
-    const std::vector<std::string> &words)
-{
+    const std::vector<std::string> &words) {
   std::auto_ptr<PcfgTree> root(new PcfgTree(tree.GetLabel()));
   const std::vector<SyntaxNode*> &children = tree.GetChildren();
   if (children.empty()) {
@@ -87,4 +84,5 @@ std::auto_ptr<PcfgTree> XmlTreeParser::ConvertTree(
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/pcfg-common/xml_tree_parser.h b/phrase-extract/pcfg-common/xml_tree_parser.h
index 7eec14033..675a112d8 100644
--- a/phrase-extract/pcfg-common/xml_tree_parser.h
+++ b/phrase-extract/pcfg-common/xml_tree_parser.h
@@ -21,28 +21,26 @@
 #ifndef PCFG_XML_TREE_PARSER_H_
 #define PCFG_XML_TREE_PARSER_H_
 
-#include "pcfg_tree.h"
-#include "SyntaxTree.h"
-
 #include <map>
 #include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
-namespace Moses
-{
-namespace PCFG
-{
+#include "pcfg_tree.h"
+#include "SyntaxTree.h"
+
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
 // Parses a string in Moses' XML parse tree format and returns a PcfgTree
 // object.
-class XmlTreeParser
-{
-public:
+class XmlTreeParser {
+ public:
   XmlTreeParser();
   std::auto_ptr<PcfgTree> Parse(const std::string &);
-private:
+ private:
   std::auto_ptr<PcfgTree> ConvertTree(const MosesTraining::SyntaxNode &,
                                       const std::vector<std::string> &);
 
@@ -54,6 +52,7 @@ private:
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-common/xml_tree_writer.h b/phrase-extract/pcfg-common/xml_tree_writer.h
index e09942279..8582e544f 100644
--- a/phrase-extract/pcfg-common/xml_tree_writer.h
+++ b/phrase-extract/pcfg-common/xml_tree_writer.h
@@ -21,10 +21,6 @@
 #ifndef PCFG_XML_TREE_WRITER_H_
 #define PCFG_XML_TREE_WRITER_H_
 
-#include "syntax_tree.h"
-
-#include "XmlTree.h"
-
 #include <cassert>
 #include <map>
 #include <memory>
@@ -32,15 +28,17 @@
 #include <vector>
 #include <string>
 
-namespace Moses
-{
-namespace PCFG
-{
+#include "XmlTree.h"
+
+#include "syntax_tree.h"
+
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
 template<typename InputTree>
-class XmlOutputHandler
-{
-public:
+class XmlOutputHandler {
+ public:
   typedef std::map<std::string, std::string> AttributeMap;
 
   void GetLabel(const InputTree &, std::string &) const;
@@ -48,19 +46,17 @@ public:
 };
 
 template<typename InputTree>
-class XmlTreeWriter : public XmlOutputHandler<InputTree>
-{
-public:
+class XmlTreeWriter : public XmlOutputHandler<InputTree> {
+ public:
   typedef XmlOutputHandler<InputTree> Base;
   void Write(const InputTree &, std::ostream &) const;
-private:
+ private:
   std::string Escape(const std::string &) const;
 };
 
 template<typename InputTree>
 void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
-                                     std::ostream &out) const
-{
+                                     std::ostream &out) const {
   assert(!tree.IsLeaf());
 
   // Opening tag
@@ -104,8 +100,7 @@ void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
 
 // Escapes XML special characters.
 template<typename InputTree>
-std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const
-{
+std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const {
   std::string t;
   std::size_t len = s.size();
   t.reserve(len);
@@ -134,6 +129,7 @@ std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-extract/main.cc b/phrase-extract/pcfg-extract/main.cc
index 47b45afc3..84051f2e2 100644
--- a/phrase-extract/pcfg-extract/main.cc
+++ b/phrase-extract/pcfg-extract/main.cc
@@ -20,6 +20,6 @@
 #include "pcfg_extract.h"
 
 int main(int argc, char *argv[]) {
-  Moses::PCFG::PcfgExtract tool;
+  MosesTraining::Syntax::PCFG::PcfgExtract tool;
   return tool.Main(argc, argv);
 }
diff --git a/phrase-extract/pcfg-extract/options.h b/phrase-extract/pcfg-extract/options.h
index 2633f025a..ffaa3bb17 100644
--- a/phrase-extract/pcfg-extract/options.h
+++ b/phrase-extract/pcfg-extract/options.h
@@ -23,16 +23,16 @@
 
 #include <string>
 
-namespace Moses
-{
-namespace PCFG
-{
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
 struct Options {
   std::string corpus_file;
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-extract/pcfg_extract.cc b/phrase-extract/pcfg-extract/pcfg_extract.cc
index 71c2e31c3..a5e06aa82 100644
--- a/phrase-extract/pcfg-extract/pcfg_extract.cc
+++ b/phrase-extract/pcfg-extract/pcfg_extract.cc
@@ -23,7 +23,8 @@
 #include "rule_collection.h"
 #include "rule_extractor.h"
 
-#include "pcfg-common/exception.h"
+#include "syntax-common/exception.h"
+
 #include "pcfg-common/pcfg.h"
 #include "pcfg-common/pcfg_tree.h"
 #include "pcfg-common/syntax_tree.h"
@@ -42,7 +43,8 @@
 #include <string>
 #include <vector>
 
-namespace Moses {
+namespace MosesTraining {
+namespace Syntax {
 namespace PCFG {
 
 int PcfgExtract::Main(int argc, char *argv[]) {
@@ -128,4 +130,5 @@ void PcfgExtract::ProcessOptions(int argc, char *argv[],
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/pcfg-extract/pcfg_extract.h b/phrase-extract/pcfg-extract/pcfg_extract.h
index e8c306876..835564341 100644
--- a/phrase-extract/pcfg-extract/pcfg_extract.h
+++ b/phrase-extract/pcfg-extract/pcfg_extract.h
@@ -23,15 +23,13 @@
 
 #include "pcfg-common/tool.h"
 
-namespace Moses
-{
-namespace PCFG
-{
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
-class Options;
+struct Options;
 
-class PcfgExtract : public Tool
-{
+class PcfgExtract : public Tool {
 public:
   PcfgExtract() : Tool("pcfg-extract") {}
   virtual int Main(int, char *[]);
@@ -40,6 +38,7 @@ private:
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-extract/rule_collection.cc b/phrase-extract/pcfg-extract/rule_collection.cc
index 32b63e0ef..21e84d2fa 100644
--- a/phrase-extract/pcfg-extract/rule_collection.cc
+++ b/phrase-extract/pcfg-extract/rule_collection.cc
@@ -23,7 +23,8 @@
 
 #include <cmath>
 
-namespace Moses {
+namespace MosesTraining {
+namespace Syntax {
 namespace PCFG {
 
 void RuleCollection::Add(std::size_t lhs, const std::vector<std::size_t> &rhs) {
@@ -55,4 +56,5 @@ void RuleCollection::CreatePcfg(Pcfg &pcfg) {
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/pcfg-extract/rule_collection.h b/phrase-extract/pcfg-extract/rule_collection.h
index 32cb2dc05..66fa98657 100644
--- a/phrase-extract/pcfg-extract/rule_collection.h
+++ b/phrase-extract/pcfg-extract/rule_collection.h
@@ -21,21 +21,19 @@
 #ifndef PCFG_EXTRACT_RULE_COLLECTION_H_
 #define PCFG_EXTRACT_RULE_COLLECTION_H_
 
-#include "pcfg-common/pcfg.h"
+#include <vector>
 
 #include <boost/unordered_map.hpp>
 
-#include <vector>
+#include "pcfg-common/pcfg.h"
 
-namespace Moses
-{
-namespace PCFG
-{
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
 // Contains PCFG rules and their counts.
-class RuleCollection
-{
-public:
+class RuleCollection {
+ public:
   typedef boost::unordered_map<std::vector<std::size_t>, std::size_t> RhsCountMap;
   typedef boost::unordered_map<std::size_t, RhsCountMap> Map;
   typedef Map::iterator iterator;
@@ -43,28 +41,21 @@ public:
 
   RuleCollection() {}
 
-  iterator begin() {
-    return collection_.begin();
-  }
-  const_iterator begin() const {
-    return collection_.begin();
-  }
+  iterator begin() { return collection_.begin(); }
+  const_iterator begin() const { return collection_.begin(); }
 
-  iterator end() {
-    return collection_.end();
-  }
-  const_iterator end() const {
-    return collection_.end();
-  }
+  iterator end() { return collection_.end(); }
+  const_iterator end() const { return collection_.end(); }
 
   void Add(std::size_t, const std::vector<std::size_t> &);
   void CreatePcfg(Pcfg &);
 
-private:
+ private:
   Map collection_;
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Synatx
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-extract/rule_extractor.cc b/phrase-extract/pcfg-extract/rule_extractor.cc
index 217574e7d..bb4698fae 100644
--- a/phrase-extract/pcfg-extract/rule_extractor.cc
+++ b/phrase-extract/pcfg-extract/rule_extractor.cc
@@ -21,7 +21,8 @@
 
 #include "pcfg-common/pcfg_tree.h"
 
-namespace Moses {
+namespace MosesTraining {
+namespace Syntax {
 namespace PCFG {
 
 RuleExtractor::RuleExtractor(Vocabulary &non_term_vocab)
@@ -48,4 +49,5 @@ void RuleExtractor::Extract(const PcfgTree &tree, RuleCollection &rc) const {
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/pcfg-extract/rule_extractor.h b/phrase-extract/pcfg-extract/rule_extractor.h
index e4b411c01..1dddd796f 100644
--- a/phrase-extract/pcfg-extract/rule_extractor.h
+++ b/phrase-extract/pcfg-extract/rule_extractor.h
@@ -21,28 +21,27 @@
 #ifndef PCFG_EXTRACT_RULE_EXTRACTOR_H_
 #define PCFG_EXTRACT_RULE_EXTRACTOR_H_
 
-#include "rule_collection.h"
-
 #include "pcfg-common/typedef.h"
 
-namespace Moses
-{
-namespace PCFG
-{
+#include "rule_collection.h"
+
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
 class PcfgTree;
 
 // Extracts PCFG rules from syntax trees and adds them to a RuleCollection.
-class RuleExtractor
-{
-public:
+class RuleExtractor {
+ public:
   RuleExtractor(Vocabulary &);
   void Extract(const PcfgTree &, RuleCollection &) const;
-private:
+ private:
   Vocabulary &non_term_vocab_;
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-score/main.cc b/phrase-extract/pcfg-score/main.cc
index da5392add..5ce19f797 100644
--- a/phrase-extract/pcfg-score/main.cc
+++ b/phrase-extract/pcfg-score/main.cc
@@ -20,6 +20,6 @@
 #include "pcfg_score.h"
 
 int main(int argc, char *argv[]) {
-  Moses::PCFG::PcfgScore tool;
+  MosesTraining::Syntax::PCFG::PcfgScore tool;
   return tool.Main(argc, argv);
 }
diff --git a/phrase-extract/pcfg-score/options.h b/phrase-extract/pcfg-score/options.h
index fd54b4b6b..bbd56d6d0 100644
--- a/phrase-extract/pcfg-score/options.h
+++ b/phrase-extract/pcfg-score/options.h
@@ -23,16 +23,16 @@
 
 #include <string>
 
-namespace Moses
-{
-namespace PCFG
-{
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
 struct Options {
   std::string pcfg_file;
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-score/pcfg_score.cc b/phrase-extract/pcfg-score/pcfg_score.cc
index 345d7fc60..a561c18ed 100644
--- a/phrase-extract/pcfg-score/pcfg_score.cc
+++ b/phrase-extract/pcfg-score/pcfg_score.cc
@@ -19,18 +19,6 @@
 
 #include "pcfg_score.h"
 
-#include "options.h"
-#include "tree_scorer.h"
-
-#include "pcfg-common/exception.h"
-#include "pcfg-common/pcfg.h"
-#include "pcfg-common/pcfg_tree.h"
-#include "pcfg-common/syntax_tree.h"
-#include "pcfg-common/typedef.h"
-#include "pcfg-common/xml_tree_parser.h"
-
-#include <boost/program_options.hpp>
-
 #include <cassert>
 #include <cstdlib>
 #include <fstream>
@@ -40,8 +28,21 @@
 #include <set>
 #include <string>
 #include <vector>
+#include "options.h"
+#include "tree_scorer.h"
 
-namespace Moses {
+#include <boost/program_options.hpp>
+
+#include "syntax-common/exception.h"
+
+#include "pcfg-common/pcfg.h"
+#include "pcfg-common/pcfg_tree.h"
+#include "pcfg-common/syntax_tree.h"
+#include "pcfg-common/typedef.h"
+#include "pcfg-common/xml_tree_parser.h"
+
+namespace MosesTraining {
+namespace Syntax {
 namespace PCFG {
 
 int PcfgScore::Main(int argc, char *argv[]) {
@@ -149,4 +150,5 @@ void PcfgScore::ProcessOptions(int argc, char *argv[], Options &options) const {
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/pcfg-score/pcfg_score.h b/phrase-extract/pcfg-score/pcfg_score.h
index f49c9a0be..fb9971c35 100644
--- a/phrase-extract/pcfg-score/pcfg_score.h
+++ b/phrase-extract/pcfg-score/pcfg_score.h
@@ -23,23 +23,22 @@
 
 #include "pcfg-common/tool.h"
 
-namespace Moses
-{
-namespace PCFG
-{
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
-class Options;
+struct Options;
 
-class PcfgScore : public Tool
-{
-public:
+class PcfgScore : public Tool {
+ public:
   PcfgScore() : Tool("pcfg-score") {}
   virtual int Main(int, char *[]);
-private:
+ private:
   void ProcessOptions(int, char *[], Options &) const;
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/pcfg-score/tree_scorer.cc b/phrase-extract/pcfg-score/tree_scorer.cc
index f9ce97ae0..53b6aaccf 100644
--- a/phrase-extract/pcfg-score/tree_scorer.cc
+++ b/phrase-extract/pcfg-score/tree_scorer.cc
@@ -21,7 +21,8 @@
 
 #include <cassert>
 
-namespace Moses {
+namespace MosesTraining {
+namespace Syntax {
 namespace PCFG {
 
 TreeScorer::TreeScorer(const Pcfg &pcfg, const Vocabulary &non_term_vocab)
@@ -65,4 +66,5 @@ bool TreeScorer::Score(PcfgTree &root) const {
 }
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/pcfg-score/tree_scorer.h b/phrase-extract/pcfg-score/tree_scorer.h
index 8cb59c0c2..3cf4693a6 100644
--- a/phrase-extract/pcfg-score/tree_scorer.h
+++ b/phrase-extract/pcfg-score/tree_scorer.h
@@ -25,26 +25,25 @@
 #include "pcfg-common/pcfg_tree.h"
 #include "pcfg-common/typedef.h"
 
-namespace Moses
-{
-namespace PCFG
-{
+namespace MosesTraining {
+namespace Syntax {
+namespace PCFG {
 
-class TreeScorer
-{
-public:
+class TreeScorer {
+ public:
   TreeScorer(const Pcfg &, const Vocabulary &);
 
   // Score tree according to PCFG.  Returns false if unsuccessful (due to
   // missing rule).
   bool Score(PcfgTree &) const;
 
-private:
+ private:
   const Pcfg &pcfg_;
   const Vocabulary &non_term_vocab_;
 };
 
 }  // namespace PCFG
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
 
 #endif
diff --git a/phrase-extract/relax-parse-main.cpp b/phrase-extract/relax-parse-main.cpp
index b415c4d0e..a6d50cef5 100644
--- a/phrase-extract/relax-parse-main.cpp
+++ b/phrase-extract/relax-parse-main.cpp
@@ -77,7 +77,7 @@ void init(int argc, char* argv[])
 
   if (argc < 2) {
     cerr << "syntax: relax-parse < in-parse > out-parse ["
-         << " --LeftBinarize | ---RightBinarize |"
+         << " --LeftBinarize | --RightBinarize |"
          << " --SAMT 1-4 ]" << endl;
     exit(1);
   }
diff --git a/phrase-extract/score-stsg/Exception.h b/phrase-extract/score-stsg/Exception.h
deleted file mode 100644
index e8c56c0d3..000000000
--- a/phrase-extract/score-stsg/Exception.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#pragma once
-
-#include <string>
-
-namespace Moses
-{
-namespace ScoreStsg
-{
-
-class Exception
-{
-public:
-  Exception(const char *msg) : m_msg(msg) {}
-  Exception(const std::string &msg) : m_msg(msg) {}
-  const std::string &GetMsg() const {
-    return m_msg;
-  }
-private:
-  std::string m_msg;
-};
-
-} // namespace ScoreStsg
-} // namespace Moses
diff --git a/phrase-extract/score-stsg/Jamfile b/phrase-extract/score-stsg/Jamfile
index aa37292fa..6ae17b565 100644
--- a/phrase-extract/score-stsg/Jamfile
+++ b/phrase-extract/score-stsg/Jamfile
@@ -1 +1 @@
-exe score-stsg : [ glob *.cpp ] ..//deps ../..//boost_iostreams ../..//boost_program_options ../..//z : <include>.. ;
+exe score-stsg : [ glob *.cpp ] ..//syntax-common ..//deps ../..//boost_iostreams ../..//boost_program_options ../..//z : <include>.. ;
diff --git a/phrase-extract/score-stsg/LexicalTable.cpp b/phrase-extract/score-stsg/LexicalTable.cpp
index 797a6a903..d5d7ce6ab 100644
--- a/phrase-extract/score-stsg/LexicalTable.cpp
+++ b/phrase-extract/score-stsg/LexicalTable.cpp
@@ -5,7 +5,9 @@
 #include <cstdlib>
 #include <iostream>
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -50,4 +52,5 @@ void LexicalTable::Load(std::istream &input)
 }
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/LexicalTable.h b/phrase-extract/score-stsg/LexicalTable.h
index b2ccf6984..54bae1dec 100644
--- a/phrase-extract/score-stsg/LexicalTable.h
+++ b/phrase-extract/score-stsg/LexicalTable.h
@@ -8,7 +8,9 @@
 
 #include "Vocabulary.h"
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -40,4 +42,5 @@ private:
 };
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/Main.cpp b/phrase-extract/score-stsg/Main.cpp
index 3809bd503..4a8f7a57f 100644
--- a/phrase-extract/score-stsg/Main.cpp
+++ b/phrase-extract/score-stsg/Main.cpp
@@ -2,6 +2,6 @@
 
 int main(int argc, char *argv[])
 {
-  Moses::ScoreStsg::ScoreStsg tool;
+  MosesTraining::Syntax::ScoreStsg::ScoreStsg tool;
   return tool.Main(argc, argv);
 }
diff --git a/phrase-extract/score-stsg/Options.h b/phrase-extract/score-stsg/Options.h
index 5b3664052..17b959c84 100644
--- a/phrase-extract/score-stsg/Options.h
+++ b/phrase-extract/score-stsg/Options.h
@@ -2,7 +2,9 @@
 
 #include <string>
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -38,4 +40,5 @@ public:
 };
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/RuleGroup.cpp b/phrase-extract/score-stsg/RuleGroup.cpp
index 8c4f7b9fd..bbbe3b2b6 100644
--- a/phrase-extract/score-stsg/RuleGroup.cpp
+++ b/phrase-extract/score-stsg/RuleGroup.cpp
@@ -1,6 +1,8 @@
 #include "RuleGroup.h"
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -39,4 +41,5 @@ void RuleGroup::AddRule(const StringPiece &target, const StringPiece &ntAlign,
 }
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/RuleGroup.h b/phrase-extract/score-stsg/RuleGroup.h
index a2889f2bf..de0c25f17 100644
--- a/phrase-extract/score-stsg/RuleGroup.h
+++ b/phrase-extract/score-stsg/RuleGroup.h
@@ -6,7 +6,9 @@
 
 #include "util/string_piece.hh"
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -65,4 +67,5 @@ private:
 };
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/RuleSymbol.h b/phrase-extract/score-stsg/RuleSymbol.h
index 4c9ae0083..efefe6266 100644
--- a/phrase-extract/score-stsg/RuleSymbol.h
+++ b/phrase-extract/score-stsg/RuleSymbol.h
@@ -2,7 +2,9 @@
 
 #include "util/string_piece.hh"
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -14,4 +16,5 @@ struct RuleSymbol
 };
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/RuleTableWriter.cpp b/phrase-extract/score-stsg/RuleTableWriter.cpp
index 62f3c2702..0a1d5aa08 100644
--- a/phrase-extract/score-stsg/RuleTableWriter.cpp
+++ b/phrase-extract/score-stsg/RuleTableWriter.cpp
@@ -12,14 +12,15 @@
 #include "util/string_piece.hh"
 #include "util/tokenize_piece.hh"
 
-#include "Exception.h"
 #include "InputFileStream.h"
 #include "LexicalTable.h"
 #include "OutputFileStream.h"
 #include "Options.h"
 #include "RuleGroup.h"
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -75,4 +76,5 @@ void RuleTableWriter::WriteRuleHalf(const TokenizedRuleHalf &half)
 }
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/RuleTableWriter.h b/phrase-extract/score-stsg/RuleTableWriter.h
index 68403dfa7..db8924de3 100644
--- a/phrase-extract/score-stsg/RuleTableWriter.h
+++ b/phrase-extract/score-stsg/RuleTableWriter.h
@@ -8,7 +8,9 @@
 #include "Options.h"
 #include "TokenizedRuleHalf.h"
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -16,7 +18,7 @@ namespace ScoreStsg
 class RuleTableWriter
 {
 public:
-  RuleTableWriter(const Options &options, OutputFileStream &out)
+  RuleTableWriter(const Options &options, Moses::OutputFileStream &out)
       : m_options(options)
       , m_out(out) {}
 
@@ -34,8 +36,9 @@ private:
   void WriteRuleHalf(const TokenizedRuleHalf &);
 
   const Options &m_options;
-  OutputFileStream &m_out;
+  Moses::OutputFileStream &m_out;
 };
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/ScoreStsg.cpp b/phrase-extract/score-stsg/ScoreStsg.cpp
index e3c2f088e..04e3b5a44 100644
--- a/phrase-extract/score-stsg/ScoreStsg.cpp
+++ b/phrase-extract/score-stsg/ScoreStsg.cpp
@@ -15,15 +15,19 @@
 #include "util/string_piece_hash.hh"
 #include "util/tokenize_piece.hh"
 
-#include "Exception.h"
 #include "InputFileStream.h"
-#include "LexicalTable.h"
 #include "OutputFileStream.h"
+
+#include "syntax-common/exception.h"
+
+#include "LexicalTable.h"
 #include "Options.h"
 #include "RuleGroup.h"
 #include "RuleTableWriter.h"
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -44,12 +48,12 @@ int ScoreStsg::Main(int argc, char *argv[])
   ProcessOptions(argc, argv, m_options);
 
   // Open input files.
-  InputFileStream extractStream(m_options.extractFile);
-  InputFileStream lexStream(m_options.lexFile);
+  Moses::InputFileStream extractStream(m_options.extractFile);
+  Moses::InputFileStream lexStream(m_options.lexFile);
 
   // Open output files.
-  OutputFileStream outStream;
-  OutputFileStream countOfCountsStream;
+  Moses::OutputFileStream outStream;
+  Moses::OutputFileStream countOfCountsStream;
   OpenOutputFileOrDie(m_options.tableFile, outStream);
   if (m_options.goodTuring || m_options.kneserNey) {
     OpenOutputFileOrDie(m_options.tableFile+".coc", countOfCountsStream);
@@ -161,7 +165,7 @@ void ScoreStsg::ProcessRuleGroupOrDie(const RuleGroup &group,
   } catch (const Exception &e) {
     std::ostringstream msg;
     msg << "failed to process rule group at lines " << start << "-" << end
-        << ": " << e.GetMsg();
+        << ": " << e.msg();
     Error(msg.str());
   } catch (const std::exception &e) {
     std::ostringstream msg;
@@ -228,7 +232,7 @@ void ScoreStsg::ProcessRuleGroup(const RuleGroup &group,
 }
 
 void ScoreStsg::ParseAlignmentString(const std::string &s, int numTgtWords,
-                                     MosesTraining::ALIGNMENT &tgtToSrc)
+                                     ALIGNMENT &tgtToSrc)
 {
   tgtToSrc.clear();
   tgtToSrc.resize(numTgtWords);
@@ -262,7 +266,7 @@ void ScoreStsg::ParseAlignmentString(const std::string &s, int numTgtWords,
 
 double ScoreStsg::ComputeLexProb(const std::vector<RuleSymbol> &sourceFrontier,
                                  const std::vector<RuleSymbol> &targetFrontier,
-                                 const MosesTraining::ALIGNMENT &tgtToSrc)
+                                 const ALIGNMENT &tgtToSrc)
 {
   double lexScore = 1.0;
   for (std::size_t i = 0; i < targetFrontier.size(); ++i) {
@@ -293,7 +297,7 @@ double ScoreStsg::ComputeLexProb(const std::vector<RuleSymbol> &sourceFrontier,
 }
 
 void ScoreStsg::OpenOutputFileOrDie(const std::string &filename,
-                                    OutputFileStream &stream)
+                                    Moses::OutputFileStream &stream)
 {
   bool ret = stream.Open(filename);
   if (!ret) {
@@ -437,4 +441,5 @@ void ScoreStsg::Error(const std::string &msg) const
 }
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/ScoreStsg.h b/phrase-extract/score-stsg/ScoreStsg.h
index 2382b38c0..628c0080e 100644
--- a/phrase-extract/score-stsg/ScoreStsg.h
+++ b/phrase-extract/score-stsg/ScoreStsg.h
@@ -7,6 +7,7 @@
 #include <vector>
 
 #include "ExtractionPhrasePair.h"
+#include "OutputFileStream.h"
 
 #include "LexicalTable.h"
 #include "Options.h"
@@ -14,11 +15,10 @@
 #include "TokenizedRuleHalf.h"
 #include "Vocabulary.h"
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
-
-class OutputFileStream;
-
 namespace ScoreStsg
 {
 
@@ -41,14 +41,14 @@ private:
 
   double ComputeLexProb(const std::vector<RuleSymbol> &,
                         const std::vector<RuleSymbol> &,
-                        const MosesTraining::ALIGNMENT &);
+                        const ALIGNMENT &);
 
   void Error(const std::string &) const;
 
-  void OpenOutputFileOrDie(const std::string &, OutputFileStream &);
+  void OpenOutputFileOrDie(const std::string &, Moses::OutputFileStream &);
 
   void ParseAlignmentString(const std::string &, int,
-                            MosesTraining::ALIGNMENT &);
+                            ALIGNMENT &);
 
   void ProcessOptions(int, char *[], Options &) const;
 
@@ -68,8 +68,9 @@ private:
   int m_totalDistinct;
   TokenizedRuleHalf m_sourceHalf;
   TokenizedRuleHalf m_targetHalf;
-  MosesTraining::ALIGNMENT m_tgtToSrc;
+  ALIGNMENT m_tgtToSrc;
 };
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/TokenizedRuleHalf.cpp b/phrase-extract/score-stsg/TokenizedRuleHalf.cpp
index 5f115ae9e..6ccc2a311 100644
--- a/phrase-extract/score-stsg/TokenizedRuleHalf.cpp
+++ b/phrase-extract/score-stsg/TokenizedRuleHalf.cpp
@@ -1,6 +1,8 @@
 #include "TokenizedRuleHalf.h"
 
-namespace Moses
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -34,4 +36,5 @@ bool TokenizedRuleHalf::IsTree() const
 }
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/TokenizedRuleHalf.h b/phrase-extract/score-stsg/TokenizedRuleHalf.h
index b21ea2d37..2fbb80f38 100644
--- a/phrase-extract/score-stsg/TokenizedRuleHalf.h
+++ b/phrase-extract/score-stsg/TokenizedRuleHalf.h
@@ -3,10 +3,13 @@
 #include <string>
 #include <vector>
 
-#include "RuleSymbol.h"
-#include "TreeFragmentTokenizer.h"
+#include "syntax-common/tree_fragment_tokenizer.h"
 
-namespace Moses
+#include "RuleSymbol.h"
+
+namespace MosesTraining
+{
+namespace Syntax
 {
 namespace ScoreStsg
 {
@@ -42,4 +45,5 @@ struct TokenizedRuleHalf
 };
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/Vocabulary.h b/phrase-extract/score-stsg/Vocabulary.h
index 8dc773fe2..db31c73f5 100644
--- a/phrase-extract/score-stsg/Vocabulary.h
+++ b/phrase-extract/score-stsg/Vocabulary.h
@@ -2,12 +2,14 @@
 
 #include <string>
 
-#include "NumberedSet.h"
+#include "syntax-common/numbered_set.h"
 
-namespace Moses {
+namespace MosesTraining {
+namespace Syntax {
 namespace ScoreStsg {
 
 typedef NumberedSet<std::string, std::size_t> Vocabulary;
 
 }  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/syntax-common/Jamfile b/phrase-extract/syntax-common/Jamfile
new file mode 100644
index 000000000..c76ab50a5
--- /dev/null
+++ b/phrase-extract/syntax-common/Jamfile
@@ -0,0 +1,8 @@
+lib syntax_common : [ glob *.cc : *_test.cc ] ..//deps : <include>.. ;
+
+import testing ;
+
+for local t in [ glob *_test.cc ] {
+  local name = [ MATCH "(.*)\.cc" : $(t) ] ;
+  unit-test $(name) : $(t) syntax_common /top//boost_unit_test_framework /top//boost_system ;
+}
diff --git a/phrase-extract/syntax-common/exception.h b/phrase-extract/syntax-common/exception.h
new file mode 100644
index 000000000..18d529fc3
--- /dev/null
+++ b/phrase-extract/syntax-common/exception.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <string>
+
+namespace MosesTraining {
+namespace Syntax {
+
+class Exception {
+ public:
+  Exception(const char *msg) : msg_(msg) {}
+  Exception(const std::string &msg) : msg_(msg) {}
+
+  const std::string &msg() const { return msg_; }
+
+ private:
+  std::string msg_;
+};
+
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/NumberedSet.h b/phrase-extract/syntax-common/numbered_set.h
similarity index 96%
rename from phrase-extract/score-stsg/NumberedSet.h
rename to phrase-extract/syntax-common/numbered_set.h
index 56f7c05c2..60933fe96 100644
--- a/phrase-extract/score-stsg/NumberedSet.h
+++ b/phrase-extract/syntax-common/numbered_set.h
@@ -6,10 +6,10 @@
 
 #include <boost/unordered_map.hpp>
 
-#include "Exception.h"
+#include "exception.h"
 
-namespace Moses {
-namespace ScoreStsg {
+namespace MosesTraining {
+namespace Syntax {
 
 // Stores a set of elements of type T, each of which is allocated an integral
 // ID of type I.  IDs are contiguous starting at 0.  Individual elements cannot
@@ -106,5 +106,5 @@ void NumberedSet<T, I>::Clear() {
   id_to_element_.clear();
 }
 
-}  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/syntax-common/string_tree.h b/phrase-extract/syntax-common/string_tree.h
new file mode 100644
index 000000000..c1676e72c
--- /dev/null
+++ b/phrase-extract/syntax-common/string_tree.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <string>
+
+#include "tree.h"
+
+namespace MosesTraining {
+namespace Syntax {
+
+typedef Tree<std::string> StringTree;
+
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/syntax-common/tree-inl.h b/phrase-extract/syntax-common/tree-inl.h
new file mode 100644
index 000000000..2ba55df1a
--- /dev/null
+++ b/phrase-extract/syntax-common/tree-inl.h
@@ -0,0 +1,115 @@
+#pragma once
+
+#include <stack>
+#include <vector>
+
+namespace MosesTraining {
+namespace Syntax {
+
+template<typename T>
+Tree<T>::~Tree() {
+  for (typename std::vector<Tree *>::iterator p = children_.begin();
+       p != children_.end(); ++p) {
+    delete *p;
+  }
+}
+
+template<typename T>
+void Tree<T>::SetParents() {
+  for (typename std::vector<Tree *>::iterator p = children_.begin();
+       p != children_.end(); ++p) {
+    (*p)->parent() = this;
+    (*p)->SetParents();
+  }
+}
+
+template<typename T>
+std::size_t Tree<T>::Depth() const {
+  std::size_t depth = 0;
+  Tree *ancestor = parent_;
+  while (ancestor != 0) {
+    ++depth;
+    ancestor = ancestor->parent_;
+  }
+  return depth;
+}
+
+template<typename T>
+class Tree<T>::PreOrderIterator {
+ public:
+  PreOrderIterator();
+  PreOrderIterator(Tree<T> &);
+
+  Tree<T> &operator*() { return *node_; }
+  Tree<T> *operator->() { return node_; }
+
+  PreOrderIterator &operator++();
+  PreOrderIterator operator++(int);
+
+  bool operator==(const Tree<T>::PreOrderIterator &);
+  bool operator!=(const Tree<T>::PreOrderIterator &);
+
+ private:
+  // Pointer to the current node.
+  Tree<T> *node_;
+
+  // Stack of indices defining the position of node_ within the child vectors
+  // of its ancestors.
+  std::stack<std::size_t> index_stack_;
+};
+
+template<typename T>
+Tree<T>::PreOrderIterator::PreOrderIterator()
+    : node_(0) {
+}
+
+template<typename T>
+Tree<T>::PreOrderIterator::PreOrderIterator(Tree<T> &t)
+    : node_(&t) {
+}
+
+template<typename T>
+typename Tree<T>::PreOrderIterator &Tree<T>::PreOrderIterator::operator++() {
+  // If the current node has children then visit the left-most child next.
+  if (!node_->children().empty()) {
+    index_stack_.push(0);
+    node_ = node_->children()[0];
+    return *this;
+  }
+  // Otherwise, try node's ancestors until either a node is found with a
+  // sibling to the right or we reach the root (in which case the traversal
+  // is complete).
+  Tree<T> *ancestor = node_->parent_;
+  while (ancestor) {
+    std::size_t index = index_stack_.top();
+    index_stack_.pop();
+    if (index+1 < ancestor->children_.size()) {
+      index_stack_.push(index+1);
+      node_ = ancestor->children()[index+1];
+      return *this;
+    }
+    ancestor = ancestor->parent_;
+  }
+  node_ = 0;
+  return *this;
+}
+
+template<typename T>
+typename Tree<T>::PreOrderIterator Tree<T>::PreOrderIterator::operator++(int) {
+  PreOrderIterator tmp(*this);
+  ++*this;
+  return tmp;
+}
+
+template<typename T>
+bool Tree<T>::PreOrderIterator::operator==(const PreOrderIterator &rhs) {
+  return node_ == rhs.node_;
+}
+
+template<typename T>
+bool Tree<T>::PreOrderIterator::operator!=(const PreOrderIterator &rhs) {
+  return node_ != rhs.node_;
+}
+
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/syntax-common/tree.h b/phrase-extract/syntax-common/tree.h
new file mode 100644
index 000000000..52adaa699
--- /dev/null
+++ b/phrase-extract/syntax-common/tree.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <vector>
+
+namespace MosesTraining {
+namespace Syntax {
+
+// A basic k-ary tree with node values of type T.  Each node has a vector of
+// pointers to its children and a pointer to its parent (or 0 for the root).
+//
+// See the unit tests in tree_test.cc for examples of usage.
+//
+// Note: a Tree owns its children: it will delete them on destruction.
+//
+// Note: it's the user's responsibility to ensure that parent and child pointers
+// are correctly set and maintained.  A convenient(-ish) way of building a
+// properly-connected tree is to add all the nodes as children of their
+// respective parents (using the children() accessor) and then call
+// SetParents() on the root at the end.
+//
+template<typename T>
+class Tree {
+ public:
+  // Constructors
+  Tree()
+      : value_()
+      , children_()
+      , parent_(0) {}
+
+  Tree(const T &value)
+      : value_(value)
+      , children_()
+      , parent_(0) {}
+
+  // Destructor (deletes children)
+  ~Tree();
+
+  // Access tree's value.
+  const T &value() const { return value_; }
+  T &value() { return value_; }
+
+  // Access tree's parent.
+  const Tree *parent() const { return parent_; }
+  Tree *&parent() { return parent_; }
+
+  // Access tree's children.
+  const std::vector<Tree *> &children() const { return children_; }
+  std::vector<Tree *> &children() { return children_; }
+
+  // Set the parent values for this subtree (excluding this node).
+  void SetParents();
+
+  // Leaf predicate.
+  bool IsLeaf() const { return children_.empty(); }
+
+  // Calculate the depth of this node within the tree (where the root has a
+  // depth of 0, root's children have a depth 1, etc).
+  std::size_t Depth() const;
+
+  // Iterators
+  //
+  // All iterators are forward iterators.  Example use:
+  //
+  //  Tree<int> &root = GetMeATree();
+  //  for (Tree<int>::PreOrderIterator p(root);
+  //       p != Tree<int>::PreOrderIterator(); ++p) {
+  //    std::cout << p->value() << " ";
+  //  }
+
+  // Pre-order iterators.
+  class PreOrderIterator;
+  // class ConstPreOrderIterator; TODO
+
+  // Post-order iterators.
+  // class PostOrderIterator; TODO
+  // class ConstPostOrderIterator; TODO
+
+  // Leaf iterators (left-to-right).
+  // class LeafIterator; TODO
+  // class ConstLeafIterator; TODO
+
+ private:
+  T value_;
+  std::vector<Tree *> children_;
+  Tree *parent_;
+};
+
+}  // namespace Syntax
+}  // namespace MosesTraining
+
+#include "tree-inl.h"
diff --git a/phrase-extract/score-stsg/TreeFragmentTokenizer.cpp b/phrase-extract/syntax-common/tree_fragment_tokenizer.cc
similarity index 93%
rename from phrase-extract/score-stsg/TreeFragmentTokenizer.cpp
rename to phrase-extract/syntax-common/tree_fragment_tokenizer.cc
index cafc39432..ab3db3a84 100644
--- a/phrase-extract/score-stsg/TreeFragmentTokenizer.cpp
+++ b/phrase-extract/syntax-common/tree_fragment_tokenizer.cc
@@ -1,10 +1,10 @@
-#include "TreeFragmentTokenizer.h"
+#include "tree_fragment_tokenizer.h"
 
 #include <cctype>
 
-namespace Moses
+namespace MosesTraining
 {
-namespace ScoreStsg
+namespace Syntax
 {
 
 TreeFragmentToken::TreeFragmentToken(TreeFragmentTokenType t,
@@ -86,5 +86,5 @@ bool operator!=(const TreeFragmentTokenizer &lhs,
   return !(lhs == rhs);
 }
 
-}  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/score-stsg/TreeFragmentTokenizer.h b/phrase-extract/syntax-common/tree_fragment_tokenizer.h
similarity index 85%
rename from phrase-extract/score-stsg/TreeFragmentTokenizer.h
rename to phrase-extract/syntax-common/tree_fragment_tokenizer.h
index 5360eb5b9..ca8741a52 100644
--- a/phrase-extract/score-stsg/TreeFragmentTokenizer.h
+++ b/phrase-extract/syntax-common/tree_fragment_tokenizer.h
@@ -2,10 +2,8 @@
 
 #include "util/string_piece.hh"
 
-namespace Moses
-{
-namespace ScoreStsg
-{
+namespace MosesTraining {
+namespace Syntax {
 
 enum TreeFragmentTokenType {
   TreeFragmentToken_EOS,
@@ -24,10 +22,11 @@ struct TreeFragmentToken {
 
 // Tokenizes tree fragment strings in Moses format.
 //
-// For example, the string "[NP [NP [NN a]] [NP]]" is tokenized to the sequence:
+// For example, the string "[S [NP [NN weasels]] [VP]]" is tokenized to the
+// sequence:
 //
 //    1   LSB   "["
-//    2   WORD  "NP"
+//    2   WORD  "S"
 //    3   LSB   "["
 //    4   WORD  "NP"
 //    5   LSB   "["
@@ -36,7 +35,7 @@ struct TreeFragmentToken {
 //    8   RSB   "]"
 //    9   RSB   "]"
 //    10  LSB   "["
-//    11  WORD  "NP"
+//    11  WORD  "VP"
 //    12  RSB   "]"
 //    13  RSB   "]"
 //    14  EOS   undefined
@@ -66,5 +65,5 @@ class TreeFragmentTokenizer {
   std::size_t pos_;
 };
 
-}  // namespace ScoreStsg
-}  // namespace Moses
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/syntax-common/tree_fragment_tokenizer_test.cc b/phrase-extract/syntax-common/tree_fragment_tokenizer_test.cc
new file mode 100644
index 000000000..cd09c6911
--- /dev/null
+++ b/phrase-extract/syntax-common/tree_fragment_tokenizer_test.cc
@@ -0,0 +1,74 @@
+#include "tree_fragment_tokenizer.h"
+
+#define BOOST_TEST_MODULE TreeTest
+#include <boost/test/unit_test.hpp>
+
+#include <boost/scoped_ptr.hpp>
+
+namespace MosesTraining {
+namespace Syntax {
+namespace {
+
+BOOST_AUTO_TEST_CASE(tokenize_empty) {
+  const std::string fragment = "";
+  std::vector<TreeFragmentToken> tokens;
+  for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
+    tokens.push_back(*p);
+  }
+  BOOST_REQUIRE(tokens.empty());
+}
+
+BOOST_AUTO_TEST_CASE(tokenize_space) {
+  const std::string fragment = "  [  weasel weasel  ] [] ] wea[sel";
+  std::vector<TreeFragmentToken> tokens;
+  for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
+    tokens.push_back(*p);
+  }
+  BOOST_REQUIRE(tokens.size() == 10);
+  BOOST_REQUIRE(tokens[0].type == TreeFragmentToken_LSB);
+  BOOST_REQUIRE(tokens[0].value == "[");
+  BOOST_REQUIRE(tokens[1].type == TreeFragmentToken_WORD);
+  BOOST_REQUIRE(tokens[1].value == "weasel");
+  BOOST_REQUIRE(tokens[2].type == TreeFragmentToken_WORD);
+  BOOST_REQUIRE(tokens[2].value == "weasel");
+  BOOST_REQUIRE(tokens[3].type == TreeFragmentToken_RSB);
+  BOOST_REQUIRE(tokens[3].value == "]");
+  BOOST_REQUIRE(tokens[4].type == TreeFragmentToken_LSB);
+  BOOST_REQUIRE(tokens[4].value == "[");
+  BOOST_REQUIRE(tokens[5].type == TreeFragmentToken_RSB);
+  BOOST_REQUIRE(tokens[5].value == "]");
+  BOOST_REQUIRE(tokens[6].type == TreeFragmentToken_RSB);
+  BOOST_REQUIRE(tokens[6].value == "]");
+  BOOST_REQUIRE(tokens[7].type == TreeFragmentToken_WORD);
+  BOOST_REQUIRE(tokens[7].value == "wea");
+  BOOST_REQUIRE(tokens[8].type == TreeFragmentToken_LSB);
+  BOOST_REQUIRE(tokens[8].value == "[");
+  BOOST_REQUIRE(tokens[9].type == TreeFragmentToken_WORD);
+  BOOST_REQUIRE(tokens[9].value == "sel");
+}
+
+BOOST_AUTO_TEST_CASE(tokenize_fragment) {
+  const std::string fragment = "[S [NP [NN weasels]] [VP]]";
+  std::vector<TreeFragmentToken> tokens;
+  for (TreeFragmentTokenizer p(fragment); p != TreeFragmentTokenizer(); ++p) {
+    tokens.push_back(*p);
+  }
+  BOOST_REQUIRE(tokens.size() == 13);
+  BOOST_REQUIRE(tokens[0].type == TreeFragmentToken_LSB);
+  BOOST_REQUIRE(tokens[1].type == TreeFragmentToken_WORD);
+  BOOST_REQUIRE(tokens[2].type == TreeFragmentToken_LSB);
+  BOOST_REQUIRE(tokens[3].type == TreeFragmentToken_WORD);
+  BOOST_REQUIRE(tokens[4].type == TreeFragmentToken_LSB);
+  BOOST_REQUIRE(tokens[5].type == TreeFragmentToken_WORD);
+  BOOST_REQUIRE(tokens[6].type == TreeFragmentToken_WORD);
+  BOOST_REQUIRE(tokens[7].type == TreeFragmentToken_RSB);
+  BOOST_REQUIRE(tokens[8].type == TreeFragmentToken_RSB);
+  BOOST_REQUIRE(tokens[9].type == TreeFragmentToken_LSB);
+  BOOST_REQUIRE(tokens[10].type == TreeFragmentToken_WORD);
+  BOOST_REQUIRE(tokens[11].type == TreeFragmentToken_RSB);
+  BOOST_REQUIRE(tokens[12].type == TreeFragmentToken_RSB);
+}
+
+}  // namespace
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/syntax-common/tree_test.cc b/phrase-extract/syntax-common/tree_test.cc
new file mode 100644
index 000000000..0a54ad3f1
--- /dev/null
+++ b/phrase-extract/syntax-common/tree_test.cc
@@ -0,0 +1,66 @@
+#include "tree.h"
+
+#define BOOST_TEST_MODULE TreeTest
+#include <boost/test/unit_test.hpp>
+
+#include <boost/scoped_ptr.hpp>
+
+namespace MosesTraining {
+namespace Syntax {
+namespace {
+
+// Test Tree<>::PreOrderIterator with a trivial, single-node tree.
+BOOST_AUTO_TEST_CASE(pre_order_1) {
+  boost::scoped_ptr<Tree<int> > root(new Tree<int>(123));
+  Tree<int>::PreOrderIterator p(*root);
+  BOOST_REQUIRE(p != Tree<int>::PreOrderIterator());
+  BOOST_REQUIRE(p->value() == 123);
+  ++p;
+  BOOST_REQUIRE(p == Tree<int>::PreOrderIterator());
+}
+
+// Test Tree<>::PreOrderIterator on this tree: (1 (2 3) (4) (5 6 (7 8)))
+BOOST_AUTO_TEST_CASE(pre_order_2) {
+  boost::scoped_ptr<Tree<int> > root(new Tree<int>(1));
+  root->children().push_back(new Tree<int>(2));
+  root->children()[0]->children().push_back(new Tree<int>(3));
+  root->children().push_back(new Tree<int>(4));
+  root->children().push_back(new Tree<int>(5));
+  root->children()[2]->children().push_back(new Tree<int>(6));
+  root->children()[2]->children().push_back(new Tree<int>(7));
+  root->children()[2]->children()[1]->children().push_back(new Tree<int>(8));
+  root->SetParents();
+
+  Tree<int>::PreOrderIterator p(*root);
+  Tree<int>::PreOrderIterator end;
+
+  BOOST_REQUIRE(p != end);
+  BOOST_REQUIRE(p->value() == 1);
+  ++p;
+  BOOST_REQUIRE(p != end);
+  BOOST_REQUIRE(p->value() == 2);
+  ++p;
+  BOOST_REQUIRE(p != end);
+  BOOST_REQUIRE(p->value() == 3);
+  ++p;
+  BOOST_REQUIRE(p != end);
+  BOOST_REQUIRE(p->value() == 4);
+  ++p;
+  BOOST_REQUIRE(p != end);
+  BOOST_REQUIRE(p->value() == 5);
+  ++p;
+  BOOST_REQUIRE(p != end);
+  BOOST_REQUIRE(p->value() == 6);
+  ++p;
+  BOOST_REQUIRE(p != end);
+  BOOST_REQUIRE(p->value() == 7);
+  ++p;
+  BOOST_REQUIRE(p != end);
+  BOOST_REQUIRE(p->value() == 8);
+  ++p;
+  BOOST_REQUIRE(p == end);
+}
+
+}  // namespace
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/syntax-common/xml_tree_parser.cc b/phrase-extract/syntax-common/xml_tree_parser.cc
new file mode 100644
index 000000000..c4363a3e2
--- /dev/null
+++ b/phrase-extract/syntax-common/xml_tree_parser.cc
@@ -0,0 +1,59 @@
+#include "xml_tree_parser.h"
+
+#include "tables-core.h"
+#include "XmlException.h"
+#include "XmlTree.h"
+
+#include <cassert>
+#include <vector>
+
+namespace MosesTraining {
+namespace Syntax {
+
+StringTree *XmlTreeParser::Parse(const std::string &line) {
+  line_ = line;
+  tree_.Clear();
+  try {
+    if (!ProcessAndStripXMLTags(line_, tree_, label_set_, top_label_set_,
+                                false)) {
+      throw Exception("");
+    }
+  } catch (const XmlException &e) {
+    throw Exception(e.getMsg());
+  }
+  tree_.ConnectNodes();
+  SyntaxNode *root = tree_.GetTop();
+  assert(root);
+  words_ = tokenize(line_.c_str());
+  return ConvertTree(*root, words_);
+}
+
+// Converts a SyntaxNode tree to a StringTree.
+StringTree *XmlTreeParser::ConvertTree(const SyntaxNode &tree,
+                                       const std::vector<std::string> &words) {
+  StringTree *root = new StringTree(tree.GetLabel());
+  const std::vector<SyntaxNode*> &children = tree.GetChildren();
+  if (children.empty()) {
+    if (tree.GetStart() != tree.GetEnd()) {
+      std::ostringstream msg;
+      msg << "leaf node covers multiple words (" << tree.GetStart()
+          << "-" << tree.GetEnd() << "): this is currently unsupported";
+      throw Exception(msg.str());
+    }
+    StringTree *leaf = new StringTree(words[tree.GetStart()]);
+    leaf->parent() = root;
+    root->children().push_back(leaf);
+  } else {
+    for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
+         p != children.end(); ++p) {
+      assert(*p);
+      StringTree *child = ConvertTree(**p, words);
+      child->parent() = root;
+      root->children().push_back(child);
+    }
+  }
+  return root;
+}
+
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/phrase-extract/syntax-common/xml_tree_parser.h b/phrase-extract/syntax-common/xml_tree_parser.h
new file mode 100644
index 000000000..a5563f63a
--- /dev/null
+++ b/phrase-extract/syntax-common/xml_tree_parser.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "SyntaxTree.h"
+
+#include "exception.h"
+#include "string_tree.h"
+
+namespace MosesTraining {
+namespace Syntax {
+
+// Parses a string in Moses' XML parse tree format and returns a StringTree
+// object.  This is a wrapper around the ProcessAndStripXMLTags function.
+class XmlTreeParser {
+ public:
+  StringTree *Parse(const std::string &);
+
+ private:
+  static StringTree *ConvertTree(const MosesTraining::SyntaxNode &,
+                                 const std::vector<std::string> &);
+
+  std::set<std::string> label_set_;
+  std::map<std::string, int> top_label_set_;
+  std::string line_;
+  MosesTraining::SyntaxTree tree_;
+  std::vector<std::string> words_;
+};
+
+}  // namespace Syntax
+}  // namespace MosesTraining
diff --git a/scripts/other/beautify.perl b/scripts/other/beautify.perl
index e653440dc..3e77edd53 100755
--- a/scripts/other/beautify.perl
+++ b/scripts/other/beautify.perl
@@ -40,6 +40,8 @@ sub Beautify($)
 		 next if ($name eq "srilm");
 		 next if ($name eq "irstlm");
 		 next if ($name eq "UG");
+		 next if ($name eq "pcfg-common");
+		 next if ($name eq "syntax-common");
 
 		 $name = $path ."/" .$name;
 		 if (-d $name) {