diff --git a/biconcor/phrase-lookup.cpp b/biconcor/phrase-lookup.cpp
index 3ef82e73a..60ab8db66 100644
--- a/biconcor/phrase-lookup.cpp
+++ b/biconcor/phrase-lookup.cpp
@@ -109,14 +109,17 @@ size_t lookup( string query )
   return suffixArray.Count( queryString );
 }
 
-vector<string> tokenize( const char input[] )
+// Duplicate of definition in util/tokenize.hh.
+// TODO: Can we de-duplicate this?  At the time of writing biconcor does not
+// use util at all.
+vector<string> tokenize(const char input[])
 {
   vector< string > token;
   bool betweenWords = true;
   int start=0;
-  int i=0;
-  for(; input[i] != '\0'; i++) {
-    bool isSpace = (input[i] == ' ' || input[i] == '\t');
+  int i;
+  for(i = 0; input[i] != '\0'; i++) {
+    const bool isSpace = (input[i] == ' ' || input[i] == '\t');
 
     if (!isSpace && betweenWords) {
       start = i;
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
index c632f9ff2..444557f9b 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@@ -17,6 +17,7 @@ License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
 #include "util/exception.hh"
+#include "util/tokenize.hh"
 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
 
 using namespace std;
@@ -30,29 +31,6 @@ void OutputVec(const vector<T> &vec)
   cerr << endl;
 }
 
-// from phrase-extract/tables-core.cpp
-inline vector<string> tokenize( const char* input )
-{
-  vector< string > token;
-  bool betweenWords = true;
-  int start=0;
-  int i=0;
-  for(; input[i] != '\0'; i++) {
-    bool isSpace = (input[i] == ' ' || input[i] == '\t');
-
-    if (!isSpace && betweenWords) {
-      start = i;
-      betweenWords = false;
-    } else if (isSpace && !betweenWords) {
-      token.push_back( string( input+start, i-start ) );
-      betweenWords = true;
-    }
-  }
-  if (!betweenWords)
-    token.push_back( string( input+start, i-start ) );
-  return token;
-}
-
 namespace Moses
 {
 
@@ -464,7 +442,7 @@ void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexic
     i++;
     if (i%100000 == 0) cerr << "." << flush;
 
-    vector<string> token = tokenize( line.c_str() );
+    const vector<string> token = util::tokenize( line.c_str() );
     if (token.size() != 4) {
       cerr << "line " << i << " in " << fileName
            << " has wrong number of tokens, skipping:\n"
diff --git a/phrase-extract/DomainFeature.cpp b/phrase-extract/DomainFeature.cpp
index 899eb9f1c..0d2b96a8a 100644
--- a/phrase-extract/DomainFeature.cpp
+++ b/phrase-extract/DomainFeature.cpp
@@ -2,6 +2,7 @@
 #include "ExtractionPhrasePair.h"
 #include "tables-core.h"
 #include "InputFileStream.h"
+#include "util/tokenize.hh"
 
 using namespace std;
 
@@ -17,7 +18,7 @@ void Domain::load( const std::string &domainFileName )
   string line;
   while(getline(*fileP, line)) {
     // read
-    vector< string > domainSpecLine = tokenize( line.c_str() );
+    const vector< string > domainSpecLine = util::tokenize( line.c_str() );
     int lineNumber;
     if (domainSpecLine.size() != 2 ||
         ! sscanf(domainSpecLine[0].c_str(), "%d", &lineNumber)) {
@@ -25,7 +26,7 @@ void Domain::load( const std::string &domainFileName )
       exit(1);
     }
     // store
-    string &name = domainSpecLine[1];
+    const string &name = domainSpecLine[1];
     spec.push_back( make_pair( lineNumber, name ));
     if (name2id.find( name ) == name2id.end()) {
       name2id[ name ] = list.size();
diff --git a/phrase-extract/DomainFeature.h b/phrase-extract/DomainFeature.h
index 040a5fc72..95babb6c2 100644
--- a/phrase-extract/DomainFeature.h
+++ b/phrase-extract/DomainFeature.h
@@ -14,8 +14,6 @@
 
 #include "ScoreFeature.h"
 
-extern std::vector<std::string> tokenize( const char*);
-
 namespace MosesTraining
 {
 
diff --git a/phrase-extract/SentenceAlignment.cpp b/phrase-extract/SentenceAlignment.cpp
index ee7f27ed9..21c1a1dbd 100644
--- a/phrase-extract/SentenceAlignment.cpp
+++ b/phrase-extract/SentenceAlignment.cpp
@@ -24,6 +24,7 @@
 #include <string>
 
 #include "tables-core.h"
+#include "util/tokenize.hh"
 
 using namespace std;
 
@@ -40,7 +41,7 @@ void addBoundaryWords(vector<string> &phrase)
 
 bool SentenceAlignment::processTargetSentence(const char * targetString, int, bool boundaryRules)
 {
-  target = tokenize(targetString);
+  target = util::tokenize(targetString);
   if (boundaryRules)
     addBoundaryWords(target);
   return true;
@@ -48,7 +49,7 @@ bool SentenceAlignment::processTargetSentence(const char * targetString, int, bo
 
 bool SentenceAlignment::processSourceSentence(const char * sourceString, int, bool boundaryRules)
 {
-  source = tokenize(sourceString);
+  source = util::tokenize(sourceString);
   if (boundaryRules)
     addBoundaryWords(source);
   return true;
@@ -89,7 +90,7 @@ bool SentenceAlignment::create(const char targetString[],
   }
 
   // reading in alignments
-  vector<string> alignmentSequence = tokenize( alignmentString );
+  vector<string> alignmentSequence = util::tokenize( alignmentString );
   for(size_t i=0; i<alignmentSequence.size(); i++) {
     int s,t;
     // cout << "scaning " << alignmentSequence[i].c_str() << endl;
diff --git a/phrase-extract/SentenceAlignmentWithSyntax.cpp b/phrase-extract/SentenceAlignmentWithSyntax.cpp
index 1b4ed7c88..7403243ab 100644
--- a/phrase-extract/SentenceAlignmentWithSyntax.cpp
+++ b/phrase-extract/SentenceAlignmentWithSyntax.cpp
@@ -26,6 +26,7 @@
 #include "tables-core.h"
 #include "XmlException.h"
 #include "XmlTree.h"
+#include "util/tokenize.hh"
 
 using namespace std;
 
@@ -49,7 +50,7 @@ bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetStrin
               << sentenceID << ": " << e.getMsg() << std::endl;
     return false;
   }
-  target = tokenize(targetStringCPP.c_str());
+  target = util::tokenize(targetStringCPP.c_str());
   return true;
 }
 
@@ -70,11 +71,8 @@ bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceStrin
               << sentenceID << ": " << e.getMsg() << std::endl;
     return false;
   }
-  source = tokenize(sourceStringCPP.c_str());
+  source = util::tokenize(sourceStringCPP.c_str());
   return true;
 }
 
 } // namespace
-
-
-
diff --git a/phrase-extract/consolidate-direct-main.cpp b/phrase-extract/consolidate-direct-main.cpp
index 423a3909b..e85a897ef 100644
--- a/phrase-extract/consolidate-direct-main.cpp
+++ b/phrase-extract/consolidate-direct-main.cpp
@@ -25,11 +25,10 @@
 #include <cstdlib>
 #include "InputFileStream.h"
 #include "OutputFileStream.h"
+#include "util/tokenize.hh"
 
 using namespace std;
 
-std::vector<std::string> tokenize( const char [] );
-
 vector< string > splitLine(const char *line)
 {
   vector< string > item;
@@ -109,7 +108,7 @@ int main(int argc, char* argv[])
     if (! getLine(fileDirectP,  itemDirect  ))
       break;
 
-    vector< string > count = tokenize( itemDirect[4].c_str() );
+    const vector< string > count = util::tokenize( itemDirect[4].c_str() );
     float countEF = atof(count[0].c_str());
     float countF = atof(count[1].c_str());
     float prob = countF/countEF;
diff --git a/phrase-extract/consolidate-reverse-main.cpp b/phrase-extract/consolidate-reverse-main.cpp
index e2b0ad473..abba063a3 100644
--- a/phrase-extract/consolidate-reverse-main.cpp
+++ b/phrase-extract/consolidate-reverse-main.cpp
@@ -28,6 +28,7 @@
 
 #include "tables-core.h"
 #include "InputFileStream.h"
+#include "util/tokenize.hh"
 
 using namespace std;
 
@@ -165,8 +166,8 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
     fileConsolidated << " ||| " << reverseAlignment(itemDirect[3]);
 
     // counts, for debugging
-    vector<string> directCounts = tokenize(itemDirect[4].c_str());
-    vector<string> indirectCounts = tokenize(itemIndirect[4].c_str());
+    const vector<string> directCounts = util::tokenize(itemDirect[4].c_str());
+    const vector<string> indirectCounts = util::tokenize(itemIndirect[4].c_str());
     fileConsolidated << "||| " << directCounts[0] << " " << indirectCounts[0];
     // output rule count if present in either file
     if (indirectCounts.size() > 1) {
@@ -199,7 +200,6 @@ bool getLine( istream &fileP, vector< string > &item )
 vector< string > splitLine(const char *line)
 {
   vector< string > item;
-  bool betweenWords = true;
   int start=0;
   int i=0;
   for(; line[i] != '\0'; i++) {
@@ -223,10 +223,10 @@ string reverseAlignment(const string &alignments)
 {
   stringstream ret("");
 
-  vector<string> alignToks = tokenize(alignments.c_str());
+  const vector<string> alignToks = util::tokenize(alignments.c_str());
 
   for (size_t i = 0; i < alignToks.size(); ++i) {
-    string &alignPair = alignToks[i];
+    const string &alignPair = alignToks[i];
     vector<string> alignPoints;
     Tokenize(alignPoints, alignPair, "-");
     assert(alignPoints.size() == 2);
diff --git a/phrase-extract/extract-ghkm/XmlTreeParser.cpp b/phrase-extract/extract-ghkm/XmlTreeParser.cpp
index 2f28c3244..267906b4c 100644
--- a/phrase-extract/extract-ghkm/XmlTreeParser.cpp
+++ b/phrase-extract/extract-ghkm/XmlTreeParser.cpp
@@ -23,6 +23,7 @@
 #include "tables-core.h"
 #include "XmlException.h"
 #include "XmlTree.h"
+#include "util/tokenize.hh"
 
 #include <cassert>
 #include <vector>
@@ -56,7 +57,7 @@ std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
   m_tree.ConnectNodes();
   SyntaxNode *root = m_tree.GetTop();
   assert(root);
-  m_words = tokenize(m_line.c_str());
+  m_words = util::tokenize(m_line.c_str());
   return ConvertTree(*root, m_words);
 }
 
diff --git a/phrase-extract/pcfg-common/xml_tree_parser.cc b/phrase-extract/pcfg-common/xml_tree_parser.cc
index 3d9291994..6a2f3fc51 100644
--- a/phrase-extract/pcfg-common/xml_tree_parser.cc
+++ b/phrase-extract/pcfg-common/xml_tree_parser.cc
@@ -25,6 +25,7 @@
 #include "tables-core.h"
 #include "XmlException.h"
 #include "XmlTree.h"
+#include "util/tokenize.hh"
 
 #include "syntax-common/exception.h"
 
@@ -51,7 +52,7 @@ std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line) {
     // There is no XML tree.
     return std::auto_ptr<PcfgTree>();
   }
-  m_words = tokenize(m_line.c_str());
+  m_words = util::tokenize(m_line.c_str());
   return ConvertTree(*root, m_words);
 }
 
diff --git a/phrase-extract/relax-parse-main.cpp b/phrase-extract/relax-parse-main.cpp
index a6d50cef5..ec7bd25fd 100644
--- a/phrase-extract/relax-parse-main.cpp
+++ b/phrase-extract/relax-parse-main.cpp
@@ -21,6 +21,7 @@
 
 #include "relax-parse.h"
 #include "tables-core.h"
+#include "util/tokenize.hh"
 
 using namespace std;
 using namespace MosesTraining;
@@ -44,7 +45,7 @@ int main(int argc, char* argv[])
     map< string, int > topLabelCollection; // count of top labels, not used
     SyntaxTree tree;
     ProcessAndStripXMLTags( inBufferString, tree, labelCollection, topLabelCollection, false );
-    vector< string > inWords = tokenize( inBufferString.c_str() );
+    const vector< string > inWords = util::tokenize( inBufferString.c_str() );
 
     // output tree
     // cerr << "BEFORE:" << endl << tree;
@@ -104,7 +105,7 @@ void init(int argc, char* argv[])
   }
 }
 
-void store( SyntaxTree &tree, vector< string > &words )
+void store( SyntaxTree &tree, const vector< string > &words )
 {
   // output words
   for( size_t i=0; i<words.size(); i++ ) {
diff --git a/phrase-extract/relax-parse.h b/phrase-extract/relax-parse.h
index ec604405e..9bd0bfb23 100644
--- a/phrase-extract/relax-parse.h
+++ b/phrase-extract/relax-parse.h
@@ -39,7 +39,7 @@ char SAMTLevel = 0;
 
 // functions
 void init(int argc, char* argv[]);
-void store( MosesTraining::SyntaxTree &tree, std::vector<std::string> &words );
+void store( MosesTraining::SyntaxTree &tree, const std::vector<std::string> &words );
 void LeftBinarize( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
 void RightBinarize( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
 void SAMT( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
diff --git a/phrase-extract/statistics-main.cpp b/phrase-extract/statistics-main.cpp
index a6c0b74db..d797b9f45 100644
--- a/phrase-extract/statistics-main.cpp
+++ b/phrase-extract/statistics-main.cpp
@@ -14,6 +14,7 @@
 #include "AlignmentPhrase.h"
 #include "tables-core.h"
 #include "InputFileStream.h"
+#include "util/tokenize.hh"
 
 using namespace std;
 using namespace MosesTraining;
@@ -237,7 +238,7 @@ void processPhrasePairs( vector< PhraseAlignment > &phrasePair )
 
 bool PhraseAlignment::create(const char line[], int lineID )
 {
-  vector< string > token = tokenize( line );
+  const vector< string > token = util::tokenize( line );
   int item = 1;
   PHRASE phraseF, phraseE;
   for (size_t j=0; j<token.size(); j++) {
@@ -321,7 +322,7 @@ void LexicalTable::load( const string &filePath )
     i++;
     if (i%100000 == 0) cerr << "." << flush;
 
-    vector<string> token = tokenize( line.c_str() );
+    const vector<string> token = util::tokenize( line.c_str() );
     if (token.size() != 3) {
       cerr << "line " << i << " in " << filePath << " has wrong number of tokens, skipping:\n" <<
            token.size() << " " << token[0] << " " << line << endl;
diff --git a/phrase-extract/syntax-common/xml_tree_parser.cc b/phrase-extract/syntax-common/xml_tree_parser.cc
index c4363a3e2..47fda1c2d 100644
--- a/phrase-extract/syntax-common/xml_tree_parser.cc
+++ b/phrase-extract/syntax-common/xml_tree_parser.cc
@@ -3,6 +3,7 @@
 #include "tables-core.h"
 #include "XmlException.h"
 #include "XmlTree.h"
+#include "util/tokenize.hh"
 
 #include <cassert>
 #include <vector>
@@ -24,7 +25,7 @@ StringTree *XmlTreeParser::Parse(const std::string &line) {
   tree_.ConnectNodes();
   SyntaxNode *root = tree_.GetTop();
   assert(root);
-  words_ = tokenize(line_.c_str());
+  words_ = util::tokenize(line_.c_str());
   return ConvertTree(*root, words_);
 }
 
diff --git a/phrase-extract/tables-core.cpp b/phrase-extract/tables-core.cpp
index 30c1544e9..93c5041dd 100644
--- a/phrase-extract/tables-core.cpp
+++ b/phrase-extract/tables-core.cpp
@@ -1,5 +1,6 @@
 // $Id$
 //#include "beammain.h"
+#include "util/tokenize.hh"
 #include "tables-core.h"
 
 #define TABLE_LINE_MAX_LENGTH 1000
@@ -7,29 +8,6 @@
 
 using namespace std;
 
-// as in beamdecoder/tables.cpp
-vector<string> tokenize( const char* input )
-{
-  vector< string > token;
-  bool betweenWords = true;
-  int start=0;
-  int i=0;
-  for(; input[i] != '\0'; i++) {
-    bool isSpace = (input[i] == ' ' || input[i] == '\t');
-
-    if (!isSpace && betweenWords) {
-      start = i;
-      betweenWords = false;
-    } else if (isSpace && !betweenWords) {
-      token.push_back( string( input+start, i-start ) );
-      betweenWords = true;
-    }
-  }
-  if (!betweenWords)
-    token.push_back( string( input+start, i-start ) );
-  return token;
-}
-
 namespace MosesTraining
 {
 
@@ -107,7 +85,7 @@ void DTable::load( const string& fileName )
       abort();
     }
 
-    vector<string> token = tokenize(line.c_str());
+    const vector<string> token = util::tokenize(line.c_str());
     if (token.size() < 2) {
       cerr << "line " << i << " in " << fileName << " too short, skipping\n";
       continue;
diff --git a/phrase-extract/tables-core.h b/phrase-extract/tables-core.h
index 44545d3a0..011fe09e6 100644
--- a/phrase-extract/tables-core.h
+++ b/phrase-extract/tables-core.h
@@ -12,8 +12,6 @@
 #include <map>
 #include <cmath>
 
-extern std::vector<std::string> tokenize( const char*);
-
 namespace MosesTraining
 {