Merge ../mosesdecoder into hieu

2024-12-25 12:52:29 +03:00 · 2014-06-08 17:07:41 +01:00 · 2014-06-08 17:07:41 +01:00 · 3c6a31128d
commit 3c6a31128d
parent 45ed0a5b1f 1b667e3e24
18 changed files with 111 additions and 221 deletions
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@ -17,12 +17,8 @@ License along with this library; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 ***********************************************************************/
 #include "util/exception.hh"
-
 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"

-#define LINE_MAX_LENGTH 100000
-#include "phrase-extract/SafeGetline.h" // for SAFE_GETLINE()
-
 using namespace std;

 template<typename T>
@ -461,16 +457,14 @@ void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexic
  }
  istream *inFileP = &inFile;

-  char line[LINE_MAX_LENGTH];
-
  int i=0;
-  while(true) {
+  string line;
+
+  while(getline(*inFileP, line)) {
    i++;
    if (i%100000 == 0) cerr << "." << flush;
-    SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (inFileP->eof()) break;

-    vector<string> token = tokenize( line );
+    vector<string> token = tokenize( line.c_str() );
    if (token.size() != 4) {
      cerr << "line " << i << " in " << fileName
           << " has wrong number of tokens, skipping:\n"
--- a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
+++ b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
@ -413,11 +413,9 @@ void FuzzyMatchWrapper::load_corpus( const std::string &fileName, vector< vector

  istream *fileStreamP = &fileStream;

-  char line[LINE_MAX_LENGTH];
-  while(true) {
-    SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
-    if (fileStreamP->eof()) break;
-    corpus.push_back( GetVocabulary().Tokenize( line ) );
+  string line;
+  while(getline(*fileStreamP, line)) {
+    corpus.push_back( GetVocabulary().Tokenize( line.c_str() ) );
  }
 }

@ -436,12 +434,9 @@ void FuzzyMatchWrapper::load_target(const std::string &fileName, vector< vector<
  WORD_ID delimiter = GetVocabulary().StoreIfNew("|||");

  int lineNum = 0;
-  char line[LINE_MAX_LENGTH];
-  while(true) {
-    SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
-    if (fileStreamP->eof()) break;
-
-    vector<WORD_ID> toks = GetVocabulary().Tokenize( line );
+  string line;
+  while(getline(*fileStreamP, line)) {
+    vector<WORD_ID> toks = GetVocabulary().Tokenize( line.c_str() );

    corpus.push_back(vector< SentenceAlignment >());
    vector< SentenceAlignment > &vec = corpus.back();
@ -493,11 +488,8 @@ void FuzzyMatchWrapper::load_alignment(const std::string &fileName, vector< vect
  string delimiter = "|||";

  int lineNum = 0;
-  char line[LINE_MAX_LENGTH];
-  while(true) {
-    SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
-    if (fileStreamP->eof()) break;
-
+  string line;
+  while(getline(*fileStreamP, line)) {
    vector< SentenceAlignment > &vec = corpus[lineNum];
    size_t targetInd = 0;
    SentenceAlignment *sentence = &vec[targetInd];
--- a/moses/TranslationModel/fuzzy-match/SuffixArray.cpp
+++ b/moses/TranslationModel/fuzzy-match/SuffixArray.cpp
@ -14,17 +14,16 @@ SuffixArray::SuffixArray( string fileName )
  m_endOfSentence = m_vcb.StoreIfNew( "<s>" );

  ifstream extractFile;
-  char line[LINE_MAX_LENGTH];

  // count the number of words first;
  extractFile.open(fileName.c_str());
  istream *fileP = &extractFile;
  m_size = 0;
  size_t sentenceCount = 0;
-  while(!fileP->eof()) {
-    SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
-    if (fileP->eof()) break;
-    vector< WORD_ID > words = m_vcb.Tokenize( line );
+  string line;
+  while(getline(*fileP, line)) {
+
+    vector< WORD_ID > words = m_vcb.Tokenize( line.c_str() );
    m_size += words.size() + 1;
    sentenceCount++;
  }
@ -43,10 +42,8 @@ SuffixArray::SuffixArray( string fileName )
  int sentenceId = 0;
  extractFile.open(fileName.c_str());
  fileP = &extractFile;
-  while(!fileP->eof()) {
-    SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
-    if (fileP->eof()) break;
-    vector< WORD_ID > words = m_vcb.Tokenize( line );
+  while(getline(*fileP, line)) {
+    vector< WORD_ID > words = m_vcb.Tokenize( line.c_str() );

    // add to corpus vector
    corpus.push_back(words);
--- a/moses/TranslationModel/fuzzy-match/Vocabulary.h
+++ b/moses/TranslationModel/fuzzy-match/Vocabulary.h
@ -17,20 +17,6 @@

 namespace tmmt
 {
-
-#define MAX_LENGTH 10000
-
-#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
-                _IS.getline(_LINE, _SIZE, _DELIM); \
-                if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
-                if (_IS.gcount() == _SIZE-1) { \
-                  cerr << "Line too long! Buffer overflow. Delete lines >=" \
-                    << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \
-                    << endl; \
-                    exit(1); \
-                } \
-              }
-
 typedef std::string WORD;
 typedef unsigned int WORD_ID;

--- a/phrase-extract/DomainFeature.cpp
+++ b/phrase-extract/DomainFeature.cpp
@ -2,9 +2,6 @@
 #include "ExtractionPhrasePair.h"
 #include "tables-core.h"
 #include "InputFileStream.h"
-#include "SafeGetline.h"
-
-#define TABLE_LINE_MAX_LENGTH 1000

 using namespace std;

@ -16,12 +13,11 @@ void Domain::load( const std::string &domainFileName )
 {
  Moses::InputFileStream fileS( domainFileName );
  istream *fileP = &fileS;
-  while(true) {
-    char line[TABLE_LINE_MAX_LENGTH];
-    SAFE_GETLINE((*fileP), line, TABLE_LINE_MAX_LENGTH, '\n', __FILE__);
-    if (fileP->eof()) break;
+
+	string line;
+  while(getline(*fileP, line)) {
    // read
-    vector< string > domainSpecLine = tokenize( line );
+    vector< string > domainSpecLine = tokenize( line.c_str() );
    int lineNumber;
    if (domainSpecLine.size() != 2 ||
        ! sscanf(domainSpecLine[0].c_str(), "%d", &lineNumber)) {
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@ -19,7 +19,6 @@

 #include <sstream>
 #include "ExtractionPhrasePair.h"
-#include "SafeGetline.h"
 #include "tables-core.h"
 #include "score.h"
 #include "moses/Util.h"
--- a/phrase-extract/SafeGetline.h
+++ b/phrase-extract/SafeGetline.h
@ -1,35 +0,0 @@
-/***********************************************************************
-  Moses - factored phrase-based language decoder
-  Copyright (C) 2010 University of Edinburgh
-
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Lesser General Public
-  License as published by the Free Software Foundation; either
-  version 2.1 of the License, or (at your option) any later version.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Lesser General Public License for more details.
-
-  You should have received a copy of the GNU Lesser General Public
-  License along with this library; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- ***********************************************************************/
-
-#pragma once
-#ifndef SAFE_GETLINE_INCLUDED_
-#define SAFE_GETLINE_INCLUDED_
-
-#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM, _FILE) {            \
-    _IS.getline(_LINE, _SIZE, _DELIM);                              \
-    if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear();         \
-    if (_IS.gcount() == _SIZE-1) {                                  \
-      cerr << "Line too long! Buffer overflow. Delete lines >="     \
-       << _SIZE << " chars or raise LINE_MAX_LENGTH in " << _FILE   \
-       << endl;                                                     \
-      exit(1);                                                      \
-    }                                                               \
-  }
-
-#endif
--- a/phrase-extract/SentenceAlignment.cpp
+++ b/phrase-extract/SentenceAlignment.cpp
@ -54,7 +54,11 @@ bool SentenceAlignment::processSourceSentence(const char * sourceString, int, bo
  return true;
 }

-bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], char weightString[], int sentenceID, bool boundaryRules)
+bool SentenceAlignment::create(const char targetString[],
+							const char sourceString[],
+							const char alignmentString[],
+							const char weightString[],
+							int sentenceID, bool boundaryRules)
 {
  using namespace std;
  this->sentenceID = sentenceID;
--- a/phrase-extract/SentenceAlignment.h
+++ b/phrase-extract/SentenceAlignment.h
@ -43,8 +43,11 @@ public:

  virtual bool processSourceSentence(const char *, int, bool boundaryRules);

-  bool create(char targetString[], char sourceString[],
-              char alignmentString[], char weightString[], int sentenceID, bool boundaryRules);
+  bool create(const char targetString[],
+		  	  const char sourceString[],
+		  	  const char alignmentString[],
+		  	  const char weightString[],
+		  	  int sentenceID, bool boundaryRules);

  void invertAlignment();

--- a/phrase-extract/consolidate-direct-main.cpp
+++ b/phrase-extract/consolidate-direct-main.cpp
@ -26,16 +26,9 @@
 #include "InputFileStream.h"
 #include "OutputFileStream.h"

-#include "SafeGetline.h"
-
-#define LINE_MAX_LENGTH 10000
-
 using namespace std;

-char line[LINE_MAX_LENGTH];
-
-
-vector< string > splitLine()
+vector< string > splitLine(const char *line)
 {
  vector< string > item;
  int start=0;
@ -61,14 +54,15 @@ bool getLine( istream &fileP, vector< string > &item )
 {
  if (fileP.eof())
    return false;
-
-  SAFE_GETLINE((fileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-  if (fileP.eof())
+  
+  string line;
+  if (getline(fileP, line)) {
+    item = splitLine(line.c_str());
    return false;
-
-  item = splitLine();
-
-  return true;
+  }
+  else {
+    return false;
+  }
 }


--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@ -26,7 +26,6 @@
 #include <cstring>

 #include "tables-core.h"
-#include "SafeGetline.h"
 #include "InputFileStream.h"
 #include "OutputFileStream.h"

--- a/phrase-extract/consolidate-reverse-main.cpp
+++ b/phrase-extract/consolidate-reverse-main.cpp
@ -27,23 +27,19 @@
 #include <cstring>

 #include "tables-core.h"
-#include "SafeGetline.h"
 #include "InputFileStream.h"

-#define LINE_MAX_LENGTH 10000
-
 using namespace std;

 bool hierarchicalFlag = false;
 bool onlyDirectFlag = false;
 bool phraseCountFlag = true;
 bool logProbFlag = false;
-char line[LINE_MAX_LENGTH];

 void processFiles( char*, char*, char* );
 bool getLine( istream &fileP, vector< string > &item );
 string reverseAlignment(const string &alignments);
-vector< string > splitLine();
+vector< string > splitLine(const char *lin);

 inline void Tokenize(std::vector<std::string> &output
                     , const std::string& str
@ -190,17 +186,18 @@ bool getLine( istream &fileP, vector< string > &item )
 {
  if (fileP.eof())
    return false;
-
-  SAFE_GETLINE((fileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-  if (fileP.eof())
+  
+  string line;
+  if (getline(fileP, line)) {
+    item = splitLine(line.c_str());
    return false;
-
-  item = splitLine();
-
-  return true;
+  }
+  else {
+    return false;
+  }
 }

-vector< string > splitLine()
+vector< string > splitLine(const char *line)
 {
  vector< string > item;
  bool betweenWords = true;
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@ -19,7 +19,6 @@
 #include <set>
 #include <vector>

-#include "SafeGetline.h"
 #include "SentenceAlignment.h"
 #include "tables-core.h"
 #include "InputFileStream.h"
@ -32,10 +31,6 @@ using namespace MosesTraining;
 namespace MosesTraining
 {

-
-const long int LINE_MAX_LENGTH = 500000 ;
-
-
 // HPhraseVertex represents a point in the alignment matrix
 typedef pair <int, int> HPhraseVertex;

@ -277,20 +272,18 @@ int main(int argc, char* argv[])

  int i = sentenceOffset;

-  while(true) {
+  string englishString, foreignString, alignmentString, weightString;
+
+  while(getline(*eFileP, englishString)) {
    i++;
    if (i%10000 == 0) cerr << "." << flush;
-    char englishString[LINE_MAX_LENGTH];
-    char foreignString[LINE_MAX_LENGTH];
-    char alignmentString[LINE_MAX_LENGTH];
-    char weightString[LINE_MAX_LENGTH];
-    SAFE_GETLINE((*eFileP), englishString, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (eFileP->eof()) break;
-    SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n', __FILE__);
-    SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
+
+    getline(*fFileP, foreignString);
+    getline(*aFileP, alignmentString);
    if (iwFileP) {
-      SAFE_GETLINE((*iwFileP), weightString, LINE_MAX_LENGTH, '\n', __FILE__);
+      getline(*iwFileP, weightString);
    }
+
    SentenceAlignment sentence;
    // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
    //az: output src, tgt, and alingment line
@ -300,7 +293,11 @@ int main(int argc, char* argv[])
      cout << "LOG: ALT: " << alignmentString << endl;
      cout << "LOG: PHRASES_BEGIN:" << endl;
    }
-    if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
+    if (sentence.create( englishString.c_str(),
+    					foreignString.c_str(),
+    					alignmentString.c_str(),
+    					weightString.c_str(),
+    					i, false)) {
      if (options.placeholders.size()) {
        sentence.invertAlignment();
      }
--- a/phrase-extract/extract-ordering-main.cpp
+++ b/phrase-extract/extract-ordering-main.cpp
@ -19,7 +19,6 @@
 #include <set>
 #include <vector>

-#include "SafeGetline.h"
 #include "SentenceAlignment.h"
 #include "tables-core.h"
 #include "InputFileStream.h"
@ -32,10 +31,6 @@ using namespace MosesTraining;
 namespace MosesTraining
 {

-
-const long int LINE_MAX_LENGTH = 500000 ;
-
-
 // HPhraseVertex represents a point in the alignment matrix
 typedef pair <int, int> HPhraseVertex;

@ -246,20 +241,20 @@ int main(int argc, char* argv[])

  int i = sentenceOffset;

-  while(true) {
+  string englishString, foreignString, alignmentString, weightString;
+
+  while(getline(*eFileP, englishString)) {
    i++;
-    if (i%10000 == 0) cerr << "." << flush;
-    char englishString[LINE_MAX_LENGTH];
-    char foreignString[LINE_MAX_LENGTH];
-    char alignmentString[LINE_MAX_LENGTH];
-    char weightString[LINE_MAX_LENGTH];
-    SAFE_GETLINE((*eFileP), englishString, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (eFileP->eof()) break;
-    SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n', __FILE__);
-    SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
+
+    getline(*eFileP, englishString);
+    getline(*fFileP, foreignString);
+    getline(*aFileP, alignmentString);
    if (iwFileP) {
-      SAFE_GETLINE((*iwFileP), weightString, LINE_MAX_LENGTH, '\n', __FILE__);
+      getline(*iwFileP, weightString);
    }
+
+    if (i%10000 == 0) cerr << "." << flush;
+
    SentenceAlignment sentence;
    // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
    //az: output src, tgt, and alingment line
@ -269,7 +264,7 @@ int main(int argc, char* argv[])
      cout << "LOG: ALT: " << alignmentString << endl;
      cout << "LOG: PHRASES_BEGIN:" << endl;
    }
-    if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) {
+    if (sentence.create( englishString.c_str(), foreignString.c_str(), alignmentString.c_str(), weightString.c_str(), i, false)) {
      ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFileOrientation);
      task->Run();
      delete task;
--- a/phrase-extract/extract-rules-main.cpp
+++ b/phrase-extract/extract-rules-main.cpp
@ -39,7 +39,6 @@
 #include "Hole.h"
 #include "HoleCollection.h"
 #include "RuleExist.h"
-#include "SafeGetline.h"
 #include "SentenceAlignmentWithSyntax.h"
 #include "SyntaxTree.h"
 #include "tables-core.h"
@ -47,8 +46,6 @@
 #include "InputFileStream.h"
 #include "OutputFileStream.h"

-#define LINE_MAX_LENGTH 500000
-
 using namespace std;
 using namespace MosesTraining;

@ -326,17 +323,15 @@ int main(int argc, char* argv[])

  // loop through all sentence pairs
  size_t i=sentenceOffset;
-  while(true) {
-    i++;
-    if (i%1000 == 0) cerr << i << " " << flush;
+  string targetString, sourceString, alignmentString;

-    char targetString[LINE_MAX_LENGTH];
-    char sourceString[LINE_MAX_LENGTH];
-    char alignmentString[LINE_MAX_LENGTH];
-    SAFE_GETLINE((*tFileP), targetString, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (tFileP->eof()) break;
-    SAFE_GETLINE((*sFileP), sourceString, LINE_MAX_LENGTH, '\n', __FILE__);
-    SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
+  while(getline(*tFileP, targetString)) {
+    i++;
+
+    getline(*sFileP, sourceString);
+    getline(*aFileP, alignmentString);
+
+    if (i%1000 == 0) cerr << i << " " << flush;

    SentenceAlignmentWithSyntax sentence
    (targetLabelCollection, sourceLabelCollection,
@ -349,7 +344,7 @@ int main(int argc, char* argv[])
      cout << "LOG: PHRASES_BEGIN:" << endl;
    }

-    if (sentence.create(targetString, sourceString, alignmentString,"", i, options.boundaryRules)) {
+    if (sentence.create(targetString.c_str(), sourceString.c_str(), alignmentString.c_str(),"", i, options.boundaryRules)) {
      if (options.unknownWordLabelFlag) {
        collectWordLabelCounts(sentence);
      }
--- a/phrase-extract/relax-parse-main.cpp
+++ b/phrase-extract/relax-parse-main.cpp
@ -33,17 +33,13 @@ int main(int argc, char* argv[])

  // loop through all sentences
  int i=0;
-  char inBuffer[LINE_MAX_LENGTH];
-  while(true) {
+  string inBuffer;
+  while(getline(cin, inBuffer)) {
    i++;
    if (i%1000 == 0) cerr << "." << flush;
    if (i%10000 == 0) cerr << ":" << flush;
    if (i%100000 == 0) cerr << "!" << flush;

-    // get line from stdin
-    SAFE_GETLINE( cin, inBuffer, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (cin.eof()) break;
-
    // process into syntax tree representation
    string inBufferString = string( inBuffer );
    set< string > labelCollection;         // set of labels, not used
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@ -29,7 +29,6 @@
 #include <vector>
 #include <algorithm>

-#include "SafeGetline.h"
 #include "ScoreFeature.h"
 #include "tables-core.h"
 #include "ExtractionPhrasePair.h"
@ -40,8 +39,6 @@
 using namespace std;
 using namespace MosesTraining;

-#define LINE_MAX_LENGTH 100000
-
 namespace MosesTraining
 {
 LexicalTable lexTable;
@ -236,7 +233,7 @@ int main(int argc, char* argv[])
  }

  // loop through all extracted phrase translations
-  char line[LINE_MAX_LENGTH], lastLine[LINE_MAX_LENGTH];
+  string line, lastLine;
  lastLine[0] = '\0';
  ExtractionPhrasePair *phrasePair = NULL;
  std::vector< ExtractionPhrasePair* > phrasePairsWithSameSource;
@ -249,8 +246,8 @@ int main(int argc, char* argv[])
  float tmpCount=0.0f, tmpPcfgSum=0.0f;

  int i=0;
-  SAFE_GETLINE( (extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__ );
-  if ( !extractFileP.eof() ) {
+  // TODO why read only the 1st line?
+  if ( getline(extractFileP, line)) {
    ++i;
    tmpPhraseSource = new PHRASE();
    tmpPhraseTarget = new PHRASE();
@ -269,23 +266,21 @@ int main(int argc, char* argv[])
    if ( hierarchicalFlag ) {
      phrasePairsWithSameSourceAndTarget.push_back( phrasePair );
    }
-    strcpy( lastLine, line );
-    SAFE_GETLINE( (extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__ );
+    lastLine = line;
  }

-  while ( !extractFileP.eof() ) {
+  while ( getline(extractFileP, line) ) {

    if ( ++i % 100000 == 0 ) {
      std::cerr << "." << std::flush;
    }

    // identical to last line? just add count
-    if (strcmp(line,lastLine) == 0) {
+    if (line == lastLine) {
      phrasePair->IncrementPrevious(tmpCount,tmpPcfgSum);
-      SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
      continue;
    } else {
-      strcpy( lastLine, line );
+      lastLine = line;
    }

    tmpPhraseSource = new PHRASE();
@ -363,8 +358,6 @@ int main(int argc, char* argv[])
      }
    }

-    SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-
  }

  processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb );
@ -758,11 +751,9 @@ void loadFunctionWords( const string &fileName )
  }
  istream *inFileP = &inFile;

-  char line[LINE_MAX_LENGTH];
-  while(true) {
-    SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (inFileP->eof()) break;
-    std::vector<string> token = tokenize( line );
+  string line;
+  while(getline(*inFileP, line)) {
+    std::vector<string> token = tokenize( line.c_str() );
    if (token.size() > 0)
      functionWordList.insert( token[0] );
  }
@ -807,16 +798,13 @@ void LexicalTable::load( const string &fileName )
  }
  istream *inFileP = &inFile;

-  char line[LINE_MAX_LENGTH];
-
+  string line;
  int i=0;
-  while(true) {
+  while(getline(*inFileP, line)) {
    i++;
    if (i%100000 == 0) std::cerr << "." << flush;
-    SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (inFileP->eof()) break;

-    std::vector<string> token = tokenize( line );
+    std::vector<string> token = tokenize( line.c_str() );
    if (token.size() != 3) {
        std::cerr << "line " << i << " in " << fileName
           << " has wrong number of tokens, skipping:" << std::endl
--- a/phrase-extract/statistics-main.cpp
+++ b/phrase-extract/statistics-main.cpp
@ -12,15 +12,12 @@
 #include <time.h>

 #include "AlignmentPhrase.h"
-#include "SafeGetline.h"
 #include "tables-core.h"
 #include "InputFileStream.h"

 using namespace std;
 using namespace MosesTraining;

-#define LINE_MAX_LENGTH 10000
-
 namespace MosesTraining
 {

@ -31,7 +28,7 @@ public:
  vector< vector<size_t> > alignedToE;
  vector< vector<size_t> > alignedToF;

-  bool create( char*, int );
+  bool create( const char*, int );
  void clear();
  bool equals( const PhraseAlignment& );
 };
@ -106,16 +103,14 @@ int main(int argc, char* argv[])
  vector< PhraseAlignment > phrasePairsWithSameF;
  int i=0;
  int fileCount = 0;
-  while(true) {
+
+  string line;
+  while(getline(extractFileP, line)) {
    if (extractFileP.eof()) break;
    if (++i % 100000 == 0) cerr << "." << flush;
-    char line[LINE_MAX_LENGTH];
-    SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-    //    if (fileCount>0)
-    if (extractFileP.eof())
-      break;
+
    PhraseAlignment phrasePair;
-    bool isPhrasePair = phrasePair.create( line, i );
+    bool isPhrasePair = phrasePair.create( line.c_str(), i );
    if (lastForeign >= 0 && lastForeign != phrasePair.foreign) {
      processPhrasePairs( phrasePairsWithSameF );
      for(size_t j=0; j<phrasePairsWithSameF.size(); j++)
@ -124,7 +119,7 @@ int main(int argc, char* argv[])
      phraseTableE.clear();
      phraseTableF.clear();
      phrasePair.clear(); // process line again, since phrase tables flushed
-      phrasePair.create( line, i );
+      phrasePair.create( line.c_str(), i );
      phrasePairBase = 0;
    }
    lastForeign = phrasePair.foreign;
@ -242,7 +237,7 @@ void processPhrasePairs( vector< PhraseAlignment > &phrasePair )
  }
 }

-bool PhraseAlignment::create( char line[], int lineID )
+bool PhraseAlignment::create(const char line[], int lineID )
 {
  vector< string > token = tokenize( line );
  int item = 1;
@ -321,16 +316,14 @@ void LexicalTable::load( const string &filePath )
  }
  istream *inFileP = &inFile;

-  char line[LINE_MAX_LENGTH];
+  string line;

  int i=0;
-  while(true) {
+  while(getline(*inFileP, line)) {
    i++;
    if (i%100000 == 0) cerr << "." << flush;
-    SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__);
-    if (inFileP->eof()) break;

-    vector<string> token = tokenize( line );
+    vector<string> token = tokenize( line.c_str() );
    if (token.size() != 3) {
      cerr << "line " << i << " in " << filePath << " has wrong number of tokens, skipping:\n" <<
           token.size() << " " << token[0] << " " << line << endl;