diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp index 04bb321d0..99d3ad256 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp @@ -17,12 +17,8 @@ License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include "util/exception.hh" - #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h" -#define LINE_MAX_LENGTH 100000 -#include "phrase-extract/SafeGetline.h" // for SAFE_GETLINE() - using namespace std; template @@ -461,16 +457,14 @@ void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexic } istream *inFileP = &inFile; - char line[LINE_MAX_LENGTH]; - int i=0; - while(true) { + string line; + + while(getline(*inFileP, line)) { i++; if (i%100000 == 0) cerr << "." << flush; - SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - if (inFileP->eof()) break; - vector token = tokenize( line ); + vector token = tokenize( line.c_str() ); if (token.size() != 4) { cerr << "line " << i << " in " << fileName << " has wrong number of tokens, skipping:\n" diff --git a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp index fc68e1f0d..8766743b3 100644 --- a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp +++ b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp @@ -413,11 +413,9 @@ void FuzzyMatchWrapper::load_corpus( const std::string &fileName, vector< vector istream *fileStreamP = &fileStream; - char line[LINE_MAX_LENGTH]; - while(true) { - SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n'); - if (fileStreamP->eof()) break; - corpus.push_back( GetVocabulary().Tokenize( line ) ); + string line; + while(getline(*fileStreamP, line)) { + corpus.push_back( GetVocabulary().Tokenize( line.c_str() ) ); } } @@ -436,12 +434,9 @@ void FuzzyMatchWrapper::load_target(const std::string &fileName, vector< vector< WORD_ID delimiter = GetVocabulary().StoreIfNew("|||"); int lineNum = 0; - char line[LINE_MAX_LENGTH]; - while(true) { - SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n'); - if (fileStreamP->eof()) break; - - vector toks = GetVocabulary().Tokenize( line ); + string line; + while(getline(*fileStreamP, line)) { + vector toks = GetVocabulary().Tokenize( line.c_str() ); corpus.push_back(vector< SentenceAlignment >()); vector< SentenceAlignment > &vec = corpus.back(); @@ -493,11 +488,8 @@ void FuzzyMatchWrapper::load_alignment(const std::string &fileName, vector< vect string delimiter = "|||"; int lineNum = 0; - char line[LINE_MAX_LENGTH]; - while(true) { - SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n'); - if (fileStreamP->eof()) break; - + string line; + while(getline(*fileStreamP, line)) { vector< SentenceAlignment > &vec = corpus[lineNum]; size_t targetInd = 0; SentenceAlignment *sentence = &vec[targetInd]; diff --git a/moses/TranslationModel/fuzzy-match/SuffixArray.cpp b/moses/TranslationModel/fuzzy-match/SuffixArray.cpp index 536bff741..2930147ab 100644 --- a/moses/TranslationModel/fuzzy-match/SuffixArray.cpp +++ b/moses/TranslationModel/fuzzy-match/SuffixArray.cpp @@ -14,17 +14,16 @@ SuffixArray::SuffixArray( string fileName ) m_endOfSentence = m_vcb.StoreIfNew( "" ); ifstream extractFile; - char line[LINE_MAX_LENGTH]; // count the number of words first; extractFile.open(fileName.c_str()); istream *fileP = &extractFile; m_size = 0; size_t sentenceCount = 0; - while(!fileP->eof()) { - SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n'); - if (fileP->eof()) break; - vector< WORD_ID > words = m_vcb.Tokenize( line ); + string line; + while(getline(*fileP, line)) { + + vector< WORD_ID > words = m_vcb.Tokenize( line.c_str() ); m_size += words.size() + 1; sentenceCount++; } @@ -43,10 +42,8 @@ SuffixArray::SuffixArray( string fileName ) int sentenceId = 0; extractFile.open(fileName.c_str()); fileP = &extractFile; - while(!fileP->eof()) { - SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n'); - if (fileP->eof()) break; - vector< WORD_ID > words = m_vcb.Tokenize( line ); + while(getline(*fileP, line)) { + vector< WORD_ID > words = m_vcb.Tokenize( line.c_str() ); // add to corpus vector corpus.push_back(words); diff --git a/moses/TranslationModel/fuzzy-match/Vocabulary.h b/moses/TranslationModel/fuzzy-match/Vocabulary.h index dfa11c1db..5a79e2f26 100644 --- a/moses/TranslationModel/fuzzy-match/Vocabulary.h +++ b/moses/TranslationModel/fuzzy-match/Vocabulary.h @@ -17,20 +17,6 @@ namespace tmmt { - -#define MAX_LENGTH 10000 - -#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \ - _IS.getline(_LINE, _SIZE, _DELIM); \ - if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \ - if (_IS.gcount() == _SIZE-1) { \ - cerr << "Line too long! Buffer overflow. Delete lines >=" \ - << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \ - << endl; \ - exit(1); \ - } \ - } - typedef std::string WORD; typedef unsigned int WORD_ID; diff --git a/phrase-extract/DomainFeature.cpp b/phrase-extract/DomainFeature.cpp index 2f99a8709..99f0713a7 100644 --- a/phrase-extract/DomainFeature.cpp +++ b/phrase-extract/DomainFeature.cpp @@ -2,9 +2,6 @@ #include "ExtractionPhrasePair.h" #include "tables-core.h" #include "InputFileStream.h" -#include "SafeGetline.h" - -#define TABLE_LINE_MAX_LENGTH 1000 using namespace std; @@ -16,12 +13,11 @@ void Domain::load( const std::string &domainFileName ) { Moses::InputFileStream fileS( domainFileName ); istream *fileP = &fileS; - while(true) { - char line[TABLE_LINE_MAX_LENGTH]; - SAFE_GETLINE((*fileP), line, TABLE_LINE_MAX_LENGTH, '\n', __FILE__); - if (fileP->eof()) break; + + string line; + while(getline(*fileP, line)) { // read - vector< string > domainSpecLine = tokenize( line ); + vector< string > domainSpecLine = tokenize( line.c_str() ); int lineNumber; if (domainSpecLine.size() != 2 || ! sscanf(domainSpecLine[0].c_str(), "%d", &lineNumber)) { diff --git a/phrase-extract/ExtractionPhrasePair.cpp b/phrase-extract/ExtractionPhrasePair.cpp index f70d106d1..2b26c2ad6 100644 --- a/phrase-extract/ExtractionPhrasePair.cpp +++ b/phrase-extract/ExtractionPhrasePair.cpp @@ -19,7 +19,6 @@ #include #include "ExtractionPhrasePair.h" -#include "SafeGetline.h" #include "tables-core.h" #include "score.h" #include "moses/Util.h" diff --git a/phrase-extract/SafeGetline.h b/phrase-extract/SafeGetline.h deleted file mode 100644 index 0e03b8468..000000000 --- a/phrase-extract/SafeGetline.h +++ /dev/null @@ -1,35 +0,0 @@ -/*********************************************************************** - Moses - factored phrase-based language decoder - Copyright (C) 2010 University of Edinburgh - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - ***********************************************************************/ - -#pragma once -#ifndef SAFE_GETLINE_INCLUDED_ -#define SAFE_GETLINE_INCLUDED_ - -#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM, _FILE) { \ - _IS.getline(_LINE, _SIZE, _DELIM); \ - if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \ - if (_IS.gcount() == _SIZE-1) { \ - cerr << "Line too long! Buffer overflow. Delete lines >=" \ - << _SIZE << " chars or raise LINE_MAX_LENGTH in " << _FILE \ - << endl; \ - exit(1); \ - } \ - } - -#endif diff --git a/phrase-extract/SentenceAlignment.cpp b/phrase-extract/SentenceAlignment.cpp index c3d71d525..120c9154d 100644 --- a/phrase-extract/SentenceAlignment.cpp +++ b/phrase-extract/SentenceAlignment.cpp @@ -54,7 +54,11 @@ bool SentenceAlignment::processSourceSentence(const char * sourceString, int, bo return true; } -bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], char weightString[], int sentenceID, bool boundaryRules) +bool SentenceAlignment::create(const char targetString[], + const char sourceString[], + const char alignmentString[], + const char weightString[], + int sentenceID, bool boundaryRules) { using namespace std; this->sentenceID = sentenceID; diff --git a/phrase-extract/SentenceAlignment.h b/phrase-extract/SentenceAlignment.h index 1df61cf02..576d3279e 100644 --- a/phrase-extract/SentenceAlignment.h +++ b/phrase-extract/SentenceAlignment.h @@ -43,8 +43,11 @@ public: virtual bool processSourceSentence(const char *, int, bool boundaryRules); - bool create(char targetString[], char sourceString[], - char alignmentString[], char weightString[], int sentenceID, bool boundaryRules); + bool create(const char targetString[], + const char sourceString[], + const char alignmentString[], + const char weightString[], + int sentenceID, bool boundaryRules); void invertAlignment(); diff --git a/phrase-extract/consolidate-direct-main.cpp b/phrase-extract/consolidate-direct-main.cpp index 3b38f741c..40e0e35d4 100644 --- a/phrase-extract/consolidate-direct-main.cpp +++ b/phrase-extract/consolidate-direct-main.cpp @@ -26,16 +26,9 @@ #include "InputFileStream.h" #include "OutputFileStream.h" -#include "SafeGetline.h" - -#define LINE_MAX_LENGTH 10000 - using namespace std; -char line[LINE_MAX_LENGTH]; - - -vector< string > splitLine() +vector< string > splitLine(const char *line) { vector< string > item; int start=0; @@ -61,14 +54,15 @@ bool getLine( istream &fileP, vector< string > &item ) { if (fileP.eof()) return false; - - SAFE_GETLINE((fileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - if (fileP.eof()) + + string line; + if (getline(fileP, line)) { + item = splitLine(line.c_str()); return false; - - item = splitLine(); - - return true; + } + else { + return false; + } } diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp index c57cc7747..43d912b81 100644 --- a/phrase-extract/consolidate-main.cpp +++ b/phrase-extract/consolidate-main.cpp @@ -26,7 +26,6 @@ #include #include "tables-core.h" -#include "SafeGetline.h" #include "InputFileStream.h" #include "OutputFileStream.h" diff --git a/phrase-extract/consolidate-reverse-main.cpp b/phrase-extract/consolidate-reverse-main.cpp index 6843bf3aa..ce59315b9 100644 --- a/phrase-extract/consolidate-reverse-main.cpp +++ b/phrase-extract/consolidate-reverse-main.cpp @@ -27,23 +27,19 @@ #include #include "tables-core.h" -#include "SafeGetline.h" #include "InputFileStream.h" -#define LINE_MAX_LENGTH 10000 - using namespace std; bool hierarchicalFlag = false; bool onlyDirectFlag = false; bool phraseCountFlag = true; bool logProbFlag = false; -char line[LINE_MAX_LENGTH]; void processFiles( char*, char*, char* ); bool getLine( istream &fileP, vector< string > &item ); string reverseAlignment(const string &alignments); -vector< string > splitLine(); +vector< string > splitLine(const char *lin); inline void Tokenize(std::vector &output , const std::string& str @@ -190,17 +186,18 @@ bool getLine( istream &fileP, vector< string > &item ) { if (fileP.eof()) return false; - - SAFE_GETLINE((fileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - if (fileP.eof()) + + string line; + if (getline(fileP, line)) { + item = splitLine(line.c_str()); return false; - - item = splitLine(); - - return true; + } + else { + return false; + } } -vector< string > splitLine() +vector< string > splitLine(const char *line) { vector< string > item; bool betweenWords = true; diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp index 5d58028d6..fe3d99cd2 100644 --- a/phrase-extract/extract-main.cpp +++ b/phrase-extract/extract-main.cpp @@ -19,7 +19,6 @@ #include #include -#include "SafeGetline.h" #include "SentenceAlignment.h" #include "tables-core.h" #include "InputFileStream.h" @@ -32,10 +31,6 @@ using namespace MosesTraining; namespace MosesTraining { - -const long int LINE_MAX_LENGTH = 500000 ; - - // HPhraseVertex represents a point in the alignment matrix typedef pair HPhraseVertex; @@ -277,20 +272,18 @@ int main(int argc, char* argv[]) int i = sentenceOffset; - while(true) { + string englishString, foreignString, alignmentString, weightString; + + while(getline(*eFileP, englishString)) { i++; if (i%10000 == 0) cerr << "." << flush; - char englishString[LINE_MAX_LENGTH]; - char foreignString[LINE_MAX_LENGTH]; - char alignmentString[LINE_MAX_LENGTH]; - char weightString[LINE_MAX_LENGTH]; - SAFE_GETLINE((*eFileP), englishString, LINE_MAX_LENGTH, '\n', __FILE__); - if (eFileP->eof()) break; - SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n', __FILE__); - SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__); + + getline(*fFileP, foreignString); + getline(*aFileP, alignmentString); if (iwFileP) { - SAFE_GETLINE((*iwFileP), weightString, LINE_MAX_LENGTH, '\n', __FILE__); + getline(*iwFileP, weightString); } + SentenceAlignment sentence; // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl; //az: output src, tgt, and alingment line @@ -300,7 +293,11 @@ int main(int argc, char* argv[]) cout << "LOG: ALT: " << alignmentString << endl; cout << "LOG: PHRASES_BEGIN:" << endl; } - if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) { + if (sentence.create( englishString.c_str(), + foreignString.c_str(), + alignmentString.c_str(), + weightString.c_str(), + i, false)) { if (options.placeholders.size()) { sentence.invertAlignment(); } diff --git a/phrase-extract/extract-ordering-main.cpp b/phrase-extract/extract-ordering-main.cpp index 104457b01..b418ba24d 100644 --- a/phrase-extract/extract-ordering-main.cpp +++ b/phrase-extract/extract-ordering-main.cpp @@ -19,7 +19,6 @@ #include #include -#include "SafeGetline.h" #include "SentenceAlignment.h" #include "tables-core.h" #include "InputFileStream.h" @@ -32,10 +31,6 @@ using namespace MosesTraining; namespace MosesTraining { - -const long int LINE_MAX_LENGTH = 500000 ; - - // HPhraseVertex represents a point in the alignment matrix typedef pair HPhraseVertex; @@ -246,20 +241,20 @@ int main(int argc, char* argv[]) int i = sentenceOffset; - while(true) { + string englishString, foreignString, alignmentString, weightString; + + while(getline(*eFileP, englishString)) { i++; - if (i%10000 == 0) cerr << "." << flush; - char englishString[LINE_MAX_LENGTH]; - char foreignString[LINE_MAX_LENGTH]; - char alignmentString[LINE_MAX_LENGTH]; - char weightString[LINE_MAX_LENGTH]; - SAFE_GETLINE((*eFileP), englishString, LINE_MAX_LENGTH, '\n', __FILE__); - if (eFileP->eof()) break; - SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n', __FILE__); - SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__); + + getline(*eFileP, englishString); + getline(*fFileP, foreignString); + getline(*aFileP, alignmentString); if (iwFileP) { - SAFE_GETLINE((*iwFileP), weightString, LINE_MAX_LENGTH, '\n', __FILE__); + getline(*iwFileP, weightString); } + + if (i%10000 == 0) cerr << "." << flush; + SentenceAlignment sentence; // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl; //az: output src, tgt, and alingment line @@ -269,7 +264,7 @@ int main(int argc, char* argv[]) cout << "LOG: ALT: " << alignmentString << endl; cout << "LOG: PHRASES_BEGIN:" << endl; } - if (sentence.create( englishString, foreignString, alignmentString, weightString, i, false)) { + if (sentence.create( englishString.c_str(), foreignString.c_str(), alignmentString.c_str(), weightString.c_str(), i, false)) { ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFileOrientation); task->Run(); delete task; diff --git a/phrase-extract/extract-rules-main.cpp b/phrase-extract/extract-rules-main.cpp index f5f44316e..592946b0d 100644 --- a/phrase-extract/extract-rules-main.cpp +++ b/phrase-extract/extract-rules-main.cpp @@ -39,7 +39,6 @@ #include "Hole.h" #include "HoleCollection.h" #include "RuleExist.h" -#include "SafeGetline.h" #include "SentenceAlignmentWithSyntax.h" #include "SyntaxTree.h" #include "tables-core.h" @@ -47,8 +46,6 @@ #include "InputFileStream.h" #include "OutputFileStream.h" -#define LINE_MAX_LENGTH 500000 - using namespace std; using namespace MosesTraining; @@ -326,17 +323,15 @@ int main(int argc, char* argv[]) // loop through all sentence pairs size_t i=sentenceOffset; - while(true) { - i++; - if (i%1000 == 0) cerr << i << " " << flush; + string targetString, sourceString, alignmentString; - char targetString[LINE_MAX_LENGTH]; - char sourceString[LINE_MAX_LENGTH]; - char alignmentString[LINE_MAX_LENGTH]; - SAFE_GETLINE((*tFileP), targetString, LINE_MAX_LENGTH, '\n', __FILE__); - if (tFileP->eof()) break; - SAFE_GETLINE((*sFileP), sourceString, LINE_MAX_LENGTH, '\n', __FILE__); - SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__); + while(getline(*tFileP, targetString)) { + i++; + + getline(*sFileP, sourceString); + getline(*aFileP, alignmentString); + + if (i%1000 == 0) cerr << i << " " << flush; SentenceAlignmentWithSyntax sentence (targetLabelCollection, sourceLabelCollection, @@ -349,7 +344,7 @@ int main(int argc, char* argv[]) cout << "LOG: PHRASES_BEGIN:" << endl; } - if (sentence.create(targetString, sourceString, alignmentString,"", i, options.boundaryRules)) { + if (sentence.create(targetString.c_str(), sourceString.c_str(), alignmentString.c_str(),"", i, options.boundaryRules)) { if (options.unknownWordLabelFlag) { collectWordLabelCounts(sentence); } diff --git a/phrase-extract/relax-parse-main.cpp b/phrase-extract/relax-parse-main.cpp index a58d4d97f..c04cae85b 100644 --- a/phrase-extract/relax-parse-main.cpp +++ b/phrase-extract/relax-parse-main.cpp @@ -33,17 +33,13 @@ int main(int argc, char* argv[]) // loop through all sentences int i=0; - char inBuffer[LINE_MAX_LENGTH]; - while(true) { + string inBuffer; + while(getline(cin, inBuffer)) { i++; if (i%1000 == 0) cerr << "." << flush; if (i%10000 == 0) cerr << ":" << flush; if (i%100000 == 0) cerr << "!" << flush; - // get line from stdin - SAFE_GETLINE( cin, inBuffer, LINE_MAX_LENGTH, '\n', __FILE__); - if (cin.eof()) break; - // process into syntax tree representation string inBufferString = string( inBuffer ); set< string > labelCollection; // set of labels, not used diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp index e2f270038..c3abd92ec 100644 --- a/phrase-extract/score-main.cpp +++ b/phrase-extract/score-main.cpp @@ -29,7 +29,6 @@ #include #include -#include "SafeGetline.h" #include "ScoreFeature.h" #include "tables-core.h" #include "ExtractionPhrasePair.h" @@ -40,8 +39,6 @@ using namespace std; using namespace MosesTraining; -#define LINE_MAX_LENGTH 100000 - namespace MosesTraining { LexicalTable lexTable; @@ -236,7 +233,7 @@ int main(int argc, char* argv[]) } // loop through all extracted phrase translations - char line[LINE_MAX_LENGTH], lastLine[LINE_MAX_LENGTH]; + string line, lastLine; lastLine[0] = '\0'; ExtractionPhrasePair *phrasePair = NULL; std::vector< ExtractionPhrasePair* > phrasePairsWithSameSource; @@ -249,8 +246,8 @@ int main(int argc, char* argv[]) float tmpCount=0.0f, tmpPcfgSum=0.0f; int i=0; - SAFE_GETLINE( (extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__ ); - if ( !extractFileP.eof() ) { + // TODO why read only the 1st line? + if ( getline(extractFileP, line)) { ++i; tmpPhraseSource = new PHRASE(); tmpPhraseTarget = new PHRASE(); @@ -269,23 +266,21 @@ int main(int argc, char* argv[]) if ( hierarchicalFlag ) { phrasePairsWithSameSourceAndTarget.push_back( phrasePair ); } - strcpy( lastLine, line ); - SAFE_GETLINE( (extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__ ); + lastLine = line; } - while ( !extractFileP.eof() ) { + while ( getline(extractFileP, line) ) { if ( ++i % 100000 == 0 ) { std::cerr << "." << std::flush; } // identical to last line? just add count - if (strcmp(line,lastLine) == 0) { + if (line == lastLine) { phrasePair->IncrementPrevious(tmpCount,tmpPcfgSum); - SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); continue; } else { - strcpy( lastLine, line ); + lastLine = line; } tmpPhraseSource = new PHRASE(); @@ -363,8 +358,6 @@ int main(int argc, char* argv[]) } } - SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - } processPhrasePairs( phrasePairsWithSameSource, *phraseTableFile, featureManager, maybeLogProb ); @@ -758,11 +751,9 @@ void loadFunctionWords( const string &fileName ) } istream *inFileP = &inFile; - char line[LINE_MAX_LENGTH]; - while(true) { - SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - if (inFileP->eof()) break; - std::vector token = tokenize( line ); + string line; + while(getline(*inFileP, line)) { + std::vector token = tokenize( line.c_str() ); if (token.size() > 0) functionWordList.insert( token[0] ); } @@ -807,16 +798,13 @@ void LexicalTable::load( const string &fileName ) } istream *inFileP = &inFile; - char line[LINE_MAX_LENGTH]; - + string line; int i=0; - while(true) { + while(getline(*inFileP, line)) { i++; if (i%100000 == 0) std::cerr << "." << flush; - SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - if (inFileP->eof()) break; - std::vector token = tokenize( line ); + std::vector token = tokenize( line.c_str() ); if (token.size() != 3) { std::cerr << "line " << i << " in " << fileName << " has wrong number of tokens, skipping:" << std::endl diff --git a/phrase-extract/statistics-main.cpp b/phrase-extract/statistics-main.cpp index 67373ec93..9d814ed76 100644 --- a/phrase-extract/statistics-main.cpp +++ b/phrase-extract/statistics-main.cpp @@ -12,15 +12,12 @@ #include #include "AlignmentPhrase.h" -#include "SafeGetline.h" #include "tables-core.h" #include "InputFileStream.h" using namespace std; using namespace MosesTraining; -#define LINE_MAX_LENGTH 10000 - namespace MosesTraining { @@ -31,7 +28,7 @@ public: vector< vector > alignedToE; vector< vector > alignedToF; - bool create( char*, int ); + bool create( const char*, int ); void clear(); bool equals( const PhraseAlignment& ); }; @@ -106,16 +103,14 @@ int main(int argc, char* argv[]) vector< PhraseAlignment > phrasePairsWithSameF; int i=0; int fileCount = 0; - while(true) { + + string line; + while(getline(extractFileP, line)) { if (extractFileP.eof()) break; if (++i % 100000 == 0) cerr << "." << flush; - char line[LINE_MAX_LENGTH]; - SAFE_GETLINE((extractFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - // if (fileCount>0) - if (extractFileP.eof()) - break; + PhraseAlignment phrasePair; - bool isPhrasePair = phrasePair.create( line, i ); + bool isPhrasePair = phrasePair.create( line.c_str(), i ); if (lastForeign >= 0 && lastForeign != phrasePair.foreign) { processPhrasePairs( phrasePairsWithSameF ); for(size_t j=0; j &phrasePair ) } } -bool PhraseAlignment::create( char line[], int lineID ) +bool PhraseAlignment::create(const char line[], int lineID ) { vector< string > token = tokenize( line ); int item = 1; @@ -321,16 +316,14 @@ void LexicalTable::load( const string &filePath ) } istream *inFileP = &inFile; - char line[LINE_MAX_LENGTH]; + string line; int i=0; - while(true) { + while(getline(*inFileP, line)) { i++; if (i%100000 == 0) cerr << "." << flush; - SAFE_GETLINE((*inFileP), line, LINE_MAX_LENGTH, '\n', __FILE__); - if (inFileP->eof()) break; - vector token = tokenize( line ); + vector token = tokenize( line.c_str() ); if (token.size() != 3) { cerr << "line " << i << " in " << filePath << " has wrong number of tokens, skipping:\n" << token.size() << " " << token[0] << " " << line << endl;