FilePieceify LoaderStandard.

This commit is contained in:
Kenneth Heafield 2012-10-14 20:52:12 +01:00
parent a0ce62e795
commit 44eee4dcd2
9 changed files with 37 additions and 39 deletions

View File

@ -40,7 +40,7 @@ class RuleTableLoader
virtual bool Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
std::istream &inStream,
const std::string &inFile,
const std::vector<float> &weight,
size_t tableLimit,
const LMList &languageModels,

View File

@ -36,7 +36,7 @@ namespace Moses
bool RuleTableLoaderCompact::Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
std::istream &inStream,
const std::string &inFile,
const std::vector<float> &weight,
size_t /* tableLimit */,
const LMList &languageModels,
@ -45,6 +45,7 @@ bool RuleTableLoaderCompact::Load(const std::vector<FactorType> &input,
{
PrintUserTime("Start loading compact rule table");
InputFileStream inStream(inFile);
LineReader reader(inStream);
// Read and check version number.

View File

@ -41,7 +41,7 @@ class RuleTableLoaderCompact : public RuleTableLoader
public:
bool Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
std::istream &inStream,
const std::string &inFile,
const std::vector<float> &weight,
size_t tableLimit,
const LMList &languageModels,

View File

@ -15,7 +15,7 @@ namespace Moses {
bool RuleTableLoaderHiero::Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
std::istream &inStream,
const std::string &inFile,
const std::vector<float> &weight,
size_t tableLimit,
const LMList &languageModels,
@ -24,7 +24,7 @@ bool RuleTableLoaderHiero::Load(const std::vector<FactorType> &input,
{
bool ret = RuleTableLoaderStandard::Load(HieroFormat
,input, output
,inStream, weight
,inFile, weight
,tableLimit, languageModels
,wpProducer, ruleTable);
return ret;

View File

@ -19,7 +19,7 @@ class RuleTableLoaderHiero : public RuleTableLoaderStandard
public:
bool Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
std::istream &inStream,
const std::string &inFile,
const std::vector<float> &weight,
size_t tableLimit,
const LMList &languageModels,

View File

@ -23,6 +23,7 @@
#include <string>
#include <iterator>
#include <algorithm>
#include <iostream>
#include <sys/stat.h>
#include <stdlib.h>
#include "RuleTable/Trie.h"
@ -35,16 +36,18 @@
#include "UserMessage.h"
#include "ChartTranslationOptionList.h"
#include "FactorCollection.h"
#include "util/file_piece.hh"
#include "util/string_piece.hh"
#include "util/tokenize_piece.hh"
using namespace std;
namespace Moses
{
bool RuleTableLoaderStandard::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, std::istream &inStream
, const std::string &inFile
, const std::vector<float> &weight
, size_t tableLimit
, const LMList &languageModels
@ -53,7 +56,7 @@ bool RuleTableLoaderStandard::Load(const std::vector<FactorType> &input
{
bool ret = Load(MosesFormat
,input, output
,inStream, weight
,inFile, weight
,tableLimit, languageModels
,wpProducer, ruleTable);
return ret;
@ -110,7 +113,7 @@ void ReformateHieroScore(string &scoreString)
scoreString = Join(" ", toks);
}
string *ReformatHieroRule(const string &lineOrig)
void ReformatHieroRule(const string &lineOrig, string &out)
{
vector<string> tokens;
vector<float> scoreVector;
@ -140,13 +143,13 @@ string *ReformatHieroRule(const string &lineOrig)
<< scoreString << " ||| "
<< align.str();
return new string(ret.str());
out = ret.str();
}
bool RuleTableLoaderStandard::Load(FormatType format
, const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, std::istream &inStream
, const std::string &inFile
, const std::vector<float> &weight
, size_t /* tableLimit */
, const LMList &languageModels
@ -158,22 +161,30 @@ bool RuleTableLoaderStandard::Load(FormatType format
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
string lineOrig;
size_t count = 0;
std::ostream *progress = NULL;
IFVERBOSE(1) progress = &std::cerr;
util::FilePiece in(inFile.c_str(), progress);
// reused variables
vector<float> scoreVector;
while(getline(inStream, lineOrig)) {
const string *line;
if (format == HieroFormat) { // reformat line
line = ReformatHieroRule(lineOrig);
}
else
{ // do nothing to format of line
line = &lineOrig;
StringPiece line;
std::string hiero_before, hiero_after;
while(true) {
try {
line = in.ReadLine();
} catch (const util::EndOfFileException &e) { break; }
if (format == HieroFormat) { // inefficiently reformat line
hiero_before.assign(line.data(), line.size());
ReformatHieroRule(hiero_before, hiero_after);
line = hiero_after;
}
util::TokenIter<util::MultiCharacter> pipes(*line, "|||");
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
StringPiece sourcePhraseString(*pipes);
StringPiece targetPhraseString(*++pipes);
StringPiece scoreString(*++pipes);
@ -235,14 +246,6 @@ bool RuleTableLoaderStandard::Load(FormatType format
phraseColl.Add(targetPhrase);
count++;
if (format == HieroFormat) { // reformat line
delete line;
}
else
{ // do nothing
}
}
// sort and prune each target phrase collection

View File

@ -32,7 +32,7 @@ protected:
bool Load(FormatType format,
const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
std::istream &inStream,
const std::string &inFile,
const std::vector<float> &weight,
size_t tableLimit,
const LMList &languageModels,
@ -41,7 +41,7 @@ protected:
public:
bool Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
std::istream &inStream,
const std::string &inFile,
const std::vector<float> &weight,
size_t tableLimit,
const LMList &languageModels,

View File

@ -59,12 +59,9 @@ void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
string grammarFile = GetFilePath() + "/grammar.out." + SPrint(translationId) + ".gz";
// data from file
InputFileStream inFile(grammarFile);
std::auto_ptr<RuleTableLoader> loader =
RuleTableLoaderFactory::Create(grammarFile);
bool ret = loader->Load(*m_input, *m_output, inFile, *m_weight, m_tableLimit,
bool ret = loader->Load(*m_input, *m_output, grammarFile, *m_weight, m_tableLimit,
*m_languageModels, m_wpProducer, *this);
CHECK(ret);

View File

@ -43,16 +43,13 @@ bool RuleTableTrie::Load(const std::vector<FactorType> &input,
m_filePath = filePath;
m_tableLimit = tableLimit;
// data from file
InputFileStream inFile(filePath);
std::auto_ptr<Moses::RuleTableLoader> loader =
Moses::RuleTableLoaderFactory::Create(filePath);
if (!loader.get())
{
return false;
}
bool ret = loader->Load(input, output, inFile, weight, tableLimit,
bool ret = loader->Load(input, output, filePath, weight, tableLimit,
languageModels, wpProducer, *this);
return ret;
}