mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
FilePieceify LoaderStandard.
This commit is contained in:
parent
a0ce62e795
commit
44eee4dcd2
@ -40,7 +40,7 @@ class RuleTableLoader
|
||||
|
||||
virtual bool Load(const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
std::istream &inStream,
|
||||
const std::string &inFile,
|
||||
const std::vector<float> &weight,
|
||||
size_t tableLimit,
|
||||
const LMList &languageModels,
|
||||
|
@ -36,7 +36,7 @@ namespace Moses
|
||||
|
||||
bool RuleTableLoaderCompact::Load(const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
std::istream &inStream,
|
||||
const std::string &inFile,
|
||||
const std::vector<float> &weight,
|
||||
size_t /* tableLimit */,
|
||||
const LMList &languageModels,
|
||||
@ -45,6 +45,7 @@ bool RuleTableLoaderCompact::Load(const std::vector<FactorType> &input,
|
||||
{
|
||||
PrintUserTime("Start loading compact rule table");
|
||||
|
||||
InputFileStream inStream(inFile);
|
||||
LineReader reader(inStream);
|
||||
|
||||
// Read and check version number.
|
||||
|
@ -41,7 +41,7 @@ class RuleTableLoaderCompact : public RuleTableLoader
|
||||
public:
|
||||
bool Load(const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
std::istream &inStream,
|
||||
const std::string &inFile,
|
||||
const std::vector<float> &weight,
|
||||
size_t tableLimit,
|
||||
const LMList &languageModels,
|
||||
|
@ -15,7 +15,7 @@ namespace Moses {
|
||||
|
||||
bool RuleTableLoaderHiero::Load(const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
std::istream &inStream,
|
||||
const std::string &inFile,
|
||||
const std::vector<float> &weight,
|
||||
size_t tableLimit,
|
||||
const LMList &languageModels,
|
||||
@ -24,7 +24,7 @@ bool RuleTableLoaderHiero::Load(const std::vector<FactorType> &input,
|
||||
{
|
||||
bool ret = RuleTableLoaderStandard::Load(HieroFormat
|
||||
,input, output
|
||||
,inStream, weight
|
||||
,inFile, weight
|
||||
,tableLimit, languageModels
|
||||
,wpProducer, ruleTable);
|
||||
return ret;
|
||||
|
@ -19,7 +19,7 @@ class RuleTableLoaderHiero : public RuleTableLoaderStandard
|
||||
public:
|
||||
bool Load(const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
std::istream &inStream,
|
||||
const std::string &inFile,
|
||||
const std::vector<float> &weight,
|
||||
size_t tableLimit,
|
||||
const LMList &languageModels,
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <string>
|
||||
#include <iterator>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <sys/stat.h>
|
||||
#include <stdlib.h>
|
||||
#include "RuleTable/Trie.h"
|
||||
@ -35,16 +36,18 @@
|
||||
#include "UserMessage.h"
|
||||
#include "ChartTranslationOptionList.h"
|
||||
#include "FactorCollection.h"
|
||||
#include "util/file_piece.hh"
|
||||
#include "util/string_piece.hh"
|
||||
#include "util/tokenize_piece.hh"
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
bool RuleTableLoaderStandard::Load(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, std::istream &inStream
|
||||
, const std::string &inFile
|
||||
, const std::vector<float> &weight
|
||||
, size_t tableLimit
|
||||
, const LMList &languageModels
|
||||
@ -53,7 +56,7 @@ bool RuleTableLoaderStandard::Load(const std::vector<FactorType> &input
|
||||
{
|
||||
bool ret = Load(MosesFormat
|
||||
,input, output
|
||||
,inStream, weight
|
||||
,inFile, weight
|
||||
,tableLimit, languageModels
|
||||
,wpProducer, ruleTable);
|
||||
return ret;
|
||||
@ -110,7 +113,7 @@ void ReformateHieroScore(string &scoreString)
|
||||
scoreString = Join(" ", toks);
|
||||
}
|
||||
|
||||
string *ReformatHieroRule(const string &lineOrig)
|
||||
void ReformatHieroRule(const string &lineOrig, string &out)
|
||||
{
|
||||
vector<string> tokens;
|
||||
vector<float> scoreVector;
|
||||
@ -140,13 +143,13 @@ string *ReformatHieroRule(const string &lineOrig)
|
||||
<< scoreString << " ||| "
|
||||
<< align.str();
|
||||
|
||||
return new string(ret.str());
|
||||
out = ret.str();
|
||||
}
|
||||
|
||||
bool RuleTableLoaderStandard::Load(FormatType format
|
||||
, const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, std::istream &inStream
|
||||
, const std::string &inFile
|
||||
, const std::vector<float> &weight
|
||||
, size_t /* tableLimit */
|
||||
, const LMList &languageModels
|
||||
@ -158,22 +161,30 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
|
||||
|
||||
|
||||
string lineOrig;
|
||||
size_t count = 0;
|
||||
|
||||
std::ostream *progress = NULL;
|
||||
IFVERBOSE(1) progress = &std::cerr;
|
||||
util::FilePiece in(inFile.c_str(), progress);
|
||||
|
||||
// reused variables
|
||||
vector<float> scoreVector;
|
||||
while(getline(inStream, lineOrig)) {
|
||||
const string *line;
|
||||
if (format == HieroFormat) { // reformat line
|
||||
line = ReformatHieroRule(lineOrig);
|
||||
}
|
||||
else
|
||||
{ // do nothing to format of line
|
||||
line = &lineOrig;
|
||||
StringPiece line;
|
||||
std::string hiero_before, hiero_after;
|
||||
|
||||
while(true) {
|
||||
try {
|
||||
line = in.ReadLine();
|
||||
} catch (const util::EndOfFileException &e) { break; }
|
||||
|
||||
if (format == HieroFormat) { // inefficiently reformat line
|
||||
hiero_before.assign(line.data(), line.size());
|
||||
ReformatHieroRule(hiero_before, hiero_after);
|
||||
line = hiero_after;
|
||||
}
|
||||
|
||||
util::TokenIter<util::MultiCharacter> pipes(*line, "|||");
|
||||
util::TokenIter<util::MultiCharacter> pipes(line, "|||");
|
||||
StringPiece sourcePhraseString(*pipes);
|
||||
StringPiece targetPhraseString(*++pipes);
|
||||
StringPiece scoreString(*++pipes);
|
||||
@ -235,14 +246,6 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
phraseColl.Add(targetPhrase);
|
||||
|
||||
count++;
|
||||
|
||||
if (format == HieroFormat) { // reformat line
|
||||
delete line;
|
||||
}
|
||||
else
|
||||
{ // do nothing
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// sort and prune each target phrase collection
|
||||
|
@ -32,7 +32,7 @@ protected:
|
||||
bool Load(FormatType format,
|
||||
const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
std::istream &inStream,
|
||||
const std::string &inFile,
|
||||
const std::vector<float> &weight,
|
||||
size_t tableLimit,
|
||||
const LMList &languageModels,
|
||||
@ -41,7 +41,7 @@ protected:
|
||||
public:
|
||||
bool Load(const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
std::istream &inStream,
|
||||
const std::string &inFile,
|
||||
const std::vector<float> &weight,
|
||||
size_t tableLimit,
|
||||
const LMList &languageModels,
|
||||
|
@ -59,12 +59,9 @@ void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
|
||||
|
||||
string grammarFile = GetFilePath() + "/grammar.out." + SPrint(translationId) + ".gz";
|
||||
|
||||
// data from file
|
||||
InputFileStream inFile(grammarFile);
|
||||
|
||||
std::auto_ptr<RuleTableLoader> loader =
|
||||
RuleTableLoaderFactory::Create(grammarFile);
|
||||
bool ret = loader->Load(*m_input, *m_output, inFile, *m_weight, m_tableLimit,
|
||||
bool ret = loader->Load(*m_input, *m_output, grammarFile, *m_weight, m_tableLimit,
|
||||
*m_languageModels, m_wpProducer, *this);
|
||||
|
||||
CHECK(ret);
|
||||
|
@ -43,16 +43,13 @@ bool RuleTableTrie::Load(const std::vector<FactorType> &input,
|
||||
m_filePath = filePath;
|
||||
m_tableLimit = tableLimit;
|
||||
|
||||
// data from file
|
||||
InputFileStream inFile(filePath);
|
||||
|
||||
std::auto_ptr<Moses::RuleTableLoader> loader =
|
||||
Moses::RuleTableLoaderFactory::Create(filePath);
|
||||
if (!loader.get())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
bool ret = loader->Load(input, output, inFile, weight, tableLimit,
|
||||
bool ret = loader->Load(input, output, filePath, weight, tableLimit,
|
||||
languageModels, wpProducer, *this);
|
||||
return ret;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user