This commit is contained in:
Hieu Hoang 2015-05-02 11:45:24 +01:00
parent a4a7c14593
commit cc8c6b7b10
95 changed files with 2349 additions and 2409 deletions

View File

@ -94,8 +94,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
mert::VocabularyFactory::GetVocabulary()->clear(); mert::VocabularyFactory::GetVocabulary()->clear();
//load reference data //load reference data
for (size_t i = 0; i < referenceFiles.size(); ++i) for (size_t i = 0; i < referenceFiles.size(); ++i) {
{
TRACE_ERR("Loading reference from " << referenceFiles[i] << endl); TRACE_ERR("Loading reference from " << referenceFiles[i] << endl);
ifstream ifs(referenceFiles[i].c_str()); ifstream ifs(referenceFiles[i].c_str());
@ -133,28 +132,27 @@ bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
void BleuScorer::ProcessReferenceLine(const std::string& line, Reference* ref) const void BleuScorer::ProcessReferenceLine(const std::string& line, Reference* ref) const
{ {
NgramCounts counts; NgramCounts counts;
size_t length = CountNgrams(line, counts, kBleuNgramOrder); size_t length = CountNgrams(line, counts, kBleuNgramOrder);
//for any counts larger than those already there, merge them in //for any counts larger than those already there, merge them in
for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) { for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
const NgramCounts::Key& ngram = ci->first; const NgramCounts::Key& ngram = ci->first;
const NgramCounts::Value newcount = ci->second; const NgramCounts::Value newcount = ci->second;
NgramCounts::Value oldcount = 0; NgramCounts::Value oldcount = 0;
ref->get_counts()->Lookup(ngram, &oldcount); ref->get_counts()->Lookup(ngram, &oldcount);
if (newcount > oldcount) { if (newcount > oldcount) {
ref->get_counts()->operator[](ngram) = newcount; ref->get_counts()->operator[](ngram) = newcount;
}
} }
//add in the length }
ref->push_back(length); //add in the length
ref->push_back(length);
} }
bool BleuScorer::GetNextReferenceFromStreams(std::vector<boost::shared_ptr<std::ifstream> >& referenceStreams, Reference& ref) const bool BleuScorer::GetNextReferenceFromStreams(std::vector<boost::shared_ptr<std::ifstream> >& referenceStreams, Reference& ref) const
{ {
for (vector<boost::shared_ptr<ifstream> >::iterator ifs=referenceStreams.begin(); ifs!=referenceStreams.end(); ++ifs) for (vector<boost::shared_ptr<ifstream> >::iterator ifs=referenceStreams.begin(); ifs!=referenceStreams.end(); ++ifs) {
{
if (!(*ifs)) return false; if (!(*ifs)) return false;
string line; string line;
if (!getline(**ifs, line)) return false; if (!getline(**ifs, line)) return false;
@ -309,22 +307,20 @@ vector<float> BleuScorer::ScoreNbestList(const string& scoreFile, const string&
vector<FeatureDataIterator> featureDataIters; vector<FeatureDataIterator> featureDataIters;
vector<ScoreDataIterator> scoreDataIters; vector<ScoreDataIterator> scoreDataIters;
for (size_t i = 0; i < featureFiles.size(); ++i) for (size_t i = 0; i < featureFiles.size(); ++i) {
{
featureDataIters.push_back(FeatureDataIterator(featureFiles[i])); featureDataIters.push_back(FeatureDataIterator(featureFiles[i]));
scoreDataIters.push_back(ScoreDataIterator(scoreFiles[i])); scoreDataIters.push_back(ScoreDataIterator(scoreFiles[i]));
} }
vector<pair<size_t,size_t> > hypotheses; vector<pair<size_t,size_t> > hypotheses;
UTIL_THROW_IF2(featureDataIters[0] == FeatureDataIterator::end(), UTIL_THROW_IF2(featureDataIters[0] == FeatureDataIterator::end(),
"At the end of feature data iterator"); "At the end of feature data iterator");
for (size_t i = 0; i < featureFiles.size(); ++i) for (size_t i = 0; i < featureFiles.size(); ++i) {
{ UTIL_THROW_IF2(featureDataIters[i] == FeatureDataIterator::end(),
UTIL_THROW_IF2(featureDataIters[i] == FeatureDataIterator::end(),
"Feature file " << i << " ended prematurely"); "Feature file " << i << " ended prematurely");
UTIL_THROW_IF2(scoreDataIters[i] == ScoreDataIterator::end(), UTIL_THROW_IF2(scoreDataIters[i] == ScoreDataIterator::end(),
"Score file " << i << " ended prematurely"); "Score file " << i << " ended prematurely");
UTIL_THROW_IF2(featureDataIters[i]->size() != scoreDataIters[i]->size(), UTIL_THROW_IF2(featureDataIters[i]->size() != scoreDataIters[i]->size(),
"Features and scores have different size"); "Features and scores have different size");
for (size_t j = 0; j < featureDataIters[i]->size(); ++j) { for (size_t j = 0; j < featureDataIters[i]->size(); ++j) {
hypotheses.push_back(pair<size_t,size_t>(i,j)); hypotheses.push_back(pair<size_t,size_t>(i,j));

View File

@ -13,7 +13,8 @@
using namespace std; using namespace std;
using namespace MosesTuning; using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(viterbi_simple_lattice) { BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
{
Vocab vocab; Vocab vocab;
WordVec words; WordVec words;
string wordStrings[] = string wordStrings[] =
@ -244,7 +245,8 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]); BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
} }
BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph) { BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph)
{
Vocab vocab; Vocab vocab;
//References //References
ReferenceSet references; ReferenceSet references;

View File

@ -11,7 +11,8 @@ how many of the features are really "dense". This is because in hg mira
all features (sparse and dense) are to get rolled in to SparseVector all features (sparse and dense) are to get rolled in to SparseVector
*/ */
BOOST_AUTO_TEST_CASE(from_sparse) { BOOST_AUTO_TEST_CASE(from_sparse)
{
SparseVector sp; SparseVector sp;
sp.set("dense0", 0.2); sp.set("dense0", 0.2);
sp.set("dense1", 0.3); sp.set("dense1", 0.3);

View File

@ -474,7 +474,7 @@ int main(int argc, char **argv)
// A task for each start point // A task for each start point
for (size_t j = 0; j < startingPoints.size(); ++j) { for (size_t j = 0; j < startingPoints.size(); ++j) {
boost::shared_ptr<OptimizationTask> boost::shared_ptr<OptimizationTask>
task(new OptimizationTask(optimizer, startingPoints[j])); task(new OptimizationTask(optimizer, startingPoints[j]));
tasks.push_back(task); tasks.push_back(task);
#ifdef WITH_THREADS #ifdef WITH_THREADS
pool.Submit(task); pool.Submit(task);

View File

@ -32,8 +32,7 @@ int main(int argc, char **argv)
// initialize reference streams // initialize reference streams
std::vector<boost::shared_ptr<std::ifstream> > refStreams; std::vector<boost::shared_ptr<std::ifstream> > refStreams;
for (std::vector<std::string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) for (std::vector<std::string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) {
{
TRACE_ERR("Loading reference from " << *refFile << std::endl); TRACE_ERR("Loading reference from " << *refFile << std::endl);
boost::shared_ptr<std::ifstream> ifs(new std::ifstream(refFile->c_str())); boost::shared_ptr<std::ifstream> ifs(new std::ifstream(refFile->c_str()));
UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile); UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile);
@ -44,8 +43,7 @@ int main(int argc, char **argv)
std::string nbestLine; std::string nbestLine;
int sid = -1; int sid = -1;
Reference ref; Reference ref;
while ( getline(std::cin, nbestLine) ) while ( getline(std::cin, nbestLine) ) {
{
std::vector<std::string> items; std::vector<std::string> items;
Moses::TokenizeMultiCharSeparator(items, nbestLine, " ||| "); Moses::TokenizeMultiCharSeparator(items, nbestLine, " ||| ");
int sidCurrent = Moses::Scan<int>(items[0]); int sidCurrent = Moses::Scan<int>(items[0]);

View File

@ -34,8 +34,7 @@ int main(int argc, char **argv)
// initialize reference streams // initialize reference streams
vector<boost::shared_ptr<ifstream> > refStreams; vector<boost::shared_ptr<ifstream> > refStreams;
for (vector<string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) for (vector<string>::const_iterator refFile=refFiles.begin(); refFile!=refFiles.end(); ++refFile) {
{
TRACE_ERR("Loading reference from " << *refFile << endl); TRACE_ERR("Loading reference from " << *refFile << endl);
boost::shared_ptr<ifstream> ifs(new ifstream(refFile->c_str())); boost::shared_ptr<ifstream> ifs(new ifstream(refFile->c_str()));
UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile); UTIL_THROW_IF2(!ifs, "Cannot open " << *refFile);
@ -45,8 +44,7 @@ int main(int argc, char **argv)
// load sentences, preparing statistics, score // load sentences, preparing statistics, score
string hypothesisLine; string hypothesisLine;
size_t sid = 0; size_t sid = 0;
while (getline(std::cin, hypothesisLine)) while (getline(std::cin, hypothesisLine)) {
{
Reference ref; Reference ref;
if (!scorer.GetNextReferenceFromStreams(refStreams, ref)) { if (!scorer.GetNextReferenceFromStreams(refStreams, ref)) {
UTIL_THROW2("Missing references"); UTIL_THROW2("Missing references");

View File

@ -177,39 +177,34 @@ int main(int argc, char* argv[])
const vector<float>& scale_grid = grid.getGrid(lmbr_scale); const vector<float>& scale_grid = grid.getGrid(lmbr_scale);
boost::shared_ptr<InputType> source; boost::shared_ptr<InputType> source;
while((source = ioWrapper->ReadInput()) != NULL) while((source = ioWrapper->ReadInput()) != NULL) {
{ // set up task of translating one sentence
// set up task of translating one sentence boost::shared_ptr<TranslationTask> ttask;
boost::shared_ptr<TranslationTask> ttask; ttask = TranslationTask::create(source, ioWrapper);
ttask = TranslationTask::create(source, ioWrapper); Manager manager(ttask);
Manager manager(ttask); manager.Decode();
manager.Decode(); TrellisPathList nBestList;
TrellisPathList nBestList; manager.CalcNBest(nBestSize, nBestList,true);
manager.CalcNBest(nBestSize, nBestList,true); //grid search
//grid search BOOST_FOREACH(float const& p, pgrid) {
BOOST_FOREACH(float const& p, pgrid) SD.SetLatticeMBRPrecision(p);
{ BOOST_FOREACH(float const& r, rgrid) {
SD.SetLatticeMBRPrecision(p); SD.SetLatticeMBRPRatio(r);
BOOST_FOREACH(float const& r, rgrid) BOOST_FOREACH(size_t const prune_i, prune_grid) {
{ SD.SetLatticeMBRPruningFactor(size_t(prune_i));
SD.SetLatticeMBRPRatio(r); BOOST_FOREACH(float const& scale_i, scale_grid) {
BOOST_FOREACH(size_t const prune_i, prune_grid) SD.SetMBRScale(scale_i);
{ size_t lineCount = source->GetTranslationId();
SD.SetLatticeMBRPruningFactor(size_t(prune_i)); cout << lineCount << " ||| " << p << " "
BOOST_FOREACH(float const& scale_i, scale_grid) << r << " " << size_t(prune_i) << " " << scale_i
{ << " ||| ";
SD.SetMBRScale(scale_i); vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
size_t lineCount = source->GetTranslationId(); manager.OutputBestHypo(mbrBestHypo, lineCount,
cout << lineCount << " ||| " << p << " " SD.GetReportSegmentation(),
<< r << " " << size_t(prune_i) << " " << scale_i SD.GetReportAllFactors(),cout);
<< " ||| "; }
vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList); }
manager.OutputBestHypo(mbrBestHypo, lineCount, }
SD.GetReportSegmentation(),
SD.GetReportAllFactors(),cout);
}
}
}
}
} }
}
} }

View File

@ -144,27 +144,28 @@ int main(int argc, char** argv)
#endif #endif
// main loop over set of input sentences // main loop over set of input sentences
boost::shared_ptr<InputType> source; boost::shared_ptr<InputType> source;
while ((source = ioWrapper->ReadInput()) != NULL) while ((source = ioWrapper->ReadInput()) != NULL) {
{ IFVERBOSE(1) {
IFVERBOSE(1) { ResetUserTime(); } ResetUserTime();
InputType* foo = source.get();
FeatureFunction::CallChangeSource(foo);
// set up task of training one sentence
boost::shared_ptr<TrainingTask> task;
task = TrainingTask::create(source, ioWrapper);
// execute task
#ifdef WITH_THREADS
pool.Submit(task);
#else
task->Run();
#endif
} }
InputType* foo = source.get();
FeatureFunction::CallChangeSource(foo);
// set up task of training one sentence
boost::shared_ptr<TrainingTask> task;
task = TrainingTask::create(source, ioWrapper);
// execute task
#ifdef WITH_THREADS
pool.Submit(task);
#else
task->Run();
#endif
}
// we are done, finishing up // we are done, finishing up
#ifdef WITH_THREADS #ifdef WITH_THREADS
pool.Stop(true); //flush remaining jobs pool.Stop(true); //flush remaining jobs

View File

@ -17,7 +17,9 @@ BaseManager::BaseManager(ttasksptr const& ttask)
const InputType& const InputType&
BaseManager::GetSource() const BaseManager::GetSource() const
{ return m_source; } {
return m_source;
}

View File

@ -36,8 +36,8 @@ class ChartCellCollectionBase
{ {
public: public:
template <class Factory> ChartCellCollectionBase(const InputType &input, template <class Factory> ChartCellCollectionBase(const InputType &input,
const Factory &factory, const Factory &factory,
const ChartParser &parser) const ChartParser &parser)
:m_cells(input.GetSize()) { :m_cells(input.GetSize()) {
size_t size = input.GetSize(); size_t size = input.GetSize();

View File

@ -299,7 +299,7 @@ CreateTranslationOptionCollection(ttasksptr const& ttask) const
= StaticData::Instance().GetTranslationOptionThreshold(); = StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv TranslationOptionCollection *rv
= new TranslationOptionCollectionConfusionNet = new TranslationOptionCollectionConfusionNet
(ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold); (ttask, *this, maxNoTransOptPerCoverage, translationOptionThreshold);
assert(rv); assert(rv);
return rv; return rv;
} }

View File

@ -18,80 +18,75 @@
namespace Moses namespace Moses
{ {
class ContextScope class ContextScope
{ {
protected: protected:
typedef std::map<void const*, boost::shared_ptr<void> > scratchpad_t; typedef std::map<void const*, boost::shared_ptr<void> > scratchpad_t;
typedef scratchpad_t::iterator iter_t; typedef scratchpad_t::iterator iter_t;
typedef scratchpad_t::value_type entry_t; typedef scratchpad_t::value_type entry_t;
typedef scratchpad_t::const_iterator const_iter_t; typedef scratchpad_t::const_iterator const_iter_t;
scratchpad_t m_scratchpad; scratchpad_t m_scratchpad;
mutable boost::shared_mutex m_lock; mutable boost::shared_mutex m_lock;
public: public:
// class write_access // class write_access
// { // {
// boost::unique_lock<boost::shared_mutex> m_lock; // boost::unique_lock<boost::shared_mutex> m_lock;
// public: // public:
// write_access(boost::shared_mutex& lock) // write_access(boost::shared_mutex& lock)
// : m_lock(lock) // : m_lock(lock)
// { } // { }
// write_access(write_access& other) // write_access(write_access& other)
// { // {
// swap(m_lock, other.m_lock); // swap(m_lock, other.m_lock);
// } // }
// }; // };
// write_access lock() const // write_access lock() const
// { // {
// return write_access(m_lock); // return write_access(m_lock);
// } // }
template<typename T> template<typename T>
boost::shared_ptr<void> const& boost::shared_ptr<void> const&
set(void const* const key, boost::shared_ptr<T> const& val) set(void const* const key, boost::shared_ptr<T> const& val) {
{ boost::unique_lock<boost::shared_mutex> lock(m_lock);
boost::unique_lock<boost::shared_mutex> lock(m_lock); return (m_scratchpad[key] = val);
return (m_scratchpad[key] = val); }
}
template<typename T> template<typename T>
boost::shared_ptr<T> const boost::shared_ptr<T> const
get(void const* key, bool CreateNewIfNecessary=false) get(void const* key, bool CreateNewIfNecessary=false) {
{ using boost::shared_mutex;
using boost::shared_mutex; using boost::upgrade_lock;
using boost::upgrade_lock; // T const* key = reinterpret_cast<T const*>(xkey);
// T const* key = reinterpret_cast<T const*>(xkey); upgrade_lock<shared_mutex> lock(m_lock);
upgrade_lock<shared_mutex> lock(m_lock); iter_t m = m_scratchpad.find(key);
iter_t m = m_scratchpad.find(key); boost::shared_ptr< T > ret;
boost::shared_ptr< T > ret; if (m != m_scratchpad.end()) {
if (m != m_scratchpad.end()) if (m->second == NULL && CreateNewIfNecessary) {
{ boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
if (m->second == NULL && CreateNewIfNecessary) m->second.reset(new T);
{ }
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock); ret = boost::static_pointer_cast< T >(m->second);
m->second.reset(new T);
}
ret = boost::static_pointer_cast< T >(m->second);
return ret;
}
if (!CreateNewIfNecessary) return ret;
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
ret.reset(new T);
m_scratchpad[key] = ret;
return ret; return ret;
} }
if (!CreateNewIfNecessary) return ret;
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
ret.reset(new T);
m_scratchpad[key] = ret;
return ret;
}
ContextScope() { } ContextScope() { }
ContextScope(ContextScope const& other) ContextScope(ContextScope const& other) {
{ boost::unique_lock<boost::shared_mutex> lock1(this->m_lock);
boost::unique_lock<boost::shared_mutex> lock1(this->m_lock); boost::unique_lock<boost::shared_mutex> lock2(other.m_lock);
boost::unique_lock<boost::shared_mutex> lock2(other.m_lock); m_scratchpad = other.m_scratchpad;
m_scratchpad = other.m_scratchpad; }
}
};
};
}; };

View File

@ -218,17 +218,16 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
void void
DecodeStepTranslation:: DecodeStepTranslation::
ProcessLEGACY(TranslationOption const& in, ProcessLEGACY(TranslationOption const& in,
DecodeStep const& decodeStep, DecodeStep const& decodeStep,
PartialTranslOptColl &out, PartialTranslOptColl &out,
TranslationOptionCollection *toc, TranslationOptionCollection *toc,
bool adhereTableLimit) const bool adhereTableLimit) const
{ {
if (in.GetTargetPhrase().GetSize() == 0) if (in.GetTargetPhrase().GetSize() == 0) {
{ // word deletion
// word deletion out.Add(new TranslationOption(in));
out.Add(new TranslationOption(in)); return;
return; }
}
// normal trans step // normal trans step
WordsRange const& srcRange = in.GetSourceWordsRange(); WordsRange const& srcRange = in.GetSourceWordsRange();
@ -241,34 +240,32 @@ ProcessLEGACY(TranslationOption const& in,
TargetPhraseCollectionWithSourcePhrase const* phraseColl; TargetPhraseCollectionWithSourcePhrase const* phraseColl;
phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange); phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
if (phraseColl != NULL) if (phraseColl != NULL) {
{ TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit) ? phraseColl->begin() + tableLimit : phraseColl->end());
? phraseColl->begin() + tableLimit : phraseColl->end());
for (iterTargetPhrase = phraseColl->begin(); for (iterTargetPhrase = phraseColl->begin();
iterTargetPhrase != iterEnd; iterTargetPhrase != iterEnd;
++iterTargetPhrase) ++iterTargetPhrase) {
{ TargetPhrase const& targetPhrase = **iterTargetPhrase;
TargetPhrase const& targetPhrase = **iterTargetPhrase; if (targetPhrase.GetSize() != currSize ||
if (targetPhrase.GetSize() != currSize || (IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
(IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors))) continue;
continue;
TargetPhrase outPhrase(inPhrase); TargetPhrase outPhrase(inPhrase);
outPhrase.Merge(targetPhrase, m_newOutputFactors); outPhrase.Merge(targetPhrase, m_newOutputFactors);
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase); TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
assert(newTransOpt != NULL); assert(newTransOpt != NULL);
newTransOpt->SetInputPath(inputPath); newTransOpt->SetInputPath(inputPath);
out.Add(newTransOpt); out.Add(newTransOpt);
}
} }
}
} }
} }

View File

@ -83,16 +83,16 @@ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
SimpleTranslationInterface::SimpleTranslationInterface(const string &mosesIni): m_staticData(StaticData::Instance()) SimpleTranslationInterface::SimpleTranslationInterface(const string &mosesIni): m_staticData(StaticData::Instance())
{ {
if (!m_params.LoadParam(mosesIni)) { if (!m_params.LoadParam(mosesIni)) {
cerr << "Error; Cannot load parameters at " << mosesIni<<endl; cerr << "Error; Cannot load parameters at " << mosesIni<<endl;
exit(1); exit(1);
} }
if (!StaticData::LoadDataStatic(&m_params, mosesIni.c_str())) { if (!StaticData::LoadDataStatic(&m_params, mosesIni.c_str())) {
cerr << "Error; Cannot load static data in file " << mosesIni<<endl; cerr << "Error; Cannot load static data in file " << mosesIni<<endl;
exit(1); exit(1);
} }
util::rand_init(); util::rand_init();
} }
@ -114,13 +114,15 @@ string SimpleTranslationInterface::translate(const string &inputString)
boost::shared_ptr<InputType> source = ioWrapper->ReadInput(); boost::shared_ptr<InputType> source = ioWrapper->ReadInput();
if (!source) return "Error: Source==null!!!"; if (!source) return "Error: Source==null!!!";
IFVERBOSE(1) { ResetUserTime(); } IFVERBOSE(1) {
ResetUserTime();
}
FeatureFunction::CallChangeSource(&*source); FeatureFunction::CallChangeSource(&*source);
// set up task of translating one sentence // set up task of translating one sentence
boost::shared_ptr<TranslationTask> task boost::shared_ptr<TranslationTask> task
= TranslationTask::create(source, ioWrapper); = TranslationTask::create(source, ioWrapper);
task->Run(); task->Run();
string output = outputStream.str(); string output = outputStream.str();
@ -147,10 +149,14 @@ int
run_as_server() run_as_server()
{ {
#ifdef HAVE_XMLRPC_C #ifdef HAVE_XMLRPC_C
int port; params.SetParameter(port, "server-port", 8080); int port;
bool isSerial; params.SetParameter(isSerial, "serial", false); params.SetParameter(port, "server-port", 8080);
string logfile; params.SetParameter(logfile, "server-log", string("")); bool isSerial;
size_t num_threads; params.SetParameter(num_threads, "threads", size_t(10)); params.SetParameter(isSerial, "serial", false);
string logfile;
params.SetParameter(logfile, "server-log", string(""));
size_t num_threads;
params.SetParameter(num_threads, "threads", size_t(10));
if (isSerial) VERBOSE(1,"Running server in serial mode." << endl); if (isSerial) VERBOSE(1,"Running server in serial mode." << endl);
xmlrpc_c::registry myRegistry; xmlrpc_c::registry myRegistry;
@ -166,8 +172,9 @@ run_as_server()
xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile); xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile);
XVERBOSE(1,"Listening on port " << port << endl); XVERBOSE(1,"Listening on port " << port << endl);
if (isSerial) { while(1) myAbyssServer.runOnce(); } if (isSerial) {
else myAbyssServer.run(); while(1) myAbyssServer.runOnce();
} else myAbyssServer.run();
std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl; std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl;
// #pragma message("BUILDING MOSES WITH SERVER SUPPORT") // #pragma message("BUILDING MOSES WITH SERVER SUPPORT")
@ -193,16 +200,15 @@ batch_run()
// set up read/writing class: // set up read/writing class:
boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper); boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper);
UTIL_THROW_IF2(ioWrapper == NULL, "Error; Failed to create IO object" UTIL_THROW_IF2(ioWrapper == NULL, "Error; Failed to create IO object"
<< " [" << HERE << "]"); << " [" << HERE << "]");
// check on weights // check on weights
const ScoreComponentCollection& weights = staticData.GetAllWeights(); const ScoreComponentCollection& weights = staticData.GetAllWeights();
IFVERBOSE(2) IFVERBOSE(2) {
{ TRACE_ERR("The global weight vector looks like this: ");
TRACE_ERR("The global weight vector looks like this: "); TRACE_ERR(weights);
TRACE_ERR(weights); TRACE_ERR("\n");
TRACE_ERR("\n"); }
}
#ifdef WITH_THREADS #ifdef WITH_THREADS
ThreadPool pool(staticData.ThreadCount()); ThreadPool pool(staticData.ThreadCount());
@ -214,57 +220,53 @@ batch_run()
// main loop over set of input sentences // main loop over set of input sentences
boost::shared_ptr<InputType> source; boost::shared_ptr<InputType> source;
while ((source = ioWrapper->ReadInput()) != NULL) while ((source = ioWrapper->ReadInput()) != NULL) {
{ IFVERBOSE(1) ResetUserTime();
IFVERBOSE(1) ResetUserTime();
FeatureFunction::CallChangeSource(source.get()); FeatureFunction::CallChangeSource(source.get());
// set up task of translating one sentence // set up task of translating one sentence
boost::shared_ptr<TranslationTask> boost::shared_ptr<TranslationTask>
task = TranslationTask::create(source, ioWrapper); task = TranslationTask::create(source, ioWrapper);
task->SetContextString(context_string); task->SetContextString(context_string);
// Allow for (sentence-)context-specific processing prior to // Allow for (sentence-)context-specific processing prior to
// decoding. This can be used, for example, for context-sensitive // decoding. This can be used, for example, for context-sensitive
// phrase lookup. // phrase lookup.
FeatureFunction::SetupAll(*task); FeatureFunction::SetupAll(*task);
// execute task // execute task
#ifdef WITH_THREADS #ifdef WITH_THREADS
#ifdef PT_UG #ifdef PT_UG
// simulated post-editing requires threads (within the dynamic phrase tables) // simulated post-editing requires threads (within the dynamic phrase tables)
// but runs all sentences serially, to allow updating of the bitext. // but runs all sentences serially, to allow updating of the bitext.
bool spe = params.isParamSpecified("spe-src"); bool spe = params.isParamSpecified("spe-src");
if (spe) if (spe) {
{ // simulated post-editing: always run single-threaded!
// simulated post-editing: always run single-threaded! task->Run();
task->Run(); string src,trg,aln;
string src,trg,aln; UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] "
UTIL_THROW_IF2(!getline(*ioWrapper->spe_src,src), "[" << HERE << "] " << "missing update data for simulated post-editing.");
<< "missing update data for simulated post-editing."); UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] "
UTIL_THROW_IF2(!getline(*ioWrapper->spe_trg,trg), "[" << HERE << "] " << "missing update data for simulated post-editing.");
<< "missing update data for simulated post-editing."); UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] " << "missing update data for simulated post-editing.");
<< "missing update data for simulated post-editing."); BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) {
BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
{ if (sapt) sapt->add(src,trg,aln);
Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd); VERBOSE(1,"[" << HERE << " added src] " << src << endl);
if (sapt) sapt->add(src,trg,aln); VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
VERBOSE(1,"[" << HERE << " added src] " << src << endl); VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
VERBOSE(1,"[" << HERE << " added trg] " << trg << endl); }
VERBOSE(1,"[" << HERE << " added aln] " << aln << endl); } else pool.Submit(task);
}
}
else pool.Submit(task);
#else #else
pool.Submit(task); pool.Submit(task);
#endif #endif
#else #else
task->Run(); task->Run();
#endif #endif
} }
// we are done, finishing up // we are done, finishing up
#ifdef WITH_THREADS #ifdef WITH_THREADS
@ -289,52 +291,49 @@ int decoder_main(int argc, char** argv)
#ifdef NDEBUG #ifdef NDEBUG
try try
#endif #endif
{ {
#ifdef HAVE_PROTOBUF #ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION; GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif #endif
// echo command line, if verbose // echo command line, if verbose
IFVERBOSE(1) IFVERBOSE(1) {
{ TRACE_ERR("command: ");
TRACE_ERR("command: "); for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" "); TRACE_ERR(endl);
TRACE_ERR(endl);
}
// set number of significant decimals in output
FixPrecision(cout);
FixPrecision(cerr);
// load all the settings into the Parameter class
// (stores them as strings, or array of strings)
if (!params.LoadParam(argc,argv))
exit(1);
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
if (!StaticData::LoadDataStatic(&params, argv[0]))
exit(1);
// setting "-show-weights" -> just dump out weights and exit
if (params.isParamSpecified("show-weights"))
{
ShowWeights();
exit(0);
}
if (params.GetParam("server"))
return run_as_server();
else
return batch_run();
} }
// set number of significant decimals in output
FixPrecision(cout);
FixPrecision(cerr);
// load all the settings into the Parameter class
// (stores them as strings, or array of strings)
if (!params.LoadParam(argc,argv))
exit(1);
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
if (!StaticData::LoadDataStatic(&params, argv[0]))
exit(1);
// setting "-show-weights" -> just dump out weights and exit
if (params.isParamSpecified("show-weights")) {
ShowWeights();
exit(0);
}
if (params.GetParam("server"))
return run_as_server();
else
return batch_run();
}
#ifdef NDEBUG #ifdef NDEBUG
catch (const std::exception &e) catch (const std::exception &e) {
{ std::cerr << "Exception: " << e.what() << std::endl;
std::cerr << "Exception: " << e.what() << std::endl; return EXIT_FAILURE;
return EXIT_FAILURE; }
}
#endif #endif
} }

View File

@ -45,7 +45,9 @@ public:
~SimpleTranslationInterface(); ~SimpleTranslationInterface();
std::string translate(const std::string &input); std::string translate(const std::string &input);
Moses::StaticData& getStaticData(); Moses::StaticData& getStaticData();
Moses::Parameter& getParameters(){ return m_params; } Moses::Parameter& getParameters() {
return m_params;
}
private: private:
SimpleTranslationInterface(); SimpleTranslationInterface();
Moses::Parameter m_params; Moses::Parameter m_params;

View File

@ -157,32 +157,26 @@ FeatureFactory
std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName); std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);
if (feature->GetNumScoreComponents()) if (feature->GetNumScoreComponents()) {
{ if (weights.size() == 0) {
if (weights.size() == 0) weights = feature->DefaultWeights();
{ if (weights.size() == 0) {
weights = feature->DefaultWeights(); TRACE_ERR("WARNING: No weights specified in config file for FF "
if (weights.size() == 0) << featureName << ". This FF does not supply default values.\n"
{ << "WARNING: Auto-initializing all weights for this FF to 1.0");
TRACE_ERR("WARNING: No weights specified in config file for FF " weights.assign(feature->GetNumScoreComponents(),1.0);
<< featureName << ". This FF does not supply default values.\n" } else {
<< "WARNING: Auto-initializing all weights for this FF to 1.0"); TRACE_ERR("WARNING: No weights specified in config file for FF "
weights.assign(feature->GetNumScoreComponents(),1.0); << featureName << ". Using default values supplied by FF.");
} }
else
{
TRACE_ERR("WARNING: No weights specified in config file for FF "
<< featureName << ". Using default values supplied by FF.");
}
}
UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
"FATAL ERROR: Mismatch in number of features and number "
<< "of weights for Feature Function " << featureName
<< " (features: " << feature->GetNumScoreComponents()
<< " vs. weights: " << weights.size() << ")");
static_data.SetWeights(feature, weights);
} }
else if (feature->IsTuneable()) UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
"FATAL ERROR: Mismatch in number of features and number "
<< "of weights for Feature Function " << featureName
<< " (features: " << feature->GetNumScoreComponents()
<< " vs. weights: " << weights.size() << ")");
static_data.SetWeights(feature, weights);
} else if (feature->IsTuneable())
static_data.SetWeights(feature, weights); static_data.SetWeights(feature, weights);
} }

View File

@ -55,7 +55,7 @@ void FeatureFunction::CallChangeSource(InputType * const&input)
void FeatureFunction::SetupAll(TranslationTask const& ttask) void FeatureFunction::SetupAll(TranslationTask const& ttask)
{ {
BOOST_FOREACH(FeatureFunction* ff, s_staticColl) BOOST_FOREACH(FeatureFunction* ff, s_staticColl)
ff->Setup(ttask); ff->Setup(ttask);
} }
FeatureFunction:: FeatureFunction::
@ -193,17 +193,23 @@ void FeatureFunction::SetTuneableComponents(const std::string& value)
void void
FeatureFunction FeatureFunction
::InitializeForInput(ttasksptr const& ttask) ::InitializeForInput(ttasksptr const& ttask)
{ InitializeForInput(*(ttask->GetSource().get())); } {
InitializeForInput(*(ttask->GetSource().get()));
}
void void
FeatureFunction FeatureFunction
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) ::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); } {
CleanUpAfterSentenceProcessing(*(ttask->GetSource().get()));
}
size_t size_t
FeatureFunction FeatureFunction
::GetIndex() const ::GetIndex() const
{ return m_index; } {
return m_index;
}
/// set index /// set index

View File

@ -136,7 +136,9 @@ public:
CleanUpAfterSentenceProcessing(ttasksptr const& ttask); CleanUpAfterSentenceProcessing(ttasksptr const& ttask);
const std::string & const std::string &
GetArgLine() const { return m_argLine; } GetArgLine() const {
return m_argLine;
}
// given a target phrase containing only factors specified in mask // given a target phrase containing only factors specified in mask
// return true if the feature function can be evaluated // return true if the feature function can be evaluated
@ -153,8 +155,8 @@ public:
// source from the input sentence // source from the input sentence
virtual void virtual void
EvaluateInIsolation(const Phrase &source, const TargetPhrase &targetPhrase, EvaluateInIsolation(const Phrase &source, const TargetPhrase &targetPhrase,
ScoreComponentCollection& scoreBreakdown, ScoreComponentCollection& scoreBreakdown,
ScoreComponentCollection& estimatedFutureScore) const = 0; ScoreComponentCollection& estimatedFutureScore) const = 0;
// override this method if you want to change the input before decoding // override this method if you want to change the input before decoding
virtual void ChangeSource(InputType * const&input) const { } virtual void ChangeSource(InputType * const&input) const { }

View File

@ -147,8 +147,7 @@ void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
const std::string &label = (*itx)->GetLabel(); const std::string &label = (*itx)->GetLabel();
if (!label.empty() && label[0] == '^') { if (!label.empty() && label[0] == '^') {
(*itx)->GetUnbinarizedChildren(ret); (*itx)->GetUnbinarizedChildren(ret);
} } else {
else {
ret.push_back(*itx); ret.push_back(*itx);
} }
} }

View File

@ -96,8 +96,7 @@ public:
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const; bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// Python-like generator that yields next nonterminal leaf on every call // Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT) $generator(leafNT) {
{
std::vector<TreePointer>::iterator it; std::vector<TreePointer>::iterator it;
InternalTree* tree; InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {} leafNT(InternalTree* root = 0): tree(root) {}
@ -116,8 +115,7 @@ public:
// Python-like generator that yields the parent of the next nonterminal leaf on every call // Python-like generator that yields the parent of the next nonterminal leaf on every call
$generator(leafNTParent) $generator(leafNTParent) {
{
std::vector<TreePointer>::iterator it; std::vector<TreePointer>::iterator it;
InternalTree* tree; InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {} leafNTParent(InternalTree* root = 0): tree(root) {}
@ -135,8 +133,7 @@ public:
}; };
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal // Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
$generator(leafNTPath) $generator(leafNTPath) {
{
std::vector<TreePointer>::iterator it; std::vector<TreePointer>::iterator it;
InternalTree* tree; InternalTree* tree;
std::vector<InternalTree*> * path; std::vector<InternalTree*> * path;

View File

@ -66,9 +66,9 @@ LexicalReordering(const std::string &line)
// sanity check: number of default scores // sanity check: number of default scores
size_t numScores size_t numScores
= m_numScoreComponents = m_numScoreComponents
= m_numTuneableComponents = m_numTuneableComponents
= m_configuration->GetNumScoreComponents(); = m_configuration->GetNumScoreComponents();
UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores, UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores,
"wrong number of default scores (" << m_defaultScores.size() "wrong number of default scores (" << m_defaultScores.size()
<< ") for lexicalized reordering model (expected " << ") for lexicalized reordering model (expected "
@ -89,7 +89,7 @@ Load()
typedef LexicalReorderingTable LRTable; typedef LexicalReorderingTable LRTable;
if (m_filePath.size()) if (m_filePath.size())
m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF, m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF,
m_factorsE, std::vector<FactorType>())); m_factorsE, std::vector<FactorType>()));
} }
Scores Scores
@ -158,7 +158,7 @@ LexicalReordering::
SetCache(TranslationOptionList& tol) const SetCache(TranslationOptionList& tol) const
{ {
BOOST_FOREACH(TranslationOption* to, tol) BOOST_FOREACH(TranslationOption* to, tol)
this->SetCache(*to); this->SetCache(*to);
} }

View File

@ -101,7 +101,7 @@ GetOrientation(int const reoDistance) const
// this one is for HierarchicalReorderingBackwardState // this one is for HierarchicalReorderingBackwardState
return ((m_modelType == LeftRight) return ((m_modelType == LeftRight)
? (reoDistance >= 1) ? R : L ? (reoDistance >= 1) ? R : L
: (reoDistance == 1) ? M : (reoDistance == 1) ? M
: (m_modelType == Monotonic) ? NM : (m_modelType == Monotonic) ? NM
: (reoDistance == -1) ? S : (reoDistance == -1) ? S
: (m_modelType == MSD) ? D : (m_modelType == MSD) ? D
@ -115,7 +115,7 @@ GetOrientation(WordsRange const& prev, WordsRange const& cur,
{ {
return ((m_modelType == LeftRight) return ((m_modelType == LeftRight)
? cur.GetStartPos() > prev.GetEndPos() ? R : L ? cur.GetStartPos() > prev.GetEndPos() ? R : L
: IsMonotonicStep(prev,cur,cov) ? M : IsMonotonicStep(prev,cur,cov) ? M
: (m_modelType == Monotonic) ? NM : (m_modelType == Monotonic) ? NM
: IsSwap(prev,cur,cov) ? S : IsSwap(prev,cur,cov) ? S
: (m_modelType == MSD) ? D : (m_modelType == MSD) ? D
@ -263,7 +263,7 @@ CopyScores(ScoreComponentCollection* accum,
const SparseReordering* sparse = m_configuration.GetSparseReordering(); const SparseReordering* sparse = m_configuration.GetSparseReordering();
if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType, if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
m_direction, accum); m_direction, accum);
} }
@ -342,7 +342,7 @@ Expand(const TranslationOption& topt, const InputType& input,
LRModel const& lrmodel = m_configuration; LRModel const& lrmodel = m_configuration;
WordsRange const cur = topt.GetSourceWordsRange(); WordsRange const cur = topt.GetSourceWordsRange();
LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur) LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
: lrmodel.GetOrientation(m_prevRange,cur)); : lrmodel.GetOrientation(m_prevRange,cur));
CopyScores(scores, topt, input, reoType); CopyScores(scores, topt, input, reoType);
} }
return new PhraseBasedReorderingState(this, topt); return new PhraseBasedReorderingState(this, topt);

View File

@ -44,19 +44,18 @@ public:
static const ReorderingType L = 1; // left static const ReorderingType L = 1; // left
static const ReorderingType MAX = 3; // largest possible static const ReorderingType MAX = 3; // largest possible
#else #else
enum ReorderingType enum ReorderingType {
{ M = 0, // monotonic
M = 0, // monotonic NM = 1, // non-monotonic
NM = 1, // non-monotonic S = 1, // swap
S = 1, // swap D = 2, // discontinuous
D = 2, // discontinuous DL = 2, // discontinuous, left
DL = 2, // discontinuous, left DR = 3, // discontinuous, right
DR = 3, // discontinuous, right R = 0, // right
R = 0, // right L = 1, // left
L = 1, // left MAX = 3, // largest possible
MAX = 3, // largest possible NONE = 4 // largest possible
NONE = 4 // largest possible };
};
#endif #endif
// determine orientation, depending on model: // determine orientation, depending on model:

View File

@ -114,10 +114,10 @@ void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id,
position <= SparseReorderingFeatureKey::Last; ++position) { position <= SparseReorderingFeatureKey::Last; ++position) {
for (int reoType = 0; reoType <= LRModel::MAX; ++reoType) { for (int reoType = 0; reoType <= LRModel::MAX; ++reoType) {
SparseReorderingFeatureKey SparseReorderingFeatureKey
key(index, static_cast<SparseReorderingFeatureKey::Type>(type), key(index, static_cast<SparseReorderingFeatureKey::Type>(type),
factor, isCluster, factor, isCluster,
static_cast<SparseReorderingFeatureKey::Position>(position), static_cast<SparseReorderingFeatureKey::Position>(position),
side, static_cast<LRModel::ReorderingType>(reoType)); side, static_cast<LRModel::ReorderingType>(reoType));
m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id)))); m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
} }
} }

View File

@ -71,21 +71,18 @@ void Model1Vocabulary::Load(const std::string& fileName)
std::string line; std::string line;
unsigned i = 0; unsigned i = 0;
if ( getline(inFile, line) ) // first line of MGIZA vocabulary files seems to be special : "1 UNK 0" -- skip if it's this if ( getline(inFile, line) ) { // first line of MGIZA vocabulary files seems to be special : "1 UNK 0" -- skip if it's this
{
++i; ++i;
std::vector<std::string> tokens = Tokenize(line); std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens."); UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
unsigned id = Scan<unsigned>(tokens[0]); unsigned id = Scan<unsigned>(tokens[0]);
if (! ( (id == 1) && (tokens[1] == "UNK") )) if (! ( (id == 1) && (tokens[1] == "UNK") )) {
{
const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading? const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
bool stored = Store(factor, id); bool stored = Store(factor, id);
UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry."); UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry.");
} }
} }
while ( getline(inFile, line) ) while ( getline(inFile, line) ) {
{
++i; ++i;
std::vector<std::string> tokens = Tokenize(line); std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens."); UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
@ -104,8 +101,7 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular
std::string line; std::string line;
unsigned i = 0; unsigned i = 0;
while ( getline(inFile, line) ) while ( getline(inFile, line) ) {
{
++i; ++i;
std::vector<std::string> tokens = Tokenize(line); std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens."); UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
@ -183,35 +179,31 @@ void Model1Feature::Load()
} }
void Model1Feature::EvaluateWithSourceContext(const InputType &input void Model1Feature::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const , ScoreComponentCollection *estimatedFutureScore) const
{ {
const Sentence& sentence = static_cast<const Sentence&>(input); const Sentence& sentence = static_cast<const Sentence&>(input);
float score = 0.0; float score = 0.0;
float norm = TransformScore(1+sentence.GetSize()); float norm = TransformScore(1+sentence.GetSize());
for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
{
const Word &wordT = targetPhrase.GetWord(posT); const Word &wordT = targetPhrase.GetWord(posT);
if ( !wordT.IsNonTerminal() ) if ( !wordT.IsNonTerminal() ) {
{
float thisWordProb = m_model1.GetProbability(m_emptyWord,wordT[0]); // probability conditioned on empty word float thisWordProb = m_model1.GetProbability(m_emptyWord,wordT[0]); // probability conditioned on empty word
// cache lookup // cache lookup
bool foundInCache = false; bool foundInCache = false;
{ {
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock); boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
#endif #endif
boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::const_iterator sentenceCache = m_cache.find(&input); boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::const_iterator sentenceCache = m_cache.find(&input);
if (sentenceCache != m_cache.end()) if (sentenceCache != m_cache.end()) {
{
boost::unordered_map<const Factor*, float>::const_iterator cacheHit = sentenceCache->second.find(wordT[0]); boost::unordered_map<const Factor*, float>::const_iterator cacheHit = sentenceCache->second.find(wordT[0]);
if (cacheHit != sentenceCache->second.end()) if (cacheHit != sentenceCache->second.end()) {
{
foundInCache = true; foundInCache = true;
score += cacheHit->second; score += cacheHit->second;
FEATUREVERBOSE(3, "Cached score( " << wordT << " ) = " << cacheHit->second << std::endl); FEATUREVERBOSE(3, "Cached score( " << wordT << " ) = " << cacheHit->second << std::endl);
@ -219,10 +211,8 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
} }
} }
if (!foundInCache) if (!foundInCache) {
{ for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) { // ignore <s> and </s>
for (size_t posS=1; posS<sentence.GetSize()-1; ++posS) // ignore <s> and </s>
{
const Word &wordS = sentence.GetWord(posS); const Word &wordS = sentence.GetWord(posS);
float modelProb = m_model1.GetProbability(wordS[0],wordT[0]); float modelProb = m_model1.GetProbability(wordS[0],wordT[0]);
FEATUREVERBOSE(4, "p( " << wordT << " | " << wordS << " ) = " << modelProb << std::endl); FEATUREVERBOSE(4, "p( " << wordT << " | " << wordS << " ) = " << modelProb << std::endl);
@ -231,10 +221,10 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
float thisWordScore = TransformScore(thisWordProb) - norm; float thisWordScore = TransformScore(thisWordProb) - norm;
FEATUREVERBOSE(3, "score( " << wordT << " ) = " << thisWordScore << std::endl); FEATUREVERBOSE(3, "score( " << wordT << " ) = " << thisWordScore << std::endl);
{ {
#ifdef WITH_THREADS #ifdef WITH_THREADS
// need to update cache; write lock // need to update cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock); boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif #endif
m_cache[&input][wordT[0]] = thisWordScore; m_cache[&input][wordT[0]] = thisWordScore;
} }
score += thisWordScore; score += thisWordScore;
@ -247,14 +237,13 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
void Model1Feature::CleanUpAfterSentenceProcessing(const InputType& source) void Model1Feature::CleanUpAfterSentenceProcessing(const InputType& source)
{ {
#ifdef WITH_THREADS #ifdef WITH_THREADS
// need to update cache; write lock // need to update cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock); boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif #endif
// clear cache // clear cache
boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::iterator sentenceCache = m_cache.find(&source); boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> >::iterator sentenceCache = m_cache.find(&source);
if (sentenceCache != m_cache.end()) if (sentenceCache != m_cache.end()) {
{
sentenceCache->second.clear(); sentenceCache->second.clear();
m_cache.erase(sentenceCache); m_cache.erase(sentenceCache);
} }

View File

@ -17,7 +17,7 @@ class Model1Vocabulary
{ {
public: public:
#define INVALID_ID std::numeric_limits<unsigned>::max() // UINT_MAX #define INVALID_ID std::numeric_limits<unsigned>::max() // UINT_MAX
static const std::string GIZANULL; static const std::string GIZANULL;
Model1Vocabulary(); Model1Vocabulary();
@ -103,10 +103,10 @@ private:
// cache // cache
mutable boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> > m_cache; mutable boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> > m_cache;
#ifdef WITH_THREADS #ifdef WITH_THREADS
// reader-writer lock // reader-writer lock
mutable boost::shared_mutex m_accessLock; mutable boost::shared_mutex m_accessLock;
#endif #endif
}; };

View File

@ -288,7 +288,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
if (currTarPhr.GetAlignNonTerm().GetSize() != 0) { if (currTarPhr.GetAlignNonTerm().GetSize() != 0) {
const boost::shared_ptr<void> data = currTarPhr.GetData("Orientation"); const boost::shared_ptr<void> data = currTarPhr.GetData("Orientation");
UTIL_THROW_IF2(!data, GetScoreProducerDescription() UTIL_THROW_IF2(!data, GetScoreProducerDescription()
<< ": Orientation data not set in target phrase. "); << ": Orientation data not set in target phrase. ");
reoClassData = static_cast<const PhraseOrientationFeature::ReoClassData*>( data.get() ); reoClassData = static_cast<const PhraseOrientationFeature::ReoClassData*>( data.get() );
} }

View File

@ -301,15 +301,15 @@ class PhraseOrientationFeature : public StatefulFeatureFunction
public: public:
struct ReoClassData { struct ReoClassData {
public: public:
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R; std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L; std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
bool firstNonTerminalIsBoundary; bool firstNonTerminalIsBoundary;
bool firstNonTerminalPreviousSourceSpanIsAligned; bool firstNonTerminalPreviousSourceSpanIsAligned;
bool firstNonTerminalFollowingSourceSpanIsAligned; bool firstNonTerminalFollowingSourceSpanIsAligned;
bool lastNonTerminalIsBoundary; bool lastNonTerminalIsBoundary;
bool lastNonTerminalPreviousSourceSpanIsAligned; bool lastNonTerminalPreviousSourceSpanIsAligned;
bool lastNonTerminalFollowingSourceSpanIsAligned; bool lastNonTerminalFollowingSourceSpanIsAligned;
}; };
PhraseOrientationFeature(const std::string &line); PhraseOrientationFeature(const std::string &line);

View File

@ -39,9 +39,9 @@ void RulePairUnlexicalizedSource::SetParameter(const std::string& key, const std
void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const
{ {
const Factor* targetPhraseLHS = targetPhrase.GetTargetLHS()[0]; const Factor* targetPhraseLHS = targetPhrase.GetTargetLHS()[0];
if ( !m_glueRules && (targetPhraseLHS == m_glueTargetLHS) ) { if ( !m_glueRules && (targetPhraseLHS == m_glueTargetLHS) ) {
@ -51,8 +51,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
return; return;
} }
for (size_t posS=0; posS<source.GetSize(); ++posS) for (size_t posS=0; posS<source.GetSize(); ++posS) {
{
const Word &wordS = source.GetWord(posS); const Word &wordS = source.GetWord(posS);
if ( !wordS.IsNonTerminal() ) { if ( !wordS.IsNonTerminal() ) {
return; return;
@ -61,8 +60,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
ostringstream namestr; ostringstream namestr;
for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT) {
{
const Word &wordT = targetPhrase.GetWord(posT); const Word &wordT = targetPhrase.GetWord(posT);
const Factor* factorT = wordT[0]; const Factor* factorT = wordT[0];
if ( wordT.IsNonTerminal() ) { if ( wordT.IsNonTerminal() ) {
@ -78,8 +76,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
namestr << targetPhraseLHS->GetString() << "|"; namestr << targetPhraseLHS->GetString() << "|";
for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin(); for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
it!=targetPhrase.GetAlignNonTerm().end(); ++it) it!=targetPhrase.GetAlignNonTerm().end(); ++it) {
{
namestr << "|" << it->first << "-" << it->second; namestr << "|" << it->first << "-" << it->second;
} }

View File

@ -26,16 +26,16 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const
{ {
if (IsGlueRule(source)) { if (IsGlueRule(source)) {
return; return;
} }
float score = 0; float score = 0;
if (source.GetSize() > 0 && source.Front().IsNonTerminal()) { if (source.GetSize() > 0 && source.Front().IsNonTerminal()) {
++score; ++score;
} }
if (source.GetSize() > 1 && source.Back().IsNonTerminal()) { if (source.GetSize() > 1 && source.Back().IsNonTerminal()) {
++score; ++score;
} }
/* /*
@ -61,23 +61,20 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
*/ */
if (m_perScope) { if (m_perScope) {
UTIL_THROW_IF2(m_numScoreComponents <= score, UTIL_THROW_IF2(m_numScoreComponents <= score,
"Insufficient number of score components. Scope=" << score << ". NUmber of score components=" << score); "Insufficient number of score components. Scope=" << score << ". NUmber of score components=" << score);
vector<float> scores(m_numScoreComponents, 0); vector<float> scores(m_numScoreComponents, 0);
scores[score] = 1; scores[score] = 1;
if (m_futureCostOnly) { if (m_futureCostOnly) {
estimatedFutureScore.PlusEquals(this, scores); estimatedFutureScore.PlusEquals(this, scores);
} } else {
else { scoreBreakdown.PlusEquals(this, scores);
scoreBreakdown.PlusEquals(this, scores); }
} } else if (m_futureCostOnly) {
} estimatedFutureScore.PlusEquals(this, score);
else if (m_futureCostOnly) { } else {
estimatedFutureScore.PlusEquals(this, score); scoreBreakdown.PlusEquals(this, score);
}
else {
scoreBreakdown.PlusEquals(this, score);
} }
} }
@ -85,14 +82,11 @@ void RuleScope::SetParameter(const std::string& key, const std::string& value)
{ {
if (key == "source-syntax") { if (key == "source-syntax") {
m_sourceSyntax = Scan<bool>(value); m_sourceSyntax = Scan<bool>(value);
} } else if (key == "per-scope") {
else if (key == "per-scope") { m_perScope = Scan<bool>(value);
m_perScope = Scan<bool>(value); } else if ("future-cost-only") {
} m_futureCostOnly = Scan<bool>(value);
else if ("future-cost-only") { } else {
m_futureCostOnly = Scan<bool>(value);
}
else {
StatelessFeatureFunction::SetParameter(key, value); StatelessFeatureFunction::SetParameter(key, value);
} }
} }

View File

@ -72,7 +72,7 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
bool full_sentence = (mytree->GetChildren().back()->GetLabel() == "</s>" || (mytree->GetChildren().back()->GetLabel() == "SEND" && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == "</s>")); bool full_sentence = (mytree->GetChildren().back()->GetLabel() == "</s>" || (mytree->GetChildren().back()->GetLabel() == "SEND" && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == "</s>"));
if (m_binarized && full_sentence) { if (m_binarized && full_sentence) {
mytree->Unbinarize(); mytree->Unbinarize();
} }
return new TreeState(mytree); return new TreeState(mytree);

View File

@ -183,8 +183,8 @@ public:
// optionally update translation options using leave-one-out // optionally update translation options using leave-one-out
std::vector<bool> keep = (m_leaveOneOut.size() > 0) std::vector<bool> keep = (m_leaveOneOut.size() > 0)
? LeaveOneOut(translationOptionList, correct) ? LeaveOneOut(translationOptionList, correct)
: std::vector<bool>(translationOptionList.size(), true); : std::vector<bool>(translationOptionList.size(), true);
// check whether we (still) have some correct translation // check whether we (still) have some correct translation
int firstCorrect = -1; int firstCorrect = -1;
@ -312,11 +312,11 @@ public:
return; return;
UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput, UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
"This feature function requires the TabbedSentence input type"); "This feature function requires the TabbedSentence input type");
const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source); const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2, UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
"TabbedSentence must contain target<tab>alignment"); "TabbedSentence must contain target<tab>alignment");
// target sentence represented as a phrase // target sentence represented as a phrase
Phrase *target = new Phrase(); Phrase *target = new Phrase();

File diff suppressed because it is too large Load Diff

View File

@ -313,7 +313,7 @@ struct CompareHypothesisTotalScore {
ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool(); \ ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool(); \
pool.freeObject(hypo); \ pool.freeObject(hypo); \
} \ } \
#else #else
#define FREEHYPO(hypo) delete hypo #define FREEHYPO(hypo) delete hypo
#endif #endif

View File

@ -161,7 +161,7 @@ public:
return m_detailTreeFragmentsOutputCollector.get(); return m_detailTreeFragmentsOutputCollector.get();
} }
void SetInputStreamFromString(std::istringstream &input){ void SetInputStreamFromString(std::istringstream &input) {
m_inputStream = &input; m_inputStream = &input;
} }

View File

@ -13,12 +13,14 @@ namespace Moses
typedef Eigen::Map<Eigen::Matrix<int,Eigen::Dynamic,1> > EigenMap; typedef Eigen::Map<Eigen::Matrix<int,Eigen::Dynamic,1> > EigenMap;
RDLM::~RDLM() { RDLM::~RDLM()
{
delete lm_head_base_instance_; delete lm_head_base_instance_;
delete lm_label_base_instance_; delete lm_label_base_instance_;
} }
void RDLM::Load() { void RDLM::Load()
{
lm_head_base_instance_ = new nplm::neuralTM(); lm_head_base_instance_ = new nplm::neuralTM();
lm_head_base_instance_->read(m_path_head_lm); lm_head_base_instance_->read(m_path_head_lm);
@ -87,8 +89,8 @@ void RDLM::Load() {
// just score provided file, then exit. // just score provided file, then exit.
if (!m_debugPath.empty()) { if (!m_debugPath.empty()) {
ScoreFile(m_debugPath); ScoreFile(m_debugPath);
exit(1); exit(1);
} }
// { // {
@ -202,8 +204,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
// ignore glue rules // ignore glue rules
if (root->GetLabel() == m_glueSymbol) { if (root->GetLabel() == m_glueSymbol) {
// recursion // recursion
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
{
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels); Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
} }
return; return;
@ -213,11 +214,11 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
if (m_binarized && root->GetLabel()[0] == '^' && !ancestor_heads.empty()) { if (m_binarized && root->GetLabel()[0] == '^' && !ancestor_heads.empty()) {
// recursion // recursion
if (root->IsLeafNT() && m_context_up > 1 && ancestor_heads.size()) { if (root->IsLeafNT() && m_context_up > 1 && ancestor_heads.size()) {
root = back_pointers.find(root)->second.get(); root = back_pointers.find(root)->second.get();
rescoring_levels = m_context_up-1; rescoring_levels = m_context_up-1;
} }
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) { for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels); Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
} }
return; return;
} }
@ -239,35 +240,34 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
if (root->GetLength() == 1 && root->GetChildren()[0]->IsTerminal()) { if (root->GetLength() == 1 && root->GetChildren()[0]->IsTerminal()) {
// root of tree: score without context // root of tree: score without context
if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) { if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
std::vector<int> ngram_head_null (static_head_null); std::vector<int> ngram_head_null (static_head_null);
ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel()); ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel());
if (m_isPretermBackoff && ngram_head_null.back() == 0) { if (m_isPretermBackoff && ngram_head_null.back() == 0) {
ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel()); ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel());
} }
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) { if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) {
std::vector<int>::iterator it = ngram_head_null.begin(); std::vector<int>::iterator it = ngram_head_null.begin();
std::fill_n(it, m_context_left, static_start_head); std::fill_n(it, m_context_left, static_start_head);
it += m_context_left; it += m_context_left;
std::fill_n(it, m_context_left, static_start_label); std::fill_n(it, m_context_left, static_start_label);
it += m_context_left; it += m_context_left;
std::fill_n(it, m_context_right, static_stop_head); std::fill_n(it, m_context_right, static_stop_head);
it += m_context_right; it += m_context_right;
std::fill_n(it, m_context_right, static_stop_label); std::fill_n(it, m_context_right, static_stop_label);
it += m_context_right; it += m_context_right;
size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size()); size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size());
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it); it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it); it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
} }
if (ancestor_labels.size() >= m_context_up && !num_virtual) { if (ancestor_labels.size() >= m_context_up && !num_virtual) {
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
} } else {
else { boost::hash_combine(boundary_hash, ngram_head_null.back());
boost::hash_combine(boundary_hash, ngram_head_null.back()); score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size()))); }
}
} }
return; return;
// we only need to re-visit previous hypotheses if we have more context available. // we only need to re-visit previous hypotheses if we have more context available.
} else if (root->IsLeafNT()) { } else if (root->IsLeafNT()) {
if (m_context_up > 1 && ancestor_heads.size()) { if (m_context_up > 1 && ancestor_heads.size()) {
root = back_pointers.find(root)->second.get(); root = back_pointers.find(root)->second.get();
@ -276,8 +276,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
return; return;
} }
rescoring_levels = m_context_up-1; rescoring_levels = m_context_up-1;
} } else {
else {
return; return;
} }
} }
@ -302,19 +301,17 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
int reached_end = 0; int reached_end = 0;
int label_idx, label_idx_out; int label_idx, label_idx_out;
if (m_binarized && head_label[0] == '^') { if (m_binarized && head_label[0] == '^') {
virtual_head = true; virtual_head = true;
if (m_binarized == 1 || (m_binarized == 3 && head_label[2] == 'l')) { if (m_binarized == 1 || (m_binarized == 3 && head_label[2] == 'l')) {
reached_end = 1; //indicate that we've seen the first symbol of the RHS reached_end = 1; //indicate that we've seen the first symbol of the RHS
} } else if (m_binarized == 2 || (m_binarized == 3 && head_label[2] == 'r')) {
else if (m_binarized == 2 || (m_binarized == 3 && head_label[2] == 'r')) { reached_end = 2; // indicate that we've seen the last symbol of the RHS
reached_end = 2; // indicate that we've seen the last symbol of the RHS }
} // with 'full' binarization, direction is encoded in 2nd char
// with 'full' binarization, direction is encoded in 2nd char std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1);
std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1); label_idx = lm_label->lookup_input_word(clipped_label);
label_idx = lm_label->lookup_input_word(clipped_label); label_idx_out = lm_label->lookup_output_word(clipped_label);
label_idx_out = lm_label->lookup_output_word(clipped_label); } else {
}
else {
reached_end = 3; // indicate that we've seen first and last symbol of the RHS reached_end = 3; // indicate that we've seen first and last symbol of the RHS
label_idx = lm_label->lookup_input_word(head_label); label_idx = lm_label->lookup_input_word(head_label);
label_idx_out = lm_label->lookup_output_word(head_label); label_idx_out = lm_label->lookup_output_word(head_label);
@ -324,49 +321,47 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
// root of tree: score without context // root of tree: score without context
if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) { if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
if (head_idx != static_dummy_head && head_idx != static_head_head) { if (head_idx != static_dummy_head && head_idx != static_head_head) {
std::vector<int> ngram_head_null (static_head_null); std::vector<int> ngram_head_null (static_head_null);
*(ngram_head_null.end()-2) = label_idx; *(ngram_head_null.end()-2) = label_idx;
ngram_head_null.back() = head_ids.second; ngram_head_null.back() = head_ids.second;
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
std::vector<int>::iterator it = ngram_head_null.begin();
std::fill_n(it, m_context_left, static_start_head);
it += m_context_left;
std::fill_n(it, m_context_left, static_start_label);
it += m_context_left;
std::fill_n(it, m_context_right, static_stop_head);
it += m_context_right;
std::fill_n(it, m_context_right, static_stop_label);
it += m_context_right;
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
}
else {
boost::hash_combine(boundary_hash, ngram_head_null.back());
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
}
}
std::vector<int> ngram_label_null (static_label_null);
ngram_label_null.back() = label_idx_out;
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) { if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
std::vector<int>::iterator it = ngram_label_null.begin(); std::vector<int>::iterator it = ngram_head_null.begin();
std::fill_n(it, m_context_left, static_start_head); std::fill_n(it, m_context_left, static_start_head);
it += m_context_left; it += m_context_left;
std::fill_n(it, m_context_left, static_start_label); std::fill_n(it, m_context_left, static_start_label);
it += m_context_left; it += m_context_left;
std::fill_n(it, m_context_right, static_stop_head); std::fill_n(it, m_context_right, static_stop_head);
it += m_context_right; it += m_context_right;
std::fill_n(it, m_context_right, static_stop_label); std::fill_n(it, m_context_right, static_stop_label);
it += m_context_right; it += m_context_right;
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it); it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it); it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size()))); score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
} } else {
else { boost::hash_combine(boundary_hash, ngram_head_null.back());
boost::hash_combine(boundary_hash, ngram_label_null.back()); score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
} }
}
std::vector<int> ngram_label_null (static_label_null);
ngram_label_null.back() = label_idx_out;
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
std::vector<int>::iterator it = ngram_label_null.begin();
std::fill_n(it, m_context_left, static_start_head);
it += m_context_left;
std::fill_n(it, m_context_left, static_start_label);
it += m_context_left;
std::fill_n(it, m_context_right, static_stop_head);
it += m_context_right;
std::fill_n(it, m_context_right, static_stop_label);
it += m_context_right;
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
} else {
boost::hash_combine(boundary_hash, ngram_label_null.back());
score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
}
} }
ancestor_heads.push_back(head_idx); ancestor_heads.push_back(head_idx);
@ -374,15 +369,14 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
if (virtual_head) { if (virtual_head) {
num_virtual = m_context_up; num_virtual = m_context_up;
} } else if (num_virtual) {
else if (num_virtual) { --num_virtual;
--num_virtual;
} }
// fill ancestor context (same for all children) // fill ancestor context (same for all children)
if (context_up_nonempty < m_context_up) { if (context_up_nonempty < m_context_up) {
++context_up_nonempty; ++context_up_nonempty;
} }
size_t up_padding = m_context_up - context_up_nonempty; size_t up_padding = m_context_up - context_up_nonempty;
@ -439,13 +433,13 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
std::vector<int>::iterator it = ngram.begin(); std::vector<int>::iterator it = ngram.begin();
if (left_padding > 0) { if (left_padding > 0) {
it += left_padding; it += left_padding;
} }
it = std::copy(heads.begin()+left_offset, heads.begin()+i, it); it = std::copy(heads.begin()+left_offset, heads.begin()+i, it);
if (left_padding > 0) { if (left_padding > 0) {
it += left_padding; it += left_padding;
} }
it = std::copy(labels.begin()+left_offset, labels.begin()+i, it); it = std::copy(labels.begin()+left_offset, labels.begin()+i, it);
@ -453,33 +447,30 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
it = std::copy(heads.begin()+i+1, heads.begin()+right_offset, it); it = std::copy(heads.begin()+i+1, heads.begin()+right_offset, it);
if (right_padding > 0) { if (right_padding > 0) {
if (reached_end == 2 || reached_end == 3) { if (reached_end == 2 || reached_end == 3) {
std::fill_n(it, right_padding, static_stop_head); std::fill_n(it, right_padding, static_stop_head);
it += right_padding; it += right_padding;
} } else {
else { std::copy(static_label_null.begin()+offset_up_head-m_context_right-right_padding, static_label_null.begin()-m_context_right+offset_up_head, it);
std::copy(static_label_null.begin()+offset_up_head-m_context_right-right_padding, static_label_null.begin()-m_context_right+offset_up_head, it); }
}
} }
it = std::copy(labels.begin()+i+1, labels.begin()+right_offset, it); it = std::copy(labels.begin()+i+1, labels.begin()+right_offset, it);
if (right_padding > 0) { if (right_padding > 0) {
if (reached_end == 2 || reached_end == 3) { if (reached_end == 2 || reached_end == 3) {
std::fill_n(it, right_padding, static_stop_label); std::fill_n(it, right_padding, static_stop_label);
it += right_padding; it += right_padding;
} } else {
else { std::copy(static_label_null.begin()+offset_up_head-right_padding, static_label_null.begin()+offset_up_head, it);
std::copy(static_label_null.begin()+offset_up_head-right_padding, static_label_null.begin()+offset_up_head, it); }
}
} }
ngram.back() = labels_output[i]; ngram.back() = labels_output[i];
if (ancestor_labels.size() >= m_context_up && !num_virtual) { if (ancestor_labels.size() >= m_context_up && !num_virtual) {
score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size()))); score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
} } else {
else {
boost::hash_combine(boundary_hash, ngram.back()); boost::hash_combine(boundary_hash, ngram.back());
score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size()))); score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
} }
@ -492,8 +483,7 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
if (ancestor_labels.size() >= m_context_up && !num_virtual) { if (ancestor_labels.size() >= m_context_up && !num_virtual) {
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size()))); score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
} } else {
else {
boost::hash_combine(boundary_hash, ngram.back()); boost::hash_combine(boundary_hash, ngram.back());
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size()))); score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
} }
@ -502,25 +492,24 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
// next time, we need to add less start symbol padding // next time, we need to add less start symbol padding
if (left_padding) if (left_padding)
left_padding--; left_padding--;
else else
left_offset++; left_offset++;
if (right_offset < heads.size()) if (right_offset < heads.size())
right_offset++; right_offset++;
else else
right_padding++; right_padding++;
} }
if (rescoring_levels == 1) { if (rescoring_levels == 1) {
ancestor_heads.pop_back(); ancestor_heads.pop_back();
ancestor_labels.pop_back(); ancestor_labels.pop_back();
return; return;
} }
// recursion // recursion
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
{
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels - 1); Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels - 1);
} }
ancestor_heads.pop_back(); ancestor_heads.pop_back();
@ -531,19 +520,17 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin
{ {
InternalTree *tree; InternalTree *tree;
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
{
if ((*it)->IsLeafNT()) { if ((*it)->IsLeafNT()) {
tree = back_pointers.find(it->get())->second.get(); tree = back_pointers.find(it->get())->second.get();
} } else {
else {
tree = it->get(); tree = it->get();
} }
if (m_binarized && tree->GetLabel()[0] == '^') { if (m_binarized && tree->GetLabel()[0] == '^') {
head_ptr = GetHead(tree, back_pointers, IDs, head_ptr); head_ptr = GetHead(tree, back_pointers, IDs, head_ptr);
if (head_ptr != NULL && !m_isPTKVZ) { if (head_ptr != NULL && !m_isPTKVZ) {
return head_ptr; return head_ptr;
} }
} }
@ -563,8 +550,7 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin
for (std::vector<TreePointer>::const_iterator it2 = tree->GetChildren().begin(); it2 != tree->GetChildren().end(); ++it2) { for (std::vector<TreePointer>::const_iterator it2 = tree->GetChildren().begin(); it2 != tree->GetChildren().end(); ++it2) {
if ((*it2)->IsLeafNT()) { if ((*it2)->IsLeafNT()) {
tree2 = back_pointers.find(it2->get())->second.get(); tree2 = back_pointers.find(it2->get())->second.get();
} } else {
else {
tree2 = it2->get(); tree2 = it2->get();
} }
if (tree2->GetLabel() == "PTKVZ" && tree2->GetLength() == 1 && tree2->GetChildren()[0]->IsTerminal()) { if (tree2->GetLabel() == "PTKVZ" && tree2->GetLength() == 1 && tree2->GetChildren()[0]->IsTerminal()) {
@ -602,18 +588,18 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac
// extract head words / labels // extract head words / labels
for (std::vector<TreePointer>::const_iterator itx = real_children.begin(); itx != real_children.end(); itx = ++real_children) { for (std::vector<TreePointer>::const_iterator itx = real_children.begin(); itx != real_children.end(); itx = ++real_children) {
if ((*itx)->IsTerminal()) { if ((*itx)->IsTerminal()) {
std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl; std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl;
std::cerr << "children: "; std::cerr << "children: ";
for (std::vector<TreePointer>::const_iterator itx2 = root->GetChildren().begin(); itx2 != root->GetChildren().end(); ++itx2) { for (std::vector<TreePointer>::const_iterator itx2 = root->GetChildren().begin(); itx2 != root->GetChildren().end(); ++itx2) {
std::cerr << (*itx2)->GetLabel() << " "; std::cerr << (*itx2)->GetLabel() << " ";
} }
std::cerr << std::endl; std::cerr << std::endl;
// resize vectors (should we throw exception instead?) // resize vectors (should we throw exception instead?)
heads.pop_back(); heads.pop_back();
labels.pop_back(); labels.pop_back();
heads_output.pop_back(); heads_output.pop_back();
labels_output.pop_back(); labels_output.pop_back();
continue; continue;
} }
InternalTree* child = itx->get(); InternalTree* child = itx->get();
// also go through trees or previous hypotheses to rescore nodes for which more context has become available // also go through trees or previous hypotheses to rescore nodes for which more context has become available
@ -659,8 +645,7 @@ void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std
} }
if (m_sharedVocab) { if (m_sharedVocab) {
IDs.second = IDs.first; IDs.second = IDs.first;
} } else {
else {
IDs.second = lm_head_base_instance_->lookup_output_word(head); IDs.second = lm_head_base_instance_->lookup_output_word(head);
if (m_isPretermBackoff && IDs.second == 0) { if (m_isPretermBackoff && IDs.second == 0) {
IDs.second = lm_head_base_instance_->lookup_output_word(preterminal); IDs.second = lm_head_base_instance_->lookup_output_word(preterminal);
@ -672,12 +657,12 @@ void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std
void RDLM::PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const void RDLM::PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const
{ {
for (size_t i = 0; i < ngram.size()-1; i++) { for (size_t i = 0; i < ngram.size()-1; i++) {
std::cerr << lm->get_input_vocabulary().words()[ngram[i]] << " "; std::cerr << lm->get_input_vocabulary().words()[ngram[i]] << " ";
} }
std::cerr << lm->get_output_vocabulary().words()[ngram.back()] << " "; std::cerr << lm->get_output_vocabulary().words()[ngram.back()] << " ";
for (size_t i = 0; i < ngram.size(); i++) { for (size_t i = 0; i < ngram.size(); i++) {
std::cerr << ngram[i] << " "; std::cerr << ngram[i] << " ";
} }
std::cerr << "score: " << lm->lookup_ngram(ngram) << std::endl; std::cerr << "score: " << lm->lookup_ngram(ngram) << std::endl;
} }
@ -691,32 +676,31 @@ RDLM::TreePointerMap RDLM::AssociateLeafNTs(InternalTree* root, const std::vecto
bool found = false; bool found = false;
InternalTree::leafNT next_leafNT(root); InternalTree::leafNT next_leafNT(root);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) { for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it); found = next_leafNT(it);
if (found) { if (found) {
ret[it->get()] = *it_prev; ret[it->get()] = *it_prev;
} } else {
else { std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n"; }
}
} }
return ret; return ret;
} }
void RDLM::ScoreFile(std::string &path) void RDLM::ScoreFile(std::string &path)
{ {
InputFileStream inStream(path); InputFileStream inStream(path);
std::string line, null; std::string line, null;
std::vector<int> ancestor_heads(m_context_up, static_root_head); std::vector<int> ancestor_heads(m_context_up, static_root_head);
std::vector<int> ancestor_labels(m_context_up, static_root_label); std::vector<int> ancestor_labels(m_context_up, static_root_label);
while(getline(inStream, line)) { while(getline(inStream, line)) {
TreePointerMap back_pointers; TreePointerMap back_pointers;
boost::array<float, 4> score; boost::array<float, 4> score;
score.fill(0); score.fill(0);
InternalTree* mytree (new InternalTree(line)); InternalTree* mytree (new InternalTree(line));
size_t boundary_hash = 0; size_t boundary_hash = 0;
Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash); Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl; std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl;
} }
} }
@ -727,42 +711,42 @@ void RDLM::SetParameter(const std::string& key, const std::string& value)
m_tuneable = Scan<bool>(value); m_tuneable = Scan<bool>(value);
} else if (key == "filterable") { //ignore } else if (key == "filterable") { //ignore
} else if (key == "path_head_lm") { } else if (key == "path_head_lm") {
m_path_head_lm = value; m_path_head_lm = value;
} else if (key == "path_label_lm") { } else if (key == "path_label_lm") {
m_path_label_lm = value; m_path_label_lm = value;
} else if (key == "ptkvz") { } else if (key == "ptkvz") {
m_isPTKVZ = Scan<bool>(value); m_isPTKVZ = Scan<bool>(value);
} else if (key == "backoff") { } else if (key == "backoff") {
m_isPretermBackoff = Scan<bool>(value); m_isPretermBackoff = Scan<bool>(value);
} else if (key == "context_up") { } else if (key == "context_up") {
m_context_up = Scan<size_t>(value); m_context_up = Scan<size_t>(value);
} else if (key == "context_left") { } else if (key == "context_left") {
m_context_left = Scan<size_t>(value); m_context_left = Scan<size_t>(value);
} else if (key == "context_right") { } else if (key == "context_right") {
m_context_right = Scan<size_t>(value); m_context_right = Scan<size_t>(value);
} else if (key == "debug_path") { } else if (key == "debug_path") {
m_debugPath = value; m_debugPath = value;
} else if (key == "premultiply") { } else if (key == "premultiply") {
m_premultiply = Scan<bool>(value); m_premultiply = Scan<bool>(value);
} else if (key == "rerank") { } else if (key == "rerank") {
m_rerank = Scan<bool>(value); m_rerank = Scan<bool>(value);
} else if (key == "normalize_head_lm") { } else if (key == "normalize_head_lm") {
m_normalizeHeadLM = Scan<bool>(value); m_normalizeHeadLM = Scan<bool>(value);
} else if (key == "normalize_label_lm") { } else if (key == "normalize_label_lm") {
m_normalizeLabelLM = Scan<bool>(value); m_normalizeLabelLM = Scan<bool>(value);
} else if (key == "binarized") { } else if (key == "binarized") {
if (value == "left") if (value == "left")
m_binarized = 1; m_binarized = 1;
else if (value == "right") else if (value == "right")
m_binarized = 2; m_binarized = 2;
else if (value == "full") else if (value == "full")
m_binarized = 3; m_binarized = 3;
else else
UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value); UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value);
} else if (key == "glue_symbol") { } else if (key == "glue_symbol") {
m_glueSymbol = value; m_glueSymbol = value;
} else if (key == "cache_size") { } else if (key == "cache_size") {
m_cacheSize = Scan<int>(value); m_cacheSize = Scan<int>(value);
} else { } else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value); UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
} }
@ -808,8 +792,8 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
size_t boundary_hash = 0; size_t boundary_hash = 0;
if (!m_rerank) { if (!m_rerank) {
Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash); Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
accumulator->PlusEquals(ff_idx, score[0] + score[1]); accumulator->PlusEquals(ff_idx, score[0] + score[1]);
accumulator->PlusEquals(ff_idx+1, score[2] + score[3]); accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
} }
mytree->Combine(previous_trees); mytree->Combine(previous_trees);
if (m_rerank && full_sentence) { if (m_rerank && full_sentence) {
@ -818,12 +802,11 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
accumulator->PlusEquals(ff_idx+1, score[2] + score[3]); accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
} }
if (m_binarized && full_sentence) { if (m_binarized && full_sentence) {
mytree->Unbinarize(); mytree->Unbinarize();
} }
return new RDLMState(mytree, score[1], score[3], boundary_hash); return new RDLMState(mytree, score[1], score[3], boundary_hash);
} } else {
else {
UTIL_THROW2("Error: RDLM active, but no internal tree structure found"); UTIL_THROW2("Error: RDLM active, but no internal tree structure found");
} }

View File

@ -11,8 +11,9 @@
// Sennrich, Rico (2015). Modelling and Optimizing on Syntactic N-Grams for Statistical Machine Translation. Transactions of the Association for Computational Linguistics. // Sennrich, Rico (2015). Modelling and Optimizing on Syntactic N-Grams for Statistical Machine Translation. Transactions of the Association for Computational Linguistics.
// see 'scripts/training/rdlm' for training scripts // see 'scripts/training/rdlm' for training scripts
namespace nplm { namespace nplm
class neuralTM; {
class neuralTM;
} }
namespace Moses namespace Moses
@ -32,21 +33,21 @@ public:
{} {}
float GetApproximateScoreHead() const { float GetApproximateScoreHead() const {
return m_approx_head; return m_approx_head;
} }
float GetApproximateScoreLabel() const { float GetApproximateScoreLabel() const {
return m_approx_label; return m_approx_label;
} }
size_t GetHash() const { size_t GetHash() const {
return m_hash; return m_hash;
} }
int Compare(const FFState& other) const { int Compare(const FFState& other) const {
if (m_hash == static_cast<const RDLMState*>(&other)->GetHash()) return 0; if (m_hash == static_cast<const RDLMState*>(&other)->GetHash()) return 0;
else if (m_hash > static_cast<const RDLMState*>(&other)->GetHash()) return 1; else if (m_hash > static_cast<const RDLMState*>(&other)->GetHash()) return 1;
else return -1; else return -1;
} }
}; };
@ -121,10 +122,9 @@ public:
, m_normalizeLabelLM(false) , m_normalizeLabelLM(false)
, m_sharedVocab(false) , m_sharedVocab(false)
, m_binarized(0) , m_binarized(0)
, m_cacheSize(1000000) , m_cacheSize(1000000) {
{ ReadParameters();
ReadParameters(); }
}
~RDLM(); ~RDLM();
@ -147,21 +147,23 @@ public:
void SetParameter(const std::string& key, const std::string& value); void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {}; , ScoreComponentCollection &estimatedFutureScore) const {};
void EvaluateWithSourceContext(const InputType &input void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath , const InputPath &inputPath
, const TargetPhrase &targetPhrase , const TargetPhrase &targetPhrase
, const StackVec *stackVec , const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const {}; , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
void EvaluateTranslationOptionListWithSourceContext(const InputType &input void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {}; , const TranslationOptionList &translationOptionList) const {};
FFState* EvaluateWhenApplied( FFState* EvaluateWhenApplied(
const Hypothesis& cur_hypo, const Hypothesis& cur_hypo,
const FFState* prev_state, const FFState* prev_state,
ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");}; ScoreComponentCollection* accumulator) const {
UTIL_THROW(util::Exception, "Not implemented");
};
FFState* EvaluateWhenApplied( FFState* EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */, const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */, int /* featureID - used to index the state in the previous hypotheses */,
@ -173,71 +175,72 @@ public:
class UnbinarizedChildren class UnbinarizedChildren
{ {
private: private:
std::vector<TreePointer>::const_iterator iter; std::vector<TreePointer>::const_iterator iter;
std::vector<TreePointer>::const_iterator _begin; std::vector<TreePointer>::const_iterator _begin;
std::vector<TreePointer>::const_iterator _end; std::vector<TreePointer>::const_iterator _end;
InternalTree* current; InternalTree* current;
const TreePointerMap & back_pointers; const TreePointerMap & back_pointers;
bool binarized; bool binarized;
std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack; std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
public: public:
UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary): UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary):
current(root), current(root),
back_pointers(pointers), back_pointers(pointers),
binarized(binary) binarized(binary) {
{ stack.reserve(10);
stack.reserve(10); _end = current->GetChildren().end();
_end = current->GetChildren().end(); iter = current->GetChildren().begin();
iter = current->GetChildren().begin(); // expand virtual node
// expand virtual node while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') { stack.push_back(std::make_pair(current, iter));
stack.push_back(std::make_pair(current, iter)); // also go through trees or previous hypotheses to rescore nodes for which more context has become available
// also go through trees or previous hypotheses to rescore nodes for which more context has become available if ((*iter)->IsLeafNT()) {
if ((*iter)->IsLeafNT()) { current = back_pointers.find(iter->get())->second.get();
current = back_pointers.find(iter->get())->second.get(); } else {
} current = iter->get();
else {
current = iter->get();
}
iter = current->GetChildren().begin();
}
_begin = iter;
} }
iter = current->GetChildren().begin();
std::vector<TreePointer>::const_iterator begin() const { return _begin; }
std::vector<TreePointer>::const_iterator end() const { return _end; }
std::vector<TreePointer>::const_iterator operator++() {
iter++;
if (iter == current->GetChildren().end()) {
while (!stack.empty()) {
std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> & active = stack.back();
current = active.first;
iter = ++active.second;
stack.pop_back();
if (iter != current->GetChildren().end()) {
break;
}
}
if (iter == _end) {
return iter;
}
}
// expand virtual node
while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
stack.push_back(std::make_pair(current, iter));
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
if ((*iter)->IsLeafNT()) {
current = back_pointers.find(iter->get())->second.get();
}
else {
current = iter->get();
}
iter = current->GetChildren().begin();
}
return iter;
} }
_begin = iter;
}
std::vector<TreePointer>::const_iterator begin() const {
return _begin;
}
std::vector<TreePointer>::const_iterator end() const {
return _end;
}
std::vector<TreePointer>::const_iterator operator++() {
iter++;
if (iter == current->GetChildren().end()) {
while (!stack.empty()) {
std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> & active = stack.back();
current = active.first;
iter = ++active.second;
stack.pop_back();
if (iter != current->GetChildren().end()) {
break;
}
}
if (iter == _end) {
return iter;
}
}
// expand virtual node
while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
stack.push_back(std::make_pair(current, iter));
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
if ((*iter)->IsLeafNT()) {
current = back_pointers.find(iter->get())->second.get();
} else {
current = iter->get();
}
iter = current->GetChildren().begin();
}
return iter;
}
}; };
}; };

View File

@ -73,7 +73,7 @@ Manager::Manager(ttasksptr const& ttask)
const StaticData &staticData = StaticData::Instance(); const StaticData &staticData = StaticData::Instance();
SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm(); SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm();
m_search = Search::CreateSearch(*this, *source, searchAlgorithm, m_search = Search::CreateSearch(*this, *source, searchAlgorithm,
*m_transOptColl); *m_transOptColl);
StaticData::Instance().InitializeForInput(ttask); StaticData::Instance().InitializeForInput(ttask);
} }
@ -87,7 +87,9 @@ Manager::~Manager()
const InputType& const InputType&
Manager::GetSource() const Manager::GetSource() const
{ return m_source ; } {
return m_source ;
}
/** /**
* Main decoder loop that translates a sentence by expanding * Main decoder loop that translates a sentence by expanding
@ -130,7 +132,7 @@ void Manager::Decode()
searchTime.start(); searchTime.start();
m_search->Decode(); m_search->Decode();
VERBOSE(1, "Line " << m_source.GetTranslationId() VERBOSE(1, "Line " << m_source.GetTranslationId()
<< ": Search took " << searchTime << " seconds" << endl); << ": Search took " << searchTime << " seconds" << endl);
IFVERBOSE(2) { IFVERBOSE(2) {
GetSentenceStats().StopTimeTotal(); GetSentenceStats().StopTimeTotal();
TRACE_ERR(GetSentenceStats()); TRACE_ERR(GetSentenceStats());

View File

@ -110,7 +110,7 @@ private:
#endif #endif
public: public:
void SetOutputStream(std::ostream* outStream){ void SetOutputStream(std::ostream* outStream) {
m_outStream = outStream; m_outStream = outStream;
} }

View File

@ -203,7 +203,7 @@ Parameter::Parameter()
AddParam(nbest_opts,"lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list"); AddParam(nbest_opts,"lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
AddParam(nbest_opts,"include-segmentation-in-n-best", "include phrasal segmentation in the n-best list. default is false"); AddParam(nbest_opts,"include-segmentation-in-n-best", "include phrasal segmentation in the n-best list. default is false");
AddParam(nbest_opts,"print-alignment-info-in-n-best", AddParam(nbest_opts,"print-alignment-info-in-n-best",
"Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false"); "Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false");
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// server options // server options
@ -215,7 +215,7 @@ Parameter::Parameter()
po::options_description irstlm_opts("IRSTLM Options"); po::options_description irstlm_opts("IRSTLM Options");
AddParam(irstlm_opts,"clean-lm-cache", AddParam(irstlm_opts,"clean-lm-cache",
"clean language model caches after N translations (default N=1)"); "clean language model caches after N translations (default N=1)");
po::options_description chart_opts("Chart Decoding Options"); po::options_description chart_opts("Chart Decoding Options");
AddParam(chart_opts,"max-chart-span", "maximum num. of source word chart rules can consume (default 10)"); AddParam(chart_opts,"max-chart-span", "maximum num. of source word chart rules can consume (default 10)");
@ -346,8 +346,8 @@ const PARAM_VEC *Parameter::GetParam(const std::string &paramName) const
void void
Parameter:: Parameter::
AddParam(po::options_description& optgroup, AddParam(po::options_description& optgroup,
string const& paramName, string const& paramName,
string const& description) string const& description)
{ {
m_valid[paramName] = true; m_valid[paramName] = true;
m_description[paramName] = description; m_description[paramName] = description;
@ -358,9 +358,9 @@ AddParam(po::options_description& optgroup,
void void
Parameter:: Parameter::
AddParam(po::options_description& optgroup, AddParam(po::options_description& optgroup,
string const& paramName, string const& paramName,
string const& abbrevName, string const& abbrevName,
string const& description) string const& description)
{ {
m_valid[paramName] = true; m_valid[paramName] = true;
m_valid[abbrevName] = true; m_valid[abbrevName] = true;
@ -368,11 +368,10 @@ AddParam(po::options_description& optgroup,
m_fullname[abbrevName] = paramName; m_fullname[abbrevName] = paramName;
m_description[paramName] = description; m_description[paramName] = description;
string optname = paramName; string optname = paramName;
if (abbrevName.size() == 1) if (abbrevName.size() == 1) {
{ optname += string(",")+abbrevName;
optname += string(",")+abbrevName; // m_confusable[abbrevName[0]].insert(paramName);
// m_confusable[abbrevName[0]].insert(paramName); }
}
optgroup.add_options()(optname.c_str(),description.c_str()); optgroup.add_options()(optname.c_str(),description.c_str());
} }
@ -429,12 +428,11 @@ LoadParam(int argc, char* xargv[])
// legacy parameter handling: all parameters are expected // legacy parameter handling: all parameters are expected
// to start with a single dash // to start with a single dash
char* argv[argc+1]; char* argv[argc+1];
for (int i = 0; i < argc; ++i) for (int i = 0; i < argc; ++i) {
{ argv[i] = xargv[i];
argv[i] = xargv[i]; if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-')
if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-') ++argv[i];
++argv[i]; }
}
// config file (-f) arg mandatory // config file (-f) arg mandatory
string configPath; string configPath;
@ -1260,7 +1258,7 @@ Validate()
bool bool
Parameter:: Parameter::
FilesExist(const string &paramName, int fieldNo, FilesExist(const string &paramName, int fieldNo,
std::vector<std::string> const& extensions) std::vector<std::string> const& extensions)
{ {
typedef std::vector<std::string> StringVec; typedef std::vector<std::string> StringVec;
StringVec::const_iterator iter; StringVec::const_iterator iter;
@ -1589,7 +1587,7 @@ template<>
void void
Parameter:: Parameter::
SetParameter<bool>(bool &parameter, std::string const& parameterName, SetParameter<bool>(bool &parameter, std::string const& parameterName,
bool const& defaultValue) const bool const& defaultValue) const
{ {
const PARAM_VEC *params = GetParam(parameterName); const PARAM_VEC *params = GetParam(parameterName);

View File

@ -66,27 +66,27 @@ protected:
void void
AddParam(options_description& optgroup, AddParam(options_description& optgroup,
value_semantic const* optvalue, value_semantic const* optvalue,
std::string const& paramName, std::string const& paramName,
std::string const& description); std::string const& description);
void void
AddParam(options_description& optgroup, AddParam(options_description& optgroup,
std::string const &paramName, std::string const &paramName,
std::string const &description); std::string const &description);
void void
AddParam(options_description& optgroup, AddParam(options_description& optgroup,
value_semantic const* optvalue, value_semantic const* optvalue,
std::string const& paramName, std::string const& paramName,
std::string const& abbrevName, std::string const& abbrevName,
std::string const& description); std::string const& description);
void void
AddParam(options_description& optgroup, AddParam(options_description& optgroup,
std::string const& paramName, std::string const& paramName,
std::string const& abbrevName, std::string const& abbrevName,
std::string const& description); std::string const& description);
void PrintCredit(); void PrintCredit();
void PrintFF() const; void PrintFF() const;

View File

@ -67,7 +67,7 @@ RegisterScoreProducer(FeatureFunction* scoreProducer)
VERBOSE(1, "FeatureFunction: " VERBOSE(1, "FeatureFunction: "
<< scoreProducer->GetScoreProducerDescription() << scoreProducer->GetScoreProducerDescription()
<< " start: " << start << " start: " << start
<< " end: " << (s_denseVectorSize-1) << endl); << " end: " << (s_denseVectorSize-1) << endl);
} }
@ -194,21 +194,19 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
} }
std::vector<FeatureFunction*> const& all_ff std::vector<FeatureFunction*> const& all_ff
= FeatureFunction::GetFeatureFunctions(); = FeatureFunction::GetFeatureFunctions();
BOOST_FOREACH(FeatureFunction const* ff, all_ff) BOOST_FOREACH(FeatureFunction const* ff, all_ff) {
{ string name = ff->GetScoreProducerDescription();
string name = ff->GetScoreProducerDescription(); size_t i = ff->GetIndex();
size_t i = ff->GetIndex(); if (ff->GetNumScoreComponents() == 1)
if (ff->GetNumScoreComponents() == 1) out << name << sep << m_scores[i] << linesep;
out << name << sep << m_scores[i] << linesep; else {
else size_t stop = i + ff->GetNumScoreComponents();
{ boost::format fmt("%s_%d");
size_t stop = i + ff->GetNumScoreComponents(); for (size_t k = 1; i < stop; ++i, ++k)
boost::format fmt("%s_%d"); out << fmt % name % k << sep << m_scores[i] << linesep;
for (size_t k = 1; i < stop; ++i, ++k)
out << fmt % name % k << sep << m_scores[i] << linesep;
}
} }
}
// write sparse features // write sparse features
m_scores.write(out,sep,linesep); m_scores.write(out,sep,linesep);
} }

View File

@ -231,10 +231,10 @@ public:
//! produced by sp //! produced by sp
void void
PlusEquals(const FeatureFunction* sp, PlusEquals(const FeatureFunction* sp,
const ScoreComponentCollection& scores) { const ScoreComponentCollection& scores) {
size_t i = sp->GetIndex(); size_t i = sp->GetIndex();
size_t stop = i + sp->GetNumScoreComponents(); size_t stop = i + sp->GetNumScoreComponents();
for (;i < stop; ++i) m_scores[i] += scores.m_scores[i]; for (; i < stop; ++i) m_scores[i] += scores.m_scores[i];
} }
//! Add scores from a single FeatureFunction only //! Add scores from a single FeatureFunction only

View File

@ -60,28 +60,23 @@ aux_init_partial_translation(string& line)
string sourceCompletedStr; string sourceCompletedStr;
int loc1 = line.find( "|||", 0 ); int loc1 = line.find( "|||", 0 );
int loc2 = line.find( "|||", loc1 + 3 ); int loc2 = line.find( "|||", loc1 + 3 );
if (loc1 > -1 && loc2 > -1) if (loc1 > -1 && loc2 > -1) {
{ m_initialTargetPhrase = Trim(line.substr(0, loc1));
m_initialTargetPhrase = Trim(line.substr(0, loc1)); string scov = Trim(line.substr(loc1 + 3, loc2 - loc1 - 3));
string scov = Trim(line.substr(loc1 + 3, loc2 - loc1 - 3)); line = line.substr(loc2 + 3);
line = line.substr(loc2 + 3);
m_sourceCompleted.resize(scov.size()); m_sourceCompleted.resize(scov.size());
int contiguous = 1; int contiguous = 1;
for (size_t i = 0; i < scov.size(); ++i) for (size_t i = 0; i < scov.size(); ++i) {
{ if (sourceCompletedStr.at(i) == '1') {
if (sourceCompletedStr.at(i) == '1') m_sourceCompleted[i] = true;
{ if (contiguous) m_frontSpanCoveredLength++;
m_sourceCompleted[i] = true; } else {
if (contiguous) m_frontSpanCoveredLength++; m_sourceCompleted[i] = false;
} contiguous = 0;
else }
{
m_sourceCompleted[i] = false;
contiguous = 0;
}
}
} }
}
} }
void void
@ -94,38 +89,31 @@ aux_interpret_sgml_markup(string& line)
metamap::const_iterator i; metamap::const_iterator i;
if ((i = meta.find("id")) != meta.end()) if ((i = meta.find("id")) != meta.end())
this->SetTranslationId(atol(i->second.c_str())); this->SetTranslationId(atol(i->second.c_str()));
if ((i = meta.find("docid")) != meta.end()) if ((i = meta.find("docid")) != meta.end()) {
{ this->SetDocumentId(atol(i->second.c_str()));
this->SetDocumentId(atol(i->second.c_str())); this->SetUseTopicId(false);
this->SetUseTopicId(false); this->SetUseTopicIdAndProb(false);
}
if ((i = meta.find("topic")) != meta.end()) {
vector<string> topic_params;
boost::split(topic_params, i->second, boost::is_any_of("\t "));
if (topic_params.size() == 1) {
this->SetTopicId(atol(topic_params[0].c_str()));
this->SetUseTopicId(true);
this->SetUseTopicIdAndProb(false); this->SetUseTopicIdAndProb(false);
} else {
this->SetTopicIdAndProb(topic_params);
this->SetUseTopicId(false);
this->SetUseTopicIdAndProb(true);
} }
if ((i = meta.find("topic")) != meta.end()) }
{ if ((i = meta.find("weight-setting")) != meta.end()) {
vector<string> topic_params; this->SetWeightSetting(i->second);
boost::split(topic_params, i->second, boost::is_any_of("\t ")); this->SetSpecifiesWeightSetting(true);
if (topic_params.size() == 1) StaticData::Instance().SetWeightSetting(i->second);
{ // oh this is so horrible! Why does this have to be propagated globally?
this->SetTopicId(atol(topic_params[0].c_str())); // --- UG
this->SetUseTopicId(true); } else this->SetSpecifiesWeightSetting(false);
this->SetUseTopicIdAndProb(false);
}
else
{
this->SetTopicIdAndProb(topic_params);
this->SetUseTopicId(false);
this->SetUseTopicIdAndProb(true);
}
}
if ((i = meta.find("weight-setting")) != meta.end())
{
this->SetWeightSetting(i->second);
this->SetSpecifiesWeightSetting(true);
StaticData::Instance().SetWeightSetting(i->second);
// oh this is so horrible! Why does this have to be propagated globally?
// --- UG
}
else this->SetSpecifiesWeightSetting(false);
} }
void void
@ -135,48 +123,44 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG
using namespace std; using namespace std;
typedef map<string, string> str2str_map; typedef map<string, string> str2str_map;
vector<str2str_map> meta = ProcessAndStripDLT(line); vector<str2str_map> meta = ProcessAndStripDLT(line);
BOOST_FOREACH(str2str_map const& M, meta) BOOST_FOREACH(str2str_map const& M, meta) {
{ str2str_map::const_iterator i,j;
str2str_map::const_iterator i,j; if ((i = M.find("type")) != M.end()) {
if ((i = M.find("type")) != M.end()) j = M.find("id");
{ string id = j == M.end() ? "default" : j->second;
j = M.find("id"); if (i->second == "cbtm") {
string id = j == M.end() ? "default" : j->second; PhraseDictionaryDynamicCacheBased* cbtm;
if (i->second == "cbtm") cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
{ if (cbtm) cbtm->ExecuteDlt(M);
PhraseDictionaryDynamicCacheBased* cbtm; }
cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id); if (i->second == "cblm") {
if (cbtm) cbtm->ExecuteDlt(M); DynamicCacheBasedLanguageModel* cblm;
} cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id);
if (i->second == "cblm") if (cblm) cblm->ExecuteDlt(M);
{ }
DynamicCacheBasedLanguageModel* cblm;
cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id);
if (cblm) cblm->ExecuteDlt(M);
}
}
} }
}
} }
void void
Sentence:: Sentence::
aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls, aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
std::vector<std::pair<size_t, std::string> >& placeholders) std::vector<std::pair<size_t, std::string> >& placeholders)
{ // parse XML markup in translation line {
// parse XML markup in translation line
const StaticData &SD = StaticData::Instance(); const StaticData &SD = StaticData::Instance();
using namespace std; using namespace std;
if (SD.GetXmlInputType() != XmlPassThrough) if (SD.GetXmlInputType() != XmlPassThrough) {
{ int offset = SD.IsSyntax() ? 1 : 0;
int offset = SD.IsSyntax() ? 1 : 0; bool OK = ProcessAndStripXMLTags(line, m_xmlOptions,
bool OK = ProcessAndStripXMLTags(line, m_xmlOptions, m_reorderingConstraint,
m_reorderingConstraint, xmlWalls, placeholders, offset,
xmlWalls, placeholders, offset, SD.GetXmlBrackets().first,
SD.GetXmlBrackets().first, SD.GetXmlBrackets().second);
SD.GetXmlBrackets().second); UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line);
UTIL_THROW_IF2(!OK, "Unable to parse XML in line: " << line); }
}
} }
void void
@ -197,11 +181,10 @@ init(string line, std::vector<FactorType> const& factorOrder)
aux_interpret_dlt(line); // some poorly documented cache-based stuff aux_interpret_dlt(line); // some poorly documented cache-based stuff
// if sentences is specified as "<passthrough tag1=""/>" // if sentences is specified as "<passthrough tag1=""/>"
if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled()) if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled()) {
{ string pthru = PassthroughSGML(line,"passthrough");
string pthru = PassthroughSGML(line,"passthrough"); this->SetPassthroughInformation(pthru);
this->SetPassthroughInformation(pthru); }
}
vector<size_t> xmlWalls; vector<size_t> xmlWalls;
vector<pair<size_t, string> >placeholders; vector<pair<size_t, string> >placeholders;
@ -218,26 +201,23 @@ init(string line, std::vector<FactorType> const& factorOrder)
// our XmlOptions and create TranslationOptions // our XmlOptions and create TranslationOptions
// only fill the vector if we are parsing XML // only fill the vector if we are parsing XML
if (SD.GetXmlInputType() != XmlPassThrough) if (SD.GetXmlInputType() != XmlPassThrough) {
{ m_xmlCoverageMap.assign(GetSize(), false);
m_xmlCoverageMap.assign(GetSize(), false); BOOST_FOREACH(XmlOption* o, m_xmlOptions) {
BOOST_FOREACH(XmlOption* o, m_xmlOptions) WordsRange const& r = o->range;
{ for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j)
WordsRange const& r = o->range; m_xmlCoverageMap[j]=true;
for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j)
m_xmlCoverageMap[j]=true;
}
} }
}
// reordering walls and zones // reordering walls and zones
m_reorderingConstraint.InitializeWalls(GetSize()); m_reorderingConstraint.InitializeWalls(GetSize());
// set reordering walls, if "-monotone-at-punction" is set // set reordering walls, if "-monotone-at-punction" is set
if (SD.UseReorderingConstraint() && GetSize()) if (SD.UseReorderingConstraint() && GetSize()) {
{ WordsRange r(0, GetSize()-1);
WordsRange r(0, GetSize()-1); m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r));
m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r)); }
}
// set walls obtained from xml // set walls obtained from xml
for(size_t i=0; i<xmlWalls.size(); i++) for(size_t i=0; i<xmlWalls.size(); i++)
@ -283,8 +263,8 @@ CreateTranslationOptionCollection(ttasksptr const& ttask) const
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage(); size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
float transOptThreshold = StaticData::Instance().GetTranslationOptionThreshold(); float transOptThreshold = StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv TranslationOptionCollection *rv
= new TranslationOptionCollectionText(ttask, *this, maxNoTransOptPerCoverage, = new TranslationOptionCollectionText(ttask, *this, maxNoTransOptPerCoverage,
transOptThreshold); transOptThreshold);
assert(rv); assert(rv);
return rv; return rv;
} }
@ -386,7 +366,7 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString)
Sentence:: Sentence::
Sentence(size_t const transId, string const& stext, Sentence(size_t const transId, string const& stext,
vector<FactorType> const* IFO) vector<FactorType> const* IFO)
: InputType(transId) : InputType(transId)
{ {
if (IFO) init(stext, *IFO); if (IFO) init(stext, *IFO);

View File

@ -32,109 +32,110 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses namespace Moses
{ {
class WordsRange; class WordsRange;
class PhraseDictionary; class PhraseDictionary;
class TranslationOption; class TranslationOption;
class TranslationOptionCollection; class TranslationOptionCollection;
class ChartTranslationOptions; class ChartTranslationOptions;
class TranslationTask; class TranslationTask;
struct XmlOption; struct XmlOption;
/**
* A Phrase class with an ID. Used specifically as source input so contains functionality to read
* from IODevice and create trans opt
*/
class Sentence : public Phrase, public InputType
{
protected:
/** /**
* A Phrase class with an ID. Used specifically as source input so contains functionality to read * Utility method that takes in a string representing an XML tag and the name of the attribute,
* from IODevice and create trans opt * and returns the value of that tag if present, empty string otherwise
*/ */
class Sentence : public Phrase, public InputType std::vector<XmlOption*> m_xmlOptions;
{ std::vector <bool> m_xmlCoverageMap;
protected:
/** NonTerminalSet m_defaultLabelSet;
* Utility method that takes in a string representing an XML tag and the name of the attribute,
* and returns the value of that tag if present, empty string otherwise
*/
std::vector<XmlOption*> m_xmlOptions;
std::vector <bool> m_xmlCoverageMap;
NonTerminalSet m_defaultLabelSet; void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);
void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);
public: public:
Sentence(); Sentence();
Sentence(size_t const transId, std::string const& stext, Sentence(size_t const transId, std::string const& stext,
std::vector<FactorType> const* IFO = NULL); std::vector<FactorType> const* IFO = NULL);
// Sentence(size_t const transId, std::string const& stext); // Sentence(size_t const transId, std::string const& stext);
~Sentence(); ~Sentence();
InputTypeEnum GetType() const { InputTypeEnum GetType() const {
return SentenceInput; return SentenceInput;
} }
//! Calls Phrase::GetSubString(). Implements abstract InputType::GetSubString() //! Calls Phrase::GetSubString(). Implements abstract InputType::GetSubString()
Phrase GetSubString(const WordsRange& r) const { Phrase GetSubString(const WordsRange& r) const {
return Phrase::GetSubString(r); return Phrase::GetSubString(r);
} }
//! Calls Phrase::GetWord(). Implements abstract InputType::GetWord() //! Calls Phrase::GetWord(). Implements abstract InputType::GetWord()
const Word& GetWord(size_t pos) const { const Word& GetWord(size_t pos) const {
return Phrase::GetWord(pos); return Phrase::GetWord(pos);
} }
//! Calls Phrase::GetSize(). Implements abstract InputType::GetSize() //! Calls Phrase::GetSize(). Implements abstract InputType::GetSize()
size_t GetSize() const { size_t GetSize() const {
return Phrase::GetSize(); return Phrase::GetSize();
} }
//! Returns true if there were any XML tags parsed that at least partially covered the range passed //! Returns true if there were any XML tags parsed that at least partially covered the range passed
bool XmlOverlap(size_t startPos, size_t endPos) const; bool XmlOverlap(size_t startPos, size_t endPos) const;
//! populates vector argument with XML force translation options for the specific range passed //! populates vector argument with XML force translation options for the specific range passed
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const; void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const;
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const; void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const; std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder); virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
void Print(std::ostream& out) const; void Print(std::ostream& out) const;
TranslationOptionCollection* TranslationOptionCollection*
CreateTranslationOptionCollection(ttasksptr const& ttask) const; CreateTranslationOptionCollection(ttasksptr const& ttask) const;
virtual void virtual void
CreateFromString(std::vector<FactorType> const &factorOrder, CreateFromString(std::vector<FactorType> const &factorOrder,
std::string const& phraseString); std::string const& phraseString);
const NonTerminalSet& const NonTerminalSet&
GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const {
{ return m_defaultLabelSet; } return m_defaultLabelSet;
}
void void
init(std::string line, std::vector<FactorType> const& factorOrder); init(std::string line, std::vector<FactorType> const& factorOrder);
private: private:
// auxliliary functions for Sentence initialization // auxliliary functions for Sentence initialization
// void aux_interpret_sgml_markup(std::string& line); // void aux_interpret_sgml_markup(std::string& line);
// void aux_interpret_dlt(std::string& line); // void aux_interpret_dlt(std::string& line);
// void aux_interpret_xml (std::string& line, std::vector<size_t> & xmlWalls, // void aux_interpret_xml (std::string& line, std::vector<size_t> & xmlWalls,
// std::vector<std::pair<size_t, std::string> >& placeholders); // std::vector<std::pair<size_t, std::string> >& placeholders);
void void
aux_interpret_sgml_markup(std::string& line); aux_interpret_sgml_markup(std::string& line);
void void
aux_interpret_dlt(std::string& line); aux_interpret_dlt(std::string& line);
void void
aux_interpret_xml aux_interpret_xml
(std::string& line, std::vector<size_t> & xmlWalls, (std::string& line, std::vector<size_t> & xmlWalls,
std::vector<std::pair<size_t, std::string> >& placeholders); std::vector<std::pair<size_t, std::string> >& placeholders);
void void
aux_init_partial_translation(std::string& line); aux_init_partial_translation(std::string& line);
}; };
} }

View File

@ -118,7 +118,7 @@ StaticData
string &feature = toks[0]; string &feature = toks[0];
std::map<std::string, std::string>::const_iterator iter std::map<std::string, std::string>::const_iterator iter
= featureNameOverride.find(feature); = featureNameOverride.find(feature);
if (iter == featureNameOverride.end()) { if (iter == featureNameOverride.end()) {
// feature name not override // feature name not override
m_registry.Construct(feature, line); m_registry.Construct(feature, line);
@ -146,7 +146,7 @@ StaticData
m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput); m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
m_parameter->SetParameter(m_continuePartialTranslation, m_parameter->SetParameter(m_continuePartialTranslation,
"continue-partial-translation", false ); "continue-partial-translation", false );
std::string s_it = "text input"; std::string s_it = "text input";
if (m_inputType == 1) { if (m_inputType == 1) {
@ -160,7 +160,7 @@ StaticData
} }
VERBOSE(2,"input type is: "<<s_it<<"\n"); VERBOSE(2,"input type is: "<<s_it<<"\n");
// use of xml in input // use of xml in input
m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough); m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
// specify XML tags opening and closing brackets for XML option // specify XML tags opening and closing brackets for XML option
@ -178,7 +178,7 @@ StaticData
} }
m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange, m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange,
"default-non-term-for-empty-range-only", false ); "default-non-term-for-empty-range-only", false );
} }
@ -347,18 +347,18 @@ StaticData
m_parameter->SetParameter(m_PrintAlignmentInfoNbest, m_parameter->SetParameter(m_PrintAlignmentInfoNbest,
"print-alignment-info-in-n-best", false ); "print-alignment-info-in-n-best", false );
// include feature names in the n-best list // include feature names in the n-best list
m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true ); m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true );
// include word alignment in the n-best list // include word alignment in the n-best list
m_parameter->SetParameter(m_nBestIncludesSegmentation, m_parameter->SetParameter(m_nBestIncludesSegmentation,
"include-segmentation-in-n-best", false ); "include-segmentation-in-n-best", false );
// print all factors of output translations // print all factors of output translations
m_parameter->SetParameter(m_reportAllFactorsNBest, m_parameter->SetParameter(m_reportAllFactorsNBest,
"report-all-factors-in-n-best", false ); "report-all-factors-in-n-best", false );
m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false ); m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false );
return true; return true;
@ -412,7 +412,7 @@ StaticData
#ifndef WITH_THREADS #ifndef WITH_THREADS
if (m_threadCount > 1) { if (m_threadCount > 1) {
std::cerr << "Error: Thread count of " << params->at(0) std::cerr << "Error: Thread count of " << params->at(0)
<< " but moses not built with thread support"; << " but moses not built with thread support";
return false; return false;
} }
#endif #endif
@ -426,11 +426,11 @@ StaticData
::ini_cube_pruning_options() ::ini_cube_pruning_options()
{ {
m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit", m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit",
DEFAULT_CUBE_PRUNING_POP_LIMIT); DEFAULT_CUBE_PRUNING_POP_LIMIT);
m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity", m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity",
DEFAULT_CUBE_PRUNING_DIVERSITY); DEFAULT_CUBE_PRUNING_DIVERSITY);
m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring", m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring",
false); false);
} }
void void
@ -468,7 +468,7 @@ void
StaticData StaticData
::ini_oov_options() ::ini_oov_options()
{ {
// unknown word processing // unknown word processing
m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false ); m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
m_parameter->SetParameter(m_markUnknown, "mark-unknown", false ); m_parameter->SetParameter(m_markUnknown, "mark-unknown", false );
@ -647,7 +647,7 @@ bool StaticData::LoadData(Parameter *parameter)
// S2T decoder // S2T decoder
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm", m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
RecursiveCYKPlus); RecursiveCYKPlus);
ini_zombie_options(); // probably dead, or maybe not ini_zombie_options(); // probably dead, or maybe not
@ -1016,7 +1016,7 @@ StaticData
::InitializeForInput(ttasksptr const& ttask) const ::InitializeForInput(ttasksptr const& ttask) const
{ {
const std::vector<FeatureFunction*> &producers const std::vector<FeatureFunction*> &producers
= FeatureFunction::GetFeatureFunctions(); = FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) { for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i]; FeatureFunction &ff = *producers[i];
if (! IsFeatureFunctionIgnored(ff)) { if (! IsFeatureFunctionIgnored(ff)) {
@ -1024,7 +1024,7 @@ StaticData
iTime.start(); iTime.start();
ff.InitializeForInput(ttask); ff.InitializeForInput(ttask);
VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription() << " )" VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription() << " )"
<< "= " << iTime << endl); << "= " << iTime << endl);
} }
} }
} }
@ -1034,7 +1034,7 @@ StaticData
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const ::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
{ {
const std::vector<FeatureFunction*> &producers const std::vector<FeatureFunction*> &producers
= FeatureFunction::GetFeatureFunctions(); = FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) { for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i]; FeatureFunction &ff = *producers[i];
if (! IsFeatureFunctionIgnored(ff)) { if (! IsFeatureFunctionIgnored(ff)) {
@ -1111,7 +1111,7 @@ bool StaticData::CheckWeights() const
if (!weightNames.empty()) { if (!weightNames.empty()) {
cerr << "The following weights have no feature function. " cerr << "The following weights have no feature function. "
<< "Maybe incorrectly spelt weights: "; << "Maybe incorrectly spelt weights: ";
set<string>::iterator iter; set<string>::iterator iter;
for (iter = weightNames.begin(); iter != weightNames.end(); ++iter) { for (iter = weightNames.begin(); iter != weightNames.end(); ++iter) {
cerr << *iter << ","; cerr << *iter << ",";

View File

@ -476,18 +476,18 @@ public:
// m_searchAlgorithm == SyntaxF2S; // m_searchAlgorithm == SyntaxF2S;
// } // }
bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const {
{
if (algo == DefaultSearchAlgorithm) if (algo == DefaultSearchAlgorithm)
algo = m_searchAlgorithm; algo = m_searchAlgorithm;
return (algo == CYKPlus || algo == ChartIncremental || return (algo == CYKPlus || algo == ChartIncremental ||
algo == SyntaxS2T || algo == SyntaxT2S || algo == SyntaxS2T || algo == SyntaxT2S ||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG); algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
} }
const ScoreComponentCollection& const ScoreComponentCollection&
GetAllWeights() const GetAllWeights() const {
{ return m_allWeights; } return m_allWeights;
}
void SetAllWeights(const ScoreComponentCollection& weights) { void SetAllWeights(const ScoreComponentCollection& weights) {
m_allWeights = weights; m_allWeights = weights;

View File

@ -146,7 +146,7 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
} }
void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath( void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath(
const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet) const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
{ {
for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin(); for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin();
p != hp.nodeSeqs.end(); ++p) { p != hp.nodeSeqs.end(); ++p) {

View File

@ -31,7 +31,7 @@ public:
private: private:
void ExtractSourceTerminalSetFromHyperPath( void ExtractSourceTerminalSetFromHyperPath(
const HyperPath &, boost::unordered_set<std::size_t> &); const HyperPath &, boost::unordered_set<std::size_t> &);
}; };
} // namespace F2S } // namespace F2S

View File

@ -39,7 +39,7 @@ Manager<RuleMatcher>::Manager(ttasksptr const& ttask)
if (const ForestInput *p = dynamic_cast<const ForestInput*>(&m_source)) { if (const ForestInput *p = dynamic_cast<const ForestInput*>(&m_source)) {
m_forest = p->GetForest(); m_forest = p->GetForest();
m_rootVertex = p->GetRootVertex(); m_rootVertex = p->GetRootVertex();
m_sentenceLength = p->GetSize(); m_sentenceLength = p->GetSize();
} else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&m_source)) { } else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&m_source)) {
T2S::InputTreeBuilder builder; T2S::InputTreeBuilder builder;
T2S::InputTree tmpTree; T2S::InputTree tmpTree;

View File

@ -39,7 +39,7 @@ public:
typedef std::vector<boost::shared_ptr<KBestExtractor::Derivation> > kBestList_t; typedef std::vector<boost::shared_ptr<KBestExtractor::Derivation> > kBestList_t;
void ExtractKBest(std::size_t k, kBestList_t& kBestList, void ExtractKBest(std::size_t k, kBestList_t& kBestList,
bool onlyDistinct=false) const; bool onlyDistinct=false) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const; void OutputDetailedTranslationReport(OutputCollector *collector) const;

View File

@ -11,34 +11,34 @@ namespace Syntax
{ {
InputWeightFF::InputWeightFF(const std::string &line) InputWeightFF::InputWeightFF(const std::string &line)
: StatelessFeatureFunction(1, line) : StatelessFeatureFunction(1, line)
{ {
ReadParameters(); ReadParameters();
} }
void InputWeightFF::EvaluateWhenApplied(const Hypothesis& hypo, void InputWeightFF::EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const
{ {
// TODO Throw exception. // TODO Throw exception.
assert(false); assert(false);
} }
void InputWeightFF::EvaluateWhenApplied(const ChartHypothesis &hypo, void InputWeightFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const
{ {
// TODO Throw exception. // TODO Throw exception.
assert(false); assert(false);
} }
void InputWeightFF::EvaluateWhenApplied( void InputWeightFF::EvaluateWhenApplied(
const Syntax::SHyperedge &hyperedge, const Syntax::SHyperedge &hyperedge,
ScoreComponentCollection* accumulator) const ScoreComponentCollection* accumulator) const
{ {
accumulator->PlusEquals(this, hyperedge.label.inputWeight); accumulator->PlusEquals(this, hyperedge.label.inputWeight);
} }
void InputWeightFF::SetParameter(const std::string& key, void InputWeightFF::SetParameter(const std::string& key,
const std::string& value) const std::string& value)
{ {
StatelessFeatureFunction::SetParameter(key, value); StatelessFeatureFunction::SetParameter(key, value);
} }

View File

@ -42,7 +42,7 @@ public:
ScoreComponentCollection *) const {} ScoreComponentCollection *) const {}
void EvaluateTranslationOptionListWithSourceContext( void EvaluateTranslationOptionListWithSourceContext(
const InputType &, const TranslationOptionList &) const {} const InputType &, const TranslationOptionList &) const {}
}; };
} // Syntax } // Syntax

View File

@ -225,21 +225,19 @@ void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProduce
boost::shared_ptr<Scores> boost::shared_ptr<Scores>
mergescores(boost::shared_ptr<Scores> const& a, mergescores(boost::shared_ptr<Scores> const& a,
boost::shared_ptr<Scores> const& b) boost::shared_ptr<Scores> const& b)
{ {
boost::shared_ptr<Scores> ret; boost::shared_ptr<Scores> ret;
if (!a) return b ? b : ret; if (!a) return b ? b : ret;
if (!b) return a; if (!b) return a;
if (a->size() != b->size()) return ret; if (a->size() != b->size()) return ret;
ret.reset(new Scores(*a)); ret.reset(new Scores(*a));
for (size_t i = 0; i < a->size(); ++i) for (size_t i = 0; i < a->size(); ++i) {
{ if ((*a)[i] == 0) (*a)[i] = (*b)[i];
if ((*a)[i] == 0) (*a)[i] = (*b)[i]; else if ((*b)[i]) {
else if ((*b)[i]) UTIL_THROW_IF2((*a)[i] != (*b)[i], "can't merge feature vectors");
{
UTIL_THROW_IF2((*a)[i] != (*b)[i], "can't merge feature vectors");
}
} }
}
return ret; return ret;
} }
@ -253,12 +251,11 @@ Merge(const TargetPhrase &copy, const std::vector<FactorType>& factorVec)
m_fullScore += copy.m_fullScore; m_fullScore += copy.m_fullScore;
typedef ScoreCache_t::iterator iter; typedef ScoreCache_t::iterator iter;
typedef ScoreCache_t::value_type item; typedef ScoreCache_t::value_type item;
BOOST_FOREACH(item const& s, copy.m_cached_scores) BOOST_FOREACH(item const& s, copy.m_cached_scores) {
{ pair<iter,bool> foo = m_cached_scores.insert(s);
pair<iter,bool> foo = m_cached_scores.insert(s); if (foo.second == false)
if (foo.second == false) foo.first->second = mergescores(foo.first->second, s.second);
foo.first->second = mergescores(foo.first->second, s.second); }
}
} }
TargetPhrase::ScoreCache_t const& TargetPhrase::ScoreCache_t const&
@ -279,8 +276,10 @@ GetExtraScores(FeatureFunction const* ff) const
void void
TargetPhrase:: TargetPhrase::
SetExtraScores(FeatureFunction const* ff, SetExtraScores(FeatureFunction const* ff,
boost::shared_ptr<Scores> const& s) boost::shared_ptr<Scores> const& s)
{ m_cached_scores[ff] = s; } {
m_cached_scores[ff] = s;
}
void TargetPhrase::SetProperties(const StringPiece &str) void TargetPhrase::SetProperties(const StringPiece &str)

View File

@ -51,15 +51,15 @@ class PhraseDictionary;
*/ */
class TargetPhrase: public Phrase class TargetPhrase: public Phrase
{ {
public: public:
typedef std::map<FeatureFunction const*, boost::shared_ptr<Scores> > typedef std::map<FeatureFunction const*, boost::shared_ptr<Scores> >
ScoreCache_t; ScoreCache_t;
ScoreCache_t const& GetExtraScores() const; ScoreCache_t const& GetExtraScores() const;
Scores const* GetExtraScores(FeatureFunction const* ff) const; Scores const* GetExtraScores(FeatureFunction const* ff) const;
void SetExtraScores(FeatureFunction const* ff, void SetExtraScores(FeatureFunction const* ff,
boost::shared_ptr<Scores> const& scores); boost::shared_ptr<Scores> const& scores);
private: private:
ScoreCache_t m_cached_scores; ScoreCache_t m_cached_scores;
private: private:

View File

@ -18,7 +18,7 @@ class TrainingTask : public Moses::TranslationTask
protected: protected:
TrainingTask(boost::shared_ptr<Moses::InputType> const source, TrainingTask(boost::shared_ptr<Moses::InputType> const source,
boost::shared_ptr<Moses::IOWrapper> const ioWrapper) boost::shared_ptr<Moses::IOWrapper> const ioWrapper)
: TranslationTask(source, ioWrapper) : TranslationTask(source, ioWrapper)
{ } { }
@ -26,8 +26,7 @@ public:
// factory function // factory function
static boost::shared_ptr<TrainingTask> static boost::shared_ptr<TrainingTask>
create(boost::shared_ptr<InputType> const& source) create(boost::shared_ptr<InputType> const& source) {
{
boost::shared_ptr<IOWrapper> nix; boost::shared_ptr<IOWrapper> nix;
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, nix)); boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, nix));
ret->m_self = ret; ret->m_self = ret;
@ -37,8 +36,7 @@ public:
// factory function // factory function
static boost::shared_ptr<TrainingTask> static boost::shared_ptr<TrainingTask>
create(boost::shared_ptr<InputType> const& source, create(boost::shared_ptr<InputType> const& source,
boost::shared_ptr<IOWrapper> const& ioWrapper) boost::shared_ptr<IOWrapper> const& ioWrapper) {
{
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, ioWrapper)); boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, ioWrapper));
ret->m_self = ret; ret->m_self = ret;
return ret; return ret;
@ -53,7 +51,7 @@ public:
std::cerr << *m_source << std::endl; std::cerr << *m_source << std::endl;
TranslationOptionCollection *transOptColl TranslationOptionCollection *transOptColl
= m_source->CreateTranslationOptionCollection(this->self()); = m_source->CreateTranslationOptionCollection(this->self());
transOptColl->CreateTranslationOptions(); transOptColl->CreateTranslationOptions();
delete transOptColl; delete transOptColl;

View File

@ -163,7 +163,7 @@ public:
#ifdef WITH_THREADS #ifdef WITH_THREADS
boost::shared_ptr<HashTask<Keys> > boost::shared_ptr<HashTask<Keys> >
ht(new HashTask<Keys>(current, *this, keys)); ht(new HashTask<Keys>(current, *this, keys));
m_threadPool.Submit(ht); m_threadPool.Submit(ht);
#else #else
CalcHash(current, keys); CalcHash(current, keys);

View File

@ -133,7 +133,7 @@ public:
size_t read = 0; size_t read = 0;
read += ftruncate(m_file_desc, m_map_size); read += ftruncate(m_file_desc, m_map_size);
m_data_ptr = (char *)util::MapOrThrow( m_data_ptr = (char *)util::MapOrThrow(
m_map_size, true, map_shared, false, m_file_desc, 0); m_map_size, true, map_shared, false, m_file_desc, 0);
return (pointer)m_data_ptr; return (pointer)m_data_ptr;
} else { } else {
size_t map_offset = (m_data_offset / m_page_size) * m_page_size; size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
@ -142,7 +142,7 @@ public:
size_t map_size = m_map_size + relative_offset; size_t map_size = m_map_size + relative_offset;
m_data_ptr = (char *)util::MapOrThrow( m_data_ptr = (char *)util::MapOrThrow(
m_map_size, false, map_shared, false, m_file_desc, map_offset); m_map_size, false, map_shared, false, m_file_desc, map_offset);
return (pointer)(m_data_ptr + relative_offset); return (pointer)(m_data_ptr + relative_offset);
} }

View File

@ -117,8 +117,7 @@ public:
virtual virtual
TargetPhraseCollection const * TargetPhraseCollection const *
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) {
{
return GetTargetPhraseCollectionLEGACY(src); return GetTargetPhraseCollectionLEGACY(src);
} }
@ -129,8 +128,7 @@ public:
virtual virtual
void void
GetTargetPhraseCollectionBatch(ttasksptr const& ttask, GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
const InputPathList &inputPathQueue) const const InputPathList &inputPathQueue) const {
{
GetTargetPhraseCollectionBatch(inputPathQueue); GetTargetPhraseCollectionBatch(inputPathQueue);
} }

View File

@ -109,7 +109,7 @@ ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation)
return out; return out;
} }
/** returns cached scores */ /** returns cached scores */
const Scores* const Scores*
TranslationOption:: TranslationOption::
GetLexReorderingScores(LexicalReordering const* scoreProducer) const GetLexReorderingScores(LexicalReordering const* scoreProducer) const

View File

@ -164,7 +164,7 @@ public:
// } // }
void CacheLexReorderingScores(const LexicalReordering &scoreProducer, void CacheLexReorderingScores(const LexicalReordering &scoreProducer,
const Scores &score); const Scores &score);
TO_STRING(); TO_STRING();

View File

@ -57,7 +57,7 @@ namespace Moses
* called by inherited classe */ * called by inherited classe */
TranslationOptionCollection:: TranslationOptionCollection::
TranslationOptionCollection(ttasksptr const& ttask, TranslationOptionCollection(ttasksptr const& ttask,
InputType const& src, InputType const& src,
size_t maxNoTransOptPerCoverage, size_t maxNoTransOptPerCoverage,
float translationOptionThreshold) float translationOptionThreshold)
: m_ttask(ttask) : m_ttask(ttask)
@ -626,14 +626,13 @@ CacheLexReordering()
{ {
size_t const stop = m_source.GetSize(); size_t const stop = m_source.GetSize();
typedef StatefulFeatureFunction sfFF; typedef StatefulFeatureFunction sfFF;
BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions()) BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions()) {
{ if (typeid(*ff) != typeid(LexicalReordering)) continue;
if (typeid(*ff) != typeid(LexicalReordering)) continue; LexicalReordering const& lr = static_cast<const LexicalReordering&>(*ff);
LexicalReordering const& lr = static_cast<const LexicalReordering&>(*ff); for (size_t s = 0 ; s < stop ; s++)
for (size_t s = 0 ; s < stop ; s++) BOOST_FOREACH(TranslationOptionList& tol, m_collection[s])
BOOST_FOREACH(TranslationOptionList& tol, m_collection[s]) lr.SetCache(tol);
lr.SetCache(tol); }
}
} }
//! list of trans opt for a particular span //! list of trans opt for a particular span

View File

@ -75,7 +75,7 @@ protected:
InputPathList m_inputPathQueue; InputPathList m_inputPathQueue;
TranslationOptionCollection(ttasksptr const& ttask, TranslationOptionCollection(ttasksptr const& ttask,
InputType const& src, size_t maxNoTransOptPerCoverage, InputType const& src, size_t maxNoTransOptPerCoverage,
float translationOptionThreshold); float translationOptionThreshold);
void CalcFutureScore(); void CalcFutureScore();
@ -177,8 +177,7 @@ public:
return m_inputPathQueue; return m_inputPathQueue;
} }
ttasksptr GetTranslationTask() const ttasksptr GetTranslationTask() const {
{
return m_ttask.lock(); return m_ttask.lock();
} }
TO_STRING(); TO_STRING();

View File

@ -21,7 +21,7 @@ namespace Moses
/** constructor; just initialize the base class */ /** constructor; just initialize the base class */
TranslationOptionCollectionConfusionNet:: TranslationOptionCollectionConfusionNet::
TranslationOptionCollectionConfusionNet(ttasksptr const& ttask, TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
const ConfusionNet &input, const ConfusionNet &input,
size_t maxNoTransOptPerCoverage, size_t maxNoTransOptPerCoverage,
float translationOptionThreshold) float translationOptionThreshold)
: TranslationOptionCollection(ttask,input, maxNoTransOptPerCoverage, : TranslationOptionCollection(ttask,input, maxNoTransOptPerCoverage,

View File

@ -23,7 +23,7 @@ TranslationOptionCollectionLattice
( ttasksptr const& ttask, const WordLattice &input, ( ttasksptr const& ttask, const WordLattice &input,
size_t maxNoTransOptPerCoverage, float translationOptionThreshold) size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: TranslationOptionCollection(ttask, input, maxNoTransOptPerCoverage, : TranslationOptionCollection(ttask, input, maxNoTransOptPerCoverage,
translationOptionThreshold) translationOptionThreshold)
{ {
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(), UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
"Not for models using the legqacy binary phrase table"); "Not for models using the legqacy binary phrase table");

View File

@ -53,7 +53,7 @@ TranslationTask
boost::shared_ptr<TranslationTask> boost::shared_ptr<TranslationTask>
TranslationTask TranslationTask
::create(boost::shared_ptr<InputType> const& source, ::create(boost::shared_ptr<InputType> const& source,
boost::shared_ptr<IOWrapper> const& ioWrapper) boost::shared_ptr<IOWrapper> const& ioWrapper)
{ {
boost::shared_ptr<TranslationTask> ret(new TranslationTask(source, ioWrapper)); boost::shared_ptr<TranslationTask> ret(new TranslationTask(source, ioWrapper));
ret->m_self = ret; ret->m_self = ret;
@ -63,7 +63,7 @@ TranslationTask
TranslationTask TranslationTask
::TranslationTask(boost::shared_ptr<InputType> const& source, ::TranslationTask(boost::shared_ptr<InputType> const& source,
boost::shared_ptr<IOWrapper> const& ioWrapper) boost::shared_ptr<IOWrapper> const& ioWrapper)
: m_source(source) , m_ioWrapper(ioWrapper) : m_source(source) , m_ioWrapper(ioWrapper)
{ } { }
@ -82,37 +82,33 @@ TranslationTask
if (!staticData.IsSyntax(algo)) if (!staticData.IsSyntax(algo))
manager.reset(new Manager(this->self())); // phrase-based manager.reset(new Manager(this->self())); // phrase-based
else if (algo == SyntaxF2S || algo == SyntaxT2S) else if (algo == SyntaxF2S || algo == SyntaxT2S) {
{ // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams) // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams)
typedef Syntax::F2S::RuleMatcherCallback Callback; typedef Syntax::F2S::RuleMatcherCallback Callback;
typedef Syntax::F2S::RuleMatcherHyperTree<Callback> RuleMatcher; typedef Syntax::F2S::RuleMatcherHyperTree<Callback> RuleMatcher;
manager.reset(new Syntax::F2S::Manager<RuleMatcher>(this->self())); manager.reset(new Syntax::F2S::Manager<RuleMatcher>(this->self()));
} }
else if (algo == SyntaxS2T) else if (algo == SyntaxS2T) {
{ // new-style string-to-tree decoding (ask Phil Williams) // new-style string-to-tree decoding (ask Phil Williams)
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm(); S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
if (algorithm == RecursiveCYKPlus) if (algorithm == RecursiveCYKPlus) {
{ typedef Syntax::S2T::EagerParserCallback Callback;
typedef Syntax::S2T::EagerParserCallback Callback; typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser;
typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser; manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
manager.reset(new Syntax::S2T::Manager<Parser>(this->self())); } else if (algorithm == Scope3) {
} typedef Syntax::S2T::StandardParserCallback Callback;
else if (algorithm == Scope3) typedef Syntax::S2T::Scope3Parser<Callback> Parser;
{ manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
typedef Syntax::S2T::StandardParserCallback Callback; } else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
typedef Syntax::S2T::Scope3Parser<Callback> Parser; }
manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
}
else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
}
else if (algo == SyntaxT2S_SCFG) else if (algo == SyntaxT2S_SCFG) {
{ // SCFG-based tree-to-string decoding (ask Phil Williams) // SCFG-based tree-to-string decoding (ask Phil Williams)
typedef Syntax::F2S::RuleMatcherCallback Callback; typedef Syntax::F2S::RuleMatcherCallback Callback;
typedef Syntax::T2S::RuleMatcherSCFG<Callback> RuleMatcher; typedef Syntax::T2S::RuleMatcherSCFG<Callback> RuleMatcher;
manager.reset(new Syntax::T2S::Manager<RuleMatcher>(this->self())); manager.reset(new Syntax::T2S::Manager<RuleMatcher>(this->self()));
} }
else if (algo == ChartIncremental) // Ken's incremental decoding else if (algo == ChartIncremental) // Ken's incremental decoding
manager.reset(new Incremental::Manager(this->self())); manager.reset(new Incremental::Manager(this->self()));
@ -126,8 +122,8 @@ TranslationTask
void TranslationTask::Run() void TranslationTask::Run()
{ {
UTIL_THROW_IF2(!m_source || !m_ioWrapper, UTIL_THROW_IF2(!m_source || !m_ioWrapper,
"Base Instances of TranslationTask must be initialized with" "Base Instances of TranslationTask must be initialized with"
<< " input and iowrapper."); << " input and iowrapper.");
// shorthand for "global data" // shorthand for "global data"
@ -152,7 +148,7 @@ void TranslationTask::Run()
boost::shared_ptr<BaseManager> manager = SetupManager(); boost::shared_ptr<BaseManager> manager = SetupManager();
VERBOSE(1, "Line " << translationId << ": Initialize search took " VERBOSE(1, "Line " << translationId << ": Initialize search took "
<< initTime << " seconds total" << endl); << initTime << " seconds total" << endl);
manager->Decode(); manager->Decode();
@ -209,9 +205,9 @@ void TranslationTask::Run()
// report additional statistics // report additional statistics
manager->CalcDecoderStatistics(); manager->CalcDecoderStatistics();
VERBOSE(1, "Line " << translationId << ": Additional reporting took " VERBOSE(1, "Line " << translationId << ": Additional reporting took "
<< additionalReportingTime << " seconds total" << endl); << additionalReportingTime << " seconds total" << endl);
VERBOSE(1, "Line " << translationId << ": Translation took " VERBOSE(1, "Line " << translationId << ": Translation took "
<< translationTime << " seconds total" << endl); << translationTime << " seconds total" << endl);
IFVERBOSE(2) { IFVERBOSE(2) {
PrintUserTime("Sentence Decoding Time:"); PrintUserTime("Sentence Decoding Time:");
} }

View File

@ -40,7 +40,9 @@ class TranslationTask : public Moses::Task
TranslationTask(TranslationTask const& other) { } TranslationTask(TranslationTask const& other) { }
TranslationTask const& TranslationTask const&
operator=(TranslationTask const& other) { return *this; } operator=(TranslationTask const& other) {
return *this;
}
protected: protected:
boost::weak_ptr<TranslationTask> m_self; // weak ptr to myself boost::weak_ptr<TranslationTask> m_self; // weak ptr to myself
@ -48,7 +50,7 @@ protected:
// pointer to ContextScope, which stores context-specific information // pointer to ContextScope, which stores context-specific information
TranslationTask() { } ; TranslationTask() { } ;
TranslationTask(boost::shared_ptr<Moses::InputType> const& source, TranslationTask(boost::shared_ptr<Moses::InputType> const& source,
boost::shared_ptr<Moses::IOWrapper> const& ioWrapper); boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
// Yes, the constructor is protected. // Yes, the constructor is protected.
// //
// TranslationTasks can only be created through the creator // TranslationTasks can only be created through the creator
@ -68,11 +70,15 @@ protected:
public: public:
boost::shared_ptr<TranslationTask> boost::shared_ptr<TranslationTask>
self() { return m_self.lock(); } self() {
return m_self.lock();
}
virtual virtual
boost::shared_ptr<TranslationTask const> boost::shared_ptr<TranslationTask const>
self() const { return m_self.lock(); } self() const {
return m_self.lock();
}
// creator functions // creator functions
static boost::shared_ptr<TranslationTask> create(); static boost::shared_ptr<TranslationTask> create();
@ -84,7 +90,7 @@ public:
static static
boost::shared_ptr<TranslationTask> boost::shared_ptr<TranslationTask>
create(boost::shared_ptr<Moses::InputType> const& source, create(boost::shared_ptr<Moses::InputType> const& source,
boost::shared_ptr<Moses::IOWrapper> const& ioWrapper); boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
~TranslationTask(); ~TranslationTask();
/** Translate one sentence /** Translate one sentence
@ -92,15 +98,16 @@ public:
virtual void Run(); virtual void Run();
boost::shared_ptr<Moses::InputType> boost::shared_ptr<Moses::InputType>
GetSource() const { return m_source; } GetSource() const {
return m_source;
}
boost::shared_ptr<BaseManager> boost::shared_ptr<BaseManager>
SetupManager(SearchAlgorithm algo = DefaultSearchAlgorithm); SetupManager(SearchAlgorithm algo = DefaultSearchAlgorithm);
boost::shared_ptr<ContextScope> const& boost::shared_ptr<ContextScope> const&
GetScope() const GetScope() const {
{
UTIL_THROW_IF2(m_scope == NULL, "No context scope!"); UTIL_THROW_IF2(m_scope == NULL, "No context scope!");
return m_scope; return m_scope;
} }

View File

@ -8,7 +8,7 @@
namespace Moses namespace Moses
{ {
class TranslationTask; class TranslationTask;
//! @todo what is this? //! @todo what is this?
class XMLParseOutput class XMLParseOutput
{ {

View File

@ -427,7 +427,7 @@ inline float CalcTranslationScore(const std::vector<float> &probVector,
out << *this; \ out << *this; \
return out.str(); \ return out.str(); \
} \ } \
//! delete and remove every element of a collection object such as set, list etc //! delete and remove every element of a collection object such as set, list etc
template<class COLL> template<class COLL>
void RemoveAllInColl(COLL &coll) void RemoveAllInColl(COLL &coll)

View File

@ -3,70 +3,67 @@
namespace MosesServer namespace MosesServer
{ {
using namespace std; using namespace std;
Optimizer:: Optimizer::
Optimizer() Optimizer()
{ {
// signature and help strings are documentation -- the client // signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and // can query this information with a system.methodSignature and
// system.methodHelp RPC. // system.methodHelp RPC.
this->_signature = "S:S"; this->_signature = "S:S";
this->_help = "Optimizes multi-model translation model"; this->_help = "Optimizes multi-model translation model";
} }
void void
Optimizer:: Optimizer::
execute(xmlrpc_c::paramList const& paramList, execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP) xmlrpc_c::value * const retvalP)
{ {
#ifdef WITH_DLIB #ifdef WITH_DLIB
const params_t params = paramList.getStruct(0); const params_t params = paramList.getStruct(0);
params_t::const_iterator si; params_t::const_iterator si;
if ((si = params.find("model_name")) == params.end()) if ((si = params.find("model_name")) == params.end()) {
{ string msg = "Missing name of model to be optimized";
string msg = "Missing name of model to be optimized"; msg += " (e.g. PhraseDictionaryMultiModelCounts0)";
msg += " (e.g. PhraseDictionaryMultiModelCounts0)"; throw xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
throw xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE); }
} const string model_name = xmlrpc_c::value_string(si->second);
const string model_name = xmlrpc_c::value_string(si->second);
if ((si = params.find("phrase_pairs")) == params.end()) {
if ((si = params.find("phrase_pairs")) == params.end()) throw xmlrpc_c::fault("Missing list of phrase pairs",
{ xmlrpc_c::fault::CODE_PARSE);
throw xmlrpc_c::fault("Missing list of phrase pairs", }
xmlrpc_c::fault::CODE_PARSE);
}
vector<pair<string, string> > phrase_pairs;
vector<pair<string, string> > phrase_pairs; xmlrpc_c::value_array pp_array = xmlrpc_c::value_array(si->second);
vector<xmlrpc_c::value> ppValVec(pp_array.vectorValueValue());
xmlrpc_c::value_array pp_array = xmlrpc_c::value_array(si->second); for (size_t i = 0; i < ppValVec.size(); ++i) {
vector<xmlrpc_c::value> ppValVec(pp_array.vectorValueValue()); xmlrpc_c::value_array pp_array
for (size_t i = 0; i < ppValVec.size(); ++i) = xmlrpc_c::value_array(ppValVec[i]);
{ vector<xmlrpc_c::value> pp(pp_array.vectorValueValue());
xmlrpc_c::value_array pp_array string L1 = xmlrpc_c::value_string(pp[0]);
= xmlrpc_c::value_array(ppValVec[i]); string L2 = xmlrpc_c::value_string(pp[1]);
vector<xmlrpc_c::value> pp(pp_array.vectorValueValue()); phrase_pairs.push_back(make_pair(L1,L2));
string L1 = xmlrpc_c::value_string(pp[0]); }
string L2 = xmlrpc_c::value_string(pp[1]);
phrase_pairs.push_back(make_pair(L1,L2)); // PhraseDictionaryMultiModel* pdmm
} // = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
PhraseDictionaryMultiModel* pdmm = FindPhraseDictionary(model_name);
// PhraseDictionaryMultiModel* pdmm vector<float> weight_vector = pdmm->MinimizePerplexity(phrase_pairs);
// = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
PhraseDictionaryMultiModel* pdmm = FindPhraseDictionary(model_name); vector<xmlrpc_c::value> weight_vector_ret;
vector<float> weight_vector = pdmm->MinimizePerplexity(phrase_pairs); for (size_t i=0; i < weight_vector.size(); i++)
weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i]));
vector<xmlrpc_c::value> weight_vector_ret;
for (size_t i=0;i < weight_vector.size();i++) *retvalP = xmlrpc_c::value_array(weight_vector_ret);
weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i])); #else
string errmsg = "Error: Perplexity minimization requires dlib ";
*retvalP = xmlrpc_c::value_array(weight_vector_ret); errmsg += "(compilation option --with-dlib)";
#else std::cerr << errmsg << std::endl;
string errmsg = "Error: Perplexity minimization requires dlib "; *retvalP = xmlrpc_c::value_string(errmsg);
errmsg += "(compilation option --with-dlib)"; #endif
std::cerr << errmsg << std::endl; }
*retvalP = xmlrpc_c::value_string(errmsg);
#endif
}
} }

View File

@ -6,12 +6,12 @@
namespace MosesServer namespace MosesServer
{ {
class class
Optimizer : public xmlrpc_c::method Optimizer : public xmlrpc_c::method
{ {
public: public:
Optimizer(); Optimizer();
void execute(xmlrpc_c::paramList const& paramList, void execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP); xmlrpc_c::value * const retvalP);
}; };
} }

View File

@ -3,372 +3,363 @@
namespace MosesServer namespace MosesServer
{ {
using namespace std; using namespace std;
using Moses::Hypothesis; using Moses::Hypothesis;
using Moses::StaticData; using Moses::StaticData;
using Moses::WordsRange; using Moses::WordsRange;
using Moses::ChartHypothesis; using Moses::ChartHypothesis;
using Moses::Phrase; using Moses::Phrase;
using Moses::Manager; using Moses::Manager;
using Moses::SearchGraphNode; using Moses::SearchGraphNode;
using Moses::TrellisPathList; using Moses::TrellisPathList;
using Moses::TranslationOptionCollection; using Moses::TranslationOptionCollection;
using Moses::TranslationOptionList; using Moses::TranslationOptionList;
using Moses::TranslationOption; using Moses::TranslationOption;
using Moses::TargetPhrase; using Moses::TargetPhrase;
using Moses::FValue; using Moses::FValue;
using Moses::PhraseDictionaryMultiModel; using Moses::PhraseDictionaryMultiModel;
using Moses::FindPhraseDictionary; using Moses::FindPhraseDictionary;
using Moses::Sentence; using Moses::Sentence;
boost::shared_ptr<TranslationRequest> boost::shared_ptr<TranslationRequest>
TranslationRequest:: TranslationRequest::
create(xmlrpc_c::paramList const& paramList, create(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::condition_variable& cond,
boost::mutex& mut) boost::mutex& mut)
{
boost::shared_ptr<TranslationRequest> ret;
ret.reset(new TranslationRequest(paramList,cond, mut));
ret->m_self = ret;
return ret;
}
void
TranslationRequest::
Run()
{
parse_request(m_paramList.getStruct(0));
Moses::StaticData const& SD = Moses::StaticData::Instance();
//Make sure alternative paths are retained, if necessary
if (m_withGraphInfo || m_nbestSize>0)
// why on earth is this a global variable? Is this even thread-safe???? UG
(const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
std::stringstream out, graphInfo, transCollOpts;
if (SD.IsSyntax())
run_chart_decoder();
else
run_phrase_decoder();
XVERBOSE(1,"Output: " << out.str() << endl);
{ {
boost::shared_ptr<TranslationRequest> ret; boost::lock_guard<boost::mutex> lock(m_mutex);
ret.reset(new TranslationRequest(paramList,cond, mut)); m_done = true;
ret->m_self = ret;
return ret;
} }
m_cond.notify_one();
void }
TranslationRequest::
Run()
{
parse_request(m_paramList.getStruct(0));
Moses::StaticData const& SD = Moses::StaticData::Instance(); /// add phrase alignment information from a Hypothesis
void
TranslationRequest::
add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const
{
if (!m_withAlignInfo) return;
WordsRange const& trg = h.GetCurrTargetWordsRange();
WordsRange const& src = h.GetCurrSourceWordsRange();
//Make sure alternative paths are retained, if necessary std::map<std::string, xmlrpc_c::value> pAlnInfo;
if (m_withGraphInfo || m_nbestSize>0) pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos());
// why on earth is this a global variable? Is this even thread-safe???? UG pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos());
(const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true); pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos());
aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo));
}
std::stringstream out, graphInfo, transCollOpts; void
TranslationRequest::
outputChartHypo(ostream& out, const ChartHypothesis* hypo)
{
Phrase outPhrase(20);
hypo->GetOutputPhrase(outPhrase);
if (SD.IsSyntax()) // delete 1st & last
run_chart_decoder(); assert(outPhrase.GetSize() >= 2);
else outPhrase.RemoveWord(0);
run_phrase_decoder(); outPhrase.RemoveWord(outPhrase.GetSize() - 1);
for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++)
out << *outPhrase.GetFactor(pos, 0) << " ";
}
XVERBOSE(1,"Output: " << out.str() << endl); bool
{ TranslationRequest::
boost::lock_guard<boost::mutex> lock(m_mutex); compareSearchGraphNode(const Moses::SearchGraphNode& a,
m_done = true; const Moses::SearchGraphNode& b)
{
return a.hypo->GetId() < b.hypo->GetId();
}
void
TranslationRequest::
insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
{
using xmlrpc_c::value_int;
using xmlrpc_c::value_double;
using xmlrpc_c::value_struct;
using xmlrpc_c::value_string;
vector<xmlrpc_c::value> searchGraphXml;
vector<SearchGraphNode> searchGraph;
manager.GetSearchGraph(searchGraph);
std::sort(searchGraph.begin(), searchGraph.end());
BOOST_FOREACH(Moses::SearchGraphNode const& n, searchGraph) {
map<string, xmlrpc_c::value> x; // search graph xml node
x["forward"] = value_double(n.forward);
x["fscore"] = value_double(n.fscore);
const Hypothesis* hypo = n.hypo;
x["hyp"] = value_int(hypo->GetId());
x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered());
if (hypo->GetId() != 0) {
const Hypothesis *prevHypo = hypo->GetPrevHypo();
x["back"] = value_int(prevHypo->GetId());
x["score"] = value_double(hypo->GetScore());
x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore());
if (n.recombinationHypo)
x["recombined"] = value_int(n.recombinationHypo->GetId());
x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder()));
} }
m_cond.notify_one(); searchGraphXml.push_back(value_struct(x));
} }
retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
}
/// add phrase alignment information from a Hypothesis void
void TranslationRequest::
TranslationRequest:: output_phrase(ostream& out, Phrase const& phrase) const
add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const {
{ if (!m_reportAllFactors) {
if (!m_withAlignInfo) return; for (size_t i = 0 ; i < phrase.GetSize(); ++i)
WordsRange const& trg = h.GetCurrTargetWordsRange(); out << *phrase.GetFactor(i, 0) << " ";
WordsRange const& src = h.GetCurrSourceWordsRange(); } else out << phrase;
}
std::map<std::string, xmlrpc_c::value> pAlnInfo; void
pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos()); TranslationRequest::
pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos()); outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos()); {
aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo)); TrellisPathList nBestList;
vector<xmlrpc_c::value> nBestXml;
manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct);
BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) {
vector<const Hypothesis *> const& E = path->GetEdges();
if (!E.size()) continue;
std::map<std::string, xmlrpc_c::value> nBestXmlItem;
pack_hypothesis(E, "hyp", nBestXmlItem);
if (m_withScoreBreakdown) {
// should the score breakdown be reported in a more structured manner?
ostringstream buf;
path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
}
// weighted score
nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetTotalScore());
nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem));
} }
retData["nbest"] = xmlrpc_c::value_array(nBestXml);
}
void void
TranslationRequest:: TranslationRequest::
outputChartHypo(ostream& out, const ChartHypothesis* hypo) insertTranslationOptions(Moses::Manager& manager,
{ std::map<std::string, xmlrpc_c::value>& retData)
Phrase outPhrase(20); {
hypo->GetOutputPhrase(outPhrase); const TranslationOptionCollection* toptsColl
= manager.getSntTranslationOptions();
vector<xmlrpc_c::value> toptsXml;
size_t const stop = toptsColl->GetSource().GetSize();
TranslationOptionList const* tol;
for (size_t s = 0 ; s < stop ; ++s) {
for (size_t e = s;
(tol = toptsColl->GetTranslationOptionList(s,e)) != NULL;
++e) {
BOOST_FOREACH(TranslationOption const* topt, *tol) {
std::map<std::string, xmlrpc_c::value> toptXml;
TargetPhrase const& tp = topt->GetTargetPhrase();
StaticData const& GLOBAL = StaticData::Instance();
std::string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder());
toptXml["phrase"] = xmlrpc_c::value_string(tphrase);
toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore());
toptXml["start"] = xmlrpc_c::value_int(s);
toptXml["end"] = xmlrpc_c::value_int(e);
vector<xmlrpc_c::value> scoresXml;
const std::valarray<FValue> &scores
= topt->GetScoreBreakdown().getCoreFeatures();
for (size_t j = 0; j < scores.size(); ++j)
scoresXml.push_back(xmlrpc_c::value_double(scores[j]));
// delete 1st & last toptXml["scores"] = xmlrpc_c::value_array(scoresXml);
assert(outPhrase.GetSize() >= 2); toptsXml.push_back(xmlrpc_c::value_struct(toptXml));
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++)
out << *outPhrase.GetFactor(pos, 0) << " ";
}
bool
TranslationRequest::
compareSearchGraphNode(const Moses::SearchGraphNode& a,
const Moses::SearchGraphNode& b)
{ return a.hypo->GetId() < b.hypo->GetId(); }
void
TranslationRequest::
insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
{
using xmlrpc_c::value_int;
using xmlrpc_c::value_double;
using xmlrpc_c::value_struct;
using xmlrpc_c::value_string;
vector<xmlrpc_c::value> searchGraphXml;
vector<SearchGraphNode> searchGraph;
manager.GetSearchGraph(searchGraph);
std::sort(searchGraph.begin(), searchGraph.end());
BOOST_FOREACH(Moses::SearchGraphNode const& n, searchGraph)
{
map<string, xmlrpc_c::value> x; // search graph xml node
x["forward"] = value_double(n.forward);
x["fscore"] = value_double(n.fscore);
const Hypothesis* hypo = n.hypo;
x["hyp"] = value_int(hypo->GetId());
x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered());
if (hypo->GetId() != 0)
{
const Hypothesis *prevHypo = hypo->GetPrevHypo();
x["back"] = value_int(prevHypo->GetId());
x["score"] = value_double(hypo->GetScore());
x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore());
if (n.recombinationHypo)
x["recombined"] = value_int(n.recombinationHypo->GetId());
x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
x["out"] = value_string(hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder()));
}
searchGraphXml.push_back(value_struct(x));
} }
retData["sg"] = xmlrpc_c::value_array(searchGraphXml); }
}
retData["topt"] = xmlrpc_c::value_array(toptsXml);
}
bool
check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (params.find(key) != params.end());
}
TranslationRequest::
TranslationRequest(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::mutex& mut)
: m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
{ }
void
TranslationRequest::
parse_request(std::map<std::string, xmlrpc_c::value> const& params)
{
// parse XMLRPC request
// params_t const params = m_paramList.getStruct(0);
m_paramList.verifyEnd(1); // ??? UG
// source text must be given, or we don't know what to translate
typedef std::map<std::string, xmlrpc_c::value> params_t;
params_t::const_iterator si = params.find("text");
if (si == params.end())
throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE);
m_source_string = xmlrpc_c::value_string(si->second);
XVERBOSE(1,"Input: " << m_source_string << endl);
m_withAlignInfo = check(params, "align");
m_withWordAlignInfo = check(params, "word-align");
m_withGraphInfo = check(params, "sg");
m_withTopts = check(params, "topt");
m_reportAllFactors = check(params, "report-all-factors");
m_nbestDistinct = check(params, "nbest-distinct");
m_withScoreBreakdown = check(params, "add-score-breakdown");
m_source.reset(new Sentence(0,m_source_string));
si = params.find("lambda");
if (si != params.end()) {
// muMo = multiModel
xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second);
vector<xmlrpc_c::value> muMoValVec(muMoArray.vectorValueValue());
vector<float> w(muMoValVec.size());
for (size_t i = 0; i < muMoValVec.size(); ++i)
w[i] = xmlrpc_c::value_double(muMoValVec[i]);
if (w.size() && (si = params.find("model_name")) != params.end()) {
string const model_name = xmlrpc_c::value_string(si->second);
PhraseDictionaryMultiModel* pdmm
= (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
// Moses::PhraseDictionaryMultiModel* pdmm
// = FindPhraseDictionary(model_name);
pdmm->SetTemporaryMultiModelWeightsVector(w);
}
} }
void // // biased sampling for suffix-array-based sampling phrase table?
TranslationRequest:: // if ((si = params.find("bias")) != params.end())
output_phrase(ostream& out, Phrase const& phrase) const // {
{ // std::vector<xmlrpc_c::value> tmp
if (!m_reportAllFactors) // = xmlrpc_c::value_array(si->second).cvalue();
{ // for (size_t i = 1; i < tmp.size(); i += 2)
for (size_t i = 0 ; i < phrase.GetSize(); ++i) // m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
out << *phrase.GetFactor(i, 0) << " "; // }
} } // end of Translationtask::parse_request()
else out << phrase;
void
TranslationRequest::
run_chart_decoder()
{
Moses::TreeInput tinput;
istringstream buf(m_source_string + "\n");
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder());
Moses::ChartManager manager(this->self());
manager.Decode();
const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis();
ostringstream out;
outputChartHypo(out,hypo);
m_target_string = out.str();
m_retData["text"] = xmlrpc_c::value_string(m_target_string);
if (m_withGraphInfo) {
std::ostringstream sgstream;
manager.OutputSearchGraphMoses(sgstream);
m_retData["sg"] = xmlrpc_c::value_string(sgstream.str());
} }
} // end of TranslationRequest::run_chart_decoder()
void void
TranslationRequest:: TranslationRequest::
outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData) pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
{ map<string, xmlrpc_c::value> & dest) const
TrellisPathList nBestList; {
vector<xmlrpc_c::value> nBestXml; // target string
manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct); ostringstream target;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
output_phrase(target, e->GetCurrTargetPhrase());
dest[key] = xmlrpc_c::value_string(target.str());
BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) if (m_withAlignInfo) {
{ // phrase alignment, if requested
vector<const Hypothesis *> const& E = path->GetEdges();
if (!E.size()) continue;
std::map<std::string, xmlrpc_c::value> nBestXmlItem;
pack_hypothesis(E, "hyp", nBestXmlItem);
if (m_withScoreBreakdown)
{
// should the score breakdown be reported in a more structured manner?
ostringstream buf;
path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
}
// weighted score vector<xmlrpc_c::value> p_aln;
nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetTotalScore());
nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem));
}
retData["nbest"] = xmlrpc_c::value_array(nBestXml);
}
void
TranslationRequest::
insertTranslationOptions(Moses::Manager& manager,
std::map<std::string, xmlrpc_c::value>& retData)
{
const TranslationOptionCollection* toptsColl
= manager.getSntTranslationOptions();
vector<xmlrpc_c::value> toptsXml;
size_t const stop = toptsColl->GetSource().GetSize();
TranslationOptionList const* tol;
for (size_t s = 0 ; s < stop ; ++s)
{
for (size_t e = s;
(tol = toptsColl->GetTranslationOptionList(s,e)) != NULL;
++e)
{
BOOST_FOREACH(TranslationOption const* topt, *tol)
{
std::map<std::string, xmlrpc_c::value> toptXml;
TargetPhrase const& tp = topt->GetTargetPhrase();
StaticData const& GLOBAL = StaticData::Instance();
std::string tphrase = tp.GetStringRep(GLOBAL.GetOutputFactorOrder());
toptXml["phrase"] = xmlrpc_c::value_string(tphrase);
toptXml["fscore"] = xmlrpc_c::value_double(topt->GetFutureScore());
toptXml["start"] = xmlrpc_c::value_int(s);
toptXml["end"] = xmlrpc_c::value_int(e);
vector<xmlrpc_c::value> scoresXml;
const std::valarray<FValue> &scores
= topt->GetScoreBreakdown().getCoreFeatures();
for (size_t j = 0; j < scores.size(); ++j)
scoresXml.push_back(xmlrpc_c::value_double(scores[j]));
toptXml["scores"] = xmlrpc_c::value_array(scoresXml);
toptsXml.push_back(xmlrpc_c::value_struct(toptXml));
}
}
}
retData["topt"] = xmlrpc_c::value_array(toptsXml);
}
bool
check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (params.find(key) != params.end());
}
TranslationRequest::
TranslationRequest(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::mutex& mut)
: m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList)
{ }
void
TranslationRequest::
parse_request(std::map<std::string, xmlrpc_c::value> const& params)
{ // parse XMLRPC request
// params_t const params = m_paramList.getStruct(0);
m_paramList.verifyEnd(1); // ??? UG
// source text must be given, or we don't know what to translate
typedef std::map<std::string, xmlrpc_c::value> params_t;
params_t::const_iterator si = params.find("text");
if (si == params.end())
throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE);
m_source_string = xmlrpc_c::value_string(si->second);
XVERBOSE(1,"Input: " << m_source_string << endl);
m_withAlignInfo = check(params, "align");
m_withWordAlignInfo = check(params, "word-align");
m_withGraphInfo = check(params, "sg");
m_withTopts = check(params, "topt");
m_reportAllFactors = check(params, "report-all-factors");
m_nbestDistinct = check(params, "nbest-distinct");
m_withScoreBreakdown = check(params, "add-score-breakdown");
m_source.reset(new Sentence(0,m_source_string));
si = params.find("lambda");
if (si != params.end())
{
// muMo = multiModel
xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second);
vector<xmlrpc_c::value> muMoValVec(muMoArray.vectorValueValue());
vector<float> w(muMoValVec.size());
for (size_t i = 0; i < muMoValVec.size(); ++i)
w[i] = xmlrpc_c::value_double(muMoValVec[i]);
if (w.size() && (si = params.find("model_name")) != params.end())
{
string const model_name = xmlrpc_c::value_string(si->second);
PhraseDictionaryMultiModel* pdmm
= (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
// Moses::PhraseDictionaryMultiModel* pdmm
// = FindPhraseDictionary(model_name);
pdmm->SetTemporaryMultiModelWeightsVector(w);
}
}
// // biased sampling for suffix-array-based sampling phrase table?
// if ((si = params.find("bias")) != params.end())
// {
// std::vector<xmlrpc_c::value> tmp
// = xmlrpc_c::value_array(si->second).cvalue();
// for (size_t i = 1; i < tmp.size(); i += 2)
// m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
// }
} // end of Translationtask::parse_request()
void
TranslationRequest::
run_chart_decoder()
{
Moses::TreeInput tinput;
istringstream buf(m_source_string + "\n");
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder());
Moses::ChartManager manager(this->self());
manager.Decode();
const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis();
ostringstream out;
outputChartHypo(out,hypo);
m_target_string = out.str();
m_retData["text"] = xmlrpc_c::value_string(m_target_string);
if (m_withGraphInfo)
{
std::ostringstream sgstream;
manager.OutputSearchGraphMoses(sgstream);
m_retData["sg"] = xmlrpc_c::value_string(sgstream.str());
}
} // end of TranslationRequest::run_chart_decoder()
void
TranslationRequest::
pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
map<string, xmlrpc_c::value> & dest) const
{
// target string
ostringstream target;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
output_phrase(target, e->GetCurrTargetPhrase()); add_phrase_aln_info(*e, p_aln);
dest[key] = xmlrpc_c::value_string(target.str()); dest["align"] = xmlrpc_c::value_array(p_aln);
if (m_withAlignInfo)
{ // phrase alignment, if requested
vector<xmlrpc_c::value> p_aln;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
add_phrase_aln_info(*e, p_aln);
dest["align"] = xmlrpc_c::value_array(p_aln);
}
if (m_withWordAlignInfo)
{ // word alignment, if requested
vector<xmlrpc_c::value> w_aln;
BOOST_FOREACH(Hypothesis const* e, edges)
e->OutputLocalWordAlignment(w_aln);
dest["word-align"] = xmlrpc_c::value_array(w_aln);
}
} }
void if (m_withWordAlignInfo) {
TranslationRequest:: // word alignment, if requested
pack_hypothesis(Hypothesis const* h, string const& key, vector<xmlrpc_c::value> w_aln;
map<string, xmlrpc_c::value>& dest) const BOOST_FOREACH(Hypothesis const* e, edges)
{ e->OutputLocalWordAlignment(w_aln);
using namespace std; dest["word-align"] = xmlrpc_c::value_array(w_aln);
vector<Hypothesis const*> edges;
for (;h; h = h->GetPrevHypo())
edges.push_back(h);
pack_hypothesis(edges, key, dest);
}
void
TranslationRequest::
run_phrase_decoder()
{
Manager manager(this->self());
// if (m_bias.size()) manager.SetBias(&m_bias);
manager.Decode();
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
if (m_withTopts) insertTranslationOptions(manager,m_retData);
if (m_nbestSize) outputNBest(manager, m_retData);
(const_cast<StaticData&>(Moses::StaticData::Instance()))
.SetOutputSearchGraph(false);
// WTF? one more reason not to have this as global variable! --- UG
} }
} }
void
TranslationRequest::
pack_hypothesis(Hypothesis const* h, string const& key,
map<string, xmlrpc_c::value>& dest) const
{
using namespace std;
vector<Hypothesis const*> edges;
for (; h; h = h->GetPrevHypo())
edges.push_back(h);
pack_hypothesis(edges, key, dest);
}
void
TranslationRequest::
run_phrase_decoder()
{
Manager manager(this->self());
// if (m_bias.size()) manager.SetBias(&m_bias);
manager.Decode();
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
if (m_withTopts) insertTranslationOptions(manager,m_retData);
if (m_nbestSize) outputNBest(manager, m_retData);
(const_cast<StaticData&>(Moses::StaticData::Instance()))
.SetOutputSearchGraph(false);
// WTF? one more reason not to have this as global variable! --- UG
}
}

View File

@ -23,100 +23,106 @@
#include <xmlrpc-c/base.hpp> #include <xmlrpc-c/base.hpp>
namespace MosesServer namespace MosesServer
{ {
class class
TranslationRequest : public virtual Moses::TranslationTask TranslationRequest : public virtual Moses::TranslationTask
{ {
boost::condition_variable& m_cond; boost::condition_variable& m_cond;
boost::mutex& m_mutex; boost::mutex& m_mutex;
bool m_done; bool m_done;
xmlrpc_c::paramList const& m_paramList; xmlrpc_c::paramList const& m_paramList;
std::map<std::string, xmlrpc_c::value> m_retData; std::map<std::string, xmlrpc_c::value> m_retData;
std::map<uint32_t,float> m_bias; // for biased sampling std::map<uint32_t,float> m_bias; // for biased sampling
std::string m_source_string, m_target_string; std::string m_source_string, m_target_string;
bool m_withAlignInfo; bool m_withAlignInfo;
bool m_withWordAlignInfo; bool m_withWordAlignInfo;
bool m_withGraphInfo; bool m_withGraphInfo;
bool m_withTopts; bool m_withTopts;
bool m_reportAllFactors; bool m_reportAllFactors;
bool m_nbestDistinct; bool m_nbestDistinct;
bool m_withScoreBreakdown; bool m_withScoreBreakdown;
size_t m_nbestSize; size_t m_nbestSize;
void void
parse_request(); parse_request();
void void
parse_request(std::map<std::string, xmlrpc_c::value> const& req); parse_request(std::map<std::string, xmlrpc_c::value> const& req);
virtual void virtual void
run_chart_decoder(); run_chart_decoder();
virtual void virtual void
run_phrase_decoder(); run_phrase_decoder();
void void
pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges, pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges,
std::string const& key, std::string const& key,
std::map<std::string, xmlrpc_c::value> & dest) const; std::map<std::string, xmlrpc_c::value> & dest) const;
void void
pack_hypothesis(Moses::Hypothesis const* h, std::string const& key, pack_hypothesis(Moses::Hypothesis const* h, std::string const& key,
std::map<std::string, xmlrpc_c::value> & dest) const; std::map<std::string, xmlrpc_c::value> & dest) const;
void void
output_phrase(std::ostream& out, Moses::Phrase const& phrase) const; output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
void void
add_phrase_aln_info(Moses::Hypothesis const& h, add_phrase_aln_info(Moses::Hypothesis const& h,
std::vector<xmlrpc_c::value>& aInfo) const; std::vector<xmlrpc_c::value>& aInfo) const;
void void
outputChartHypo(std::ostream& out, const Moses::ChartHypothesis* hypo); outputChartHypo(std::ostream& out, const Moses::ChartHypothesis* hypo);
bool bool
compareSearchGraphNode(const Moses::SearchGraphNode& a, compareSearchGraphNode(const Moses::SearchGraphNode& a,
const Moses::SearchGraphNode& b); const Moses::SearchGraphNode& b);
void void
insertGraphInfo(Moses::Manager& manager, insertGraphInfo(Moses::Manager& manager,
std::map<std::string, xmlrpc_c::value>& retData); std::map<std::string, xmlrpc_c::value>& retData);
void void
outputNBest(Moses::Manager const& manager, outputNBest(Moses::Manager const& manager,
std::map<std::string, xmlrpc_c::value>& retData); std::map<std::string, xmlrpc_c::value>& retData);
void void
insertTranslationOptions(Moses::Manager& manager, insertTranslationOptions(Moses::Manager& manager,
std::map<std::string, xmlrpc_c::value>& retData); std::map<std::string, xmlrpc_c::value>& retData);
protected: protected:
TranslationRequest(xmlrpc_c::paramList const& paramList, TranslationRequest(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::condition_variable& cond,
boost::mutex& mut); boost::mutex& mut);
public: public:
static static
boost::shared_ptr<TranslationRequest> boost::shared_ptr<TranslationRequest>
create(xmlrpc_c::paramList const& paramList, create(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::condition_variable& cond,
boost::mutex& mut); boost::mutex& mut);
virtual bool virtual bool
DeleteAfterExecution() { return false; } DeleteAfterExecution() {
return false;
}
bool bool
IsDone() const { return m_done; } IsDone() const {
return m_done;
}
std::map<std::string, xmlrpc_c::value> const& std::map<std::string, xmlrpc_c::value> const&
GetRetData() { return m_retData; } GetRetData() {
return m_retData;
}
void void
Run(); Run();
}; };
} }

View File

@ -4,34 +4,34 @@
namespace MosesServer namespace MosesServer
{ {
using namespace std; using namespace std;
using namespace Moses; using namespace Moses;
Translator:: Translator::
Translator(size_t numThreads) Translator(size_t numThreads)
: m_threadPool(numThreads) : m_threadPool(numThreads)
{ {
// signature and help strings are documentation -- the client // signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and // can query this information with a system.methodSignature and
// system.methodHelp RPC. // system.methodHelp RPC.
this->_signature = "S:S"; this->_signature = "S:S";
this->_help = "Does translation"; this->_help = "Does translation";
} }
void void
Translator:: Translator::
execute(xmlrpc_c::paramList const& paramList, execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP) xmlrpc_c::value * const retvalP)
{ {
boost::condition_variable cond; boost::condition_variable cond;
boost::mutex mut; boost::mutex mut;
boost::shared_ptr<TranslationRequest> task boost::shared_ptr<TranslationRequest> task
= TranslationRequest::create(paramList,cond,mut); = TranslationRequest::create(paramList,cond,mut);
m_threadPool.Submit(task); m_threadPool.Submit(task);
boost::unique_lock<boost::mutex> lock(mut); boost::unique_lock<boost::mutex> lock(mut);
while (!task->IsDone()) while (!task->IsDone())
cond.wait(lock); cond.wait(lock);
*retvalP = xmlrpc_c::value_struct(task->GetRetData()); *retvalP = xmlrpc_c::value_struct(task->GetRetData());
} }
} }

View File

@ -10,17 +10,17 @@
#endif #endif
namespace MosesServer namespace MosesServer
{ {
class class
// MosesServer:: // MosesServer::
Translator : public xmlrpc_c::method Translator : public xmlrpc_c::method
{ {
public: public:
Translator(size_t numThreads = 10); Translator(size_t numThreads = 10);
void execute(xmlrpc_c::paramList const& paramList, void execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP); xmlrpc_c::value * const retvalP);
private: private:
Moses::ThreadPool m_threadPool; Moses::ThreadPool m_threadPool;
}; };
} }

View File

@ -2,56 +2,56 @@
namespace MosesServer namespace MosesServer
{ {
using namespace Moses; using namespace Moses;
using namespace std; using namespace std;
Updater:: Updater::
Updater() Updater()
{ {
// signature and help strings are documentation -- the client // signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and // can query this information with a system.methodSignature and
// system.methodHelp RPC. // system.methodHelp RPC.
this->_signature = "S:S"; this->_signature = "S:S";
this->_help = "Updates stuff"; this->_help = "Updates stuff";
} }
void void
Updater:: Updater::
execute(xmlrpc_c::paramList const& paramList, execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP) xmlrpc_c::value * const retvalP)
{ {
#if PT_UG #if PT_UG
const params_t params = paramList.getStruct(0); const params_t params = paramList.getStruct(0);
breakOutParams(params); breakOutParams(params);
Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]); Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
pdsa->add(m_src, m_trg, m_aln); pdsa->add(m_src, m_trg, m_aln);
XVERBOSE(1,"Done inserting\n"); XVERBOSE(1,"Done inserting\n");
*retvalP = xmlrpc_c::value_string("Phrase table updated"); *retvalP = xmlrpc_c::value_string("Phrase table updated");
#endif #endif
}; };
void void
Updater:: Updater::
breakOutParams(const params_t& params) breakOutParams(const params_t& params)
{ {
params_t::const_iterator si = params.find("source"); params_t::const_iterator si = params.find("source");
if(si == params.end()) if(si == params.end())
throw xmlrpc_c::fault("Missing source sentence", throw xmlrpc_c::fault("Missing source sentence",
xmlrpc_c::fault::CODE_PARSE); xmlrpc_c::fault::CODE_PARSE);
m_src = xmlrpc_c::value_string(si->second); m_src = xmlrpc_c::value_string(si->second);
XVERBOSE(1,"source = " << m_src << endl); XVERBOSE(1,"source = " << m_src << endl);
si = params.find("target"); si = params.find("target");
if(si == params.end()) if(si == params.end())
throw xmlrpc_c::fault("Missing target sentence", throw xmlrpc_c::fault("Missing target sentence",
xmlrpc_c::fault::CODE_PARSE); xmlrpc_c::fault::CODE_PARSE);
m_trg = xmlrpc_c::value_string(si->second); m_trg = xmlrpc_c::value_string(si->second);
XVERBOSE(1,"target = " << m_trg << endl); XVERBOSE(1,"target = " << m_trg << endl);
if((si = params.find("alignment")) == params.end()) if((si = params.find("alignment")) == params.end())
throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE); throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
m_aln = xmlrpc_c::value_string(si->second); m_aln = xmlrpc_c::value_string(si->second);
XVERBOSE(1,"alignment = " << m_aln << endl); XVERBOSE(1,"alignment = " << m_aln << endl);
m_bounded = ((si = params.find("bounded")) != params.end()); m_bounded = ((si = params.find("bounded")) != params.end());
m_add2ORLM = ((si = params.find("updateORLM")) != params.end()); m_add2ORLM = ((si = params.find("updateORLM")) != params.end());
}; };
} }

View File

@ -19,26 +19,26 @@
namespace MosesServer namespace MosesServer
{ {
class class
Updater: public xmlrpc_c::method Updater: public xmlrpc_c::method
{ {
typedef std::map<std::string, xmlrpc_c::value> params_t; typedef std::map<std::string, xmlrpc_c::value> params_t;
std::string m_src, m_trg, m_aln; std::string m_src, m_trg, m_aln;
bool m_bounded, m_add2ORLM; bool m_bounded, m_add2ORLM;
public: public:
Updater(); Updater();
void void
execute(xmlrpc_c::paramList const& paramList, execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP); xmlrpc_c::value * const retvalP);
void void
breakOutParams(const params_t& params); breakOutParams(const params_t& params);
}; };
} }

View File

@ -18,108 +18,104 @@
namespace Moses namespace Moses
{ {
// todo: replace this with thread lock-free containers, if a stable library can // todo: replace this with thread lock-free containers, if a stable library can
// be found somewhere // be found somewhere
template<typename KEY, typename VAL, class CONTAINER = std::map<KEY,VAL> > template<typename KEY, typename VAL, class CONTAINER = std::map<KEY,VAL> >
class class
ThreadSafeContainer ThreadSafeContainer
{
protected:
mutable boost::shared_mutex m_lock;
CONTAINER m_container;
typedef typename CONTAINER::iterator iter_t;
typedef typename CONTAINER::const_iterator const_iter_t;
typedef typename CONTAINER::value_type entry_t;
public:
class locking_iterator
{ {
protected: boost::unique_lock<boost::shared_mutex> m_lock;
mutable boost::shared_mutex m_lock; CONTAINER const* m_container;
CONTAINER m_container; const_iter_t m_iter;
typedef typename CONTAINER::iterator iter_t;
typedef typename CONTAINER::const_iterator const_iter_t; locking_iterator(locking_iterator const& other); // no copies!
typedef typename CONTAINER::value_type entry_t;
public: public:
locking_iterator() : m_container(NULL) { }
class locking_iterator locking_iterator(boost::shared_mutex& lock,
{ CONTAINER const* container,
boost::unique_lock<boost::shared_mutex> m_lock; const_iter_t const& iter)
CONTAINER const* m_container; : m_lock(lock), m_container(container), m_iter(iter)
const_iter_t m_iter; { }
locking_iterator(locking_iterator const& other); // no copies! entry_t const& operator->() {
public: UTIL_THROW_IF2(m_container == NULL, "This locking iterator is invalid "
locking_iterator() : m_container(NULL) { } << "or has not been assigned.");
return m_iter.operator->();
locking_iterator(boost::shared_mutex& lock,
CONTAINER const* container,
const_iter_t const& iter)
: m_lock(lock), m_container(container), m_iter(iter)
{ }
entry_t const& operator->()
{
UTIL_THROW_IF2(m_container == NULL, "This locking iterator is invalid "
<< "or has not been assigned.");
return m_iter.operator->();
}
// locking operators transfer the lock upon assignment and become invalid
locking_iterator const&
operator=(locking_iterator& other)
{
m_lock.swap(other.m_lock);
m_iter = other.m_iter;
other.m_iter = other.m_container.end();
}
bool
operator==(const_iter_t const& other)
{
return m_iter == other;
}
locking_iterator const&
operator++() { ++m_iter; return *this; }
// DO NOT DEFINE THE POST-INCREMENT OPERATOR!
// locking_operators are non-copyable,
// so we can't simply make a copy before incrementing and return
// the copy after incrementing
locking_iterator const&
operator++(int);
};
const_iter_t const& end() const
{ return m_container.end(); }
locking_iterator begin() const
{
return locking_iterator(m_lock, this, m_container.begin());
} }
VAL const& set(KEY const& key, VAL const& val) // locking operators transfer the lock upon assignment and become invalid
{ locking_iterator const&
boost::unique_lock< boost::shared_mutex > lock(m_lock); operator=(locking_iterator& other) {
entry_t entry(key,val); m_lock.swap(other.m_lock);
iter_t foo = m_container.insert(entry).first; m_iter = other.m_iter;
foo->second = val; other.m_iter = other.m_container.end();
return foo->second;
} }
VAL const* get(KEY const& key, VAL const& default_val) bool
{ operator==(const_iter_t const& other) {
boost::shared_lock< boost::shared_mutex > lock(m_lock); return m_iter == other;
entry_t entry(key, default_val);
iter_t foo = m_container.insert(entry).first;
return &(foo->second);
} }
VAL const* get(KEY const& key) const locking_iterator const&
{ operator++() {
boost::shared_lock< boost::shared_mutex > lock(m_lock); ++m_iter;
const_iter_t m = m_container.find(key); return *this;
if (m == m_container.end()) return NULL;
return &m->second;
} }
size_t erase(KEY const& key) // DO NOT DEFINE THE POST-INCREMENT OPERATOR!
{ // locking_operators are non-copyable,
boost::unique_lock< boost::shared_mutex > lock(m_lock); // so we can't simply make a copy before incrementing and return
return m_container.erase(key); // the copy after incrementing
} locking_iterator const&
operator++(int);
}; };
const_iter_t const& end() const {
return m_container.end();
}
locking_iterator begin() const {
return locking_iterator(m_lock, this, m_container.begin());
}
VAL const& set(KEY const& key, VAL const& val) {
boost::unique_lock< boost::shared_mutex > lock(m_lock);
entry_t entry(key,val);
iter_t foo = m_container.insert(entry).first;
foo->second = val;
return foo->second;
}
VAL const* get(KEY const& key, VAL const& default_val) {
boost::shared_lock< boost::shared_mutex > lock(m_lock);
entry_t entry(key, default_val);
iter_t foo = m_container.insert(entry).first;
return &(foo->second);
}
VAL const* get(KEY const& key) const {
boost::shared_lock< boost::shared_mutex > lock(m_lock);
const_iter_t m = m_container.find(key);
if (m == m_container.end()) return NULL;
return &m->second;
}
size_t erase(KEY const& key) {
boost::unique_lock< boost::shared_mutex > lock(m_lock);
return m_container.erase(key);
}
};
} }
#endif #endif

View File

@ -146,7 +146,7 @@ public:
void AddProperty(const std::string &key, const std::string &value, float count) { void AddProperty(const std::string &key, const std::string &value, float count) {
std::map<std::string, std::map<std::string,
std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key); std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter = m_properties.find(key);
if ( iter == m_properties.end() ) { if ( iter == m_properties.end() ) {
// key not found: insert property key and value // key not found: insert property key and value
PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES(); PROPERTY_VALUES *propertyValues = new PROPERTY_VALUES();

View File

@ -116,18 +116,18 @@ void PropertiesConsolidator::ProcessPropertiesString(const std::string &properti
} else if ( !keyValue[0].compare("POS") ) { } else if ( !keyValue[0].compare("POS") ) {
/* DO NOTHING (property is not registered in the decoder at the moment) /* DO NOTHING (property is not registered in the decoder at the moment)
if ( m_partsOfSpeechFlag ) { if ( m_partsOfSpeechFlag ) {
// POS property: replace strings with vocabulary indices // POS property: replace strings with vocabulary indices
out << " {{" << keyValue[0]; out << " {{" << keyValue[0];
ProcessPOSPropertyValue(keyValue[1], out); ProcessPOSPropertyValue(keyValue[1], out);
out << "}}"; out << "}}";
} else { // don't process POS property } else { // don't process POS property
out << " {{" << keyValue[0] << " " << keyValue[1] << "}}"; out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
} }
*/ */
} else { } else {

View File

@ -699,12 +699,12 @@ void ExtractGHKM::WriteGlueGrammar(
// const size_t partOfSpeechSentenceStart = 0; // const size_t partOfSpeechSentenceStart = 0;
// const size_t partOfSpeechSentenceEnd = 1; // const size_t partOfSpeechSentenceEnd = 1;
#ifndef BOS_ #ifndef BOS_
#define BOS_ "<s>" //Beginning of sentence symbol #define BOS_ "<s>" //Beginning of sentence symbol
#endif #endif
#ifndef EOS_ #ifndef EOS_
#define EOS_ "</s>" //End of sentence symbol #define EOS_ "</s>" //End of sentence symbol
#endif #endif
std::string sentenceStartSource = BOS_; std::string sentenceStartSource = BOS_;
std::string sentenceEndSource = EOS_; std::string sentenceEndSource = EOS_;

View File

@ -12,7 +12,7 @@ namespace FilterRuleTable
{ {
TreeCfgFilter::TreeCfgFilter( TreeCfgFilter::TreeCfgFilter(
const std::vector<boost::shared_ptr<StringTree> > &sentences) const std::vector<boost::shared_ptr<StringTree> > &sentences)
{ {
} }

View File

@ -25,8 +25,9 @@ namespace FilterRuleTable
// Filters a rule table, discarding rules that cannot be applied to a given // Filters a rule table, discarding rules that cannot be applied to a given
// test set. The rule table must have a TSG source-side and the test sentences // test set. The rule table must have a TSG source-side and the test sentences
// must be parse trees. // must be parse trees.
class TreeCfgFilter : public CfgFilter { class TreeCfgFilter : public CfgFilter
public: {
public:
// Initialize the filter for a given set of test sentences. // Initialize the filter for a given set of test sentences.
TreeCfgFilter(const std::vector<boost::shared_ptr<StringTree> > &); TreeCfgFilter(const std::vector<boost::shared_ptr<StringTree> > &);

View File

@ -15,7 +15,7 @@ namespace PostprocessEgretForests
class Forest class Forest
{ {
public: public:
struct Vertex; struct Vertex;
struct Hyperedge { struct Hyperedge {
@ -35,7 +35,7 @@ class Forest
std::vector<boost::shared_ptr<Vertex> > vertices; std::vector<boost::shared_ptr<Vertex> > vertices;
private: private:
// Copying is not allowed. // Copying is not allowed.
Forest(const Forest &); Forest(const Forest &);
Forest &operator=(const Forest &); Forest &operator=(const Forest &);

View File

@ -17,15 +17,18 @@ namespace PostprocessEgretForests
{ {
ForestParser::ForestParser() ForestParser::ForestParser()
: m_input(0) { : m_input(0)
{
} }
ForestParser::ForestParser(std::istream &input) ForestParser::ForestParser(std::istream &input)
: m_input(&input) { : m_input(&input)
{
++(*this); ++(*this);
} }
ForestParser &ForestParser::operator++() { ForestParser &ForestParser::operator++()
{
if (!m_input) { if (!m_input) {
return *this; return *this;
} }
@ -106,7 +109,7 @@ void ForestParser::ParseHyperedgeLine(const std::string &line, Forest &forest)
} }
boost::shared_ptr<Forest::Vertex> ForestParser::ParseVertex( boost::shared_ptr<Forest::Vertex> ForestParser::ParseVertex(
const StringPiece &s) const StringPiece &s)
{ {
VertexSP v = boost::make_shared<Forest::Vertex>(); VertexSP v = boost::make_shared<Forest::Vertex>();
std::size_t pos = s.rfind('['); std::size_t pos = s.rfind('[');
@ -132,12 +135,14 @@ boost::shared_ptr<Forest::Vertex> ForestParser::ParseVertex(
return v; return v;
} }
bool operator==(const ForestParser &lhs, const ForestParser &rhs) { bool operator==(const ForestParser &lhs, const ForestParser &rhs)
{
// TODO Is this right? Compare values of istreams if non-zero? // TODO Is this right? Compare values of istreams if non-zero?
return lhs.m_input == rhs.m_input; return lhs.m_input == rhs.m_input;
} }
bool operator!=(const ForestParser &lhs, const ForestParser &rhs) { bool operator!=(const ForestParser &lhs, const ForestParser &rhs)
{
return !(lhs == rhs); return !(lhs == rhs);
} }

View File

@ -20,8 +20,9 @@ namespace Syntax
namespace PostprocessEgretForests namespace PostprocessEgretForests
{ {
class ForestParser { class ForestParser
public: {
public:
struct Entry { struct Entry {
std::size_t sentNum; std::size_t sentNum;
std::string sentence; std::string sentence;
@ -31,15 +32,19 @@ class ForestParser {
ForestParser(); ForestParser();
ForestParser(std::istream &); ForestParser(std::istream &);
Entry &operator*() { return m_entry; } Entry &operator*() {
Entry *operator->() { return &m_entry; } return m_entry;
}
Entry *operator->() {
return &m_entry;
}
ForestParser &operator++(); ForestParser &operator++();
friend bool operator==(const ForestParser &, const ForestParser &); friend bool operator==(const ForestParser &, const ForestParser &);
friend bool operator!=(const ForestParser &, const ForestParser &); friend bool operator!=(const ForestParser &, const ForestParser &);
private: private:
typedef boost::shared_ptr<Forest::Vertex> VertexSP; typedef boost::shared_ptr<Forest::Vertex> VertexSP;
typedef boost::shared_ptr<Forest::Hyperedge> HyperedgeSP; typedef boost::shared_ptr<Forest::Hyperedge> HyperedgeSP;
@ -60,7 +65,7 @@ class ForestParser {
}; };
typedef boost::unordered_set<VertexSP, VertexSetHash, typedef boost::unordered_set<VertexSP, VertexSetHash,
VertexSetPred> VertexSet; VertexSetPred> VertexSet;
// Copying is not allowed // Copying is not allowed
ForestParser(const ForestParser &); ForestParser(const ForestParser &);

View File

@ -61,7 +61,8 @@ void ForestWriter::WriteVertex(const Forest::Vertex &v)
} }
} }
std::string ForestWriter::PossiblyEscape(const std::string &s) const { std::string ForestWriter::PossiblyEscape(const std::string &s) const
{
if (m_options.escape) { if (m_options.escape) {
return Escape(s); return Escape(s);
} else { } else {
@ -70,7 +71,8 @@ std::string ForestWriter::PossiblyEscape(const std::string &s) const {
} }
// Escapes XML special characters. // Escapes XML special characters.
std::string ForestWriter::Escape(const std::string &s) const { std::string ForestWriter::Escape(const std::string &s) const
{
std::string t; std::string t;
std::size_t len = s.size(); std::size_t len = s.size();
t.reserve(len); t.reserve(len);

View File

@ -15,13 +15,13 @@ namespace PostprocessEgretForests
class ForestWriter class ForestWriter
{ {
public: public:
ForestWriter(const Options &options, std::ostream &out) ForestWriter(const Options &options, std::ostream &out)
: m_options(options), m_out(out) {} : m_options(options), m_out(out) {}
void Write(const std::string &, const Forest &, std::size_t); void Write(const std::string &, const Forest &, std::size_t);
private: private:
std::string Escape(const std::string &) const; std::string Escape(const std::string &) const;
std::string PossiblyEscape(const std::string &) const; std::string PossiblyEscape(const std::string &) const;
void WriteHyperedgeLine(const Forest::Hyperedge &); void WriteHyperedgeLine(const Forest::Hyperedge &);

View File

@ -47,8 +47,8 @@ int PostprocessEgretForests::Main(int argc, char *argv[])
} }
void PostprocessEgretForests::ProcessForest( void PostprocessEgretForests::ProcessForest(
std::istream &in, std::ostream &out, SplitPointFileParser *splitPointParser, std::istream &in, std::ostream &out, SplitPointFileParser *splitPointParser,
const Options &options) const Options &options)
{ {
std::size_t sentNum = 0; std::size_t sentNum = 0;
ForestWriter writer(options, out); ForestWriter writer(options, out);
@ -77,7 +77,7 @@ void PostprocessEgretForests::ProcessForest(
} }
void PostprocessEgretForests::OpenInputFileOrDie(const std::string &filename, void PostprocessEgretForests::OpenInputFileOrDie(const std::string &filename,
std::ifstream &stream) std::ifstream &stream)
{ {
stream.open(filename.c_str()); stream.open(filename.c_str());
if (!stream) { if (!stream) {
@ -88,7 +88,7 @@ void PostprocessEgretForests::OpenInputFileOrDie(const std::string &filename,
} }
void PostprocessEgretForests::ProcessOptions(int argc, char *argv[], void PostprocessEgretForests::ProcessOptions(int argc, char *argv[],
Options &options) const Options &options) const
{ {
namespace po = boost::program_options; namespace po = boost::program_options;
namespace cls = boost::program_options::command_line_style; namespace cls = boost::program_options::command_line_style;
@ -119,7 +119,7 @@ void PostprocessEgretForests::ProcessOptions(int argc, char *argv[],
// (these are used as positional options). // (these are used as positional options).
po::options_description hidden("Hidden options"); po::options_description hidden("Hidden options");
hidden.add_options() hidden.add_options()
// None // None
; ;
// Compose the full set of command-line options. // Compose the full set of command-line options.

View File

@ -16,15 +16,18 @@ namespace PostprocessEgretForests
{ {
SplitPointFileParser::SplitPointFileParser() SplitPointFileParser::SplitPointFileParser()
: m_input(0) { : m_input(0)
{
} }
SplitPointFileParser::SplitPointFileParser(std::istream &input) SplitPointFileParser::SplitPointFileParser(std::istream &input)
: m_input(&input) { : m_input(&input)
{
++(*this); ++(*this);
} }
SplitPointFileParser &SplitPointFileParser::operator++() { SplitPointFileParser &SplitPointFileParser::operator++()
{
if (!m_input) { if (!m_input) {
return *this; return *this;
} }
@ -66,13 +69,15 @@ void SplitPointFileParser::ParseLine(const std::string &line,
} }
bool operator==(const SplitPointFileParser &lhs, bool operator==(const SplitPointFileParser &lhs,
const SplitPointFileParser &rhs) { const SplitPointFileParser &rhs)
{
// TODO Is this right? Compare values of istreams if non-zero? // TODO Is this right? Compare values of istreams if non-zero?
return lhs.m_input == rhs.m_input; return lhs.m_input == rhs.m_input;
} }
bool operator!=(const SplitPointFileParser &lhs, bool operator!=(const SplitPointFileParser &lhs,
const SplitPointFileParser &rhs) { const SplitPointFileParser &rhs)
{
return !(lhs == rhs); return !(lhs == rhs);
} }

View File

@ -13,8 +13,9 @@ namespace Syntax
namespace PostprocessEgretForests namespace PostprocessEgretForests
{ {
class SplitPointFileParser { class SplitPointFileParser
public: {
public:
struct Entry { struct Entry {
std::vector<SplitPoint> splitPoints; std::vector<SplitPoint> splitPoints;
}; };
@ -22,8 +23,12 @@ class SplitPointFileParser {
SplitPointFileParser(); SplitPointFileParser();
SplitPointFileParser(std::istream &); SplitPointFileParser(std::istream &);
const Entry &operator*() const { return m_entry; } const Entry &operator*() const {
const Entry *operator->() const { return &m_entry; } return m_entry;
}
const Entry *operator->() const {
return &m_entry;
}
SplitPointFileParser &operator++(); SplitPointFileParser &operator++();
@ -33,7 +38,7 @@ class SplitPointFileParser {
friend bool operator!=(const SplitPointFileParser &, friend bool operator!=(const SplitPointFileParser &,
const SplitPointFileParser &); const SplitPointFileParser &);
private: private:
void ParseLine(const std::string &, std::vector<SplitPoint> &); void ParseLine(const std::string &, std::vector<SplitPoint> &);
Entry m_entry; Entry m_entry;

View File

@ -30,14 +30,14 @@ inline bool operator==(const Symbol &s, const Symbol &t)
} }
struct SymbolHasher { struct SymbolHasher {
public: public:
std::size_t operator()(const Symbol &s) const { std::size_t operator()(const Symbol &s) const {
return hash_value(s); return hash_value(s);
} }
}; };
struct SymbolEqualityPred { struct SymbolEqualityPred {
public: public:
bool operator()(const Symbol &s, const Symbol &t) const { bool operator()(const Symbol &s, const Symbol &t) const {
return s.value == t.value && s.isNonTerminal == t.isNonTerminal; return s.value == t.value && s.isNonTerminal == t.isNonTerminal;
} }

View File

@ -16,10 +16,10 @@ namespace PostprocessEgretForests
class TopologicalSorter class TopologicalSorter
{ {
public: public:
void Sort(const Forest &, std::vector<const Forest::Vertex *> &); void Sort(const Forest &, std::vector<const Forest::Vertex *> &);
private: private:
typedef boost::unordered_set<const Forest::Vertex *> VertexSet; typedef boost::unordered_set<const Forest::Vertex *> VertexSet;
void BuildPredSets(const Forest &); void BuildPredSets(const Forest &);

View File

@ -900,7 +900,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
} }
if (nonTermContext && !inverseFlag) { if (nonTermContext && !inverseFlag) {
std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContext"); std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContext");
if (!propValue.empty() && propValue.size() < 50000) { if (!propValue.empty() && propValue.size() < 50000) {
size_t nNTs = NumNonTerminal(phraseSource); size_t nNTs = NumNonTerminal(phraseSource);
phraseTableFile << " {{NonTermContext " << nNTs << " " << propValue << "}}"; phraseTableFile << " {{NonTermContext " << nNTs << " " << propValue << "}}";
@ -908,7 +908,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
} }
if (nonTermContextTarget && !inverseFlag) { if (nonTermContextTarget && !inverseFlag) {
std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContextTarget"); std::string propValue = phrasePair.CollectAllPropertyValues("NonTermContextTarget");
if (!propValue.empty() && propValue.size() < 50000) { if (!propValue.empty() && propValue.size() < 50000) {
size_t nNTs = NumNonTerminal(phraseSource); size_t nNTs = NumNonTerminal(phraseSource);
phraseTableFile << " {{NonTermContextTarget " << nNTs << " " << propValue << "}}"; phraseTableFile << " {{NonTermContextTarget " << nNTs << " " << propValue << "}}";

View File

@ -427,7 +427,7 @@ int main(int argc, char** argv)
ostream *out = &std::cout; ostream *out = &std::cout;
if (input) { if (input) {
fstream *fin = new fstream(input,ios::in); fstream *fin = new fstream(input,ios::in);
if (!fin->is_open()) { if (!fin->is_open()) {
cerr << "cannot open " << input << "\n"; cerr << "cannot open " << input << "\n";
exit(1); exit(1);
@ -436,7 +436,7 @@ int main(int argc, char** argv)
} }
if (output) { if (output) {
fstream *fout = new fstream(output,ios::out); fstream *fout = new fstream(output,ios::out);
if (!fout->is_open()) { if (!fout->is_open()) {
cerr << "cannot open " << output << "\n"; cerr << "cannot open " << output << "\n";
exit(1); exit(1);
@ -506,12 +506,12 @@ int main(int argc, char** argv)
for (int i=1; i<=MAX_N; i++) delete [] A[i]; for (int i=1; i<=MAX_N; i++) delete [] A[i];
delete [] A; delete [] A;
if (inp != &std::cin) { if (inp != &std::cin) {
delete inp; delete inp;
} }
if (out != &std::cout) { if (out != &std::cout) {
delete inp; delete inp;
} }
exit(0); exit(0);
} }